rohitkshirsagar19 commited on
Commit
ee9f80e
·
verified ·
1 Parent(s): 28ab8e9

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +16 -23
main.py CHANGED
@@ -1,5 +1,5 @@
1
  import uvicorn
2
- from fastapi import FastAPI, HTTPException, Depends
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
5
  from sentence_transformers import SentenceTransformer
@@ -9,13 +9,12 @@ import os
9
  from contextlib import asynccontextmanager
10
 
11
  # --- Environment Setup ---
12
- # It's best practice to get sensitive keys from environment variables
13
- # We will set these up in Hugging Face Spaces Secrets
14
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
15
  PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "memoria-index")
 
 
16
 
17
  # --- Global objects ---
18
- # We load these once at startup to save time and memory
19
  model = None
20
  pc = None
21
  index = None
@@ -33,8 +32,12 @@ async def lifespan(app: FastAPI):
33
  raise ValueError("PINECONE_API_KEY environment variable not set.")
34
 
35
  # 1. Load the AI Model
36
- print("Loading lightweight sentence transformer model...")
37
- model = SentenceTransformer('sentence-transformers/paraphrase-albert-small-v2')
 
 
 
 
38
  print("Model loaded.")
39
 
40
  # 2. Connect to Pinecone
@@ -47,15 +50,16 @@ async def lifespan(app: FastAPI):
47
  pc.create_index(
48
  name=PINECONE_INDEX_NAME,
49
  dimension=model.get_sentence_embedding_dimension(),
50
- metric="cosine", # Cosine similarity is great for sentence vectors
51
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
52
  )
53
  index = pc.Index(PINECONE_INDEX_NAME)
54
  print("Pinecone setup complete.")
55
  yield
56
- # Cleanup logic can go here if needed on shutdown
57
  print("Application shutdown.")
58
 
 
 
59
  # --- Pydantic Models ---
60
  class Memory(BaseModel):
61
  content: str
@@ -67,13 +71,13 @@ class SearchQuery(BaseModel):
67
  app = FastAPI(
68
  title="Memoria API",
69
  description="API for storing and retrieving memories.",
70
- version="1.0.0",
71
- lifespan=lifespan # Use the lifespan context manager
72
  )
73
 
74
  app.add_middleware(
75
  CORSMiddleware,
76
- allow_origins=["*"], # Allow all origins for simplicity
77
  allow_credentials=True,
78
  allow_methods=["*"],
79
  allow_headers=["*"],
@@ -89,10 +93,7 @@ def save_memory(memory: Memory):
89
  try:
90
  embedding = model.encode(memory.content).tolist()
91
  memory_id = str(uuid.uuid4())
92
-
93
- # Upsert (update or insert) the vector into Pinecone
94
  index.upsert(vectors=[{"id": memory_id, "values": embedding, "metadata": {"text": memory.content}}])
95
-
96
  print(f"Successfully saved memory with ID: {memory_id}")
97
  return {"status": "success", "id": memory_id}
98
  except Exception as e:
@@ -103,13 +104,8 @@ def save_memory(memory: Memory):
103
  def search_memory(search: SearchQuery):
104
  try:
105
  query_embedding = model.encode(search.query).tolist()
106
-
107
- # Query Pinecone for the most similar vectors
108
  results = index.query(vector=query_embedding, top_k=5, include_metadata=True)
109
-
110
- # Extract the original text from the metadata
111
  retrieved_documents = [match['metadata']['text'] for match in results['matches']]
112
-
113
  print(f"Found {len(retrieved_documents)} results for query: '{search.query}'")
114
  return {"status": "success", "results": retrieved_documents}
115
  except Exception as e:
@@ -117,7 +113,4 @@ def search_memory(search: SearchQuery):
117
  raise HTTPException(status_code=500, detail=str(e))
118
 
119
  if __name__ == "__main__":
120
- uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True)
121
-
122
-
123
-
 
1
  import uvicorn
2
+ from fastapi import FastAPI, HTTPException
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
5
  from sentence_transformers import SentenceTransformer
 
9
  from contextlib import asynccontextmanager
10
 
11
  # --- Environment Setup ---
 
 
12
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
13
  PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "memoria-index")
14
+ # Define a writable cache directory inside our container
15
+ CACHE_DIR = "/app/model_cache"
16
 
17
  # --- Global objects ---
 
18
  model = None
19
  pc = None
20
  index = None
 
32
  raise ValueError("PINECONE_API_KEY environment variable not set.")
33
 
34
  # 1. Load the AI Model
35
+ print(f"Loading model and setting cache to: {CACHE_DIR}")
36
+ # THE FINAL FIX: Explicitly tell the library where to save the model.
37
+ model = SentenceTransformer(
38
+ 'sentence-transformers/paraphrase-albert-small-v2',
39
+ cache_folder=CACHE_DIR
40
+ )
41
  print("Model loaded.")
42
 
43
  # 2. Connect to Pinecone
 
50
  pc.create_index(
51
  name=PINECONE_INDEX_NAME,
52
  dimension=model.get_sentence_embedding_dimension(),
53
+ metric="cosine",
54
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
55
  )
56
  index = pc.Index(PINECONE_INDEX_NAME)
57
  print("Pinecone setup complete.")
58
  yield
 
59
  print("Application shutdown.")
60
 
61
+ # ... (The rest of the file remains exactly the same) ...
62
+
63
  # --- Pydantic Models ---
64
  class Memory(BaseModel):
65
  content: str
 
71
  app = FastAPI(
72
  title="Memoria API",
73
  description="API for storing and retrieving memories.",
74
+ version="1.0.1", # Final deployed version
75
+ lifespan=lifespan
76
  )
77
 
78
  app.add_middleware(
79
  CORSMiddleware,
80
+ allow_origins=["*"],
81
  allow_credentials=True,
82
  allow_methods=["*"],
83
  allow_headers=["*"],
 
93
  try:
94
  embedding = model.encode(memory.content).tolist()
95
  memory_id = str(uuid.uuid4())
 
 
96
  index.upsert(vectors=[{"id": memory_id, "values": embedding, "metadata": {"text": memory.content}}])
 
97
  print(f"Successfully saved memory with ID: {memory_id}")
98
  return {"status": "success", "id": memory_id}
99
  except Exception as e:
 
104
  def search_memory(search: SearchQuery):
105
  try:
106
  query_embedding = model.encode(search.query).tolist()
 
 
107
  results = index.query(vector=query_embedding, top_k=5, include_metadata=True)
 
 
108
  retrieved_documents = [match['metadata']['text'] for match in results['matches']]
 
109
  print(f"Found {len(retrieved_documents)} results for query: '{search.query}'")
110
  return {"status": "success", "results": retrieved_documents}
111
  except Exception as e:
 
113
  raise HTTPException(status_code=500, detail=str(e))
114
 
115
  if __name__ == "__main__":
116
+ uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True)