Spaces:

Manju080
/

Text_To_Sql_Converter_HF

Runtime error

App Files Files Community

Manju080 commited on Jul 6

Commit

6416f7d

1 Parent(s): e161cb9

Fix the Optimization

Browse files

Files changed (5) hide show

README.md +1 -1
app.py +63 -23
model_utils.py +65 -8
requirements.txt +8 -6
startup_test.py +136 -0

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🗄️
 colorFrom: blue
 colorTo: purple
 sdk: gradio
-sdk_version: 4.0.0
 app_file: app.py
 pinned: false
 ---

 colorFrom: blue
 colorTo: purple
 sdk: gradio
+sdk_version: 5.35.0
 app_file: app.py
 pinned: false
 ---

app.py CHANGED Viewed

@@ -5,9 +5,9 @@ from pydantic import BaseModel
 from typing import List, Optional
 import uvicorn
 import logging
-from model_utils import get_model
 import time
 import os
 from contextlib import asynccontextmanager
 # Configure logging
@@ -16,18 +16,44 @@ logger = logging.getLogger(__name__)
 # Global model instance
 model = None
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup
-    global model
     logger.info("Starting Text-to-SQL API...")
     try:
-        model = get_model()
-        logger.info("Model loaded successfully!")
     except Exception as e:
-        logger.error(f"Failed to load model: {str(e)}")
-        raise
     yield
     # Shutdown
     logger.info("Shutting down Text-to-SQL API...")
@@ -62,12 +88,10 @@ class BatchResponse(BaseModel):
 class HealthResponse(BaseModel):
     status: str
     model_loaded: bool
     timestamp: float
 @app.get("/", response_class=HTMLResponse)
 async def root():
     """Serve the main HTML interface"""
@@ -111,8 +135,14 @@ async def predict_sql(request: SQLRequest):
     Returns:
         SQLResponse with generated SQL query
     """
     if model is None:
-        raise HTTPException(status_code=503, detail="Model not loaded")
     start_time = time.time()
@@ -142,8 +172,14 @@ async def batch_predict(request: BatchRequest):
     Returns:
         BatchResponse with generated SQL queries
     """
     if model is None:
-        raise HTTPException(status_code=503, detail="Model not loaded")
     start_time = time.time()
@@ -197,17 +233,28 @@ async def health_check():
     Returns:
         HealthResponse with service status
     """
     model_loaded = model is not None and model.health_check()
     return HealthResponse(
-        status="healthy" if model_loaded else "unhealthy",
         model_loaded=model_loaded,
         timestamp=time.time()
     )
 @app.get("/example")
 async def get_example():
-    """Get example request format"""
     return {
         "example_request": {
             "question": "How many employees are older than 30?",
@@ -217,16 +264,9 @@ async def get_example():
             "question": "How many employees are older than 30?",
             "table_headers": ["id", "name", "age", "department", "salary"],
             "sql_query": "SELECT COUNT(*) FROM table WHERE age > 30",
-            "processing_time": 0.123
         }
     }
 if __name__ == "__main__":
-    # Run the application
-    uvicorn.run(
-        "app:app",
-        host="0.0.0.0",
-        port=8000,
-        reload=False,
-        log_level="info"
-    )

 from typing import List, Optional
 import uvicorn
 import logging
 import time
 import os
+import asyncio
 from contextlib import asynccontextmanager
 # Configure logging
 # Global model instance
 model = None
+model_loading = False
+model_load_error = None
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup
+    global model, model_loading, model_load_error
     logger.info("Starting Text-to-SQL API...")
+    # Start model loading in background
+    model_loading = True
+    model_load_error = None
     try:
+        # Import here to avoid startup delays
+        from model_utils import get_model
+        # Set a timeout for model loading (5 minutes)
+        try:
+            # Run model loading in a thread to avoid blocking
+            import concurrent.futures
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                future = executor.submit(get_model)
+                model = future.result(timeout=300)  # 5 minute timeout
+            logger.info("Model loaded successfully!")
+        except concurrent.futures.TimeoutError:
+            logger.error("Model loading timed out after 5 minutes")
+            model_load_error = "Model loading timed out"
+        except Exception as e:
+            logger.error(f"Failed to load model: {str(e)}")
+            model_load_error = str(e)
     except Exception as e:
+        logger.error(f"Failed to import model_utils: {str(e)}")
+        model_load_error = f"Import error: {str(e)}"
+    finally:
+        model_loading = False
     yield
     # Shutdown
     logger.info("Shutting down Text-to-SQL API...")
 class HealthResponse(BaseModel):
     status: str
     model_loaded: bool
+    model_loading: bool
+    model_error: Optional[str] = None
     timestamp: float
 @app.get("/", response_class=HTMLResponse)
 async def root():
     """Serve the main HTML interface"""
     Returns:
         SQLResponse with generated SQL query
     """
+    global model, model_loading, model_load_error
+    if model_loading:
+        raise HTTPException(status_code=503, detail="Model is still loading, please try again in a few minutes")
     if model is None:
+        error_msg = model_load_error or "Model not loaded"
+        raise HTTPException(status_code=503, detail=f"Model not available: {error_msg}")
     start_time = time.time()
     Returns:
         BatchResponse with generated SQL queries
     """
+    global model, model_loading, model_load_error
+    if model_loading:
+        raise HTTPException(status_code=503, detail="Model is still loading, please try again in a few minutes")
     if model is None:
+        error_msg = model_load_error or "Model not loaded"
+        raise HTTPException(status_code=503, detail=f"Model not available: {error_msg}")
     start_time = time.time()
     Returns:
         HealthResponse with service status
     """
+    global model, model_loading, model_load_error
     model_loaded = model is not None and model.health_check()
+    if model_loaded:
+        status = "healthy"
+    elif model_loading:
+        status = "loading"
+    else:
+        status = "unhealthy"
     return HealthResponse(
+        status=status,
         model_loaded=model_loaded,
+        model_loading=model_loading,
+        model_error=model_load_error,
         timestamp=time.time()
     )
 @app.get("/example")
 async def get_example():
+    """Get example usage"""
     return {
         "example_request": {
             "question": "How many employees are older than 30?",
             "question": "How many employees are older than 30?",
             "table_headers": ["id", "name", "age", "department", "salary"],
             "sql_query": "SELECT COUNT(*) FROM table WHERE age > 30",
+            "processing_time": 0.5
         }
     }
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

model_utils.py CHANGED Viewed

@@ -2,6 +2,8 @@ import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from peft import PeftModel
 import logging
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -19,22 +21,59 @@ class TextToSQLModel:
         self._load_model()
     def _load_model(self):
-        """Load the trained model and tokenizer"""
         try:
             logger.info("Loading tokenizer...")
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir)
             logger.info("Loading base model...")
-            base_model = AutoModelForSeq2SeqLM.from_pretrained(self.base_model)
             logger.info("Loading PEFT model...")
-            self.model = PeftModel.from_pretrained(base_model, self.model_dir)
             self.model.eval()
             logger.info("Model loaded successfully!")
         except Exception as e:
             logger.error(f"Error loading model: {str(e)}")
             raise
     def predict(self, question: str, table_headers: list) -> str:
@@ -49,6 +88,9 @@ class TextToSQLModel:
             str: Generated SQL query
         """
         try:
             # Format input text
             table_headers_str = ", ".join(table_headers)
             input_text = f"### Table columns:\n{table_headers_str}\n### Question:\n{question}\n### SQL:"
@@ -62,14 +104,26 @@ class TextToSQLModel:
                 max_length=self.max_length
             )
-            # Generate prediction
             with torch.no_grad():
-                outputs = self.model.generate(**inputs, max_length=self.max_length)
             # Decode prediction
             sql_query = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            return sql_query
         except Exception as e:
             logger.error(f"Error generating SQL: {str(e)}")
@@ -96,6 +150,7 @@ class TextToSQLModel:
                     'status': 'success'
                 })
             except Exception as e:
                 results.append({
                     'question': query['question'],
                     'table_headers': query['table_headers'],
@@ -108,7 +163,9 @@ class TextToSQLModel:
     def health_check(self) -> bool:
         """Check if model is loaded and ready"""
-        return self.model is not None and self.tokenizer is not None
 # Global model instance
 _model_instance = None

 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from peft import PeftModel
 import logging
+import os
+import gc
 # Configure logging
 logging.basicConfig(level=logging.INFO)
         self._load_model()
     def _load_model(self):
+        """Load the trained model and tokenizer with optimizations for HF Spaces"""
         try:
+            # Check if model directory exists
+            if not os.path.exists(self.model_dir):
+                raise FileNotFoundError(f"Model directory {self.model_dir} not found")
             logger.info("Loading tokenizer...")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_dir,
+                trust_remote_code=True,
+                use_fast=True
+            )
             logger.info("Loading base model...")
+            # Use lower precision and CPU if needed for memory optimization
+            device = "cpu"  # Force CPU for HF Spaces stability
+            torch_dtype = torch.float32  # Use float32 for better compatibility
+            base_model = AutoModelForSeq2SeqLM.from_pretrained(
+                self.base_model,
+                torch_dtype=torch_dtype,
+                device_map=device,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True
+            )
             logger.info("Loading PEFT model...")
+            self.model = PeftModel.from_pretrained(
+                base_model,
+                self.model_dir,
+                torch_dtype=torch_dtype,
+                device_map=device
+            )
+            # Move to CPU and set to eval mode
+            self.model = self.model.to(device)
             self.model.eval()
+            # Clear cache to free memory
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            gc.collect()
             logger.info("Model loaded successfully!")
         except Exception as e:
             logger.error(f"Error loading model: {str(e)}")
+            # Clean up on error
+            self.model = None
+            self.tokenizer = None
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            gc.collect()
             raise
     def predict(self, question: str, table_headers: list) -> str:
             str: Generated SQL query
         """
         try:
+            if self.model is None or self.tokenizer is None:
+                raise RuntimeError("Model not properly loaded")
             # Format input text
             table_headers_str = ", ".join(table_headers)
             input_text = f"### Table columns:\n{table_headers_str}\n### Question:\n{question}\n### SQL:"
                 max_length=self.max_length
             )
+            # Generate prediction with memory optimization
             with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    max_length=self.max_length,
+                    num_beams=1,  # Use greedy decoding for speed
+                    do_sample=False,
+                    pad_token_id=self.tokenizer.pad_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id
+                )
             # Decode prediction
             sql_query = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Clean up
+            del inputs, outputs
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            return sql_query.strip()
         except Exception as e:
             logger.error(f"Error generating SQL: {str(e)}")
                     'status': 'success'
                 })
             except Exception as e:
+                logger.error(f"Error in batch prediction for query '{query['question']}': {str(e)}")
                 results.append({
                     'question': query['question'],
                     'table_headers': query['table_headers'],
     def health_check(self) -> bool:
         """Check if model is loaded and ready"""
+        return (self.model is not None and
+                self.tokenizer is not None and
+                hasattr(self.model, 'generate'))
 # Global model instance
 _model_instance = None

requirements.txt CHANGED Viewed

@@ -1,8 +1,10 @@
 fastapi==0.104.1
 uvicorn[standard]==0.24.0
-torch>=2.0.0
-transformers>=4.35.0
-peft>=0.6.0
-accelerate>=0.24.0
-pydantic>=2.0.0
-python-multipart>=0.0.6

 fastapi==0.104.1
 uvicorn[standard]==0.24.0
+torch==2.1.0
+transformers==4.35.0
+peft==0.6.0
+accelerate==0.24.0
+pydantic==2.5.0
+python-multipart==0.0.6
+tokenizers==0.15.0
+safetensors==0.4.0

startup_test.py ADDED Viewed

	@@ -0,0 +1,136 @@

+#!/usr/bin/env python3
+"""
+Startup test script for Hugging Face Spaces deployment
+This script helps debug model loading issues
+"""
+import os
+import sys
+import time
+import logging
+import traceback
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+def test_imports():
+    """Test if all required packages can be imported"""
+    logger.info("Testing imports...")
+    try:
+        import torch
+        logger.info(f"PyTorch version: {torch.__version__}")
+    except ImportError as e:
+        logger.error(f"Failed to import torch: {e}")
+        return False
+    try:
+        import transformers
+        logger.info(f"Transformers version: {transformers.__version__}")
+    except ImportError as e:
+        logger.error(f"Failed to import transformers: {e}")
+        return False
+    try:
+        import peft
+        logger.info(f"PEFT version: {peft.__version__}")
+    except ImportError as e:
+        logger.error(f"Failed to import peft: {e}")
+        return False
+    try:
+        import fastapi
+        logger.info(f"FastAPI version: {fastapi.__version__}")
+    except ImportError as e:
+        logger.error(f"Failed to import fastapi: {e}")
+        return False
+    return True
+def test_model_files():
+    """Test if model files exist"""
+    logger.info("Testing model files...")
+    model_dir = "./final-model"
+    required_files = [
+        "adapter_config.json",
+        "adapter_model.safetensors",
+        "tokenizer.json",
+        "tokenizer_config.json",
+        "vocab.json"
+    ]
+    if not os.path.exists(model_dir):
+        logger.error(f"Model directory {model_dir} does not exist")
+        return False
+    missing_files = []
+    for file in required_files:
+        file_path = os.path.join(model_dir, file)
+        if not os.path.exists(file_path):
+            missing_files.append(file)
+        else:
+            size = os.path.getsize(file_path)
+            logger.info(f"✓ {file} exists ({size} bytes)")
+    if missing_files:
+        logger.error(f"Missing required files: {missing_files}")
+        return False
+    return True
+def test_model_loading():
+    """Test model loading with timeout"""
+    logger.info("Testing model loading...")
+    try:
+        from model_utils import get_model
+        start_time = time.time()
+        model = get_model()
+        load_time = time.time() - start_time
+        logger.info(f"Model loaded successfully in {load_time:.2f} seconds")
+        # Test a simple prediction
+        test_question = "How many records are there?"
+        test_headers = ["id", "name", "age"]
+        start_time = time.time()
+        result = model.predict(test_question, test_headers)
+        predict_time = time.time() - start_time
+        logger.info(f"Test prediction successful in {predict_time:.2f} seconds")
+        logger.info(f"Generated SQL: {result}")
+        return True
+    except Exception as e:
+        logger.error(f"Model loading failed: {e}")
+        logger.error(traceback.format_exc())
+        return False
+def main():
+    """Run all tests"""
+    logger.info("Starting Hugging Face Spaces deployment tests...")
+    # Test 1: Imports
+    if not test_imports():
+        logger.error("Import test failed")
+        sys.exit(1)
+    # Test 2: Model files
+    if not test_model_files():
+        logger.error("Model files test failed")
+        sys.exit(1)
+    # Test 3: Model loading
+    if not test_model_loading():
+        logger.error("Model loading test failed")
+        sys.exit(1)
+    logger.info("All tests passed! Ready for deployment.")
+if __name__ == "__main__":
+    main()