Spaces:

willwade
/

AACKGDemo

Sleeping

App Files Files Community

willwade commited on May 15

Commit

36fcf07

1 Parent(s): 238c097

change to gemini

Browse files

Files changed (6) hide show

app.py +181 -63
llm_interface.py +345 -0
print_test.py +3 -0
requirements.txt +1 -0
test_app.py +54 -0
utils.py +552 -85

app.py CHANGED Viewed

@@ -1,29 +1,48 @@
 import gradio as gr
 import whisper
-import tempfile
-import os
-from utils import SocialGraphManager, SuggestionGenerator
 # Define available models
 AVAILABLE_MODELS = {
-    "google/gemma-3-1b-it": "Gemma 3 1B-IT (Small, instruction-tuned)",
-    "google/gemma-3-4b-it": "Gemma 3 4B-IT (Default, instruction-tuned)",
-    "google/gemma-3-12b-it": "Gemma 3 12B-IT (Better quality, instruction-tuned)",
-    "google/gemma-3-27b-it": "Gemma 3 27B-IT (Best quality, instruction-tuned)",
-    "Qwen/Qwen1.5-0.5B": "Qwen 1.5 0.5B (Very small, efficient)",
-    "Qwen/Qwen1.5-1.8B": "Qwen 1.5 1.8B (Small, good quality)",
-    "TinyLlama/TinyLlama-1.1B-Chat-v1.0": "TinyLlama 1.1B (Small, chat-tuned)",
-    "microsoft/phi-3-mini-4k-instruct": "Phi-3 Mini (Small, instruction-tuned)",
-    "microsoft/phi-2": "Phi-2 (Small, high quality for size)",
-    "distilgpt2": "DistilGPT2 (Fast, smaller model)",
-    "gpt2": "GPT-2 (Medium size, better quality)",
 }
 # Initialize the social graph manager
 social_graph = SocialGraphManager("social_graph.json")
-# Initialize the suggestion generator with Gemma 3 1B (default - smaller model to save memory)
-suggestion_generator = SuggestionGenerator("google/gemma-3-1b-it")
 # Test the model to make sure it's working
 test_result = suggestion_generator.test_model()
@@ -137,15 +156,28 @@ def change_model(model_name, progress=gr.Progress()):
     # Show progress indicator
     progress(0, desc=f"Loading model: {model_name}")
-    # Try to load the new model
-    success = suggestion_generator.load_model(model_name)
-    if success:
-        progress(1.0, desc=f"Model loaded: {model_name}")
-        return f"Successfully switched to model: {model_name}"
-    else:
-        progress(1.0, desc="Model loading failed")
-        return f"Failed to load model: {model_name}. Using fallback responses instead."
 def generate_suggestions(
@@ -153,7 +185,7 @@ def generate_suggestions(
     user_input,
     suggestion_type,
     selected_topic=None,
-    model_name="google/gemma-3-1b-it",
     temperature=0.7,
     mood=3,
     progress=gr.Progress(),
@@ -232,6 +264,9 @@ def generate_suggestions(
     if selected_topic:
         person_context["selected_topic"] = selected_topic
     # Format the output with multiple suggestions
     result = ""
@@ -240,31 +275,40 @@ def generate_suggestions(
         print("Using model for suggestions")
         progress(0.2, desc="Preparing to generate suggestions...")
-        # Generate 3 different suggestions
-        suggestions = []
-        for i in range(3):
-            progress_value = 0.3 + (i * 0.2)  # Progress from 30% to 70%
-            progress(progress_value, desc=f"Generating suggestion {i+1}/3")
-            print(f"Generating suggestion {i+1}/3")
-            try:
-                # Add mood to person context
-                person_context["mood"] = mood
-                suggestion = suggestion_generator.generate_suggestion(
-                    person_context, user_input, temperature=temperature
-                )
-                print(f"Generated suggestion: {suggestion}")
-                suggestions.append(suggestion)
-            except Exception as e:
-                print(f"Error generating suggestion: {e}")
-                suggestions.append("Error generating suggestion")
-        result = (
-            f"### AI-Generated Responses (using {suggestion_generator.model_name}):\n\n"
-        )
-        for i, suggestion in enumerate(suggestions, 1):
-            result += f"{i}. {suggestion}\n\n"
-        print(f"Final result: {result[:100]}...")
     # If suggestion type is "common_phrases", use the person's common phrases
     elif clean_suggestion_type == "common_phrases":
@@ -288,23 +332,87 @@ def generate_suggestions(
             progress(0.3, desc="No category detected, using model instead...")
             try:
                 suggestions = []
                 for i in range(3):
                     progress_value = 0.4 + (i * 0.15)  # Progress from 40% to 70%
                     progress(
                         progress_value, desc=f"Generating fallback suggestion {i+1}/3"
                     )
-                    # Add mood to person context
-                    person_context["mood"] = mood
-                    suggestion = suggestion_generator.generate_suggestion(
-                        person_context, user_input, temperature=temperature
-                    )
-                    suggestions.append(suggestion)
-                result = f"### AI-Generated Responses (no category detected, using {suggestion_generator.model_name}):\n\n"
                 for i, suggestion in enumerate(suggestions, 1):
                     result += f"{i}. {suggestion}\n\n"
             except Exception as e:
                 print(f"Error generating fallback suggestion: {e}")
                 result = "### Could not generate a response:\n\n"
                 result += "1. Sorry, I couldn't generate a suggestion at this time.\n\n"
@@ -334,13 +442,19 @@ def generate_suggestions(
     print(f"Result type: {type(result)}")
     print(f"Result length: {len(result)}")
-    # Complete the progress
-    progress(1.0, desc="Completed!")
     # Make sure we're returning a non-empty string
     if not result or len(result.strip()) == 0:
         result = "No response was generated. Please try again with different settings."
     return result
@@ -462,9 +576,9 @@ with gr.Blocks(title="Will's AAC Communication Aid", css="custom.css") as demo:
             with gr.Row():
                 model_dropdown = gr.Dropdown(
                     choices=list(AVAILABLE_MODELS.keys()),
-                    value="google/gemma-3-1b-it",
                     label="Language Model",
-                    info="Select which AI model to use for generating responses",
                 )
                 temperature_slider = gr.Slider(
@@ -556,4 +670,8 @@ with gr.Blocks(title="Will's AAC Communication Aid", css="custom.css") as demo:
 # Launch the app
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import whisper
+import random
+import time
+from utils import SocialGraphManager
+from llm_interface import LLMInterface
 # Define available models
 AVAILABLE_MODELS = {
+    # Gemini models (online API)
+    "gemini-1.5-flash-latest": "🌐 Gemini 1.5 Flash (Online API - Fast, Recommended)",
+    "gemini-1.5-pro-latest": "🌐 Gemini 1.5 Pro (Online API - High quality)",
+    # OpenAI models (if API key is set)
+    "gpt-3.5-turbo": "🌐 ChatGPT 3.5 (Online API)",
+    "gpt-4o-mini": "🌐 GPT-4o Mini (Online API - Fast)",
+    # Ollama models (if installed locally)
+    "ollama/gemma:7b": "💻 Gemma 7B (Offline - requires Ollama)",
+    "ollama/llama3:8b": "💻 Llama 3 8B (Offline - requires Ollama)",
 }
 # Initialize the social graph manager
 social_graph = SocialGraphManager("social_graph.json")
+# Initialize the suggestion generator with a fast online model by default
+print("Initializing with Gemini 1.5 Flash (online model)")
+suggestion_generator = LLMInterface("gemini-1.5-flash-latest")
+# Test the model to make sure it's working
+print("Testing model connection...")
+test_result = suggestion_generator.test_model()
+print(f"Model test result: {test_result}")
+# If the model didn't load, try Ollama as fallback
+if not suggestion_generator.model_loaded:
+    print("Online model not available, trying Ollama model...")
+    suggestion_generator = LLMInterface("ollama/gemma:7b")
+    test_result = suggestion_generator.test_model()
+    print(f"Ollama model test result: {test_result}")
+    # If Ollama also fails, try OpenAI as fallback
+    if not suggestion_generator.model_loaded:
+        print("Ollama not available, trying OpenAI model...")
+        suggestion_generator = LLMInterface("gpt-3.5-turbo")
+        test_result = suggestion_generator.test_model()
+        print(f"OpenAI model test result: {test_result}")
 # Test the model to make sure it's working
 test_result = suggestion_generator.test_model()
     # Show progress indicator
     progress(0, desc=f"Loading model: {model_name}")
+    # Create a new LLMInterface with the selected model
+    try:
+        progress(0.3, desc=f"Initializing {model_name}...")
+        new_generator = LLMInterface(model_name)
+        # Test if the model works
+        progress(0.6, desc="Testing model connection...")
+        test_result = new_generator.test_model()
+        print(f"Model test result: {test_result}")
+        if new_generator.model_loaded:
+            # Replace the current generator with the new one
+            suggestion_generator = new_generator
+            progress(1.0, desc=f"Model loaded: {model_name}")
+            return f"Successfully switched to model: {model_name}"
+        else:
+            progress(1.0, desc="Model loading failed")
+            return f"Failed to load model: {model_name}. Using previous model instead."
+    except Exception as e:
+        print(f"Error changing model: {e}")
+        progress(1.0, desc="Error loading model")
+        return f"Error loading model: {model_name}. Using previous model instead."
 def generate_suggestions(
     user_input,
     suggestion_type,
     selected_topic=None,
+    model_name="gemini-1.5-flash",
     temperature=0.7,
     mood=3,
     progress=gr.Progress(),
     if selected_topic:
         person_context["selected_topic"] = selected_topic
+    # Add mood to person context
+    person_context["mood"] = mood
     # Format the output with multiple suggestions
     result = ""
         print("Using model for suggestions")
         progress(0.2, desc="Preparing to generate suggestions...")
+        # Generate suggestions using the LLM interface
+        try:
+            # Use the LLM interface to generate multiple suggestions
+            suggestions = suggestion_generator.generate_multiple_suggestions(
+                person_context=person_context,
+                user_input=user_input,
+                num_suggestions=3,
+                temperature=temperature,
+                progress_callback=lambda p, desc: progress(0.2 + (p * 0.7), desc=desc),
+            )
+            # Make sure we have at least one suggestion
+            if not suggestions:
+                suggestions = ["I'm not sure what to say about that."]
+            # Make sure we have exactly 3 suggestions (pad with fallbacks if needed)
+            while len(suggestions) < 3:
+                suggestions.append("I'm not sure what else to say about that.")
+            result = f"### AI-Generated Responses (using {suggestion_generator.model_name}):\n\n"
+            for i, suggestion in enumerate(suggestions, 1):
+                result += f"{i}. {suggestion}\n\n"
+            print(f"Final result: {result[:100]}...")
+        except Exception as e:
+            print(f"Error generating suggestions: {e}")
+            result = "### Error generating suggestions:\n\n"
+            result += "1. I'm having trouble generating responses right now.\n\n"
+            result += "2. Please try again or select a different model.\n\n"
+            result += "3. You might want to check your internet connection if using an online model.\n\n"
+        # Force a complete progress update before returning
+        progress(0.9, desc="Finalizing suggestions...")
     # If suggestion type is "common_phrases", use the person's common phrases
     elif clean_suggestion_type == "common_phrases":
             progress(0.3, desc="No category detected, using model instead...")
             try:
                 suggestions = []
+                # Set a timeout for each suggestion generation (10 seconds)
+                timeout_per_suggestion = 10
                 for i in range(3):
                     progress_value = 0.4 + (i * 0.15)  # Progress from 40% to 70%
                     progress(
                         progress_value, desc=f"Generating fallback suggestion {i+1}/3"
                     )
+                    try:
+                        # Add mood to person context
+                        person_context["mood"] = mood
+                        # Set a start time for timeout tracking
+                        start_time = time.time()
+                        # Try to generate a suggestion with timeout
+                        suggestion = None
+                        # If model isn't loaded, use fallback immediately
+                        if not suggestion_generator.model_loaded:
+                            print("Model not loaded, using fallback response")
+                            suggestion = random.choice(
+                                suggestion_generator.fallback_responses
+                            )
+                        else:
+                            # Try to generate with the model
+                            suggestion = suggestion_generator.generate_suggestion(
+                                person_context, user_input, temperature=temperature
+                            )
+                        # Check if generation took too long
+                        if time.time() - start_time > timeout_per_suggestion:
+                            print(
+                                f"Fallback suggestion {i+1} generation timed out, using fallback"
+                            )
+                            suggestion = (
+                                "I'm not sure what to say about that right now."
+                            )
+                        # Only add non-empty suggestions
+                        if suggestion and suggestion.strip():
+                            suggestions.append(suggestion.strip())
+                        else:
+                            print("Empty fallback suggestion received, using default")
+                            suggestions.append("I'm not sure what to say about that.")
+                        # Force a progress update after each suggestion
+                        progress(
+                            0.4 + (i * 0.15) + 0.05,
+                            desc=f"Completed fallback suggestion {i+1}/3",
+                        )
+                    except Exception as e:
+                        print(f"Error generating fallback suggestion {i+1}: {e}")
+                        suggestions.append("I'm having trouble responding to that.")
+                        # Force a progress update even after error
+                        progress(
+                            0.4 + (i * 0.15) + 0.05,
+                            desc=f"Error in fallback suggestion {i+1}/3",
+                        )
+                    # Small delay to ensure UI updates
+                    time.sleep(0.2)
+                # Make sure we have at least one suggestion
+                if not suggestions:
+                    suggestions = ["I'm not sure what to say about that."]
+                # Make sure we have exactly 3 suggestions (pad with fallbacks if needed)
+                while len(suggestions) < 3:
+                    suggestions.append("I'm not sure what else to say about that.")
+                # Force a progress update
+                progress(0.85, desc="Finalizing fallback suggestions...")
+                result = "### AI-Generated Responses (no category detected):\n\n"
                 for i, suggestion in enumerate(suggestions, 1):
                     result += f"{i}. {suggestion}\n\n"
             except Exception as e:
                 print(f"Error generating fallback suggestion: {e}")
+                progress(0.9, desc="Error handling...")
                 result = "### Could not generate a response:\n\n"
                 result += "1. Sorry, I couldn't generate a suggestion at this time.\n\n"
     print(f"Result type: {type(result)}")
     print(f"Result length: {len(result)}")
     # Make sure we're returning a non-empty string
     if not result or len(result.strip()) == 0:
         result = "No response was generated. Please try again with different settings."
+    # Always complete the progress to 100% before returning
+    progress(1.0, desc="Completed!")
+    # Add a small delay to ensure UI updates properly
+    time.sleep(0.5)
+    # Print final status
+    print("Generation completed successfully, returning result")
     return result
             with gr.Row():
                 model_dropdown = gr.Dropdown(
                     choices=list(AVAILABLE_MODELS.keys()),
+                    value="gemini-1.5-flash-latest",
                     label="Language Model",
+                    info="Select which AI model to use (🌐 = online API, 💻 = offline model)",
                 )
                 temperature_slider = gr.Slider(
 # Launch the app
 if __name__ == "__main__":
+    print("Starting application...")
+    try:
+        demo.launch()
+    except Exception as e:
+        print(f"Error launching application: {e}")

llm_interface.py ADDED Viewed

	@@ -0,0 +1,345 @@

+"""
+LLM Interface for the AAC app using Simon Willison's LLM library.
+"""
+import subprocess
+import time
+from typing import List, Optional, Dict, Any
+class LLMInterface:
+    """Interface for Simon Willison's LLM tool."""
+    def __init__(
+        self,
+        model_name: str = "gemini-1.5-flash",
+        max_length: int = 150,
+        temperature: float = 0.7,
+    ):
+        """Initialize the LLM interface.
+        Args:
+            model_name: Name of the model to use
+            max_length: Maximum length of generated text
+            temperature: Controls randomness (higher = more random)
+        """
+        self.model_name = model_name
+        self.max_length = max_length
+        self.temperature = temperature
+        self.model_loaded = self._check_llm_installed()
+        self.fallback_responses = [
+            "I'm not sure how to respond to that.",
+            "That's interesting. Tell me more.",
+            "I'd like to talk about that further.",
+            "I appreciate you sharing that with me.",
+            "Could we talk about something else?",
+            "I need some time to think about that.",
+        ]
+    def _check_llm_installed(self) -> bool:
+        """Check if the LLM tool is installed and working."""
+        try:
+            result = subprocess.run(
+                ["llm", "--version"],
+                capture_output=True,
+                text=True,
+                timeout=5,  # Add a timeout to prevent hanging
+            )
+            if result.returncode == 0:
+                print(f"LLM tool is installed: {result.stdout.strip()}")
+                # Also check if the model exists
+                try:
+                    # Just check if the model is in the list of available models
+                    model_check = subprocess.run(
+                        ["llm", "models"],
+                        capture_output=True,
+                        text=True,
+                        timeout=5,
+                    )
+                    if model_check.returncode == 0:
+                        if self.model_name in model_check.stdout:
+                            print(f"Model {self.model_name} is available")
+                            return True
+                        else:
+                            print(
+                                f"Model {self.model_name} not found in available models"
+                            )
+                            # Try to find similar models
+                            if "gemini" in self.model_name.lower():
+                                print("Available Gemini models:")
+                                for line in model_check.stdout.splitlines():
+                                    if "gemini" in line.lower():
+                                        print(f"  {line}")
+                            return False
+                    else:
+                        print("Error checking available models")
+                        return False
+                except Exception as model_error:
+                    print(f"Error checking model availability: {model_error}")
+                    return False
+            else:
+                print("LLM tool returned an error.")
+                return False
+        except subprocess.TimeoutExpired:
+            print("Timeout checking LLM tool installation")
+            return False
+        except Exception as e:
+            print(f"Error checking LLM tool: {e}")
+            return False
+    def _get_max_tokens_param(self) -> str:
+        """Get the appropriate max tokens parameter name for the model."""
+        if "gemini" in self.model_name.lower():
+            return "max_output_tokens"
+        else:
+            return "max_tokens"
+    def generate_suggestion(
+        self,
+        person_context: Dict[str, Any],
+        user_input: Optional[str] = None,
+        temperature: Optional[float] = None,
+        progress_callback=None,
+    ) -> str:
+        """Generate a suggestion based on the person context and user input.
+        Args:
+            person_context: Context information about the person
+            user_input: Optional user input to consider
+            temperature: Controls randomness in generation (higher = more random)
+            progress_callback: Optional callback function to report progress
+        Returns:
+            A generated suggestion string
+        """
+        if not self.model_loaded:
+            import random
+            return random.choice(self.fallback_responses)
+        # Extract context information
+        name = person_context.get("name", "")
+        role = person_context.get("role", "")
+        topics = person_context.get("topics", [])
+        context = person_context.get("context", "")
+        selected_topic = person_context.get("selected_topic", "")
+        common_phrases = person_context.get("common_phrases", [])
+        frequency = person_context.get("frequency", "")
+        mood = person_context.get("mood", 3)  # Default to neutral mood (3)
+        # Get mood description
+        mood_descriptions = {
+            1: "I'm feeling quite down and sad today. My responses might be more subdued.",
+            2: "I'm feeling a bit low today. I might be less enthusiastic than usual.",
+            3: "I'm feeling okay today - neither particularly happy nor sad.",
+            4: "I'm feeling pretty good today. I'm in a positive mood.",
+            5: "I'm feeling really happy and upbeat today! I'm in a great mood.",
+        }
+        mood_description = mood_descriptions.get(mood, mood_descriptions[3])
+        # Build enhanced prompt
+        prompt = f"""I am Will, a 38-year-old with MND (Motor Neuron Disease) from Manchester.
+I am talking to {name}, who is my {role}.
+About {name}: {context}
+We typically talk about: {', '.join(topics)}
+We communicate {frequency}.
+My current mood: {mood_description}
+"""
+        # Add communication style based on relationship
+        if role in ["wife", "son", "daughter", "mother", "father"]:
+            prompt += "I communicate with my family in a warm, loving way, sometimes using inside jokes.\n"
+        elif role in ["doctor", "therapist", "nurse"]:
+            prompt += "I communicate with healthcare providers in a direct, informative way.\n"
+        elif role in ["best mate", "friend"]:
+            prompt += "I communicate with friends casually, often with humor and sometimes swearing.\n"
+        elif role in ["work colleague", "boss"]:
+            prompt += (
+                "I communicate with colleagues professionally but still friendly.\n"
+            )
+        # Add topic information if provided
+        if selected_topic:
+            prompt += f"\nWe are currently discussing {selected_topic}.\n"
+        # Add the user's message if provided, or set up for conversation initiation
+        if user_input:
+            # If user input is provided, we're responding to something
+            prompt += f'\n{name} just said to me: "{user_input}"\n'
+            prompt += f"I want to respond directly to what {name} just said.\n"
+        else:
+            # No user input means we're initiating a conversation
+            if selected_topic:
+                # If a topic is selected, initiate conversation about that topic
+                prompt += f"\nI'm about to start a conversation with {name} about {selected_topic}.\n"
+                prompt += f"I want to initiate a conversation about {selected_topic} in a natural way.\n"
+            else:
+                # Generic conversation starter
+                prompt += f"\nI'm about to start a conversation with {name}.\n"
+                prompt += "I want to initiate a conversation in a natural way based on our relationship.\n"
+        # Add the response prompt with specific guidance
+        if user_input:
+            # Responding to something
+            prompt += f"""
+I am Will, the person with MND. I want to respond to {name}'s message: "{user_input}"
+My response should be natural, brief (1-2 sentences), and directly relevant to what {name} just said.
+I'll use language appropriate for our relationship and speak as myself (Will).
+My response to {name}:"""
+        else:
+            # Initiating a conversation
+            prompt += f"""
+I am Will, the person with MND. I want to start a conversation with {name}.
+My conversation starter should be natural, brief (1-2 sentences), and appropriate for our relationship.
+I'll speak in first person as myself (Will).
+My conversation starter to {name}:"""
+        # Use the provided temperature or default
+        temp = temperature if temperature is not None else self.temperature
+        # Update progress if callback provided
+        if progress_callback:
+            progress_callback(0.3, desc="Sending prompt to LLM...")
+        try:
+            # Get the appropriate max tokens parameter
+            max_tokens_param = self._get_max_tokens_param()
+            # Call the LLM tool
+            result = subprocess.run(
+                [
+                    "llm",
+                    "-m",
+                    self.model_name,
+                    "-s",
+                    f"temperature={temp}",
+                    "-s",
+                    f"{max_tokens_param}={self.max_length}",
+                    prompt,
+                ],
+                capture_output=True,
+                text=True,
+                timeout=15,  # Add timeout to prevent hanging
+            )
+            if progress_callback:
+                progress_callback(0.7, desc="Processing response...")
+            if result.returncode == 0:
+                # Get the generated text
+                generated = result.stdout.strip()
+                # Clean up the response if needed
+                if not generated:
+                    generated = "I'm not sure what to say about that."
+                if progress_callback:
+                    progress_callback(0.9, desc="Response generated successfully")
+                return generated
+            else:
+                print(f"Error from LLM tool: {result.stderr}")
+                if progress_callback:
+                    progress_callback(0.9, desc="Error generating response")
+                return "I'm having trouble responding to that right now."
+        except subprocess.TimeoutExpired:
+            print("LLM generation timed out")
+            if progress_callback:
+                progress_callback(0.9, desc="Generation timed out")
+            return "I need more time to think about that."
+        except Exception as e:
+            print(f"Error generating with LLM tool: {e}")
+            if progress_callback:
+                progress_callback(0.9, desc="Error generating response")
+            return "I'm having trouble responding to that."
+    def generate_multiple_suggestions(
+        self,
+        person_context: Dict[str, Any],
+        user_input: Optional[str] = None,
+        num_suggestions: int = 3,
+        temperature: Optional[float] = None,
+        progress_callback=None,
+    ) -> List[str]:
+        """Generate multiple suggestions.
+        Args:
+            person_context: Context information about the person
+            user_input: Optional user input to consider
+            num_suggestions: Number of suggestions to generate
+            temperature: Controls randomness in generation
+            progress_callback: Optional callback function to report progress
+        Returns:
+            A list of generated suggestions
+        """
+        suggestions = []
+        for i in range(num_suggestions):
+            if progress_callback:
+                progress_callback(
+                    0.1 + (i * 0.3),
+                    desc=f"Generating suggestion {i+1}/{num_suggestions}",
+                )
+            # Vary temperature slightly for each suggestion to increase diversity
+            temp_variation = 0.05 * (i - 1)  # -0.05, 0, 0.05
+            temp = (
+                temperature if temperature is not None else self.temperature
+            ) + temp_variation
+            suggestion = self.generate_suggestion(
+                person_context,
+                user_input,
+                temperature=temp,
+                progress_callback=lambda p, desc: (
+                    progress_callback(0.1 + (i * 0.3) + (p * 0.3), desc=desc)
+                    if progress_callback
+                    else None
+                ),
+            )
+            suggestions.append(suggestion)
+            # Small delay to ensure UI updates
+            time.sleep(0.2)
+        return suggestions
+    def test_model(self) -> str:
+        """Test if the model is working correctly."""
+        if not self.model_loaded:
+            return "LLM tool not available"
+        try:
+            # Create a simple test prompt
+            test_prompt = "Say hello in one word."
+            # Call the LLM tool
+            result = subprocess.run(
+                [
+                    "llm",
+                    "-m",
+                    self.model_name,
+                    "-s",
+                    "temperature=0.7",
+                    test_prompt,
+                ],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if result.returncode == 0:
+                response = result.stdout.strip()
+                return f"LLM test successful: {response}"
+            else:
+                return f"LLM test failed: {result.stderr}"
+        except Exception as e:
+            return f"LLM test error: {str(e)}"

print_test.py ADDED Viewed

	@@ -0,0 +1,3 @@

+print("Hello, world!")
+print("This is a test script.")
+print("If you can see this, the terminal output is working.")

requirements.txt CHANGED Viewed

@@ -6,3 +6,4 @@ numpy>=1.24.0
 openai-whisper>=20231117
 bitsandbytes>=0.41.0
 accelerate>=0.21.0

 openai-whisper>=20231117
 bitsandbytes>=0.41.0
 accelerate>=0.21.0
+google-generativeai>=0.3.0

test_app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import sys
+import os
+print("Starting test...")
+# Test importing the modules
+try:
+    import gradio as gr
+    import whisper
+    import random
+    import time
+    from utils import SocialGraphManager, SuggestionGenerator
+    print("All modules imported successfully")
+except Exception as e:
+    print(f"Error importing modules: {e}")
+    sys.exit(1)
+# Test loading the social graph
+try:
+    social_graph = SocialGraphManager("social_graph.json")
+    print("Social graph loaded successfully")
+except Exception as e:
+    print(f"Error loading social graph: {e}")
+    sys.exit(1)
+# Test initializing the suggestion generator
+try:
+    suggestion_generator = SuggestionGenerator("distilgpt2")  # Use a simpler model for testing
+    print("Suggestion generator initialized successfully")
+except Exception as e:
+    print(f"Error initializing suggestion generator: {e}")
+    sys.exit(1)
+# Test getting people from the social graph
+try:
+    people = social_graph.get_people_list()
+    print(f"Found {len(people)} people in the social graph")
+    if people:
+        print(f"First person: {people[0]['name']} ({people[0]['role']})")
+except Exception as e:
+    print(f"Error getting people from social graph: {e}")
+    sys.exit(1)
+# Test getting person context
+try:
+    if people:
+        person_id = people[0]['id']
+        person_context = social_graph.get_person_context(person_id)
+        print(f"Got context for {person_context.get('name', 'unknown')}")
+except Exception as e:
+    print(f"Error getting person context: {e}")
+    sys.exit(1)
+print("All tests passed successfully!")

utils.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import json
 import random
-from typing import Dict, List, Any, Optional, Tuple
 from sentence_transformers import SentenceTransformer
 import numpy as np
 from transformers import pipeline
@@ -186,10 +187,10 @@ class SuggestionGenerator:
         ]
     def load_model(self, model_name: str) -> bool:
-        """Load a Hugging Face model.
         Args:
-            model_name: Name of the HuggingFace model to use
         Returns:
             bool: True if model loaded successfully, False otherwise
@@ -204,8 +205,48 @@ class SuggestionGenerator:
             self.model_loaded = True
             return True
         try:
-            print(f"Loading model: {model_name}")
             # Check if this is a gated model that requires authentication
             is_gated_model = any(
@@ -217,7 +258,9 @@ class SuggestionGenerator:
                 # Try to get token from environment
                 import os
                 import torch
                 from transformers import BitsAndBytesConfig
                 token = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get(
                     "HF_TOKEN"
@@ -232,56 +275,138 @@ class SuggestionGenerator:
                     # Explicitly pass token to pipeline
                     from transformers import AutoTokenizer, AutoModelForCausalLM
-                    try:
-                        # Configure 4-bit quantization to save memory
-                        quantization_config = BitsAndBytesConfig(
-                            load_in_4bit=True,
-                            bnb_4bit_compute_dtype=torch.float16,
-                            bnb_4bit_quant_type="nf4",
-                            bnb_4bit_use_double_quant=True,
-                        )
-                        tokenizer = AutoTokenizer.from_pretrained(
-                            model_name, token=token
-                        )
-                        # Load model with quantization
-                        model = AutoModelForCausalLM.from_pretrained(
-                            model_name,
-                            token=token,
-                            quantization_config=quantization_config,
-                            device_map="auto",
-                        )
-                        self.generator = pipeline(
-                            "text-generation",
-                            model=model,
-                            tokenizer=tokenizer,
-                            torch_dtype=torch.float16,
-                        )
-                    except Exception as e:
-                        print(f"Error loading gated model with token: {e}")
-                        print(
-                            "This may be due to not having accepted the model license or insufficient permissions."
-                        )
-                        print(
-                            "Please visit the model page on Hugging Face Hub and accept the license."
-                        )
                         # Try loading without quantization as fallback
                         try:
-                            print("Trying to load model without quantization...")
                             tokenizer = AutoTokenizer.from_pretrained(
-                                model_name, token=token
                             )
                             model = AutoModelForCausalLM.from_pretrained(
-                                model_name, token=token
                             )
-                            self.generator = pipeline(
-                                "text-generation", model=model, tokenizer=tokenizer
                             )
                         except Exception as e2:
                             print(f"Fallback loading also failed: {e2}")
-                            raise e
                 else:
                     print("No Hugging Face token found in environment variables.")
                     print(
@@ -297,7 +422,12 @@ class SuggestionGenerator:
                     raise ValueError("Authentication token required for gated model")
             else:
                 # For non-gated models, use the standard pipeline
-                self.generator = pipeline("text-generation", model=model_name)
             # Cache the loaded model
             self.loaded_models[model_name] = self.generator
@@ -310,6 +440,71 @@ class SuggestionGenerator:
             self.model_loaded = False
             return False
     def _get_mood_description(self, mood_value: int) -> str:
         """Convert mood value (1-5) to a descriptive string.
@@ -336,16 +531,132 @@ class SuggestionGenerator:
             return "Model not loaded"
         try:
-            test_prompt = "I am Will. My son Billy asked about football. I respond:"
             print(f"Testing model with prompt: {test_prompt}")
-            response = self.generator(test_prompt, max_new_tokens=30, do_sample=True)
-            full_text = response[0]["generated_text"]
-            if len(test_prompt) < len(full_text):
-                result = full_text[len(test_prompt) :]
             else:
-                result = "No additional text generated"
-            print(f"Test response: {result}")
-            return f"Model test successful: {result}"
         except Exception as e:
             print(f"Error testing model: {e}")
             return f"Model test failed: {str(e)}"
@@ -486,14 +797,42 @@ My current mood: {self._get_mood_description(mood)}
             for marker in ["-it", "instruct", "chat", "phi-3", "phi-2"]
         )
-        if is_instruction_model:
             # Use instruction format for instruction-tuned models
             if user_input:
                 # Responding to something
                 prompt += f"""
 <instruction>
-Respond to {name} in a way that is natural, brief (1-2 sentences), and directly relevant to what they just said.
-Use language appropriate for our relationship.
 </instruction>
 My response to {name}:"""
@@ -501,55 +840,183 @@ My response to {name}:"""
                 # Initiating a conversation
                 prompt += f"""
 <instruction>
-Start a conversation with {name} in a natural, brief (1-2 sentences) way.
-Use language appropriate for our relationship.
-If a topic was selected, focus on that topic.
 </instruction>
 My conversation starter to {name}:"""
         else:
-            # Use standard format for non-instruction models
             if user_input:
                 # Responding to something
                 prompt += f"""
-I want to respond to {name} in a way that is natural, brief (1-2 sentences), and directly relevant to what they just said. I'll use language appropriate for our relationship.
 My response to {name}:"""
             else:
                 # Initiating a conversation
                 prompt += f"""
-I want to start a conversation with {name} in a natural, brief (1-2 sentences) way. I'll use language appropriate for our relationship.
 My conversation starter to {name}:"""
         # Generate suggestion
         try:
             print(f"Generating suggestion with prompt: {prompt}")
-            # Use max_new_tokens instead of max_length to avoid the error
-            response = self.generator(
-                prompt,
-                max_new_tokens=100,  # Generate more tokens to ensure we get a response
-                temperature=temperature,
-                do_sample=True,
-                top_p=0.92,
-                top_k=50,
-                # Only use truncation if we're providing a max_length
-                truncation=False,
-            )
-            # Extract only the generated part, not the prompt
-            full_text = response[0]["generated_text"]
-            print(f"Full generated text length: {len(full_text)}")
-            print(f"Prompt length: {len(prompt)}")
-            # Make sure we're not trying to slice beyond the text length
-            if len(prompt) < len(full_text):
-                result = full_text[len(prompt) :]
-                print(f"Generated response: {result}")
-                return result.strip()
             else:
-                # If the model didn't generate anything beyond the prompt
-                print("Model didn't generate text beyond prompt")
-                return "I'm thinking about what to say..."
         except Exception as e:
             print(f"Error generating suggestion: {e}")
             return "Could not generate a suggestion. Please try again."

 import json
 import random
+import threading
+import time
+from typing import Dict, List, Any, Optional
 from sentence_transformers import SentenceTransformer
 import numpy as np
 from transformers import pipeline
         ]
     def load_model(self, model_name: str) -> bool:
+        """Load a model (either Hugging Face model or API-based model).
         Args:
+            model_name: Name of the model to use (HuggingFace model name or API identifier)
         Returns:
             bool: True if model loaded successfully, False otherwise
             self.model_loaded = True
             return True
+        # Check if this is a Gemini API model
+        if model_name.startswith("gemini-api:"):
+            try:
+                import os
+                import google.generativeai as genai
+                # Get API key from environment
+                api_key = os.environ.get("GEMINI_API_KEY")
+                if not api_key:
+                    print("No GEMINI_API_KEY found in environment variables.")
+                    print("Please set the GEMINI_API_KEY environment variable.")
+                    return False
+                # Configure the Gemini API
+                genai.configure(api_key=api_key)
+                # Extract the specific model name after the prefix
+                gemini_model = model_name.split(":", 1)[1]
+                print(f"Using Gemini API with model: {gemini_model}")
+                # Store the model name and API client in the generator
+                self.generator = {
+                    "type": "gemini-api",
+                    "model": gemini_model,
+                    "client": genai,
+                }
+                # Cache the API client
+                self.loaded_models[model_name] = self.generator
+                self.model_loaded = True
+                print(f"Gemini API configured successfully for model: {gemini_model}")
+                return True
+            except Exception as e:
+                print(f"Error configuring Gemini API: {e}")
+                self.model_loaded = False
+                return False
+        # Otherwise, try to load a Hugging Face model
         try:
+            print(f"Loading Hugging Face model: {model_name}")
             # Check if this is a gated model that requires authentication
             is_gated_model = any(
                 # Try to get token from environment
                 import os
                 import torch
+                import time
                 from transformers import BitsAndBytesConfig
+                from requests.exceptions import ConnectionError, Timeout, HTTPError
                 token = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get(
                     "HF_TOKEN"
                     # Explicitly pass token to pipeline
                     from transformers import AutoTokenizer, AutoModelForCausalLM
+                    # Implement retry mechanism for network issues
+                    max_retries = 3
+                    retry_delay = 2  # seconds
+                    for attempt in range(max_retries):
+                        try:
+                            print(
+                                f"Attempt {attempt+1}/{max_retries} to load model: {model_name}"
+                            )
+                            # First try to load just the tokenizer to check connectivity
+                            print(f"Loading tokenizer for {model_name}...")
+                            tokenizer = AutoTokenizer.from_pretrained(
+                                model_name,
+                                token=token,
+                                use_fast=True,
+                                local_files_only=False,
+                            )
+                            print(f"Tokenizer loaded successfully for {model_name}")
+                            # Configure 4-bit quantization to save memory
+                            print("Configuring quantization settings...")
+                            quantization_config = BitsAndBytesConfig(
+                                load_in_4bit=True,
+                                bnb_4bit_compute_dtype=torch.float16,
+                                bnb_4bit_quant_type="nf4",
+                                bnb_4bit_use_double_quant=True,
+                            )
+                            # Load model with quantization
+                            print(f"Loading model {model_name} with quantization...")
+                            model = AutoModelForCausalLM.from_pretrained(
+                                model_name,
+                                token=token,
+                                quantization_config=quantization_config,
+                                device_map="auto",
+                                low_cpu_mem_usage=True,
+                            )
+                            print(
+                                f"Model {model_name} loaded successfully with quantization"
+                            )
+                            # Create pipeline
+                            print("Creating text generation pipeline...")
+                            self.generator = {
+                                "type": "huggingface",
+                                "pipeline": pipeline(
+                                    "text-generation",
+                                    model=model,
+                                    tokenizer=tokenizer,
+                                    torch_dtype=torch.float16,
+                                ),
+                            }
+                            print("Pipeline created successfully")
+                            # If we got here, loading succeeded
+                            break
+                        except (ConnectionError, Timeout, HTTPError) as network_error:
+                            # Handle network-related errors with retries
+                            print(
+                                f"Network error loading model (attempt {attempt+1}/{max_retries}): {network_error}"
+                            )
+                            if attempt < max_retries - 1:
+                                print(f"Retrying in {retry_delay} seconds...")
+                                time.sleep(retry_delay)
+                                retry_delay *= 2  # Exponential backoff
+                            else:
+                                print(
+                                    "Maximum retries reached, falling back to alternative loading method"
+                                )
+                                raise network_error
+                        except (RuntimeError, ValueError, OSError) as e:
+                            # Handle memory errors or other issues
+                            print(
+                                f"Error loading gated model with token (attempt {attempt+1}/{max_retries}): {e}"
+                            )
+                            print(
+                                "This may be due to memory limitations, network issues, or insufficient permissions."
+                            )
+                            if "CUDA out of memory" in str(
+                                e
+                            ) or "DefaultCPUAllocator" in str(e):
+                                print(
+                                    "Memory error detected. Trying with more aggressive memory optimization..."
+                                )
+                                break  # Skip to non-quantized version with CPU offloading
+                            if attempt < max_retries - 1:
+                                print(f"Retrying in {retry_delay} seconds...")
+                                time.sleep(retry_delay)
+                                retry_delay *= 2  # Exponential backoff
+                            else:
+                                print(
+                                    "Maximum retries reached, falling back to alternative loading method"
+                                )
+                    # If the loop completed without success, try alternative loading methods
+                    if not hasattr(self, "generator") or self.generator is None:
                         # Try loading without quantization as fallback
                         try:
+                            print(
+                                "Trying to load model without quantization (CPU only)..."
+                            )
                             tokenizer = AutoTokenizer.from_pretrained(
+                                model_name, token=token, use_fast=True
                             )
                             model = AutoModelForCausalLM.from_pretrained(
+                                model_name,
+                                token=token,
+                                device_map="cpu",
+                                low_cpu_mem_usage=True,
                             )
+                            self.generator = {
+                                "type": "huggingface",
+                                "pipeline": pipeline(
+                                    "text-generation", model=model, tokenizer=tokenizer
+                                ),
+                            }
+                            print(
+                                "Successfully loaded model on CPU without quantization"
                             )
                         except Exception as e2:
                             print(f"Fallback loading also failed: {e2}")
+                            print(
+                                "All loading attempts failed. Please try a different model or check your connection."
+                            )
+                            raise RuntimeError(
+                                f"Failed to load model after multiple attempts: {str(e2)}"
+                            )
                 else:
                     print("No Hugging Face token found in environment variables.")
                     print(
                     raise ValueError("Authentication token required for gated model")
             else:
                 # For non-gated models, use the standard pipeline
+                from transformers import pipeline
+                self.generator = {
+                    "type": "huggingface",
+                    "pipeline": pipeline("text-generation", model=model_name),
+                }
             # Cache the loaded model
             self.loaded_models[model_name] = self.generator
             self.model_loaded = False
             return False
+    def _clean_small_model_response(self, response: str) -> str:
+        """Clean up responses from small models that often repeat instructions or generate nonsense.
+        Args:
+            response: The raw response from the model
+        Returns:
+            A cleaned response
+        """
+        # If response is too short, return as is
+        if len(response) < 5:
+            return response
+        # Remove common instruction repetitions
+        patterns_to_remove = [
+            "I want to respond to what",
+            "I'll use language appropriate for our relationship",
+            "I should speak in first person",
+            "I should use language appropriate",
+            "I want to respond directly",
+            "I'll speak as myself",
+            "I want to initiate a conversation",
+            "My response should be natural",
+            "My response to",
+            "Will's response to",
+            "Will says to",
+        ]
+        # Check for and remove these patterns
+        cleaned_response = response
+        for pattern in patterns_to_remove:
+            if pattern in cleaned_response:
+                # Find the first occurrence and remove everything from there
+                index = cleaned_response.find(pattern)
+                if index > 10:  # Keep some beginning text if available
+                    cleaned_response = cleaned_response[:index].strip()
+                else:
+                    # If pattern is at the beginning, remove just that pattern
+                    parts = cleaned_response.split(pattern, 1)
+                    if len(parts) > 1:
+                        cleaned_response = parts[1].strip()
+        # Remove any lines that are just the name repeated
+        lines = cleaned_response.split("\n")
+        cleaned_lines = []
+        for line in lines:
+            # Skip lines that are just a name repeated
+            if line.strip() and not all(
+                word == line.split()[0] for word in line.split()
+            ):
+                cleaned_lines.append(line)
+        cleaned_response = "\n".join(cleaned_lines).strip()
+        # If we've removed too much, use a fallback
+        if len(cleaned_response) < 5:
+            return "I'm not sure what to say about that."
+        # Limit to first 2 sentences to avoid rambling
+        sentences = cleaned_response.split(".")
+        if len(sentences) > 2:
+            cleaned_response = ".".join(sentences[:2]) + "."
+        return cleaned_response
     def _get_mood_description(self, mood_value: int) -> str:
         """Convert mood value (1-5) to a descriptive string.
             return "Model not loaded"
         try:
+            # Create a more explicit test prompt that clearly establishes Will's identity and role
+            test_prompt = """I am Will, a 38-year-old with MND (Motor Neuron Disease).
+I am talking to my 7-year-old son Billy.
+Billy just asked me about football.
+I want to respond to Billy in a natural, brief way.
+My response to Billy:"""
             print(f"Testing model with prompt: {test_prompt}")
+            # Check if we're using the Gemini API or a Hugging Face model
+            if (
+                isinstance(self.generator, dict)
+                and self.generator.get("type") == "gemini-api"
+            ):
+                try:
+                    # Use Gemini API
+                    genai = self.generator["client"]
+                    model_name = self.generator["model"]
+                    # Create a generative model
+                    model = genai.GenerativeModel(model_name)
+                    # Generate content with timeout
+                    print("Sending test request to Gemini API...")
+                    # Set a timeout for the test
+                    import threading
+                    import time
+                    result = ["No response received yet"]
+                    generation_complete = [False]
+                    def generate_with_timeout():
+                        try:
+                            print("Starting Gemini API test request...")
+                            response = model.generate_content(test_prompt)
+                            print(f"Received response from Gemini API: {response}")
+                            if response and hasattr(response, "text"):
+                                result[0] = response.text
+                                print(f"Extracted text from response: {result[0]}")
+                            else:
+                                result[0] = "No text in Gemini API response"
+                                print("Response object has no text attribute")
+                            generation_complete[0] = True
+                        except Exception as e:
+                            print(f"Error in Gemini test generation: {e}")
+                            result[0] = f"Error: {str(e)}"
+                            generation_complete[0] = True
+                    # Start generation in a separate thread
+                    generation_thread = threading.Thread(target=generate_with_timeout)
+                    generation_thread.daemon = True
+                    generation_thread.start()
+                    # Wait for up to 10 seconds
+                    timeout = 10
+                    start_time = time.time()
+                    while (
+                        not generation_complete[0]
+                        and time.time() - start_time < timeout
+                    ):
+                        print(
+                            f"Waiting for Gemini API response... ({int(time.time() - start_time)}s)"
+                        )
+                        time.sleep(1)
+                    if not generation_complete[0]:
+                        print("Gemini API test request timed out")
+                        return "Gemini API test timed out after 10 seconds"
+                    print(f"Test response from Gemini API: {result[0]}")
+                    return f"Gemini API test successful: {result[0]}"
+                except Exception as e:
+                    print(f"Error testing Gemini API: {e}")
+                    return f"Gemini API test failed: {str(e)}"
+            elif (
+                isinstance(self.generator, dict)
+                and self.generator.get("type") == "huggingface"
+            ):
+                # Use Hugging Face pipeline
+                pipeline = self.generator["pipeline"]
+                response = pipeline(test_prompt, max_new_tokens=30, do_sample=True)
+                full_text = response[0]["generated_text"]
+                if len(test_prompt) < len(full_text):
+                    result = full_text[len(test_prompt) :].strip()
+                    # Check if this is a small model that needs cleaning
+                    is_small_model = any(
+                        name in self.model_name.lower()
+                        for name in ["distilgpt2", "gpt2-small", "tiny"]
+                    )
+                    if is_small_model:
+                        result = self._clean_small_model_response(result)
+                else:
+                    result = "No additional text generated"
+                print(f"Test response from Hugging Face: {result}")
+                return f"Hugging Face model test successful: {result}"
             else:
+                # Legacy format (for backward compatibility)
+                response = self.generator(
+                    test_prompt, max_new_tokens=30, do_sample=True
+                )
+                full_text = response[0]["generated_text"]
+                if len(test_prompt) < len(full_text):
+                    result = full_text[len(test_prompt) :].strip()
+                    # Check if this is a small model that needs cleaning
+                    is_small_model = any(
+                        name in self.model_name.lower()
+                        for name in ["distilgpt2", "gpt2-small", "tiny"]
+                    )
+                    if is_small_model:
+                        result = self._clean_small_model_response(result)
+                else:
+                    result = "No additional text generated"
+                print(f"Test response: {result}")
+                return f"Model test successful: {result}"
         except Exception as e:
             print(f"Error testing model: {e}")
             return f"Model test failed: {str(e)}"
             for marker in ["-it", "instruct", "chat", "phi-3", "phi-2"]
         )
+        # Check if this is a very small model that needs simpler prompts
+        is_small_model = any(
+            name in self.model_name.lower()
+            for name in ["distilgpt2", "gpt2-small", "tiny"]
+        )
+        if is_small_model:
+            # Use a much simpler format for very small models
+            if user_input:
+                # Responding to something
+                prompt += f"""
+{name} said: "{user_input}"
+Will's response:"""
+            else:
+                # Initiating a conversation
+                if selected_topic:
+                    prompt += f"""
+Will starts a conversation with {name} about {selected_topic}.
+Will says:"""
+                else:
+                    prompt += f"""
+Will starts a conversation with {name}.
+Will says:"""
+        elif is_instruction_model:
             # Use instruction format for instruction-tuned models
             if user_input:
                 # Responding to something
                 prompt += f"""
 <instruction>
+I am Will, the person with MND. I need to respond to {name}'s message: "{user_input}"
+My response should be natural, brief (1-2 sentences), and directly relevant to what {name} just said.
+I should use language appropriate for our relationship.
+I should speak in first person as myself (Will).
 </instruction>
 My response to {name}:"""
                 # Initiating a conversation
                 prompt += f"""
 <instruction>
+I am Will, the person with MND. I need to start a conversation with {name}.
+My conversation starter should be natural, brief (1-2 sentences), and appropriate for our relationship.
+If a topic was selected, I should focus on that topic.
+I should speak in first person as myself (Will).
 </instruction>
 My conversation starter to {name}:"""
         else:
+            # Use standard format for other models
             if user_input:
                 # Responding to something
                 prompt += f"""
+I am Will, the person with MND. I want to respond to {name}'s message: "{user_input}"
+My response should be natural, brief (1-2 sentences), and directly relevant to what {name} just said.
+I'll use language appropriate for our relationship and speak as myself (Will).
 My response to {name}:"""
             else:
                 # Initiating a conversation
                 prompt += f"""
+I am Will, the person with MND. I want to start a conversation with {name}.
+My conversation starter should be natural, brief (1-2 sentences), and appropriate for our relationship.
+I'll speak in first person as myself (Will).
 My conversation starter to {name}:"""
         # Generate suggestion
         try:
             print(f"Generating suggestion with prompt: {prompt}")
+            # Check if we're using the Gemini API or a Hugging Face model
+            if (
+                isinstance(self.generator, dict)
+                and self.generator.get("type") == "gemini-api"
+            ):
+                try:
+                    # Use Gemini API
+                    try:
+                        genai = self.generator["client"]
+                        model_name = self.generator["model"]
+                        # Create a generative model
+                        model = genai.GenerativeModel(model_name)
+                        # Set generation config
+                        generation_config = {
+                            "temperature": temperature,
+                            "top_p": 0.92,
+                            "top_k": 50,
+                            "max_output_tokens": 100,
+                        }
+                        # Generate content with timeout
+                        result = [
+                            "I'm thinking about what to say..."
+                        ]  # Default response
+                        generation_complete = [False]
+                        def generate_with_gemini():
+                            try:
+                                response = model.generate_content(
+                                    prompt, generation_config=generation_config
+                                )
+                                if response and hasattr(response, "text"):
+                                    result[0] = response.text.strip()
+                                    print(f"Gemini API response: {result[0]}")
+                                else:
+                                    print("No response from Gemini API")
+                                generation_complete[0] = True
+                            except Exception as e:
+                                print(f"Error in Gemini generation thread: {e}")
+                                generation_complete[0] = True
+                        # Start generation in a separate thread
+                        generation_thread = threading.Thread(
+                            target=generate_with_gemini
+                        )
+                        generation_thread.daemon = True
+                        generation_thread.start()
+                        # Wait for up to 10 seconds
+                        timeout = 10
+                        start_time = time.time()
+                        while (
+                            not generation_complete[0]
+                            and time.time() - start_time < timeout
+                        ):
+                            time.sleep(0.1)
+                        if not generation_complete[0]:
+                            print("Gemini API request timed out")
+                            return "I'm thinking about what to say... (API timeout)"
+                        return result[0]
+                    except Exception as e:
+                        print(f"Error setting up Gemini API: {e}")
+                        return (
+                            "I'm having trouble connecting to the Gemini API right now."
+                        )
+                except Exception as e:
+                    print(f"Error generating with Gemini API: {e}")
+                    return "Could not generate a suggestion with Gemini API. Please try again."
+            elif (
+                isinstance(self.generator, dict)
+                and self.generator.get("type") == "huggingface"
+            ):
+                # Use Hugging Face pipeline
+                pipeline = self.generator["pipeline"]
+                # Generate with Hugging Face
+                response = pipeline(
+                    prompt,
+                    max_new_tokens=100,  # Generate more tokens to ensure we get a response
+                    temperature=temperature,
+                    do_sample=True,
+                    top_p=0.92,
+                    top_k=50,
+                    truncation=False,
+                )
+                # Extract only the generated part, not the prompt
+                full_text = response[0]["generated_text"]
+                print(f"Full generated text length: {len(full_text)}")
+                print(f"Prompt length: {len(prompt)}")
+                # Make sure we're not trying to slice beyond the text length
+                if len(prompt) < len(full_text):
+                    result = full_text[len(prompt) :].strip()
+                    # Post-process the result for small models
+                    if is_small_model:
+                        result = self._clean_small_model_response(result)
+                    print(f"Generated response: {result}")
+                    return result
+                else:
+                    # If the model didn't generate anything beyond the prompt
+                    print("Model didn't generate text beyond prompt")
+                    return "I'm thinking about what to say..."
             else:
+                # Legacy format (for backward compatibility)
+                response = self.generator(
+                    prompt,
+                    max_new_tokens=100,
+                    temperature=temperature,
+                    do_sample=True,
+                    top_p=0.92,
+                    top_k=50,
+                    truncation=False,
+                )
+                # Extract only the generated part, not the prompt
+                full_text = response[0]["generated_text"]
+                print(f"Full generated text length: {len(full_text)}")
+                print(f"Prompt length: {len(prompt)}")
+                # Make sure we're not trying to slice beyond the text length
+                if len(prompt) < len(full_text):
+                    result = full_text[len(prompt) :].strip()
+                    # Post-process the result for small models
+                    if is_small_model:
+                        result = self._clean_small_model_response(result)
+                    print(f"Generated response: {result}")
+                    return result
+                else:
+                    # If the model didn't generate anything beyond the prompt
+                    print("Model didn't generate text beyond prompt")
+                    return "I'm thinking about what to say..."
         except Exception as e:
             print(f"Error generating suggestion: {e}")
             return "Could not generate a suggestion. Please try again."