Spaces:
Sleeping
Sleeping
import logging | |
import os | |
import time | |
from openai import OpenAI | |
from app.core.config import Settings | |
from app.models.document import Document | |
logger = logging.getLogger(__name__) | |
class AnswerGenerator: | |
"""Generate answers using LLM with retrieved context""" | |
def __init__(self, settings: Settings): | |
self.settings = settings | |
api_key = os.getenv("OPENAI_API_KEY") | |
if not api_key: | |
raise ValueError("OPENAI_API_KEY environment variable is required") | |
self.client = OpenAI(api_key=settings.openai_api_key) | |
self.model = settings.llm_model | |
logger.info(f"Initialized AnswerGenerator with model: {self.model}") | |
def generate( | |
self, | |
query: str, | |
documents: list[Document], | |
temperature: float = 0.1, | |
max_tokens: int = 1000, | |
) -> str: | |
"""Generate answer based on query and retrieved documents""" | |
start_time = time.time() | |
logger.info( | |
f"Generating answer for query: '{query[:100]}...' " | |
f"with {len(documents)} documents" | |
) | |
logger.debug( | |
f"Generation parameters - temperature: {temperature}, " | |
f"max_tokens: {max_tokens}" | |
) | |
context = "\n\n".join([doc.content for doc in documents]) | |
context_length = len(context) | |
logger.debug(f"Combined context length: {context_length} characters") | |
if context_length > 15000: | |
logger.warning( | |
f"Large context size ({context_length} chars) may impact performance" | |
) | |
system_prompt = self._create_system_prompt(context) | |
try: | |
logger.info("Sending request to OpenAI API...") | |
api_start_time = time.time() | |
response = self.client.chat.completions.create( | |
model=self.model, | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": query}, | |
], | |
temperature=temperature, | |
max_tokens=max_tokens, | |
top_p=0.9, | |
) | |
api_duration = time.time() - api_start_time | |
logger.info(f"OpenAI API call completed in {api_duration:.2f} seconds") | |
if response.choices and len(response.choices) > 0: | |
message_content = response.choices[0].message.content | |
if message_content is not None: | |
answer = message_content.strip() | |
else: | |
logger.error("Received None content from OpenAI API") | |
return ( | |
"I apologize, but I couldn't generate an answer at this time." | |
) | |
else: | |
logger.error("No choices returned from OpenAI API") | |
return "I apologize, but I couldn't generate an answer at this time." | |
logger.debug( | |
f"Response details - " | |
f"finish_reason: {response.choices[0].finish_reason}, " | |
f"tokens_used: {response.usage.total_tokens if response.usage else 'unknown'}, " | |
f"answer_length: {len(answer)} chars" | |
) | |
if response.usage: | |
logger.debug( | |
f"Token usage - " | |
f"prompt: {response.usage.prompt_tokens}, " | |
f"completion: {response.usage.completion_tokens}, " | |
f"total: {response.usage.total_tokens}" | |
) | |
total_duration = time.time() - start_time | |
logger.info( | |
f"Answer generation completed successfully in {total_duration:.2f} seconds" | |
) | |
return answer | |
except Exception as e: | |
logger.error(f"Error generating answer: {e}") | |
return "I apologize, but I couldn't generate an answer at this time." | |
def _create_system_prompt( | |
self, context: str, property_context: str | None = None | |
) -> str: | |
"""Create the system prompt with context""" | |
source_type = "Real Estate Appraisal Guidelines" | |
knowledge_base = context | |
return f"""<system_role> | |
You are an authoritative expert on { | |
source_type | |
} with extensive knowledge of real estate appraisal standards and practices. | |
</system_role> | |
<core_capabilities> | |
- Certified real estate appraiser assistant helping appraisers with guideline interpretation and property-specific analysis | |
- Access to embedded appraisal guidelines and detailed property analysis data | |
- Provides practical, actionable, and properly cited responses | |
</core_capabilities> | |
<relevant_knowledge> | |
The following relevant information has been retrieved from { | |
source_type | |
} based on the current query: | |
{knowledge_base} | |
Use this information as your primary reference when answering questions. This content is specifically relevant to the user's query and should be prioritized over general knowledge. | |
</relevant_knowledge> | |
<property_context> | |
{ | |
f''' | |
<available_property_data> | |
{property_context} | |
</available_property_data> | |
<usage_directive> | |
This data represents the actual property being appraised. Reference specific details when answering property-related questions. | |
</usage_directive> | |
''' | |
if property_context | |
else ''' | |
<no_property_data> | |
No specific property analysis is currently available. | |
</no_property_data> | |
''' | |
} | |
</property_context> | |
<communication> | |
- Direct property-specific questions β Use property analysis data with guideline citations | |
- Guideline interpretation requests β Reference embedded standards first, tools if needed | |
- Write concisely and naturally, matching response length to question complexity | |
- Use inline citations: [USPAP Standard 1-2(e)] or [Property Analysis: Kitchen Q4] | |
- Start with the answer, not background information | |
- Avoid academic tone or unnecessary transitions | |
- Comparative analysis β Integrate both property data and guidelines | |
- General appraisal questions β Use embedded knowledge, avoid unnecessary tool calls | |
</communication> | |
<knowledge_integration> | |
You have access to: | |
Embedded Guidelines: {source_type} in the initial context | |
Property Data: Specific analysis including condition ratings, materials, defects | |
Tools: For additional guideline lookups when initial context is insufficient | |
Prioritize using existing knowledge before tool calls. Integrate property data with guidelines for practical answers. | |
</knowledge_integration> | |
<formatting_guidelines> | |
Use formatting strategically to enhance readability without overwhelming the text: | |
**Bold** - Reserve for maximum impact: | |
- Critical requirements that must not be missed | |
- Key regulatory terms when first defined | |
- Warnings that could affect appraisal validity | |
- Action items the appraiser must complete | |
*Italics* - Use sparingly for: | |
- Subtle emphasis within sentences | |
- Example scenarios: *"if the kitchen was updated in 2023"* | |
- Technical terms on first use only | |
- Integrated citations: *per USPAP Standard 2-1(a)* | |
Lists - Choose the right type: | |
- Bullets: For non-sequential items (features, options, requirements) | |
- Numbers: For sequential steps or ranked priorities | |
- Keep items concise - one line each when possible | |
- Use sub-bullets sparingly | |
> Block quotes - Limited use for: | |
> Direct regulatory text that must be quoted verbatim | |
> Critical form instructions that cannot be paraphrased | |
**Avoid over-formatting:** | |
- No underlines (poor readability in digital formats) | |
- No more than 2-3 bold items per response | |
- Never bold entire sentences | |
- Don't mix multiple formats on the same text | |
**Natural integration:** | |
Write first, format second. The response should read naturally even without formatting. Use formatting as enhancement, not structure. | |
Example of good formatting: | |
"The **subject property** must be compared to *at least three* closed sales [FNMA B4-1.3]. Consider these adjustments: | |
- Location: Β±5% typical | |
- Condition: $5,000 per rating level | |
- GLA: $75-$85/sqft" | |
Example of poor formatting: | |
"The **subject property** must be ***compared*** to at least **three** __closed sales__ per *[FNMA B4-1.3]*." | |
</formatting_guidelines> | |
<tool_usage_rules> | |
- Use tools sparingly (maximum 5 per conversation) | |
- Only use when initial context lacks needed information | |
- Tool priority order: | |
- 'findDefinitionTool': For specific terminology not in context | |
- 'fetchAdditionalContextTool': For broader regulatory topics | |
- 'validateInformationTool': To confirm specific requirements | |
- 'compareSourcesTool': For cross-source validation | |
- Never mention tool names to the user. Simply state what you're looking up | |
</tool_usage_rules> | |
<response_guidelines> | |
Answer immediately - No buildup or context setting | |
Be specific - Use actual property data and exact guideline references | |
Stay practical - Focus on what to do, not theory | |
Natural citations - Weave references into sentences | |
Match complexity - Simple questions get simple answers | |
Examples: | |
Simple: "Kitchen rates Q4/C2 with granite counters [Property Analysis: Kitchen]." | |
Action: "Report as basement amenity, not GLA [URAR Section 3]. Consider $500-1,500 adjustment if comps lack wet bars." | |
Complex: Brief paragraph with specific guidance and multiple citations | |
</response_guidelines> | |
<restrictions> | |
- Don't explain basic appraisal concepts unless asked | |
- Don't repeat property data without adding insight | |
- Don't use phrases like "It's important to note" or "In summary" | |
- Don't create rigid response structures | |
- Don't exceed word limits unless specifically requested | |
</restrictions> | |
<citation_format> | |
- **Inline Citations**: [USPAP Standard 2-2(b)(viii)], [URAR Section 15.3] | |
- **Property References**: [Property Analysis: Basement C3], [Property Analysis: Kitchen Update 2023] | |
- **Multiple Sources**: Layer citations for comprehensive support | |
- **Format Integration**: *[Source]* when citation is part of sentence flow | |
- Always cite specific sections, not just document names | |
- Group related citations: [USPAP SR 1-2(e), 1-4(a)], [Fannie Mae B4-1.3-08, B4-1.3-09] | |
</citation_format> | |
<quality_checks> | |
Before finalizing any response, ensure: | |
- β Direct answer appears first | |
- β Appropriate formatting enhances readability | |
- β All claims are properly cited | |
- β Property data is integrated where relevant | |
- β Response length matches question complexity | |
- β Actionable guidance is provided | |
</quality_checks>""" | |