Spaces:

a-zamfir
/

IRIS

Build error

App Files Files Community

IRIS / services /llm_service.py

a-zamfir

Manual push

031f9b9 verified 2 months ago

raw

history blame contribute delete

2.41 kB

	import logging
	from typing import Dict, List, Optional
	from dataclasses import dataclass
	from huggingface_hub import InferenceClient

	from config.settings import Settings

	# Configure logger for detailed debugging
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.DEBUG)
	ch = logging.StreamHandler()
	ch.setLevel(logging.DEBUG)
	formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
	ch.setFormatter(formatter)
	logger.addHandler(ch)

	@dataclass
	class LLMConfig:
	api_key: str
	model_name: str
	temperature: float = 0.01
	max_tokens: int = 512

	class LLMService:
	def __init__(
	self,
	api_key: Optional[str] = None,
	model_name: Optional[str] = None,
	):
	"""
	LLMService that uses HuggingFace InferenceClient for chat completions.
	"""
	settings = Settings()

	# Use provided values or fall back to settings
	key = api_key or settings.hf_token
	name = model_name or settings.effective_model_name

	self.config = LLMConfig(
	api_key=key,
	model_name=name,
	temperature=settings.hf_temperature,
	max_tokens=settings.hf_max_new_tokens,
	)

	# Initialize the InferenceClient
	self.client = InferenceClient(token=self.config.api_key)

	async def get_chat_completion(self, messages: List[Dict[str, str]]) -> str:
	"""
	Return the assistant response for a chat-style messages array.
	"""
	logger.debug(f"Chat completion request with model: {self.config.model_name}")

	try:
	# Use chat_completion method
	response = self.client.chat_completion(
	messages=messages,
	model=self.config.model_name,
	max_tokens=self.config.max_tokens,
	temperature=self.config.temperature
	)

	# Extract the content from the response
	content = response.choices[0].message.content
	logger.debug(f"Chat completion response: {content[:200]}")

	return content

	except Exception as e:
	logger.error(f"Chat completion error: {str(e)}")
	raise Exception(f"HF chat completion error: {str(e)}")