"""LangGraph Agent"""
import os
from dotenv import load_dotenv
from langgraph.graph import START, StateGraph, MessagesState
from langgraph.prebuilt import tools_condition
from langgraph.prebuilt import ToolNode
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
from langchain_community.vectorstores import SupabaseVectorStore
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.tools import tool
from langchain.tools.retriever import create_retriever_tool
from supabase.client import Client, create_client
from langchain_openai import ChatOpenAI
from langchain.tools import Tool
from code_interpreter import CodeInterpreter
#from langchain_chroma import Chroma
from langchain_core.messages import AIMessage
interpreter_instance = CodeInterpreter()
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_SERVICE_KEY = os.environ.get("SUPABASE_SERVICE_KEY")
### ======================================== MATHEMATICAL TOOLS ======================================== ###
def multiply(a: int, b: int) -> int:
return a * b
multiply_tool = Tool(
name="multiply",
func=multiply,
description="Multiply two numbers. Args (a: first int, b: second int)"
)
def add(a: int, b: int) -> int:
return a + b
add_tool = Tool(
name="add",
func=add,
description="Add two numbers. Args (a: first int, b: second int)"
)
def substract(a: int, b: int) -> int:
return a - b
substract_tool = Tool(
name="substract",
func=substract,
description="Substract two numbers. Args (a: first int, b: second int)"
)
def divide(a: int, b: int) -> int:
if b == 0:
raise ValueError("Cannot divide by zero.")
return a / b
divide_tool = Tool(
name="divide",
func=divide,
description="Divide two numbers. Args (a: first int, b: second int)"
)
def modulus(a: int, b: int) -> int:
return a % b
modulus_tool = Tool(
name="modulus",
func=modulus,
description="Modulus two numbers. Args (a: first int, b: second int)"
)
def power(a: float, b: float) -> float:
return a**b
power_tool = Tool(
name="power",
func=power,
description="Power two numbers. Args (a: first float, b: second float)"
)
def square_root(a: float) -> float | complex:
if a >= 0:
return a**0.5
return cmath.sqrt(a)
square_root_power = Tool(
name="square_root",
func=square_root,
description="Square two numbers. Args (a: float)"
)
### ======================================== BROWSER TOOLS ======================================== ###
def wiki_search(query: str) -> str:
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'\n{doc.page_content}\n'
for doc in search_docs
])
return {"wiki_results": formatted_search_docs}
wiki_search_tool = Tool(
name="wiki_search",
func=wiki_search,
description="Search Wikipedia for a query and return maximum 2 results. Args (query: the search query)"
)
def web_search(query: str) -> str:
search_docs = TavilySearchResults(max_results=3).invoke(query=query)
formatted_search_docs = "\n\n---\n\n".join(
[
f'\n{doc.page_content}\n'
for doc in search_docs
])
return {"web_results": formatted_search_docs}
web_search_tool = Tool(
name="web_search",
func=web_search,
description="Search Tavily for a query and return maximum 3 results. Args (query: the search query)"
)
def arvix_search(query: str) -> str:
"""Search Arxiv for a query and return maximum 3 result.
Args:
query: The search query."""
search_docs = ArxivLoader(query=query, load_max_docs=3).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'\n{doc.page_content[:1000]}\n'
for doc in search_docs
])
return {"arvix_results": formatted_search_docs}
arvix_search_tool = Tool(
name="arvix_search",
func=arvix_search,
description="Search Arxiv for a query and return maximum 3 result. Args (query: the search query)"
)
### ======================================== CODE INTERPRETER TOOLS ======================================== ###
def execute_code_multilang(code: str, language: str = "python") -> str:
"""Execute code in multiple languages (Python, Bash, SQL, C, Java) and return results.
Args:
code (str): The source code to execute.
language (str): The language of the code. Supported: "python", "bash", "sql", "c", "java".
Returns:
A string summarizing the execution results (stdout, stderr, errors, plots, dataframes if any).
"""
supported_languages = ["python", "bash", "sql", "c", "java"]
language = language.lower()
if language not in supported_languages:
return f"❌ Unsupported language: {language}. Supported languages are: {', '.join(supported_languages)}"
result = interpreter_instance.execute_code(code, language=language)
response = []
if result["status"] == "success":
response.append(f"✅ Code executed successfully in **{language.upper()}**")
if result.get("stdout"):
response.append(
"\n**Standard Output:**\n```\n" + result["stdout"].strip() + "\n```"
)
if result.get("stderr"):
response.append(
"\n**Standard Error (if any):**\n```\n"
+ result["stderr"].strip()
+ "\n```"
)
if result.get("result") is not None:
response.append(
"\n**Execution Result:**\n```\n"
+ str(result["result"]).strip()
+ "\n```"
)
if result.get("dataframes"):
for df_info in result["dataframes"]:
response.append(
f"\n**DataFrame `{df_info['name']}` (Shape: {df_info['shape']})**"
)
df_preview = pd.DataFrame(df_info["head"])
response.append("First 5 rows:\n```\n" + str(df_preview) + "\n```")
if result.get("plots"):
response.append(
f"\n**Generated {len(result['plots'])} plot(s)** (Image data returned separately)"
)
else:
response.append(f"❌ Code execution failed in **{language.upper()}**")
if result.get("stderr"):
response.append(
"\n**Error Log:**\n```\n" + result["stderr"].strip() + "\n```"
)
return "\n".join(response)
execute_code_multilang_tool = Tool(
name="execute_code_multilang",
func=execute_code_multilang,
description="""Execute code in multiple languages (Python, Bash, SQL, C, Java) and return results.
Args:
code (str): The source code to execute.
language (str): The language of the code. Supported: "python", "bash", "sql", "c", "java".
"""
)
### ======================================== DOCUMENT PROCESSING TOOLS ======================================== ###
### ======================================== RETRIEVER TOOLS ======================================== ###
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
"""
Using chroma database
vector_store = Chroma(
collection_name="gaia_dataset",
embedding_function=embeddings,
persist_directory="./data/chroma_langchain_db", # Where to save data locally, remove if not necessary
)
# It's not going to be used later
create_retriever_tool = create_retriever_tool(
retriever=vector_store.as_retriever(),
name="Question Search",
description="A tool to retrieve similar questions from a vector store.",
)
"""
# Using supabase database
# build a retriever
supabase: Client = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY)
vector_store = SupabaseVectorStore(
embedding=embeddings,
client=supabase,
table_name="gaia_dataset",
query_name="match_documents",
)
create_retriever_tool = create_retriever_tool(
retriever=vector_store.as_retriever(),
name="Question Search",
description="A tool to retrieve similar questions from a vector store.",
)
# load the system prompt from the file
with open("system_prompt.txt", "r", encoding="utf-8") as f:
system_prompt = f.read()
# System message
sys_msg = SystemMessage(content=system_prompt)
tools = [
multiply_tool,
add_tool,
substract_tool,
divide_tool,
modulus_tool,
power_tool,
square_root,
wiki_search_tool,
web_search_tool,
arvix_search_tool,
execute_code_multilang_tool,
]
# Build graph function
def build_graph(provider: str = "huggingface"):
"""Build the graph"""
# Load environment variables from .env file
if provider == "google":
# Google Gemini
chat = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
elif provider == "groq":
# Groq https://console.groq.com/docs/models
chat = ChatGroq(model="qwen-qwq-32b", temperature=0) # optional : qwen-qwq-32b gemma2-9b-it
elif provider == "openai":
# Set the model from openai here
model_openai = "gpt-4o"
chat = ChatOpenAI(
model=model_openai,
temperature=0,
api_key=OPENAI_API_KEY
)
elif provider == "huggingface":
# Add huggingface endpoint
#repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
#repo_id = "Qwen/Qwen2.5-Coder-32B-Instruct" # -> it doesn't reply well
#repo_id = "deepseek-ai/DeepSeek-Coder-V2-Instruct" -> it doesn't work (error on StopIteration)
#repo_id = "meta-llama/CodeLlama-34b-Instruct-hf" -> it doesn't work (error on StopIteration)
repo_id = "WizardLMTeam/WizardCoder-15B-V1.0"
chat = ChatHuggingFace(
#llm=HuggingFaceEndpoint(
# endpoint_url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
# temperature=0,
#),
llm=HuggingFaceEndpoint(
repo_id=repo_id,
temperature=0.1
)
)
else:
raise ValueError("Invalid provider. Choose 'google', 'groq', 'openai' or 'huggingface'.")
# Bind tools to LLM
chat_with_tools = chat.bind_tools(tools)
# Node
def assistant(state: MessagesState):
"""Assistant node"""
return {"messages": [chat_with_tools.invoke(state["messages"])]}
def retriever(state: MessagesState):
query = state["messages"][-1].content
similar_doc = vector_store.similarity_search(query, k=1)[0]
content = similar_doc.page_content
if "Final answer :" in content:
answer = content.split("Final answer :")[-1].strip()
else:
answer = content.strip()
return {"messages": [AIMessage(content=answer)]}
"""
Graph with retriever and tools
builder = StateGraph(MessagesState)
builder.add_node("retriever", retriever)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))
builder.add_edge(START, "retriever")
builder.add_edge("retriever", "assistant")
builder.add_conditional_edges(
"assistant",
tools_condition,
)
#builder.add_edge("tools", "assistant")
"""
builder = StateGraph(MessagesState)
builder.add_node("retriever", retriever)
# Retriever ist Start und Endpunkt
builder.set_entry_point("retriever")
builder.set_finish_point("retriever")
# Compile graph
return builder.compile()
"""
Graph with tools conditions
builder = StateGraph(MessagesState)
# Define nodes: these do the work
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))
# Define edges: these determine how the control flow moves
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
"assistant",
# If the latest message requires a tool, route to tools
# Otherwise, provide a direct response
tools_condition,
)
builder.add_edge("tools", "assistant")
# Compile graph
return builder.compile()
"""