{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "bde2a402", "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "from langchain_community.vectorstores.azuresearch import AzureSearch\n", "from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings" ] }, { "cell_type": "code", "execution_count": null, "id": "d6b66606", "metadata": {}, "outputs": [], "source": [ "# Option 1: use an OpenAI account\n", "# openai_api_key: str = \"PLACEHOLDER FOR YOUR API KEY\"\n", "# openai_api_version: str = \"2023-05-15\"\n", "# model: str = \"text-embedding-ada-002\"\n", "\n", "import openai\n", "\n", "API_SECRET_KEY = \"sk-r0WeYOdkMjzYdnSxEcC8B931Aa904e4bBaCcAc2a57D803F1\"\n", "BASE_URL = \"https://svip.xty.app/v1\"\n", "os.environ[\"OPENAI_API_KEY\"] = API_SECRET_KEY\n", "os.environ[\"OPENAI_API_BASE\"] = BASE_URL\n", "openai.api_key = os.environ['OPENAI_API_KEY']" ] }, { "cell_type": "code", "execution_count": null, "id": "d272bf45", "metadata": {}, "outputs": [], "source": [ "from langchain.embeddings import HuggingFaceEmbeddings\n", "from langchain.vectorstores import Chroma\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "from langchain.document_loaders import TextLoader\n", "from langchain.chains import RetrievalQA\n", "from langchain.llms import OpenAI\n", "import os\n", "\n", "# 设置 OpenAI API 密钥\n", "# os.environ[\"OPENAI_API_KEY\"] = \"your_openai_api_key\"\n", "\n", "# 加载文档\n", "loader = TextLoader(\"term.txt\")\n", "documents = loader.load()\n", "\n", "# 分割文档\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)\n", "texts = text_splitter.split_documents(documents)\n", "\n", "# 使用 HuggingFace 的 sentence-transformers/all-mpnet-base-v2 作为 embedding 模型\n", "embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\")\n", "\n", "# 创建 Chroma 数据库\n", "db = Chroma.from_documents(texts, embeddings)\n", "\n", "# 使用 similarity_search_with_relevance_scores 进行检索\n", "query = \"特别说明:针对有些专家学者对《意见》中的两处疑问,编者专门联系《意见》起草者\"\n", "results = db.similarity_search_with_relevance_scores(query, k=5)\n", " \n", "# 打印检索结果\n", "for doc, score in results:\n", " print(f\"Content: {doc.page_content}\")\n", " print(f\"Relevance Score: {score}\")\n", " print(\"-----\")" ] }, { "cell_type": "code", "execution_count": null, "id": "b5eee4c7", "metadata": {}, "outputs": [], "source": [ "# 使用 GPT-4 进行生成\n", "llm = OpenAI(model_name=\"gpt-4\")\n", "retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": 5})\n", "qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type=\"stuff\", retriever=retriever)\n", "\n", "# 提问并获取回答\n", "query = \"your_query\"\n", "result = qa_chain.run(query)\n", "print(f\"Answer: {result}\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "77d1ffee", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 5 }