Spaces:
Build error
Build error
Commit
·
8dfc799
1
Parent(s):
2d84a88
add vllm deployment info
Browse files
README.md
CHANGED
|
@@ -89,6 +89,8 @@ Optionally, you can use different API providers and models.
|
|
| 89 |
- `OPENAI_BASE_URL`: The base URL for any OpenAI compatible API, e.g. `https://api.openai.com/v1/`.
|
| 90 |
- `OLLAMA_BASE_URL`: The base URL for any Ollama compatible API, e.g. `http://127.0.0.1:11434/`.
|
| 91 |
- `HUGGINGFACE_BASE_URL`: The base URL for any Hugging Face compatible API, e.g. TGI server or Dedicated Inference Endpoints. If you want to use serverless inference, only set the `MODEL`.
|
|
|
|
|
|
|
| 92 |
|
| 93 |
SFT and Chat Data generation is only supported with Hugging Face Inference Endpoints , and you can set the following environment variables use it with models other than Llama3 and Qwen2.
|
| 94 |
|
|
|
|
| 89 |
- `OPENAI_BASE_URL`: The base URL for any OpenAI compatible API, e.g. `https://api.openai.com/v1/`.
|
| 90 |
- `OLLAMA_BASE_URL`: The base URL for any Ollama compatible API, e.g. `http://127.0.0.1:11434/`.
|
| 91 |
- `HUGGINGFACE_BASE_URL`: The base URL for any Hugging Face compatible API, e.g. TGI server or Dedicated Inference Endpoints. If you want to use serverless inference, only set the `MODEL`.
|
| 92 |
+
- `VLLM_BASE_URL`: The base URL for any VLLM compatible API, e.g. `http://localhost:8000/`.
|
| 93 |
+
|
| 94 |
|
| 95 |
SFT and Chat Data generation is only supported with Hugging Face Inference Endpoints , and you can set the following environment variables use it with models other than Llama3 and Qwen2.
|
| 96 |
|
examples/vllm_deployment.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pip install synthetic-dataset-generator
|
| 2 |
+
# vllm serve Qwen/Qwen2.5-1.5B-Instruct
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
from synthetic_dataset_generator import launch
|
| 6 |
+
|
| 7 |
+
# os.environ["HF_TOKEN"] = "hf_..." # push the data to huggingface
|
| 8 |
+
os.environ["VLLM_BASE_URL"] = "http://127.0.0.1:8000/" # vllm base url
|
| 9 |
+
os.environ["MODEL"] = "Qwen/Qwen2.5-1.5B-Instruct" # model id
|
| 10 |
+
os.environ["TOKENIZER_ID"] = "Qwen/Qwen2.5-1.5B-Instruct" # tokenizer id
|
| 11 |
+
os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "qwen2"
|
| 12 |
+
os.environ["MAX_NUM_ROWS"] = "10000"
|
| 13 |
+
os.environ["DEFAULT_BATCH_SIZE"] = "2"
|
| 14 |
+
os.environ["MAX_NUM_TOKENS"] = "1024"
|
| 15 |
+
|
| 16 |
+
launch()
|
src/synthetic_dataset_generator/constants.py
CHANGED
|
@@ -18,23 +18,28 @@ TOKENIZER_ID = os.getenv(key="TOKENIZER_ID", default=None)
|
|
| 18 |
OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
| 19 |
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL")
|
| 20 |
HUGGINGFACE_BASE_URL = os.getenv("HUGGINGFACE_BASE_URL")
|
|
|
|
|
|
|
|
|
|
| 21 |
if HUGGINGFACE_BASE_URL and MODEL:
|
| 22 |
raise ValueError(
|
| 23 |
"`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
|
| 24 |
)
|
| 25 |
if not MODEL:
|
| 26 |
-
if OPENAI_BASE_URL or OLLAMA_BASE_URL:
|
| 27 |
raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
|
| 28 |
|
| 29 |
# Check if multiple base URLs are provided
|
| 30 |
base_urls = [
|
| 31 |
-
url
|
|
|
|
|
|
|
| 32 |
]
|
| 33 |
if len(base_urls) > 1:
|
| 34 |
raise ValueError(
|
| 35 |
f"Multiple base URLs provided: {', '.join(base_urls)}. Only one base URL can be set at a time."
|
| 36 |
)
|
| 37 |
-
BASE_URL = OPENAI_BASE_URL or OLLAMA_BASE_URL or HUGGINGFACE_BASE_URL
|
| 38 |
|
| 39 |
|
| 40 |
# API Keys
|
|
|
|
| 18 |
OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
| 19 |
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL")
|
| 20 |
HUGGINGFACE_BASE_URL = os.getenv("HUGGINGFACE_BASE_URL")
|
| 21 |
+
VLLM_BASE_URL = os.getenv("VLLM_BASE_URL")
|
| 22 |
+
|
| 23 |
+
# check if model is set correctly
|
| 24 |
if HUGGINGFACE_BASE_URL and MODEL:
|
| 25 |
raise ValueError(
|
| 26 |
"`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
|
| 27 |
)
|
| 28 |
if not MODEL:
|
| 29 |
+
if OPENAI_BASE_URL or OLLAMA_BASE_URL or VLLM_BASE_URL:
|
| 30 |
raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
|
| 31 |
|
| 32 |
# Check if multiple base URLs are provided
|
| 33 |
base_urls = [
|
| 34 |
+
url
|
| 35 |
+
for url in [OPENAI_BASE_URL, OLLAMA_BASE_URL, HUGGINGFACE_BASE_URL, VLLM_BASE_URL]
|
| 36 |
+
if url
|
| 37 |
]
|
| 38 |
if len(base_urls) > 1:
|
| 39 |
raise ValueError(
|
| 40 |
f"Multiple base URLs provided: {', '.join(base_urls)}. Only one base URL can be set at a time."
|
| 41 |
)
|
| 42 |
+
BASE_URL = OPENAI_BASE_URL or OLLAMA_BASE_URL or HUGGINGFACE_BASE_URL or VLLM_BASE_URL
|
| 43 |
|
| 44 |
|
| 45 |
# API Keys
|
src/synthetic_dataset_generator/pipelines/base.py
CHANGED
|
@@ -2,7 +2,7 @@ import math
|
|
| 2 |
import random
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
-
from distilabel.llms import InferenceEndpointsLLM, OllamaLLM, OpenAILLM
|
| 6 |
from distilabel.steps.tasks import TextGeneration
|
| 7 |
|
| 8 |
from synthetic_dataset_generator.constants import (
|
|
@@ -14,6 +14,7 @@ from synthetic_dataset_generator.constants import (
|
|
| 14 |
OLLAMA_BASE_URL,
|
| 15 |
OPENAI_BASE_URL,
|
| 16 |
TOKENIZER_ID,
|
|
|
|
| 17 |
)
|
| 18 |
|
| 19 |
TOKEN_INDEX = 0
|
|
@@ -109,6 +110,17 @@ def _get_llm(use_magpie_template=False, **kwargs):
|
|
| 109 |
tokenizer_id=TOKENIZER_ID or MODEL,
|
| 110 |
**kwargs,
|
| 111 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
else:
|
| 113 |
llm = InferenceEndpointsLLM(
|
| 114 |
api_key=_get_next_api_key(),
|
|
|
|
| 2 |
import random
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
+
from distilabel.llms import ClientvLLM, InferenceEndpointsLLM, OllamaLLM, OpenAILLM
|
| 6 |
from distilabel.steps.tasks import TextGeneration
|
| 7 |
|
| 8 |
from synthetic_dataset_generator.constants import (
|
|
|
|
| 14 |
OLLAMA_BASE_URL,
|
| 15 |
OPENAI_BASE_URL,
|
| 16 |
TOKENIZER_ID,
|
| 17 |
+
VLLM_BASE_URL,
|
| 18 |
)
|
| 19 |
|
| 20 |
TOKEN_INDEX = 0
|
|
|
|
| 110 |
tokenizer_id=TOKENIZER_ID or MODEL,
|
| 111 |
**kwargs,
|
| 112 |
)
|
| 113 |
+
elif VLLM_BASE_URL:
|
| 114 |
+
if "generation_kwargs" in kwargs:
|
| 115 |
+
if "do_sample" in kwargs["generation_kwargs"]:
|
| 116 |
+
del kwargs["generation_kwargs"]["do_sample"]
|
| 117 |
+
llm = ClientvLLM(
|
| 118 |
+
base_url=VLLM_BASE_URL,
|
| 119 |
+
model=MODEL,
|
| 120 |
+
tokenizer=TOKENIZER_ID or MODEL,
|
| 121 |
+
api_key=_get_next_api_key(),
|
| 122 |
+
**kwargs,
|
| 123 |
+
)
|
| 124 |
else:
|
| 125 |
llm = InferenceEndpointsLLM(
|
| 126 |
api_key=_get_next_api_key(),
|