hfendpoints-images
/

text-generation-sglang-gpu

Morgan Funtowicz commited on Apr 29

Commit

96ef4b5

1 Parent(s): 746b297

feat(text-generation): initial commit

Files changed (2) hide show

Dockerfile ADDED Viewed

+FROM lmsysorg/sglang:latest
+ENV MODEL_ID="/repository"
+ENV KV_CACHE_DTYPE="auto"
+ENV TP_SIZE="1"
+ENV QUANT_METHOD="w8a8_int8"
+EXPOSE 80
+COPY entrypoint.sh /usr/local/endpoint/
+RUN chmod +x /usr/local/endpoint/entrypoint.sh
+ENTRYPOINT ["/usr/local/endpoint/entrypoint.sh"]

entrypoint.sh ADDED Viewed

+#!/bin/bash
+python3 -m sglang.launch_server \
+  --model-path $MODEL_ID \
+  --kv-cache-dtype $KV_CACHE_DTYPE \
+  --tensor-parallel-size $TP_SIZE \
+  --expert-parallel-size $TP_SIZE \
+  --quantization $QUANT_METHOD \
+  --enable-torch-compile \
+  --enable-ep-moe \
+  --host 0.0.0.0 \
+  --port 80