Morgan Funtowicz commited on
Commit
96ef4b5
·
1 Parent(s): 746b297

feat(text-generation): initial commit

Browse files
Files changed (2) hide show
  1. Dockerfile +11 -0
  2. entrypoint.sh +12 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM lmsysorg/sglang:latest
2
+
3
+ ENV MODEL_ID="/repository"
4
+ ENV KV_CACHE_DTYPE="auto"
5
+ ENV TP_SIZE="1"
6
+ ENV QUANT_METHOD="w8a8_int8"
7
+ EXPOSE 80
8
+
9
+ COPY entrypoint.sh /usr/local/endpoint/
10
+ RUN chmod +x /usr/local/endpoint/entrypoint.sh
11
+ ENTRYPOINT ["/usr/local/endpoint/entrypoint.sh"]
entrypoint.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ python3 -m sglang.launch_server \
4
+ --model-path $MODEL_ID \
5
+ --kv-cache-dtype $KV_CACHE_DTYPE \
6
+ --tensor-parallel-size $TP_SIZE \
7
+ --expert-parallel-size $TP_SIZE \
8
+ --quantization $QUANT_METHOD \
9
+ --enable-torch-compile \
10
+ --enable-ep-moe \
11
+ --host 0.0.0.0 \
12
+ --port 80