Spaces:
Sleeping
Sleeping
FROM ubuntu:22.04 | |
# Install system dependencies | |
RUN apt-get update && apt-get install -y \ | |
curl \ | |
openjdk-11-jdk \ | |
python3 \ | |
python3-pip \ | |
wget \ | |
apt-transport-https \ | |
gnupg \ | |
&& rm -rf /var/lib/apt/lists/* | |
# Install Elasticsearch | |
ENV ES_VERSION=8.8.0 | |
RUN curl -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.8.0-linux-x86_64.tar.gz && \ | |
tar -xzf elasticsearch-8.8.0-linux-x86_64.tar.gz && \ | |
mv elasticsearch-8.8.0 /usr/share/elasticsearch && \ | |
rm elasticsearch-8.8.0-linux-x86_64.tar.gz | |
# Create elasticsearch.yml with proper YAML format | |
RUN echo "discovery.type: single-node" > /usr/share/elasticsearch/config/elasticsearch.yml && \ | |
echo "xpack.security.enabled: false" >> /usr/share/elasticsearch/config/elasticsearch.yml && \ | |
echo "network.host: 0.0.0.0" >> /usr/share/elasticsearch/config/elasticsearch.yml | |
# Set Elasticsearch environment variables | |
ENV ES_JAVA_OPTS="-Xms1g -Xmx1g" | |
# Create non-root user for running the services | |
RUN useradd -m -u 1000 appuser | |
RUN mkdir -p /app /usr/share/elasticsearch/data && \ | |
chown -R appuser:appuser /app /usr/share/elasticsearch | |
# Create app directory | |
WORKDIR /app | |
# Copy your project files | |
COPY --chown=appuser:appuser app.py streamlit.py requirements.txt ./ | |
COPY --chown=appuser:appuser chunking ./chunking | |
COPY --chown=appuser:appuser embeddings ./embeddings | |
COPY --chown=appuser:appuser prompting ./prompting | |
COPY --chown=appuser:appuser elastic ./elastic | |
COPY --chown=appuser:appuser file_processing.py ./ | |
COPY --chown=appuser:appuser ingestion.py ./ | |
# Copy ES data if needed - consider if this is actually necessary | |
COPY --chown=appuser:appuser es_data /usr/share/elasticsearch/data | |
# Install Python dependencies | |
RUN pip3 install -r requirements.txt | |
# Set environment variables for Streamlit | |
ENV STREAMLIT_SERVER_HEADLESS=true | |
ENV STREAMLIT_SERVER_PORT=7860 | |
ENV STREAMLIT_SERVER_ENABLE_CORS=false | |
ENV ES_HOST=localhost | |
ENV ES_PORT=9200 | |
ENV ELASTICSEARCH_HOSTS="http://localhost:9200" | |
# Expose required ports (Elasticsearch and Streamlit) | |
EXPOSE 9200 7860 | |
# Switch to non-root user | |
USER appuser | |
# Create startup script | |
RUN echo '#!/bin/bash\n\ | |
# Start Elasticsearch in the background\n\ | |
/usr/share/elasticsearch/bin/elasticsearch &\n\ | |
\n\ | |
# Wait for Elasticsearch to become available\n\ | |
echo "Waiting for Elasticsearch to start..."\n\ | |
until curl -s http://localhost:9200 > /dev/null; do\n\ | |
sleep 2\n\ | |
echo "Still waiting for Elasticsearch..."\n\ | |
done\n\ | |
echo "Elasticsearch is up and running!"\n\ | |
\n\ | |
# Start Streamlit\n\ | |
echo "Starting Streamlit application..."\n\ | |
streamlit run /app/streamlit.py\n\ | |
' > /app/start.sh && chmod +x /app/start.sh | |
# Command to run | |
CMD ["/app/start.sh"] |