rag_hydro_json / Dockerfile
baderanas's picture
Update Dockerfile
502a493 verified
FROM ubuntu:22.04
# Install system dependencies
RUN apt-get update && apt-get install -y \
curl \
openjdk-11-jdk \
python3 \
python3-pip \
wget \
apt-transport-https \
gnupg \
&& rm -rf /var/lib/apt/lists/*
# Install Elasticsearch
ENV ES_VERSION=8.8.0
RUN curl -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.8.0-linux-x86_64.tar.gz && \
tar -xzf elasticsearch-8.8.0-linux-x86_64.tar.gz && \
mv elasticsearch-8.8.0 /usr/share/elasticsearch && \
rm elasticsearch-8.8.0-linux-x86_64.tar.gz
# Create elasticsearch.yml with proper YAML format
RUN echo "discovery.type: single-node" > /usr/share/elasticsearch/config/elasticsearch.yml && \
echo "xpack.security.enabled: false" >> /usr/share/elasticsearch/config/elasticsearch.yml && \
echo "network.host: 0.0.0.0" >> /usr/share/elasticsearch/config/elasticsearch.yml
# Set Elasticsearch environment variables
ENV ES_JAVA_OPTS="-Xms1g -Xmx1g"
# Create non-root user for running the services
RUN useradd -m -u 1000 appuser
RUN mkdir -p /app /usr/share/elasticsearch/data && \
chown -R appuser:appuser /app /usr/share/elasticsearch
# Create app directory
WORKDIR /app
# Copy your project files
COPY --chown=appuser:appuser app.py streamlit.py requirements.txt ./
COPY --chown=appuser:appuser chunking ./chunking
COPY --chown=appuser:appuser embeddings ./embeddings
COPY --chown=appuser:appuser prompting ./prompting
COPY --chown=appuser:appuser elastic ./elastic
COPY --chown=appuser:appuser file_processing.py ./
COPY --chown=appuser:appuser ingestion.py ./
# Copy ES data if needed - consider if this is actually necessary
COPY --chown=appuser:appuser es_data /usr/share/elasticsearch/data
# Install Python dependencies
RUN pip3 install -r requirements.txt
# Set environment variables for Streamlit
ENV STREAMLIT_SERVER_HEADLESS=true
ENV STREAMLIT_SERVER_PORT=7860
ENV STREAMLIT_SERVER_ENABLE_CORS=false
ENV ES_HOST=localhost
ENV ES_PORT=9200
ENV ELASTICSEARCH_HOSTS="http://localhost:9200"
# Expose required ports (Elasticsearch and Streamlit)
EXPOSE 9200 7860
# Switch to non-root user
USER appuser
# Create startup script
RUN echo '#!/bin/bash\n\
# Start Elasticsearch in the background\n\
/usr/share/elasticsearch/bin/elasticsearch &\n\
\n\
# Wait for Elasticsearch to become available\n\
echo "Waiting for Elasticsearch to start..."\n\
until curl -s http://localhost:9200 > /dev/null; do\n\
sleep 2\n\
echo "Still waiting for Elasticsearch..."\n\
done\n\
echo "Elasticsearch is up and running!"\n\
\n\
# Start Streamlit\n\
echo "Starting Streamlit application..."\n\
streamlit run /app/streamlit.py\n\
' > /app/start.sh && chmod +x /app/start.sh
# Command to run
CMD ["/app/start.sh"]