File size: 2,731 Bytes
88eff1f
4cbe4e9
88eff1f
4cbe4e9
 
88eff1f
4cbe4e9
 
 
88eff1f
 
4cbe4e9
 
88eff1f
4cbe4e9
88eff1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cbe4e9
 
88eff1f
4cbe4e9
 
88eff1f
4cbe4e9
 
 
 
 
 
 
 
88eff1f
 
 
297954e
88eff1f
 
 
 
502a493
88eff1f
 
 
 
 
 
502a493
4cbe4e9
 
 
 
88eff1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cbe4e9
88eff1f
4cbe4e9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
FROM ubuntu:22.04

# Install system dependencies
RUN apt-get update && apt-get install -y \
    curl \
    openjdk-11-jdk \
    python3 \
    python3-pip \
    wget \
    apt-transport-https \
    gnupg \
    && rm -rf /var/lib/apt/lists/*

# Install Elasticsearch
ENV ES_VERSION=8.8.0
RUN curl -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.8.0-linux-x86_64.tar.gz && \
    tar -xzf elasticsearch-8.8.0-linux-x86_64.tar.gz && \
    mv elasticsearch-8.8.0 /usr/share/elasticsearch && \
    rm elasticsearch-8.8.0-linux-x86_64.tar.gz

# Create elasticsearch.yml with proper YAML format
RUN echo "discovery.type: single-node" > /usr/share/elasticsearch/config/elasticsearch.yml && \
    echo "xpack.security.enabled: false" >> /usr/share/elasticsearch/config/elasticsearch.yml && \
    echo "network.host: 0.0.0.0" >> /usr/share/elasticsearch/config/elasticsearch.yml

# Set Elasticsearch environment variables
ENV ES_JAVA_OPTS="-Xms1g -Xmx1g"

# Create non-root user for running the services
RUN useradd -m -u 1000 appuser
RUN mkdir -p /app /usr/share/elasticsearch/data && \
    chown -R appuser:appuser /app /usr/share/elasticsearch

# Create app directory
WORKDIR /app

# Copy your project files
COPY --chown=appuser:appuser app.py streamlit.py requirements.txt ./
COPY --chown=appuser:appuser chunking ./chunking
COPY --chown=appuser:appuser embeddings ./embeddings
COPY --chown=appuser:appuser prompting ./prompting
COPY --chown=appuser:appuser elastic ./elastic
COPY --chown=appuser:appuser file_processing.py ./
COPY --chown=appuser:appuser ingestion.py ./

# Copy ES data if needed - consider if this is actually necessary
COPY --chown=appuser:appuser es_data /usr/share/elasticsearch/data

# Install Python dependencies
RUN pip3 install -r requirements.txt

# Set environment variables for Streamlit
ENV STREAMLIT_SERVER_HEADLESS=true
ENV STREAMLIT_SERVER_PORT=7860
ENV STREAMLIT_SERVER_ENABLE_CORS=false
ENV ES_HOST=localhost
ENV ES_PORT=9200
ENV ELASTICSEARCH_HOSTS="http://localhost:9200"

# Expose required ports (Elasticsearch and Streamlit)
EXPOSE 9200 7860

# Switch to non-root user
USER appuser

# Create startup script
RUN echo '#!/bin/bash\n\
# Start Elasticsearch in the background\n\
/usr/share/elasticsearch/bin/elasticsearch &\n\
\n\
# Wait for Elasticsearch to become available\n\
echo "Waiting for Elasticsearch to start..."\n\
until curl -s http://localhost:9200 > /dev/null; do\n\
  sleep 2\n\
  echo "Still waiting for Elasticsearch..."\n\
done\n\
echo "Elasticsearch is up and running!"\n\
\n\
# Start Streamlit\n\
echo "Starting Streamlit application..."\n\
streamlit run /app/streamlit.py\n\
' > /app/start.sh && chmod +x /app/start.sh

# Command to run
CMD ["/app/start.sh"]