elungky commited on
Commit
8da02bd
·
1 Parent(s): a27c594

Fix cudnn.h not found error by dynamically locating and symlinking it from pip installed package

Browse files
Files changed (1) hide show
  1. Dockerfile +9 -8
Dockerfile CHANGED
@@ -60,22 +60,23 @@ RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
60
  torchaudio==2.3.1 \
61
  --index-url https://download.pytorch.org/whl/cu121
62
 
63
- # NEW: Ensure cudnn.h is available in the Conda environment's include path.
64
- # The base image has cudnn.h at /usr/local/cuda/include.
65
- # We explicitly copy it to the Conda environment's include directory if it's not already there.
66
  RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
67
  conda activate cosmos-predict1 && \
68
- if [ ! -f "$CONCONDA_PREFIX/include/cudnn.h" ]; then \
69
- echo "cudnn.h not found in Conda environment, copying from /usr/local/cuda/include"; \
 
70
  mkdir -p "$CONDA_PREFIX/include" && \
71
- cp /usr/local/cuda/include/cudnn.h "$CONDA_PREFIX/include/"; \
 
72
  else \
73
- echo "cudnn.h already present in Conda environment"; \
 
74
  fi
75
 
76
  # IMPORTANT: Symlink fix for Transformer Engine compilation (from INSTALL.md).
77
  # These symlinks are for other NVIDIA headers that might be in Python site-packages.
78
- # $CONDA_PREFIX is the current activated Conda environment.
79
  RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
80
  conda activate cosmos-predict1 && \
81
  ln -sf "$CONDA_PREFIX"/lib/python3.10/site-packages/nvidia/*/include/* "$CONDA_PREFIX"/include/ || true && \
 
60
  torchaudio==2.3.1 \
61
  --index-url https://download.pytorch.org/whl/cu121
62
 
63
+ # NEW: Dynamically find cudnn.h and symlink it to the Conda environment's include path.
64
+ # This ensures Transformer Engine can find it during compilation.
 
65
  RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
66
  conda activate cosmos-predict1 && \
67
+ CUDNN_HEADER_PATH=$(find "$CONDA_PREFIX/lib/python3.10/site-packages/nvidia/" -name "cudnn.h" | head -n 1) && \
68
+ if [ -f "$CUDNN_HEADER_PATH" ]; then \
69
+ echo "Found cudnn.h at: $CUDNN_HEADER_PATH"; \
70
  mkdir -p "$CONDA_PREFIX/include" && \
71
+ ln -sf "$CUDNN_HEADER_PATH" "$CONDA_PREFIX/include/cudnn.h" || \
72
+ cp "$CUDNN_HEADER_PATH" "$CONDA_PREFIX/include/cudnn.h"; \
73
  else \
74
+ echo "Error: cudnn.h not found in any expected location within Conda environment. This will likely cause compilation failures."; \
75
+ exit 1; \
76
  fi
77
 
78
  # IMPORTANT: Symlink fix for Transformer Engine compilation (from INSTALL.md).
79
  # These symlinks are for other NVIDIA headers that might be in Python site-packages.
 
80
  RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
81
  conda activate cosmos-predict1 && \
82
  ln -sf "$CONDA_PREFIX"/lib/python3.10/site-packages/nvidia/*/include/* "$CONDA_PREFIX"/include/ || true && \