|
1 | | -FROM python:3.9-alpine |
| 1 | +FROM python:3.11-slim |
| 2 | + |
| 3 | +ENV DEBIAN_FRONTEND=noninteractive \ |
| 4 | + PYTHONUNBUFFERED=1 \ |
| 5 | + PIP_NO_CACHE_DIR=1 \ |
| 6 | + OMP_NUM_THREADS=4 \ |
| 7 | + MODEL_PATH=/models/ggml-model-i2_s.gguf \ |
| 8 | + BITNET_DIR=/app/BitNet |
| 9 | + |
| 10 | +RUN apt-get update && apt-get install -y --no-install-recommends \ |
| 11 | + git \ |
| 12 | + curl \ |
| 13 | + ca-certificates \ |
| 14 | + build-essential \ |
| 15 | + cmake \ |
| 16 | + && rm -rf /var/lib/apt/lists/* |
| 17 | + |
| 18 | +WORKDIR /app |
| 19 | + |
| 20 | +RUN pip install --upgrade pip && \ |
| 21 | + pip install flask huggingface_hub |
| 22 | + |
| 23 | +# The reason to checkout 404980e is listed here . It is an open issue on the BitNet repository. |
| 24 | +# https://github.com/microsoft/BitNet/issues/470 |
| 25 | +RUN git clone https://github.com/microsoft/BitNet.git && \ |
| 26 | + cd BitNet && \ |
| 27 | + git checkout 404980e && \ |
| 28 | + git submodule update --init --recursive --force && \ |
| 29 | + rm -rf .git |
| 30 | + |
| 31 | +WORKDIR /app/BitNet |
| 32 | + |
| 33 | +RUN pip install -r requirements.txt |
| 34 | + |
| 35 | +RUN ARCH="$(uname -m)" && set -eux && \ |
| 36 | + if [ "$ARCH" = "x86_64" ]; then \ |
| 37 | + python3 utils/codegen_tl2.py --model bitnet_b1_58-3B \ |
| 38 | + --BM 160,320,320 --BK 96,96,96 --bm 32,32,32; \ |
| 39 | + elif [ "$ARCH" = "aarch64" ]; then \ |
| 40 | + python3 utils/codegen_tl1.py --model bitnet_b1_58-3B \ |
| 41 | + --BM 160,320,320 --BK 64,128,64 --bm 32,64,32; \ |
| 42 | + else \ |
| 43 | + echo "unsupported architecture: $ARCH" >&2; exit 1; \ |
| 44 | + fi |
| 45 | + |
| 46 | +RUN cmake -B build -DCMAKE_BUILD_TYPE=Release && \ |
| 47 | + cmake --build build -j$(nproc) |
| 48 | + |
| 49 | +WORKDIR /app |
| 50 | + |
| 51 | +RUN mkdir -p /models && \ |
| 52 | + hf download microsoft/BitNet-b1.58-2B-4T-gguf \ |
| 53 | + ggml-model-i2_s.gguf \ |
| 54 | + --local-dir /models |
2 | 55 |
|
3 | | -# Install necessary dependencies and tools |
4 | | -RUN apk add --no-cache build-base cmake clang git && \ |
5 | | - rm -rf /var/cache/apk/* |
6 | | - |
7 | | -# Clone the BitNet repository without history |
8 | | -RUN git clone --recursive --depth 1 https://github.com/microsoft/BitNet.git && \ |
9 | | - rm -rf BitNet/.git |
10 | | - |
11 | | -WORKDIR /BitNet |
12 | | - |
13 | | -# Install Python dependencies |
14 | | -RUN pip install --no-cache-dir -r requirements.txt |
15 | | - |
16 | | -# Copy the local requirements.txt for additional dependencies |
17 | | -COPY requirements-local.txt . |
18 | | - |
19 | | -# Install additional dependencies from the local requirements file |
20 | | -RUN pip install --no-cache-dir -r requirements-local.txt |
21 | | - |
22 | | -# Run the code generation for Llama3-8B model |
23 | | -RUN python3 utils/codegen_tl2.py --model Llama3-8B-1.58-100B-tokens --BM 256,128,256,128 --BK 96,96,96,96 --bm 32,32,32,32 |
24 | | - |
25 | | -# Build the model using cmake with specified compilers |
26 | | -RUN cmake -B build -DBITNET_X86_TL2=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ |
27 | | - |
28 | | -RUN cmake --build build --config Release |
29 | | - |
30 | | -# Download the Llama model from HuggingFace |
31 | | -ADD https://huggingface.co/brunopio/Llama3-8B-1.58-100B-tokens-GGUF/resolve/main/Llama3-8B-1.58-100B-tokens-TQ2_0.gguf . |
32 | | - |
33 | | -# Verify the integrity of the model file |
34 | | -RUN echo "2565559c82a1d03ecd1101f536c5e99418d07e55a88bd5e391ed734f6b3989ac Llama3-8B-1.58-100B-tokens-TQ2_0.gguf" | sha256sum -c |
| 56 | +COPY run_model.py . |
| 57 | +COPY static ./static |
35 | 58 |
|
36 | | -# Expose port for communication with the Node.js app |
37 | 59 | EXPOSE 5000 |
38 | 60 |
|
39 | | -# Run a Python script that handles queries from the Node.js app using socket.io |
40 | | -COPY run_model.py . |
41 | | - |
42 | | -# Run the model in inference mode, listening for queries |
43 | | -CMD ["python3", "run_model.py"] |
| 61 | +CMD ["python", "run_model.py"] |
0 commit comments