Skip to content

Commit 972c787

Browse files
authored
Merge pull request #5 from stackblogger/feature/llm-v2
BitNet.Js improve LLM with web redesign
2 parents a54f9ca + e8f94ba commit 972c787

10 files changed

Lines changed: 1576 additions & 423 deletions

File tree

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
/node_modules
22
/package-lock.json
33
node_modules
4-
/apps/web/package-lock.json
4+
apps/llm/__pycache__

apps/llm/Dockerfile

Lines changed: 57 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,61 @@
1-
FROM python:3.9-alpine
1+
FROM python:3.11-slim
2+
3+
ENV DEBIAN_FRONTEND=noninteractive \
4+
PYTHONUNBUFFERED=1 \
5+
PIP_NO_CACHE_DIR=1 \
6+
OMP_NUM_THREADS=4 \
7+
MODEL_PATH=/models/ggml-model-i2_s.gguf \
8+
BITNET_DIR=/app/BitNet
9+
10+
RUN apt-get update && apt-get install -y --no-install-recommends \
11+
git \
12+
curl \
13+
ca-certificates \
14+
build-essential \
15+
cmake \
16+
&& rm -rf /var/lib/apt/lists/*
17+
18+
WORKDIR /app
19+
20+
RUN pip install --upgrade pip && \
21+
pip install flask huggingface_hub
22+
23+
# The reason to checkout 404980e is listed here . It is an open issue on the BitNet repository.
24+
# https://github.com/microsoft/BitNet/issues/470
25+
RUN git clone https://github.com/microsoft/BitNet.git && \
26+
cd BitNet && \
27+
git checkout 404980e && \
28+
git submodule update --init --recursive --force && \
29+
rm -rf .git
30+
31+
WORKDIR /app/BitNet
32+
33+
RUN pip install -r requirements.txt
34+
35+
RUN ARCH="$(uname -m)" && set -eux && \
36+
if [ "$ARCH" = "x86_64" ]; then \
37+
python3 utils/codegen_tl2.py --model bitnet_b1_58-3B \
38+
--BM 160,320,320 --BK 96,96,96 --bm 32,32,32; \
39+
elif [ "$ARCH" = "aarch64" ]; then \
40+
python3 utils/codegen_tl1.py --model bitnet_b1_58-3B \
41+
--BM 160,320,320 --BK 64,128,64 --bm 32,64,32; \
42+
else \
43+
echo "unsupported architecture: $ARCH" >&2; exit 1; \
44+
fi
45+
46+
RUN cmake -B build -DCMAKE_BUILD_TYPE=Release && \
47+
cmake --build build -j$(nproc)
48+
49+
WORKDIR /app
50+
51+
RUN mkdir -p /models && \
52+
hf download microsoft/BitNet-b1.58-2B-4T-gguf \
53+
ggml-model-i2_s.gguf \
54+
--local-dir /models
255

3-
# Install necessary dependencies and tools
4-
RUN apk add --no-cache build-base cmake clang git && \
5-
rm -rf /var/cache/apk/*
6-
7-
# Clone the BitNet repository without history
8-
RUN git clone --recursive --depth 1 https://github.com/microsoft/BitNet.git && \
9-
rm -rf BitNet/.git
10-
11-
WORKDIR /BitNet
12-
13-
# Install Python dependencies
14-
RUN pip install --no-cache-dir -r requirements.txt
15-
16-
# Copy the local requirements.txt for additional dependencies
17-
COPY requirements-local.txt .
18-
19-
# Install additional dependencies from the local requirements file
20-
RUN pip install --no-cache-dir -r requirements-local.txt
21-
22-
# Run the code generation for Llama3-8B model
23-
RUN python3 utils/codegen_tl2.py --model Llama3-8B-1.58-100B-tokens --BM 256,128,256,128 --BK 96,96,96,96 --bm 32,32,32,32
24-
25-
# Build the model using cmake with specified compilers
26-
RUN cmake -B build -DBITNET_X86_TL2=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
27-
28-
RUN cmake --build build --config Release
29-
30-
# Download the Llama model from HuggingFace
31-
ADD https://huggingface.co/brunopio/Llama3-8B-1.58-100B-tokens-GGUF/resolve/main/Llama3-8B-1.58-100B-tokens-TQ2_0.gguf .
32-
33-
# Verify the integrity of the model file
34-
RUN echo "2565559c82a1d03ecd1101f536c5e99418d07e55a88bd5e391ed734f6b3989ac Llama3-8B-1.58-100B-tokens-TQ2_0.gguf" | sha256sum -c
56+
COPY run_model.py .
57+
COPY static ./static
3558

36-
# Expose port for communication with the Node.js app
3759
EXPOSE 5000
3860

39-
# Run a Python script that handles queries from the Node.js app using socket.io
40-
COPY run_model.py .
41-
42-
# Run the model in inference mode, listening for queries
43-
CMD ["python3", "run_model.py"]
61+
CMD ["python", "run_model.py"]

apps/llm/requirements-local.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
11
flask
2-
flask-socketio
32
requests
4-
eventlet

0 commit comments

Comments
 (0)