-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Description
` Environment Setup
1. Create conda environment
conda create -n mlc-llm-env python=3.11
conda activate mlc-llm-env
2. Install system dependencies
sudo apt-get install build-essential cmake git libxml2-dev llvm-15 llvm-15-dev
TVM Unity Build
3. Clone MLC-LLM with TVM submodule
git clone --recursive https://github.com/mlc-ai/mlc-llm.git
cd mlc-llm
4. Configure TVM build - created config.cmake with:
Key TVM configuration (3rdparty/tvm/config.cmake):
set(CMAKE_BUILD_TYPE RelWithDebInfo)
set(HIDE_PRIVATE_SYMBOLS ON)
set(USE_LLVM "llvm-config-15") # Fixed LLVM detection
set(USE_CUDA ON)
set(USE_CUBLAS ON)
set(USE_CUTLASS OFF) # Disabled due to compilation errors
set(USE_WEBGPU ON) # Essential for WebLLM
set(USE_META_SCHEDULE OFF) # Disabled to fix linker errors
set(USE_FLASHINFER OFF) # Disabled due to compilation errors
5. Build TVM libraries
cd 3rdparty/tvm
mkdir build && cd build
cmake .. && make -j$(nproc)
Generated: libtvm.so (1.3GB), libtvm_runtime.so (84MB)
6. Build MLC-LLM C++ libraries
cd ../../../
mkdir build && cd build
cmake .. && make mlc_llm -j$(nproc)
Generated: libmlc_llm.so (99MB)
7. Install MLC-LLM Python package
cd .. && pip install -e .
WebAssembly Setup
8. Install Emscripten SDK
git clone https://github.com/emscripten-core/emsdk.git
cd emsdk
./emsdk install 3.1.56
./emsdk activate 3.1.56
9. Generate WASM runtime
cd ../mlc-llm
export TVM_SOURCE_DIR=$PWD/3rdparty/tvm
./scripts/prep_emcc_deps.sh
Generated: wasm_runtime.bc, webgpu_runtime.bc, tvmjs_runtime.wasm
Model Conversion
10. Download model (used existing Llama 3.2-1B-Instruct)
11. Convert weights to MLC format
mlc_llm convert_weight ./dist/models/Llama-3.2-1B-Instruct --quantization q4f16_1 --output ./dist/models/Llama-3.2-1B-Instruct-q4f16_1-MLC
12. Generate MLC chat config
mlc_llm gen_config ./dist/models/Llama-3.2-1B-Instruct
--quantization q4f16_1
--conv-template llama-3
--context-window-size 32768
--max-batch-size 1
--output ./dist/models/Llama-3.2-1B-Instruct-q4f16_1-MLC
13. Compile to WebGPU WASM
source /home/paperspace/webllm/emsdk/emsdk_env.sh
mlc_llm compile ./dist/models/Llama-3.2-1B-Instruct-q4f16_1-MLC/mlc-chat-config.json
--device webgpu
--output ./dist/libs/Llama-3.2-1B-Instruct-q4f16_1-webgpu.wasm
Result: 4.6MB WASM file
Library Versions
System info
Platform: Linux 6.2.0-37-generic (Ubuntu)
Python: 3.11
LLVM: 15
CUDA: 12.1
Emscripten: 3.1.56
Key libraries (from pip list in mlc-llm-env)
mlc-llm: 0.1.0.dev0 (built from source)
tvm: 0.18.dev0 (built from source with WebGPU support)
torch: 2.1.0+cu121
transformers: 4.45.0.dev0