rangeify: don't tag consts, they are global (#12247) #49

Workflow file for this run

	name: Unit Tests
	env:
	# increment this when downloads substantially change to avoid the internet
	DOWNLOAD_CACHE_VERSION: '12'
	PYTHON_CACHE_VERSION: '3'
	APT_CACHE_VERSION: '1'
	BUILD_CACHE_VERSION: '1'
	CAPTURE_PROCESS_REPLAY: 1
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	PYTHONPATH: ${{ github.workspace }}

	on:
	push:
	branches:
	- master
	pull_request:
	workflow_dispatch:

	jobs:
	llvmspeed:
	name: LLVM Speed
	runs-on: ubuntu-24.04
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: llvm-speed
	deps: testing_minimal
	llvm: 'true'
	- name: Speed Test
	run: CPU=1 CPU_LLVM=1 python3 test/speed/external_test_speed_v_torch.py
	- name: Speed Test (BEAM=2)
	run: BEAM=2 CPU=1 CPU_LLVM=1 python3 test/speed/external_test_speed_v_torch.py

	docs:
	name: Docs
	runs-on: ubuntu-22.04
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	deps: docs
	pydeps: "capstone torch"
	- name: Build wheel and show size
	run: \|
	pip install build
	python -m build --wheel --outdir dist
	ls -lh dist/*.whl
	- name: Use as an external package
	run: \|
	mkdir $HOME/test_external_dir
	cd $HOME/test_external_dir
	python -m venv venv
	source venv/bin/activate
	pip install $GITHUB_WORKSPACE
	python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
	pip install mypy
	mypy -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
	- name: Run beautiful_mnist with tinygrad only
	run: \|
	mkdir $GITHUB_WORKSPACE/test_dir
	cd $GITHUB_WORKSPACE/test_dir
	python -m venv venv
	source venv/bin/activate
	pip install $GITHUB_WORKSPACE
	cp $GITHUB_WORKSPACE/examples/beautiful_mnist.py .
	BS=2 STEPS=10 python beautiful_mnist.py
	- name: Test Docs Build
	run: python -m mkdocs build --strict
	- name: Test Docs
	run: \|
	python docs/abstractions2.py
	python docs/abstractions3.py
	- name: Test README
	run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && python README.py
	- name: Test Quickstart
	run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py && python quickstart.py
	- name: Test DEBUG
	run: DEBUG=100 python3 -c "from tinygrad import Tensor; N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N); c = (a.reshape(N, 1, N) * b.T.reshape(1, N, N)).sum(axis=2); print((c.numpy() - (a.numpy() @ b.numpy())).mean())"
	- name: Compile EfficientNet to C and test it
	run: \|
	CPU=1 CPU_LLVM=0 python examples/compile_efficientnet.py > recognize.c
	clang -O2 recognize.c -lm -o recognize
	cat test/models/efficientnet/Chicken.jpg \| ./recognize \| grep cock

	torchbackend:
	name: Torch Backend Tests
	runs-on: ubuntu-latest
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: torch-backend-pillow-torchvision-et-pt
	deps: testing_minimal
	pydeps: "pillow torchvision expecttest"
	llvm: 'true'
	- name: Install ninja
	run: \|
	sudo apt update \|\| true
	sudo apt install -y --no-install-recommends ninja-build
	- name: Lint with ruff
	run: \|
	pip3 install --upgrade --force-reinstall ruff==0.11.0
	python3 -m ruff check extra/torch_backend/backend.py
	- name: Test one op
	run: FORWARD_ONLY=1 TINY_BACKEND=1 python3 test/test_ops.py TestOps.test_add
	- name: Test ResNet-18
	run: DEBUG=2 python3 extra/torch_backend/example.py
	- name: My (custom) tests
	run: python3 extra/torch_backend/test.py
	- name: Test one op in torch tests
	run: DEBUG=2 python3 extra/torch_backend/torch_tests.py TestTinyBackendPRIVATEUSE1.test_unary_log_tiny_float32
	- name: Test Ops with TINY_BACKEND
	run: CPU=1 CPU_LLVM=1 LLVMOPT=0 TINY_BACKEND=1 python3 -m pytest -n auto test/test_ops.py --durations=20
	- name: Test in-place operations on views
	run: TORCH_DEBUG=1 python3 extra/torch_backend/test_inplace.py
	- name: Test multi-gpu
	run: CPU=1 CPU_LLVM=1 GPUS=4 TORCH_DEBUG=1 python3 extra/torch_backend/test_multigpu.py

	torchbackendmore:
	name: Torch Backend Tests More
	runs-on: ubuntu-latest
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: torch-backend-pillow-torchvision-et-pt
	deps: testing_minimal
	llvm: 'true'
	- name: Install ninja
	run: \|
	sudo apt update \|\| true
	sudo apt install -y --no-install-recommends ninja-build
	- name: Test beautiful_mnist in torch with TINY_BACKEND
	run: SPLIT_REDUCEOP=0 FUSE_ARANGE=1 CPU=1 CPU_LLVM=1 TARGET_EVAL_ACC_PCT=96.0 TINY_BACKEND=1 python3 examples/other_mnist/beautiful_mnist_torch.py
	- name: Test some torch tests (expect failure)
	run: python3 -m pytest extra/torch_backend/torch_tests.py -v --tb=no \|\| true

	bepython:
	name: Python Backend
	runs-on: ubuntu-latest
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: be-minimal
	deps: testing_minimal
	- name: Test dtype with Python emulator
	run: DEBUG=1 PYTHON=1 python3 -m pytest -n=auto test/test_dtype.py test/test_dtype_alu.py
	- name: Test ops with Python emulator
	run: DEBUG=2 SKIP_SLOW_TEST=1 PYTHON=1 python3 -m pytest -n=auto test/test_ops.py --durations=20
	- name: Test uops with Python emulator
	run: PYTHON=1 python3 -m pytest test/test_uops.py --durations=20
	- name: Test symbolic with Python emulator
	run: PYTHON=1 python3 test/test_symbolic_ops.py
	- name: test_renderer_failures with Python emulator
	run: PYTHON=1 python3 -m pytest -rA test/test_renderer_failures.py::TestRendererFailures
	- name: Test IMAGE=2 support
	run: \|
	IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm
	IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_simple_conv2d
	- name: Test emulated METAL tensor cores
	run: \|
	DEBUG=2 EMULATE=METAL FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_big_gemm
	DEBUG=2 EMULATE=METAL FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py
	- name: Test emulated AMX tensor cores
	run: DEBUG=2 AMX=1 EMULATE=AMX FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm
	- name: Test emulated AMD tensor cores
	run: \|
	DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py
	- name: Test emulated AMD MFMA tensor cores
	run: \|
	DEBUG=2 EMULATE=AMD_MFMA FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 EMULATE=AMD_MFMA FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py
	- name: Test emulated AMD RDNA4 tensor cores
	run: \|
	DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py
	DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py
	- name: Test emulated CUDA tensor cores
	run: \|
	DEBUG=2 EMULATE=CUDA FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16
	DEBUG=2 EMULATE=CUDA ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm
	DEBUG=2 EMULATE=CUDA_SM75 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16
	DEBUG=2 EMULATE=CUDA ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py
	- name: Test emulated INTEL OpenCL tensor cores
	run: DEBUG=2 EMULATE=INTEL FORWARD_ONLY=1 PYTHON=1 HALF=1 N=64 python3 ./extra/gemm/simple_matmul.py
	- name: Test emulated AMX tensor cores
	run: DEBUG=2 AMX=1 EMULATE=AMX FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py
	- name: Test device flop counts
	run: \|
	DEBUG=2 EMULATE=METAL PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
	DEBUG=2 EMULATE=AMD PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
	DEBUG=2 EMULATE=CUDA PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
	DEBUG=2 EMULATE=INTEL PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
	DEBUG=2 AMX=1 EMULATE=AMX PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStats.test_simple_matmul

	linter:
	name: Linters
	runs-on: ubuntu-latest
	timeout-minutes: 10

	# TODO: run the pre-commit hook to replace a lot of this
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: linting-only
	python-version: '3.10'
	deps: linting
	- name: Lint bad-indentation and trailing-whitespace with pylint
	run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y .
	- name: Lint with ruff
	run: \|
	pip3 install --upgrade --force-reinstall ruff==0.11.0
	python3 -m ruff check .
	python3 -m ruff check examples/mlperf/ --ignore E501
	- name: Lint tinygrad with pylint
	run: python -m pylint tinygrad/
	- name: Run mypy
	run: \|
	python -m mypy --strict-equality --lineprecision-report .
	cat lineprecision.txt
	- name: Run TYPED=1
	run: TYPED=1 python -c "import tinygrad"

	unittest:
	name: Unit Tests
	runs-on: ubuntu-latest
	timeout-minutes: 15

	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: unittest-12
	pydeps: "pillow"
	deps: testing_unit
	- name: Run unit tests
	run: python -m pytest -n=auto test/unit/ --durations=20
	- name: Run targetted tests on NULL backend
	run: NULL=1 python3 test/test_multitensor.py TestMultiTensor.test_data_parallel_resnet_train_step
	- name: Run SDXL on NULL backend
	run: MAX_BUFFER_SIZE=0 NULL=1 DEBUG=1 python3 examples/sdxl.py --seed 0 --noshow --timing --fakeweights
	# TODO: support fake weights
	#- name: Run LLaMA 7B on 4 fake devices
	# run: NULL=1 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 3 --temperature 0 --timing
	- name: Run GC tests
	run: python test/external/external_uop_gc.py
	- name: External Benchmark Schedule
	run: python3 test/external/external_benchmark_schedule.py
	- name: Run process replay tests
	uses: ./.github/actions/process-replay
	- name: Regen dataset on test_tiny
	run: \|
	test/external/process_replay/reset.py
	CAPTURE_PROCESS_REPLAY=1 python test/test_tiny.py TestTiny.test_plus
	python extra/optimization/extract_dataset.py
	gzip -c /tmp/sops > extra/datasets/sops.gz
	#DEBUG=1 MIN_ASTS=1 python extra/optimization/get_action_space.py
	- name: Repo line count < 18000 lines
	run: MAX_LINE_COUNT=18000 python sz.py

	fuzzing:
	name: Fuzzing
	runs-on: ubuntu-latest
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: fuzzing-unit
	deps: testing_unit
	- name: Fuzz Test symbolic
	run: python test/external/fuzz_symbolic.py
	- name: Fuzz Test fast idiv
	run: python test/external/fuzz_fast_idiv.py
	- name: Fuzz Test shapetracker
	run: \|
	python test/external/fuzz_shapetracker.py
	python test/external/fuzz_shapetracker_math.py
	- name: Fuzz Test shape ops
	run: python test/external/fuzz_shape_ops.py

	testopenclimage:
	name: CL IMAGE Tests
	runs-on: ubuntu-22.04
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: gpu-image
	deps: testing_minimal
	opencl: 'true'
	- name: Test CL IMAGE=2 ops + training
	run: \|
	CL=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py --durations=20
	CL=1 IMAGE=2 python test/models/test_end2end.py TestEnd2End.test_linear_mnist
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testgpumisc:
	name: CL Misc tests
	runs-on: ubuntu-22.04
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: gen-dataset
	deps: testing_minimal
	opencl: 'true'
	- name: Generate Dataset
	run: CL=1 extra/optimization/generate_dataset.sh
	- name: Run Kernel Count Test
	run: CL=1 python -m pytest -n=auto test/external/external_test_opt.py
	- name: Run fused optimizer tests
	run: CL=1 FUSE_OPTIM=1 python -m pytest -n=auto test/models/test_mnist.py
	- name: Upload artifact
	uses: actions/upload-artifact@v4
	with:
	name: sops.gz
	path: /tmp/sops.gz

	testopenpilot:
	name: openpilot Compile Tests
	runs-on: ubuntu-22.04
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: openpilot-compile
	deps: testing
	opencl: 'true'
	llvm: 'true'
	- name: Test openpilot model kernel count and gate usage
	run: \|
	ALLOWED_KERNEL_COUNT=208 ALLOWED_READ_IMAGE=2175 ALLOWED_GATED_READ_IMAGE=16 FLOAT16=0 CL=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/v0.9.4/selfdrive/modeld/models/supercombo.onnx
	- name: Test openpilot alt model correctness (float32)
	run: FLOAT16=0 DEBUGCL=1 CL=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx
	- name: Test openpilot fastvits model correctness (float32)
	run: FLOAT16=0 DEBUGCL=1 CL=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx
	# - name: Test openpilot simple_plan vision model correctness (float32)
	# run: FLOAT16=0 DEBUGCL=1 CL=1 IMAGE=2 python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/35ff4f4577002f2685e50c8346addae33fe8da27a41dd4d6a0f14d1f4b1af81b
	- name: Test openpilot LLVM compile
	run: CPU=1 CPU_LLVM=1 LLVMOPT=1 JIT=2 BEAM=0 IMAGE=0 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx
	- name: Test openpilot compile4
	run: NOLOCALS=1 CL=1 IMAGE=2 FLOAT16=1 DEBUG=2 python3 examples/openpilot/compile4.py
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	# **** ONNX Tests ****

	testonnxcpu:
	name: ONNX (CPU) Tests
	runs-on: ubuntu-22.04
	timeout-minutes: 20

	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: onnxoptc
	deps: testing
	python-version: '3.11'
	llvm: 'true'
	- name: Test ONNX (CPU)
	run: CPU=1 CPU_LLVM=0 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
	- name: Test ONNX (LLVM)
	run: CPU=1 CPU_LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
	- name: Test ONNX Runner (CPU)
	run: CPU=1 CPU_LLVM=0 python3 test/external/external_test_onnx_runner.py
	- name: Test Additional ONNX Ops (CPU)
	run: CPU=1 CPU_LLVM=0 python3 test/external/external_test_onnx_ops.py
	- name: Test Quantize ONNX
	run: CPU=1 CPU_LLVM=0 python3 test/test_quantize_onnx.py
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testopencl:
	name: ONNX (CL)+Optimization Tests
	runs-on: ubuntu-22.04
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: onnxoptl
	deps: testing
	pydeps: "tensorflow==2.15.1 tensorflow_addons"
	python-version: '3.11'
	opencl: 'true'
	- name: Test ONNX (CL)
	run: CL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
	#- name: Test Optimization Helpers
	# run: DEBUG=1 python3 extra/optimization/test_helpers.py
	#- name: Test Action Space
	# run: DEBUG=1 CL=1 python3 extra/optimization/get_action_space.py
	- name: Test Beam Search
	run: CL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
	- name: Test MLPerf stuff
	run: CL=1 python -m pytest -n=auto test/external/external_test_optim.py test/external/external_test_losses.py test/external/external_test_metrics.py test/external/external_test_datasets.py --durations=20
	- name: Test llama 3 training
	run: MAX_BUFFER_SIZE=0 DEV=NULL SAMPLES=300 BS=8 SEQLEN=512 GRADIENT_ACC_STEPS=8 FAKEDATA=1 DEFAULT_FLOAT=bfloat16 OPTIM_DTYPE=bfloat16 LLAMA3_SIZE=1B MODEL=llama3 python3 examples/mlperf/model_train.py
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testllm:
	name: Test LLM
	runs-on: ubuntu-24.04
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: apps_llm
	- name: Test 1B LLM
	run: echo "What's a male chicken called? Answer with only one word." \| MAX_BUFFER_SIZE=0 python3 -m tinygrad.apps.llm \| grep -i rooster

	# **** Models Tests ****

	testmodels:
	name: Models (llvm+cpu+gpu)
	runs-on: ubuntu-22.04
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: models
	deps: testing
	opencl: 'true'
	llvm: 'true'
	- name: Test models (llvm)
	run: CPU=1 CPU_LLVM=1 python -m pytest -n=auto test/models --durations=20
	- name: Test models (opencl)
	run: CL=1 python -m pytest -n=auto test/models --durations=20
	- name: Test models (cpu)
	run: CPU=1 CPU_LLVM=0 python -m pytest -n=auto test/models --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testmetalmodels:
	name: Models (metal)
	runs-on: macos-14
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: metal
	deps: testing
	python-version: '3.11'
	- name: Test models (Metal)
	run: METAL=1 python -m pytest -n=auto test/models --durations=20
	- name: Test LLaMA compile speed
	run: METAL=1 python test/external/external_test_speed_llama.py

	# **** Feature Tests ****

	testrangeifycpu:
	name: Linux (rangeify) CPU
	runs-on: ubuntu-24.04
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: rangeify-minimal-llvm
	deps: testing_minimal
	opencl: 'true'
	llvm: "true"
	- name: Test CPU=1 RANGEIFY=1
	# TODO: add more passing tests here
	# test_embedding issue with jit
	# test_load_state_dict_sharded_model_dict_same_axis issue with multi
	# test_instancenorm_3d is very slow
	# test_copy_from_disk issue with DISK
	run: \|
	CPU=1 CPU_LLVM=0 RANGEIFY=1 python3 -m pytest -n auto --durations 20 \
	-k "not test_embedding and not test_load_state_dict_sharded_model_dict_same_axis and not test_instancenorm_3d and not test_copy_from_disk" \
	test/test_tiny.py test/test_rangeify.py test/test_ops.py test/test_symbolic_ops.py test/test_tensor_variable.py \
	test/test_outerworld_range.py test/test_sample.py test/test_randomness.py test/test_nn.py test/test_arange.py test/test_tensor.py
	- name: Test const folding
	run: CPU=1 RANGEIFY=1 python3 -m pytest -n auto --durations 20 test/test_const_folding.py -k "not test_cast_padded and not TestReduceOpsConstFolding and not TestMultiConstFolding"
	- name: Test multitensor
	run: CPU=1 RANGEIFY=1 python3 test/test_multitensor.py TestMultiTensor.test_matmul_shard_1_1 TestMultiTensor.test_simple_add_W
	- name: Test CPU=1 RANGEIFY=2
	run: CPU=1 CPU_LLVM=0 RANGEIFY=2 python3 -m pytest -n auto test/test_tiny.py test/test_rangeify.py test/test_ops.py --durations 20
	# slow (and still wrong on beautiful_mnist)
	#- name: Test LLVM RANGEIFY=1 (slow tests)
	# run: CPU=1 CPU_LLVM=1 RANGEIFY=1 python3 -m pytest -n auto test/models/test_mnist.py --durations 20

	testrangeifycl:
	name: Linux (rangeify) CL
	runs-on: ubuntu-24.04
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: rangeify-cl
	deps: testing
	opencl: 'true'
	llvm: "true"
	- name: Test CL=1 RANGEIFY=1
	run: CL=1 RANGEIFY=1 pytest -n auto test/test_ops.py test/test_schedule.py test/test_symbolic_ops.py --durations 20
	- name: Test Fuse
	run: CL=1 RANGEIFY=2 python3 -m pytest --durations 20 test/test_softmax_fusion.py -k "not test_auto_softmax"
	- name: Test ONNX
	run: CL=1 RANGEIFY=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20

	testdevectorize:
	name: Linux (devectorize)
	runs-on: ubuntu-24.04
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: devectorize-minimal
	deps: testing_minimal
	pydeps: "pillow"
	llvm: "true"
	- name: Test LLVM=1 DEVECTORIZE=0
	run: CPU=1 CPU_LLVM=1 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/test_ops.py -k "not test_avg_pool3d_failure"
	- name: Test LLVM=1 DEVECTORIZE=0 for model
	run: CPU=1 CPU_LLVM=1 DEVECTORIZE=0 python3 test/models/test_efficientnet.py
	- name: Test CPU=1 DEVECTORIZE=0
	run: CPU=1 CPU_LLVM=0 DEVECTORIZE=0 FUSE_ARANGE=0 python3 -m pytest -n auto test/test_tiny.py test/test_ops.py -k "not test_avg_pool3d_failure"

	testdsp:
	name: Linux (DSP)
	runs-on: ubuntu-24.04
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: dsp-minimal
	deps: testing_minimal
	pydeps: "onnx==1.18.0 onnxruntime pillow"
	llvm: "true"
	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3
	- name: Build QEMU Docker with cache
	uses: docker/build-push-action@v4
	with:
	file: extra/dsp/Dockerfile
	push: false
	load: true
	tags: qemu-hexagon:latest
	cache-from: type=gha
	cache-to: type=gha,mode=min
	- name: Set MOCKDSP env
	run: printf "MOCKDSP=1" >> $GITHUB_ENV
	- name: Run test_tiny on DSP
	run: DEBUG=2 DSP=1 python test/test_tiny.py
	- name: Test transcendentals
	run: CC=clang-20 DEBUG=2 DSP=1 python test/test_transcendental.py TestTranscendentalVectorized
	- name: Test quantize onnx
	run: DEBUG=2 DSP=1 python3 test/test_quantize_onnx.py

	testwebgpu:
	name: Linux (WebGPU)
	runs-on: ubuntu-22.04
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: webgpu-minimal
	deps: testing_minimal
	python-version: '3.11'
	webgpu: 'true'
	- name: Check Device.DEFAULT (WEBGPU) and print some source
	run: \|
	WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT"
	WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
	- name: Run selected webgpu tests
	run: \|
	WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Vulkan" python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit \
	--ignore=test/test_copy_speed.py --ignore=test/test_rearrange_einops.py \
	--ignore=test/test_fuzz_shape_ops.py --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testamd:
	strategy:
	fail-fast: false
	matrix:
	backend: [amd, amdllvm]

	name: Linux (${{ matrix.backend }})
	runs-on: ubuntu-22.04
	timeout-minutes: 20
	env:
	AMD: 1
	MOCKGPU: 1
	FORWARD_ONLY: 1
	AMD_LLVM: ${{ matrix.backend == 'amdllvm' && '1' \|\| matrix.backend != 'amdllvm' && '0' }}
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: ${{ matrix.backend }}-minimal
	deps: testing_minimal
	amd: 'true'
	llvm: ${{ matrix.backend == 'amdllvm' && 'true' }}
	- name: Check Device.DEFAULT and print some source
	run: \|
	python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['AMD'], Device.DEFAULT"
	DEBUG=5 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
	- name: Run LLVM test
	if: matrix.backend=='amdllvm'
	run: python test/device/test_amd_llvm.py
	- name: Run pytest (amd)
	run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/test_jit.py test/test_graph.py test/test_multitensor.py test/device/test_hcq.py --durations=20
	- name: Run pytest (amd)
	run: python -m pytest test/external/external_test_am.py --durations=20
	- name: Run TRANSCENDENTAL math
	run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
	- name: Run TestOps.test_add with SQTT
	run: \|
	VIZ=1 SQTT=1 DEBUG=5 python3 test/test_ops.py TestOps.test_add
	extra/sqtt/rgptool.py create "/tmp/profile.pkl.$USER" -o /tmp/gpu0.rgp
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testnvidia:
	strategy:
	fail-fast: false
	matrix:
	backend: [ptx, nv]

	name: Linux (${{ matrix.backend }})
	runs-on: ubuntu-22.04
	timeout-minutes: 20
	env:
	MOCKGPU: 1
	FORWARD_ONLY: 1
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: ${{ matrix.backend }}-minimal
	deps: testing_minimal
	cuda: 'true'
	ocelot: 'true'
	- name: Set env
	run: printf "${{ matrix.backend == 'PTX' && 'CUDA=1\nCUDA_PTX=1' \|\| matrix.backend == 'nv' && 'NV=1\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV
	- name: Check Device.DEFAULT and print some source
	run: \|
	python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CUDA','NV'], Device.DEFAULT"
	DEBUG=5 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
	- name: Run pytest (cuda)
	# skip multitensor because it's slow
	run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --ignore test/test_gc.py --ignore test/test_multitensor.py --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testcpuopencl:
	strategy:
	fail-fast: false
	matrix:
	backend: [llvm, cpu, opencl]

	name: Linux (${{ matrix.backend }})
	runs-on: ubuntu-22.04
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: ${{ matrix.backend }}-minimal
	deps: testing_minimal
	opencl: ${{ matrix.backend == 'opencl' && 'true' }}
	llvm: ${{ matrix.backend == 'llvm' && 'true' }}
	- name: Set env
	run: printf "${{ matrix.backend == 'llvm' && 'CPU=1\nCPU_LLVM=1' \|\| matrix.backend == 'cpu' && 'CPU=1\nCPU_LLVM=0\nCPU_COUNT=2' \|\| matrix.backend == 'opencl' && 'CL=1' }}" >> $GITHUB_ENV
	- name: Check Device.DEFAULT and print some source
	run: \|
	python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CPU','CL'], Device.DEFAULT"
	DEBUG=5 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
	- name: Run pytest (${{ matrix.backend }})
	run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --durations=20
	- name: Run TRANSCENDENTAL math
	run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	amdremote:
	name: Linux (remote)
	runs-on: ubuntu-22.04
	timeout-minutes: 20
	env:
	REMOTE: 1
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: linux-remote
	deps: testing_minimal
	amd: 'true'
	llvm: 'true'
	opencl: 'true'
	- name: Start remote server
	run: \|
	start_server() {
	systemd-run --user \
	--unit="$1" \
	--setenv=REMOTEDEV="$2" \
	--setenv=MOCKGPU=1 \
	--setenv=PYTHONPATH=. \
	--setenv=PORT="$3" \
	--working-directory="$(pwd)" \
	python tinygrad/runtime/ops_remote.py
	}

	start_server "remote-server-amd-1" "AMD" 6667
	start_server "remote-server-amd-2" "AMD" 6668
	start_server "remote-server-gpu" "CL" 7667
	start_server "remote-server-cpu" "CPU" 8667
	- name: Check Device.DEFAULT and print some source
	env:
	HOST: 127.0.0.1:66676,127.0.0.1:66686
	run: \|
	python -c "from tinygrad import Device; assert Device.DEFAULT == 'REMOTE', Device.DEFAULT"
	python -c "from tinygrad import Device; assert Device.default.properties.real_device == 'AMD', Device.default.properties.real_device"
	DEBUG=4 python3 test/test_tiny.py TestTiny.test_plus
	- name: Run REMOTE=1 Test (AMD)
	env:
	HOST: 127.0.0.1:66676,127.0.0.1:66686
	run: \|
	python3 -m pytest test/test_tiny.py test/test_jit.py test/test_subbuffer.py test/test_graph.py test/test_multitensor.py test/test_remote.py test/test_tensor_variable.py --durations 20
	- name: Run REMOTE=1 Test (CL)
	env:
	HOST: 127.0.0.1:7667*6
	run: \|
	python3 -m pytest test/test_tiny.py test/test_image_dtype.py test/test_jit.py --durations 20
	IMAGE=2 python3 -m pytest test/test_tiny.py test/test_image_dtype.py
	- name: Run REMOTE=1 Test (CPU)
	env:
	HOST: 127.0.0.1:8667*6
	run: \|
	python3 -m pytest test/test_tiny.py test/test_jit.py test/test_multitensor.py --durations 20
	- name: Show remote server logs
	if: always()
	run: \|
	journalctl --user -u remote-server-amd-1 --no-pager
	journalctl --user -u remote-server-amd-2 --no-pager
	journalctl --user -u remote-server-gpu --no-pager
	journalctl --user -u remote-server-cpu --no-pager

	# **** OSX Tests ****

	testmetal:
	name: MacOS (unit)
	runs-on: macos-14
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: metal
	deps: testing
	python-version: '3.11'
	amd: 'true'
	cuda: 'true'
	ocelot: 'true'
	llvm: 'true'
	- name: Run unit tests
	run: METAL=1 python -m pytest -n=auto test/unit/ --durations=20
	- name: Run ONNX
	run: METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
	- name: Test tensor core ops (fake)
	run: METAL=1 DEBUG=3 TC=2 python test/test_ops.py TestOps.test_gemm
	- name: Test tensor core ops (real)
	run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm
	- name: Test Beam Search
	run: METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
	#- name: Fuzz Test linearizer
	# run: METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py
	- name: Run TRANSCENDENTAL math
	run: METAL=1 TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
	- name: Run pytest (amd)
	env:
	MOCKGPU: 1
	AMD: 1
	AMD_LLVM: 0
	FORWARD_ONLY: 1
	run: \|
	python3 -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py --durations=20
	- name: Run pytest (amd with llvm backend)
	env:
	MOCKGPU: 1
	AMD: 1
	AMD_LLVM: 1
	FORWARD_ONLY: 1
	run: \|
	python -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py test/device/test_amd_llvm.py --durations=20
	- name: Run pytest (ptx)
	env:
	MOCKGPU: 1
	NV_PTX: 1
	NV: 1
	FORWARD_ONLY: 1
	run: \|
	python3 -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	osxwebgpu:
	name: MacOS (WebGPU)
	runs-on: macos-14
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: osx-webgpu
	deps: testing
	webgpu: 'true'
	- name: Test infinity math in WGSL
	run: WEBGPU=1 python -m pytest -n=auto test/test_renderer_failures.py::TestWGSLFailures::test_multiply_infinity --durations=20
	- name: Build WEBGPU Efficientnet
	run: WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Metal" python3 -m examples.compile_efficientnet
	- name: Clean npm cache
	run: npm cache clean --force
	- name: Install Puppeteer
	run: npm install puppeteer
	# this is also flaky
	#- name: Run WEBGPU Efficientnet
	# run: node test/web/test_webgpu.js
	# this is flaky
	#- name: Run VIZ tests as external package
	# run: \|
	# mkdir $GITHUB_WORKSPACE/test_dir
	# cd $GITHUB_WORKSPACE/test_dir
	# python -m venv venv
	# source venv/bin/activate
	# pip install $GITHUB_WORKSPACE
	# cp $GITHUB_WORKSPACE/test/web/test_viz.js .
	# node test_viz.js
	- name: Test ONNX Runner (WEBGPU)
	run: WEBGPU=1 python3 test/external/external_test_onnx_runner.py

	osxremote:
	name: MacOS (remote metal)
	runs-on: macos-15
	timeout-minutes: 10
	env:
	REMOTE: 1
	REMOTEDEV: METAL
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: macos-remote
	deps: testing_minimal
	- name: Check Device.DEFAULT and print some source
	run: \|
	python -c "from tinygrad import Device; assert Device.DEFAULT == 'REMOTE', Device.DEFAULT"
	python -c "from tinygrad import Device; assert Device.default.properties.real_device == 'METAL', Device.default.properties.real_device"
	DEBUG=4 python3 test/test_tiny.py TestTiny.test_plus
	- name: Run REMOTE=1 Test
	run: \|
	python3 -m pytest test/test_tiny.py test/test_jit.py test/test_subbuffer.py test/test_graph.py test/test_multitensor.py test/test_tensor_variable.py

	osxtests:
	strategy:
	fail-fast: false
	matrix:
	backend: [metal, llvm, cpu]
	name: MacOS (${{ matrix.backend }})
	runs-on: macos-15
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: macos-${{ matrix.backend }}-minimal
	deps: testing_minimal
	pydeps: "capstone"
	llvm: ${{ matrix.backend == 'llvm' && 'true' }}
	- name: Set env
	run: printf "${{ matrix.backend == 'llvm' && 'CPU=1\nCPU_LLVM=1' \|\| matrix.backend == 'cpu' && 'CPU=1\nCPU_LLVM=0\nCPU_COUNT=2' \|\| matrix.backend == 'metal' && 'METAL=1'}}" >> $GITHUB_ENV
	- name: Check Device.DEFAULT and print some source
	run: \|
	python -c "from tinygrad import Device; assert Device.DEFAULT == {'LLVM':'CPU'}.get(x:='${{ matrix.backend }}'.upper(), x), Device.DEFAULT"
	DEBUG=4 python3 test/test_tiny.py TestTiny.test_plus
	- name: Run pytest (${{ matrix.backend }})
	run: python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay
	- name: Run macOS-specific unit test
	if: matrix.backend == 'cpu'
	run: python3 -m pytest test/unit/test_disk_tensor.py::TestDiskTensor::test_copy_to_cpu_not_truncated

	# **** Windows Tests ****

	wintests:
	strategy:
	fail-fast: false
	matrix:
	backend: [llvm, cpu, webgpu]

	name: Windows (${{ matrix.backend }})
	runs-on: windows-latest
	timeout-minutes: 15
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: windows-${{ matrix.backend }}-minimal
	deps: testing_unit
	pydeps: ${{ matrix.backend == 'webgpu' && 'dawn-python' \|\| '' }}
	- name: Set env
	shell: bash
	run: printf "${{ matrix.backend == 'llvm' && 'CPU=1\nCPU_LLVM=1' \|\| matrix.backend == 'cpu' && 'CPU=1\nCPU_LLVM=0\nCPU_COUNT=2' \|\| matrix.backend == 'webgpu' && 'WEBGPU=1'}}" >> $GITHUB_ENV
	- name: Run unit tests
	if: matrix.backend=='llvm'
	# test_newton_schulz hits RecursionError
	run: python -m pytest -n=auto test/unit/ --ignore=test/unit/test_disk_tensor.py --ignore=test/unit/test_elf.py --ignore=test/unit/test_tar.py --ignore=test/unit/test_linalg.py --durations=20
	- name: Run pytest (${{ matrix.backend }})
	shell: bash
	run: \|
	python -c "from tinygrad import Device; assert Device.DEFAULT == {'LLVM':'CPU'}.get(x:='${{ matrix.backend }}'.upper(), x), Device.DEFAULT"
	python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

rangeify: don't tag consts, they are global (#12247) #49

Workflow file

rangeify: don't tag consts, they are global (#12247) #49

Uh oh!

Jobs

Run details

Workflow file for this run