diff --git a/method_comparison/MetaMathQA/README.md b/method_comparison/MetaMathQA/README.md index 20679f5ace..4c76240661 100644 --- a/method_comparison/MetaMathQA/README.md +++ b/method_comparison/MetaMathQA/README.md @@ -159,9 +159,9 @@ Results are stored in one of the result directories. An example output could loo } }, "train_info": { - "cuda_memory_reserved_avg": 14229219940, - "cuda_memory_max": 24847056896, - "cuda_memory_reserved_99th": 19115624366, + "accelerator_memory_reserved_avg": 14229219940, + "accelerator_memory_max": 24847056896, + "accelerator_memory_reserved_99th": 19115624366, "train_time": 2238.65277833899, "file_size": 1157064, "status": "success", @@ -211,7 +211,7 @@ Results are stored in one of the result directories. An example output could loo "version": "#17~24.04.2-Ubuntu SMP PREEMPT_DYNAMIC Mon Jan 20 22:48:29 UTC 2", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA GeForce RTX 4090" + "accelerator": "NVIDIA GeForce RTX 4090" }, "pytorch_info": "PyTorch built with: [...]" } diff --git a/method_comparison/MetaMathQA/results/adalora--llama-3.2-3B-rank32.json b/method_comparison/MetaMathQA/results/adalora--llama-3.2-3B-rank32.json index cfad82b317..8b38ea15e8 100644 --- a/method_comparison/MetaMathQA/results/adalora--llama-3.2-3B-rank32.json +++ b/method_comparison/MetaMathQA/results/adalora--llama-3.2-3B-rank32.json @@ -3773,9 +3773,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 12361399900, - "cuda_memory_max": 22793945088, - "cuda_memory_reserved_99th": 18203426160, + "accelerator_memory_reserved_avg": 12361399900, + "accelerator_memory_max": 22793945088, + "accelerator_memory_reserved_99th": 18203426160, "train_time": 1986.3603882369862, "file_size": 35147440, "num_trainable_params": 18353664, @@ -4064,8 +4064,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/adaptionprompt--llama-3.2-3B-lr_0.0005.json b/method_comparison/MetaMathQA/results/adaptionprompt--llama-3.2-3B-lr_0.0005.json index 87127c7ef3..c35ccc865b 100644 --- a/method_comparison/MetaMathQA/results/adaptionprompt--llama-3.2-3B-lr_0.0005.json +++ b/method_comparison/MetaMathQA/results/adaptionprompt--llama-3.2-3B-lr_0.0005.json @@ -43,9 +43,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 11893757234, - "cuda_memory_max": 22410166272, - "cuda_memory_reserved_99th": 17907664814, + "accelerator_memory_reserved_avg": 11893757234, + "accelerator_memory_max": 22410166272, + "accelerator_memory_reserved_99th": 17907664814, "train_time": 1989.2834085189897, "file_size": 17210384, "num_trainable_params": 8601628, @@ -334,8 +334,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/boft--llama-3.2-3B-default.json b/method_comparison/MetaMathQA/results/boft--llama-3.2-3B-default.json index 7b8de14f03..83ddbc7c63 100644 --- a/method_comparison/MetaMathQA/results/boft--llama-3.2-3B-default.json +++ b/method_comparison/MetaMathQA/results/boft--llama-3.2-3B-default.json @@ -56,9 +56,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 14814855089, - "cuda_memory_max": 24427626496, - "cuda_memory_reserved_99th": 20103445872, + "accelerator_memory_reserved_avg": 14814855089, + "accelerator_memory_max": 24427626496, + "accelerator_memory_reserved_99th": 20103445872, "train_time": 8291.859631775995, "file_size": 3225360, "num_trainable_params": 802816, @@ -347,8 +347,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/bone--llama-3.2-3B-bat.json b/method_comparison/MetaMathQA/results/bone--llama-3.2-3B-bat.json index 4c684dc573..069bbfe107 100644 --- a/method_comparison/MetaMathQA/results/bone--llama-3.2-3B-bat.json +++ b/method_comparison/MetaMathQA/results/bone--llama-3.2-3B-bat.json @@ -52,9 +52,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 14713983755, - "cuda_memory_max": 25251807232, - "cuda_memory_reserved_99th": 20472733368, + "accelerator_memory_reserved_avg": 14713983755, + "accelerator_memory_max": 25251807232, + "accelerator_memory_reserved_99th": 20472733368, "train_time": 2430.7548372539895, "file_size": 29367552, "num_trainable_params": 7340032, @@ -343,8 +343,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/bone--llama-3.2-3B-default.json b/method_comparison/MetaMathQA/results/bone--llama-3.2-3B-default.json index ec4da35505..a473c5827e 100644 --- a/method_comparison/MetaMathQA/results/bone--llama-3.2-3B-default.json +++ b/method_comparison/MetaMathQA/results/bone--llama-3.2-3B-default.json @@ -52,9 +52,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 11170837063, - "cuda_memory_max": 20248002560, - "cuda_memory_reserved_99th": 16303469363, + "accelerator_memory_reserved_avg": 11170837063, + "accelerator_memory_max": 20248002560, + "accelerator_memory_reserved_99th": 16303469363, "train_time": 1664.0814183089897, "file_size": 29367496, "num_trainable_params": 7340032, @@ -343,8 +343,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/fourierft--llama-3.2-3B-default.json b/method_comparison/MetaMathQA/results/fourierft--llama-3.2-3B-default.json index 0aa2edcfa9..f3bc26876c 100644 --- a/method_comparison/MetaMathQA/results/fourierft--llama-3.2-3B-default.json +++ b/method_comparison/MetaMathQA/results/fourierft--llama-3.2-3B-default.json @@ -56,9 +56,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 13104129350, - "cuda_memory_max": 23653777408, - "cuda_memory_reserved_99th": 19017267937, + "accelerator_memory_reserved_avg": 13104129350, + "accelerator_memory_max": 23653777408, + "accelerator_memory_reserved_99th": 19017267937, "train_time": 2424.3862988609762, "file_size": 231416, "num_trainable_params": 56000, @@ -347,8 +347,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/fourierft--llama-3.2-3B-n_frequency-5000.json b/method_comparison/MetaMathQA/results/fourierft--llama-3.2-3B-n_frequency-5000.json index 24096854af..3c7241b5f0 100644 --- a/method_comparison/MetaMathQA/results/fourierft--llama-3.2-3B-n_frequency-5000.json +++ b/method_comparison/MetaMathQA/results/fourierft--llama-3.2-3B-n_frequency-5000.json @@ -56,9 +56,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 13111221498, - "cuda_memory_max": 23681040384, - "cuda_memory_reserved_99th": 19054869872, + "accelerator_memory_reserved_avg": 13111221498, + "accelerator_memory_max": 23681040384, + "accelerator_memory_reserved_99th": 19054869872, "train_time": 2421.913372163006, "file_size": 1127472, "num_trainable_params": 280000, @@ -347,8 +347,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/full-finetuning--llama-3.2-3B-lr_0.00001.json b/method_comparison/MetaMathQA/results/full-finetuning--llama-3.2-3B-lr_0.00001.json index 7a121108cd..4f15dc9eb2 100644 --- a/method_comparison/MetaMathQA/results/full-finetuning--llama-3.2-3B-lr_0.00001.json +++ b/method_comparison/MetaMathQA/results/full-finetuning--llama-3.2-3B-lr_0.00001.json @@ -33,9 +33,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 33098872284, - "cuda_memory_max": 37241225216, - "cuda_memory_reserved_99th": 33573390254, + "accelerator_memory_reserved_avg": 33098872284, + "accelerator_memory_max": 37241225216, + "accelerator_memory_reserved_99th": 33573390254, "train_time": 3111.3685010060144, "file_size": 6425499648, "num_trainable_params": 3212749824, @@ -324,8 +324,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/ia3--llama-3.2-3B-default.json b/method_comparison/MetaMathQA/results/ia3--llama-3.2-3B-default.json index 22ae90406a..50d2efa1f9 100644 --- a/method_comparison/MetaMathQA/results/ia3--llama-3.2-3B-default.json +++ b/method_comparison/MetaMathQA/results/ia3--llama-3.2-3B-default.json @@ -53,9 +53,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 12023227429, - "cuda_memory_max": 23137878016, - "cuda_memory_reserved_99th": 18398566154, + "accelerator_memory_reserved_avg": 12023227429, + "accelerator_memory_max": 23137878016, + "accelerator_memory_reserved_99th": 18398566154, "train_time": 1782.9318781230104, "file_size": 1157064, "num_trainable_params": 286720, @@ -344,8 +344,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/ia3--llama-3.2-3B-lr_0.001.json b/method_comparison/MetaMathQA/results/ia3--llama-3.2-3B-lr_0.001.json index 51193628f7..f1982e8ef3 100644 --- a/method_comparison/MetaMathQA/results/ia3--llama-3.2-3B-lr_0.001.json +++ b/method_comparison/MetaMathQA/results/ia3--llama-3.2-3B-lr_0.001.json @@ -52,9 +52,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 12023331867, - "cuda_memory_max": 23135780864, - "cuda_memory_reserved_99th": 18398356439, + "accelerator_memory_reserved_avg": 12023331867, + "accelerator_memory_max": 23135780864, + "accelerator_memory_reserved_99th": 18398356439, "train_time": 1746.0246657649877, "file_size": 1157064, "num_trainable_params": 286720, @@ -343,8 +343,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/ln_tuning--llama-3.2-3B-default.json b/method_comparison/MetaMathQA/results/ln_tuning--llama-3.2-3B-default.json index 70ba76cf1a..2e8dabe44e 100644 --- a/method_comparison/MetaMathQA/results/ln_tuning--llama-3.2-3B-default.json +++ b/method_comparison/MetaMathQA/results/ln_tuning--llama-3.2-3B-default.json @@ -48,9 +48,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 11385589622, - "cuda_memory_max": 21177040896, - "cuda_memory_reserved_99th": 16903066091, + "accelerator_memory_reserved_avg": 11385589622, + "accelerator_memory_max": 21177040896, + "accelerator_memory_reserved_99th": 16903066091, "train_time": 1657.2412179829698, "file_size": 358288, "num_trainable_params": 175104, @@ -339,8 +339,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/loha--llama-3.2-3B-rank32.json b/method_comparison/MetaMathQA/results/loha--llama-3.2-3B-rank32.json index f9a3ad7896..7baa8eb00f 100644 --- a/method_comparison/MetaMathQA/results/loha--llama-3.2-3B-rank32.json +++ b/method_comparison/MetaMathQA/results/loha--llama-3.2-3B-rank32.json @@ -57,9 +57,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 13446820344, - "cuda_memory_max": 23886561280, - "cuda_memory_reserved_99th": 19247870771, + "accelerator_memory_reserved_avg": 13446820344, + "accelerator_memory_max": 23886561280, + "accelerator_memory_reserved_99th": 19247870771, "train_time": 2340.7451966560056, "file_size": 73429560, "num_trainable_params": 18350080, @@ -348,8 +348,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/lokr--llama-3.2-3B-rank32.json b/method_comparison/MetaMathQA/results/lokr--llama-3.2-3B-rank32.json index 7ab43febfd..07ae5b1b5a 100644 --- a/method_comparison/MetaMathQA/results/lokr--llama-3.2-3B-rank32.json +++ b/method_comparison/MetaMathQA/results/lokr--llama-3.2-3B-rank32.json @@ -60,9 +60,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 13173683073, - "cuda_memory_max": 23565697024, - "cuda_memory_reserved_99th": 18987698094, + "accelerator_memory_reserved_avg": 13173683073, + "accelerator_memory_max": 23565697024, + "accelerator_memory_reserved_99th": 18987698094, "train_time": 2152.0406475960117, "file_size": 1131984, "num_trainable_params": 279552, @@ -351,8 +351,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32-dora.json b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32-dora.json index ab8251af3e..a2b645db9b 100644 --- a/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32-dora.json +++ b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32-dora.json @@ -67,9 +67,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 12490471636, - "cuda_memory_max": 24553455616, - "cuda_memory_reserved_99th": 19189150515, + "accelerator_memory_reserved_avg": 12490471636, + "accelerator_memory_max": 24553455616, + "accelerator_memory_reserved_99th": 19189150515, "train_time": 2022.7454924520134, "file_size": 37181760, "num_trainable_params": 9289728, @@ -358,8 +358,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32-lorafa.json b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32-lorafa.json index e95ab18d7e..35c7a9981d 100644 --- a/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32-lorafa.json +++ b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32-lorafa.json @@ -69,9 +69,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 11106307276, - "cuda_memory_max": 20187185152, - "cuda_memory_reserved_99th": 16257394933, + "accelerator_memory_reserved_avg": 11106307276, + "accelerator_memory_max": 20187185152, + "accelerator_memory_reserved_99th": 16257394933, "train_time": 1821.1390361119993, "file_size": 36715216, "num_trainable_params": 3670016, @@ -360,8 +360,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32.json b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32.json index b90d9048cb..f3d348c07e 100644 --- a/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32.json +++ b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank32.json @@ -67,9 +67,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 11868689976, - "cuda_memory_max": 22273851392, - "cuda_memory_reserved_99th": 17710763212, + "accelerator_memory_reserved_avg": 11868689976, + "accelerator_memory_max": 22273851392, + "accelerator_memory_reserved_99th": 17710763212, "train_time": 1796.1857790169925, "file_size": 36715216, "num_trainable_params": 9175040, @@ -358,8 +358,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank64-rslora.json b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank64-rslora.json index fd499d5401..dbeb788d4e 100644 --- a/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank64-rslora.json +++ b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank64-rslora.json @@ -67,9 +67,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 12128059444, - "cuda_memory_max": 22538092544, - "cuda_memory_reserved_99th": 17953927987, + "accelerator_memory_reserved_avg": 12128059444, + "accelerator_memory_max": 22538092544, + "accelerator_memory_reserved_99th": 17953927987, "train_time": 1871.457509397991, "file_size": 73415408, "num_trainable_params": 18350080, @@ -358,8 +358,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank64.json b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank64.json index d2087db6a1..5dd3d50954 100644 --- a/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank64.json +++ b/method_comparison/MetaMathQA/results/lora--llama-3.2-3B-rank64.json @@ -67,9 +67,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 12128055669, - "cuda_memory_max": 22540189696, - "cuda_memory_reserved_99th": 17953927987, + "accelerator_memory_reserved_avg": 12128055669, + "accelerator_memory_max": 22540189696, + "accelerator_memory_reserved_99th": 17953927987, "train_time": 1853.4967184819961, "file_size": 73415408, "num_trainable_params": 18350080, @@ -358,8 +358,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/oft--llama-3.2-3B-rank32.json b/method_comparison/MetaMathQA/results/oft--llama-3.2-3B-rank32.json index 1f7f3d5c46..9e250b7a3d 100644 --- a/method_comparison/MetaMathQA/results/oft--llama-3.2-3B-rank32.json +++ b/method_comparison/MetaMathQA/results/oft--llama-3.2-3B-rank32.json @@ -58,9 +58,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 18387461314, - "cuda_memory_max": 28913434624, - "cuda_memory_reserved_99th": 24327110000, + "accelerator_memory_reserved_avg": 18387461314, + "accelerator_memory_max": 28913434624, + "accelerator_memory_reserved_99th": 24327110000, "train_time": 5771.733417916999, "file_size": 66533232, "num_trainable_params": 16629760, @@ -349,8 +349,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/prefixtuning--llama-3.2-3B-lr_0.001.json b/method_comparison/MetaMathQA/results/prefixtuning--llama-3.2-3B-lr_0.001.json index 61a5c49e6e..9c1717d39a 100644 --- a/method_comparison/MetaMathQA/results/prefixtuning--llama-3.2-3B-lr_0.001.json +++ b/method_comparison/MetaMathQA/results/prefixtuning--llama-3.2-3B-lr_0.001.json @@ -47,9 +47,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 11766684083, - "cuda_memory_max": 20912799744, - "cuda_memory_reserved_99th": 16945051074, + "accelerator_memory_reserved_avg": 11766684083, + "accelerator_memory_max": 20912799744, + "accelerator_memory_reserved_99th": 16945051074, "train_time": 1661.6597991429953, "file_size": 45875328, "num_trainable_params": 11468800, @@ -338,8 +338,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/prompt_tuning--llama-3.2-3B-default.json b/method_comparison/MetaMathQA/results/prompt_tuning--llama-3.2-3B-default.json index 643caf477b..013c9ebf41 100644 --- a/method_comparison/MetaMathQA/results/prompt_tuning--llama-3.2-3B-default.json +++ b/method_comparison/MetaMathQA/results/prompt_tuning--llama-3.2-3B-default.json @@ -50,9 +50,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 15297773830, - "cuda_memory_max": 24379392000, - "cuda_memory_reserved_99th": 20669781770, + "accelerator_memory_reserved_avg": 15297773830, + "accelerator_memory_max": 24379392000, + "accelerator_memory_reserved_99th": 20669781770, "train_time": 2379.557773831024, "file_size": 2457728, "num_trainable_params": 614400, @@ -341,8 +341,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/prompt_tuning--llama-3.2-3B-lr_0.001.json b/method_comparison/MetaMathQA/results/prompt_tuning--llama-3.2-3B-lr_0.001.json index 527c329ef8..2ce456649c 100644 --- a/method_comparison/MetaMathQA/results/prompt_tuning--llama-3.2-3B-lr_0.001.json +++ b/method_comparison/MetaMathQA/results/prompt_tuning--llama-3.2-3B-lr_0.001.json @@ -49,9 +49,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 15297364466, - "cuda_memory_max": 24408752128, - "cuda_memory_reserved_99th": 20650676715, + "accelerator_memory_reserved_avg": 15297364466, + "accelerator_memory_max": 24408752128, + "accelerator_memory_reserved_99th": 20650676715, "train_time": 2394.4007484640024, "file_size": 2457728, "num_trainable_params": 614400, @@ -340,8 +340,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/ptuning--llama-3.2-3B-default.json b/method_comparison/MetaMathQA/results/ptuning--llama-3.2-3B-default.json index e111eeafb4..5ad6db2181 100644 --- a/method_comparison/MetaMathQA/results/ptuning--llama-3.2-3B-default.json +++ b/method_comparison/MetaMathQA/results/ptuning--llama-3.2-3B-default.json @@ -50,9 +50,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 11867101593, - "cuda_memory_max": 20937965568, - "cuda_memory_reserved_99th": 17215688540, + "accelerator_memory_reserved_avg": 11867101593, + "accelerator_memory_max": 20937965568, + "accelerator_memory_reserved_99th": 17215688540, "train_time": 1707.340225783013, "file_size": 245880, "num_trainable_params": 28382208, @@ -341,8 +341,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/randlora--llama-3.2-3B-default.json b/method_comparison/MetaMathQA/results/randlora--llama-3.2-3B-default.json index 8c87884719..1025019a88 100644 --- a/method_comparison/MetaMathQA/results/randlora--llama-3.2-3B-default.json +++ b/method_comparison/MetaMathQA/results/randlora--llama-3.2-3B-default.json @@ -58,9 +58,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 12743670025, - "cuda_memory_max": 22798139392, - "cuda_memory_reserved_99th": 18436063232, + "accelerator_memory_reserved_avg": 12743670025, + "accelerator_memory_max": 22798139392, + "accelerator_memory_reserved_99th": 18436063232, "train_time": 2213.072415724004, "file_size": 2211281240, "num_trainable_params": 9289728, @@ -349,8 +349,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/vblora--llama-3.2-3B-default.json b/method_comparison/MetaMathQA/results/vblora--llama-3.2-3B-default.json index 818a4491f9..ccf041765e 100644 --- a/method_comparison/MetaMathQA/results/vblora--llama-3.2-3B-default.json +++ b/method_comparison/MetaMathQA/results/vblora--llama-3.2-3B-default.json @@ -59,9 +59,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 11735344663, - "cuda_memory_max": 22181576704, - "cuda_memory_reserved_99th": 17635223797, + "accelerator_memory_reserved_avg": 11735344663, + "accelerator_memory_max": 22181576704, + "accelerator_memory_reserved_99th": 17635223797, "train_time": 1961.761054087001, "file_size": 4864912, "num_trainable_params": 1212416, @@ -350,8 +350,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/results/vera--llama-3.2-3B-default.json b/method_comparison/MetaMathQA/results/vera--llama-3.2-3B-default.json index cd834c83a3..690c350729 100644 --- a/method_comparison/MetaMathQA/results/vera--llama-3.2-3B-default.json +++ b/method_comparison/MetaMathQA/results/vera--llama-3.2-3B-default.json @@ -55,9 +55,9 @@ "error_msg": "" }, "train_info": { - "cuda_memory_reserved_avg": 11489715316, - "cuda_memory_max": 21596471296, - "cuda_memory_reserved_99th": 17291123097, + "accelerator_memory_reserved_avg": 11489715316, + "accelerator_memory_max": 21596471296, + "accelerator_memory_reserved_99th": 17291123097, "train_time": 1819.9693055349999, "file_size": 6821968, "num_trainable_params": 129024, @@ -346,8 +346,8 @@ "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", - "gpu": "NVIDIA L40S" + "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } -} \ No newline at end of file +} diff --git a/method_comparison/MetaMathQA/run.py b/method_comparison/MetaMathQA/run.py index d5261ede81..0d159220eb 100644 --- a/method_comparison/MetaMathQA/run.py +++ b/method_comparison/MetaMathQA/run.py @@ -48,14 +48,14 @@ get_peft_branch, get_tokenizer, get_train_config, - init_cuda, + init_accelerator, log_results, validate_experiment_path, ) from data import get_train_valid_test_datasets from peft import AdaLoraConfig, PeftConfig -from peft.utils import CONFIG_NAME +from peft.utils import infer_device, CONFIG_NAME # # suppress all warnings @@ -119,7 +119,7 @@ def train( batch_size: int, batch_size_eval: int, tokenizer: Any, - cuda_memory_init: int, + accelerator_memory_init: int, eval_steps: int, generation_kwargs: dict[str, Any], grad_norm_clip: float, @@ -130,17 +130,20 @@ def train( use_amp: bool, is_adalora: bool, ) -> TrainResult: - cuda_memory_allocated_log = [] - cuda_memory_reserved_log = [] + accelerator_memory_allocated_log = [] + accelerator_memory_reserved_log = [] losses = [] durations = [] metrics = [] sample = 0 # keep count of the current sample total_samples = 0 # total number of samples over all epochs total_tokens = [] # total number of tokens over all epochs + + device_type = infer_device() + torch_accelerator_module = getattr(torch, device_type, torch.cuda) if use_amp: - grad_scaler: GradScaler | DummyGradScaler = GradScaler(device="cuda") - autocast_ctx: Callable[[], AbstractContextManager[Any]] = partial(autocast, device_type="cuda") + grad_scaler: GradScaler | DummyGradScaler = GradScaler(device=device_type) + autocast_ctx: Callable[[], ContextManager[Any]] = partial(autocast, device_type=device_type) else: grad_scaler = DummyGradScaler() autocast_ctx = nullcontext @@ -223,8 +226,12 @@ def train( losses.append(loss.item()) pbar.set_postfix({"loss": loss.item()}) - cuda_memory_allocated_log.append(torch.cuda.memory_allocated() - cuda_memory_init) - cuda_memory_reserved_log.append(torch.cuda.memory_reserved() - cuda_memory_init) + accelerator_memory_allocated_log.append( + torch_accelerator_module.memory_allocated() - accelerator_memory_init + ) + accelerator_memory_reserved_log.append( + torch_accelerator_module.memory_reserved() - accelerator_memory_init + ) toc = time.perf_counter() durations.append(toc - tic) @@ -232,8 +239,8 @@ def train( if step % eval_steps == 0: tic_eval = time.perf_counter() loss_avg = sum(losses[-eval_steps:]) / eval_steps - memory_allocated_avg = sum(cuda_memory_allocated_log[-eval_steps:]) / eval_steps - memory_reserved_avg = sum(cuda_memory_reserved_log[-eval_steps:]) / eval_steps + memory_allocated_avg = sum(accelerator_memory_allocated_log[-eval_steps:]) / eval_steps + memory_reserved_avg = sum(accelerator_memory_reserved_log[-eval_steps:]) / eval_steps token_sum = sum(total_tokens[-eval_steps:]) dur_train = sum(durations[-eval_steps:]) tokens_per_sec = token_sum / dur_train @@ -292,7 +299,7 @@ def train( print_verbose(json.dumps(log_dict)) # # TODO is this needed? - torch.cuda.empty_cache() + torch_accelerator_module.empty_cache() gc.collect() print_verbose(f"Training finished after {max_steps} steps, evaluation on test set follows.") @@ -340,7 +347,7 @@ def train( train_result = TrainResult( status=status, train_time=train_time, - cuda_memory_reserved_log=cuda_memory_reserved_log, + accelerator_memory_reserved_log=accelerator_memory_reserved_log, losses=losses, metrics=metrics, error_msg=error_msg, @@ -373,7 +380,7 @@ def main(*, path_experiment: str, experiment_name: str, clean: bool) -> None: set_seed(train_config.seed) # initialize objects - cuda_memory_init = init_cuda() + accelerator_memory_init = init_accelerator() tokenizer = get_tokenizer(model_id=train_config.model_id, max_seq_length=train_config.max_seq_length) model_info = get_base_model_info(train_config.model_id) @@ -396,7 +403,7 @@ def main(*, path_experiment: str, experiment_name: str, clean: bool) -> None: batch_size=train_config.batch_size, batch_size_eval=train_config.batch_size_eval, tokenizer=tokenizer, - cuda_memory_init=cuda_memory_init, + accelerator_memory_init=accelerator_memory_init, eval_steps=train_config.eval_steps, generation_kwargs=train_config.generation_kwargs, grad_norm_clip=train_config.grad_norm_clip, @@ -424,7 +431,7 @@ def main(*, path_experiment: str, experiment_name: str, clean: bool) -> None: log_results( experiment_name=experiment_name, train_result=train_result, - cuda_memory_init=cuda_memory_init, + accelerator_memory_init=accelerator_memory_init, time_total=time_total, file_size=file_size, model_info=model_info, diff --git a/method_comparison/MetaMathQA/utils.py b/method_comparison/MetaMathQA/utils.py index 00b24c3d2a..d48a301b35 100644 --- a/method_comparison/MetaMathQA/utils.py +++ b/method_comparison/MetaMathQA/utils.py @@ -44,14 +44,14 @@ import peft from peft import PeftConfig, get_peft_model, prepare_model_for_kbit_training from peft.optimizers import create_lorafa_optimizer, create_loraplus_optimizer -from peft.utils import SAFETENSORS_WEIGHTS_NAME +from peft.utils import infer_device, SAFETENSORS_WEIGHTS_NAME +device = infer_device() -if not torch.cuda.is_available(): - raise RuntimeError("CUDA is not available, currently only CUDA is supported") +if device not in ["cuda", "xpu"]: + raise RuntimeError("CUDA or XPU is not available, currently only CUDA or XPU is supported") -device = "cuda" -CUDA_MEMORY_INIT_THRESHOLD = 500 * 2**20 # 500MB +ACCELERATOR_MEMORY_INIT_THRESHOLD = 500 * 2**20 # 500MB FILE_NAME_DEFAULT_TRAIN_PARAMS = os.path.join(os.path.dirname(__file__), "default_training_params.json") FILE_NAME_TRAIN_PARAMS = "training_params.json" # specific params for this experiment # main results @@ -173,23 +173,24 @@ def get_train_config(path: str) -> TrainConfig: return TrainConfig(**config_kwargs) -def init_cuda() -> int: +def init_accelerator() -> int: + torch_accelerator_module = getattr(torch, device, torch.cuda) torch.manual_seed(0) - torch.cuda.reset_peak_memory_stats() - torch.cuda.manual_seed_all(0) + torch_accelerator_module.reset_peak_memory_stats() + torch_accelerator_module.manual_seed_all(0) # might not be necessary, but just to be sure nn.Linear(1, 1).to(device) - cuda_memory_init = torch.cuda.max_memory_reserved() - if cuda_memory_init > CUDA_MEMORY_INIT_THRESHOLD: + accelerator_memory_init = torch_accelerator_module.max_memory_reserved() + if accelerator_memory_init > ACCELERATOR_MEMORY_INIT_THRESHOLD: raise RuntimeError( - f"CUDA memory usage at start is too high: {cuda_memory_init // 2**20}MB, please ensure that no other " + f"{device} memory usage at start is too high: {accelerator_memory_init // 2**20}MB, please ensure that no other " f"processes are running on {device}." ) - torch.cuda.reset_peak_memory_stats() - cuda_memory_init = torch.cuda.max_memory_reserved() - return cuda_memory_init + torch_accelerator_module.reset_peak_memory_stats() + accelerator_memory_init = torch_accelerator_module.max_memory_reserved() + return accelerator_memory_init def get_tokenizer(*, model_id: str, max_seq_length: int): @@ -524,13 +525,15 @@ def get_package_info() -> dict[str, Optional[str]]: def get_system_info() -> dict[str, str]: + device = infer_device() + torch_accelerator_module = getattr(torch, device, torch.cuda) system_info = { "system": platform.system(), "release": platform.release(), "version": platform.version(), "machine": platform.machine(), "processor": platform.processor(), - "gpu": torch.cuda.get_device_name(0), + "accelerator": torch_accelerator_module.get_device_name(0), } return system_info @@ -569,7 +572,7 @@ class TrainStatus(enum.Enum): class TrainResult: status: TrainStatus train_time: float - cuda_memory_reserved_log: list[int] + accelerator_memory_reserved_log: list[int] losses: list[float] metrics: list[Any] # TODO error_msg: str @@ -578,16 +581,16 @@ class TrainResult: def log_to_console(log_data: dict[str, Any], print_fn: Callable[..., None]) -> None: - cuda_memory_max = log_data["train_info"]["cuda_memory_max"] - cuda_memory_avg = log_data["train_info"]["cuda_memory_reserved_avg"] - cuda_memory_reserved_99th = log_data["train_info"]["cuda_memory_reserved_99th"] + accelerator_memory_max = log_data["train_info"]["accelerator_memory_max"] + accelerator_memory_avg = log_data["train_info"]["accelerator_memory_reserved_avg"] + accelerator_memory_reserved_99th = log_data["train_info"]["accelerator_memory_reserved_99th"] time_train = log_data["train_info"]["train_time"] time_total = log_data["run_info"]["total_time"] file_size = log_data["train_info"]["file_size"] - print_fn(f"cuda memory max: {cuda_memory_max // 2**20}MB") - print_fn(f"cuda memory reserved avg: {cuda_memory_avg // 2**20}MB") - print_fn(f"cuda memory reserved 99th percentile: {cuda_memory_reserved_99th // 2**20}MB") + print_fn(f"accelerator memory max: {accelerator_memory_max // 2**20}MB") + print_fn(f"accelerator memory reserved avg: {accelerator_memory_avg // 2**20}MB") + print_fn(f"accelerator memory reserved 99th percentile: {accelerator_memory_reserved_99th // 2**20}MB") print_fn(f"train time: {time_train}s") print_fn(f"total time: {time_total:.2f}s") print_fn(f"file size of checkpoint: {file_size / 2**20:.1f}MB") @@ -612,7 +615,7 @@ def log_results( *, experiment_name: str, train_result: TrainResult, - cuda_memory_init: int, + accelerator_memory_init: int, time_total: float, file_size: int, model_info: Optional[huggingface_hub.ModelInfo], @@ -623,9 +626,13 @@ def log_results( print_fn: Callable[..., None], ) -> None: # collect results - cuda_memory_final = torch.cuda.max_memory_reserved() - cuda_memory_avg = int(sum(train_result.cuda_memory_reserved_log) / len(train_result.cuda_memory_reserved_log)) - cuda_memory_reserved_99th = int(np.percentile(train_result.cuda_memory_reserved_log, 99)) + device = infer_device() + torch_accelerator_module = getattr(torch, device, torch.cuda) + accelerator_memory_final = torch_accelerator_module.max_memory_reserved() + accelerator_memory_avg = int( + sum(train_result.accelerator_memory_reserved_log) / len(train_result.accelerator_memory_reserved_log) + ) + accelerator_memory_reserved_99th = int(np.percentile(train_result.accelerator_memory_reserved_log, 99)) meta_info = get_meta_info() if model_info is not None: @@ -679,9 +686,9 @@ def log_results( "error_msg": train_result.error_msg, }, "train_info": { - "cuda_memory_reserved_avg": cuda_memory_avg, - "cuda_memory_max": (cuda_memory_final - cuda_memory_init), - "cuda_memory_reserved_99th": cuda_memory_reserved_99th, + "accelerator_memory_reserved_avg": accelerator_memory_avg, + "accelerator_memory_max": (accelerator_memory_final - accelerator_memory_init), + "accelerator_memory_reserved_99th": accelerator_memory_reserved_99th, "train_time": train_result.train_time, "file_size": file_size, "num_trainable_params": train_result.num_trainable_params, diff --git a/method_comparison/app.py b/method_comparison/app.py index 6f7d927fbf..ad47254228 100644 --- a/method_comparison/app.py +++ b/method_comparison/app.py @@ -25,9 +25,9 @@ metric_preferences = { - "cuda_memory_reserved_avg": "lower", - "cuda_memory_max": "lower", - "cuda_memory_reserved_99th": "lower", + "accelerator_memory_reserved_avg": "lower", + "accelerator_memory_max": "lower", + "accelerator_memory_reserved_99th": "lower", "total_time": "lower", "train_time": "lower", "file_size": "lower", @@ -222,7 +222,9 @@ def build_app(df): with gr.Row(): x_default = ( - "cuda_memory_max" if "cuda_memory_max" in metric_preferences else list(metric_preferences.keys())[0] + "accelerator_memory_max" + if "accelerator_memory_max" in metric_preferences + else list(metric_preferences.keys())[0] ) y_default = ( "test_accuracy" if "test_accuracy" in metric_preferences else list(metric_preferences.keys())[1] diff --git a/method_comparison/processing.py b/method_comparison/processing.py index f8a74f4c69..888f3f6855 100644 --- a/method_comparison/processing.py +++ b/method_comparison/processing.py @@ -45,9 +45,9 @@ def preprocess(rows, task_name: str, print_fn=print): "train_config": run_info["train_config"], "peft_type": peft_type, "peft_config": run_info["peft_config"], - "cuda_memory_reserved_avg": train_info["cuda_memory_reserved_avg"], - "cuda_memory_max": train_info["cuda_memory_max"], - "cuda_memory_reserved_99th": train_info["cuda_memory_reserved_99th"], + "accelerator_memory_reserved_avg": train_info["accelerator_memory_reserved_avg"], + "accelerator_memory_max": train_info["accelerator_memory_max"], + "accelerator_memory_reserved_99th": train_info["accelerator_memory_reserved_99th"], "total_time": run_info["total_time"], "train_time": train_info["train_time"], "file_size": train_info["file_size"], @@ -93,9 +93,9 @@ def load_df(path, task_name, print_fn=print): "train_config": "string", "peft_type": "string", "peft_config": "string", - "cuda_memory_reserved_avg": int, - "cuda_memory_max": int, - "cuda_memory_reserved_99th": int, + "accelerator_memory_reserved_avg": int, + "accelerator_memory_max": int, + "accelerator_memory_reserved_99th": int, "total_time": float, "train_time": float, "file_size": int, @@ -128,9 +128,9 @@ def load_df(path, task_name, print_fn=print): "train_time", "test_accuracy", "train_loss", - "cuda_memory_max", - "cuda_memory_reserved_99th", - "cuda_memory_reserved_avg", + "accelerator_memory_max", + "accelerator_memory_reserved_99th", + "accelerator_memory_reserved_avg", "file_size", "created_at", "task_name",