diff --git a/examples/conditional_generation/multitask_prompt_tuning.ipynb b/examples/conditional_generation/multitask_prompt_tuning.ipynb index 96fd2180dd..1eaec5b016 100644 --- a/examples/conditional_generation/multitask_prompt_tuning.ipynb +++ b/examples/conditional_generation/multitask_prompt_tuning.ipynb @@ -9,12 +9,13 @@ }, "outputs": [], "source": [ + "import torch\n", "from datasets import load_dataset\n", "from transformers import set_seed, AutoModelForSeq2SeqLM, AutoTokenizer\n", "from peft import get_peft_model, MultitaskPromptTuningConfig, TaskType, MultitaskPromptTuningInit\n", "\n", "set_seed(42)\n", - "\n", + "device = torch.accelerator.current_accelerator().type if hasattr(torch, \"accelerator\") else \"cuda\"\n", "model_name = \"google/flan-t5-base\"\n", "\n", "peft_config = MultitaskPromptTuningConfig(\n", @@ -31,18 +32,18 @@ "model = AutoModelForSeq2SeqLM.from_pretrained(model_name)\n", "model = get_peft_model(model, peft_config)\n", "\n", - "model = model.cuda()\n", + "model = model.to(device)\n", "\n", "\n", "def send_to_device(batch):\n", " for i in batch:\n", - " batch[i] = batch[i].cuda()\n", + " batch[i] = batch[i].to(device)\n", " return batch" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "eb112bc1-ffaf-49fa-a216-0d601ec304ee", "metadata": { "tags": [] @@ -86,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "e5a16ec4-8fef-4ba9-95b6-a661eb51e50c", "metadata": { "tags": [] @@ -159,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "cceecc94-f43a-4f62-8d45-926f2f02f36d", "metadata": { "tags": [] @@ -293,7 +294,7 @@ " num_tasks=1,\n", " task_type=TaskType.SEQ_2_SEQ_LM,\n", " prompt_tuning_init=MultitaskPromptTuningInit.EXACT_SOURCE_TASK,\n", - " prompt_tuning_init_state_dict_path=\"checkpoints_source/50000/adapter_model.bin\",\n", + " prompt_tuning_init_state_dict_path=\"checkpoints_source/50000/adapter_model.safetensors\",\n", " num_virtual_tokens=50,\n", " num_transformer_submodules=1,\n", ")\n", @@ -302,7 +303,7 @@ "model = AutoModelForSeq2SeqLM.from_pretrained(model_name)\n", "model = get_peft_model(model, peft_config)\n", "\n", - "model = model.cuda()" + "model = model.to(device)" ] }, { @@ -360,8 +361,9 @@ "source": [ "# load last checkpoint for now\n", "from peft import set_peft_model_state_dict\n", + "from safetensors.torch import load_file\n", "\n", - "sd_6000 = torch.load(\"checkpoints_target/6000/adapter_model.bin\")\n", + "sd_6000 = load_file(\"checkpoints_target/6000/adapter_model.safetensors\")\n", "set_peft_model_state_dict(model, sd_6000)\n", "\n", "# evaluate val\n", @@ -382,6 +384,22 @@ "f1 = {f1}\"\"\"\n", ")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d18325c-9607-4cb5-a5b0-5b44dfee2a75", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43988e92-af42-45cb-8bca-f19c193ad04f", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -400,7 +418,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/examples/conditional_generation/peft_adalora_seq2seq.py b/examples/conditional_generation/peft_adalora_seq2seq.py index eac19f3d55..32f532226b 100644 --- a/examples/conditional_generation/peft_adalora_seq2seq.py +++ b/examples/conditional_generation/peft_adalora_seq2seq.py @@ -11,7 +11,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false" -device = "cuda" +device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda" model_name_or_path = "facebook/bart-base" tokenizer_name_or_path = "facebook/bart-base" @@ -24,6 +24,20 @@ batch_size = 8 +# loading dataset +dataset = load_dataset("financial_phrasebank", "sentences_allagree") +dataset = dataset["train"].train_test_split(test_size=0.1) +dataset["validation"] = dataset["test"] +del dataset["test"] + +classes = dataset["train"].features["label"].names +dataset = dataset.map( + lambda x: {"text_label": [classes[label] for label in x["label"]]}, + batched=True, + num_proc=1, +) + + # creating model peft_config = AdaLoraConfig( init_r=12, @@ -37,6 +51,7 @@ lora_dropout=0.1, task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, + total_step=len(dataset["train"]) * num_epochs, ) model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path) @@ -44,20 +59,6 @@ model.print_trainable_parameters() -# loading dataset -dataset = load_dataset("financial_phrasebank", "sentences_allagree") -dataset = dataset["train"].train_test_split(test_size=0.1) -dataset["validation"] = dataset["test"] -del dataset["test"] - -classes = dataset["train"].features["label"].names -dataset = dataset.map( - lambda x: {"text_label": [classes[label] for label in x["label"]]}, - batched=True, - num_proc=1, -) - - # data preprocessing tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) @@ -159,7 +160,7 @@ def preprocess_function(examples): model.save_pretrained(peft_model_id) -ckpt = f"{peft_model_id}/adapter_model.bin" +ckpt = f"{peft_model_id}/adapter_model.safetensors" # get_ipython().system('du -h $ckpt') diff --git a/examples/conditional_generation/peft_ia3_seq2seq.ipynb b/examples/conditional_generation/peft_ia3_seq2seq.ipynb index 0bd57f89ba..155fab5530 100644 --- a/examples/conditional_generation/peft_ia3_seq2seq.ipynb +++ b/examples/conditional_generation/peft_ia3_seq2seq.ipynb @@ -2,7 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 12, + "execution_count": null, + "id": "0c152fc8", "metadata": { "id": "5f93b7d1" }, @@ -22,7 +23,7 @@ "from tqdm import tqdm\n", "from datasets import load_dataset\n", "\n", - "device = \"cuda\"\n", + "device = torch.accelerator.current_accelerator().type if hasattr(torch, \"accelerator\") else \"cuda\"\n", "model_name_or_path = \"bigscience/mt0-large\"\n", "tokenizer_name_or_path = \"bigscience/mt0-large\"\n", "\n", @@ -37,7 +38,8 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 2, + "id": "4e23624f", "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -49,10 +51,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 13, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -65,7 +67,8 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, + "id": "da74b569", "metadata": { "id": "8d0850ac" }, @@ -79,7 +82,8 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 4, + "id": "df33fce2", "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -233,7 +237,7 @@ ")" ] }, - "execution_count": 15, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -244,7 +248,8 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 5, + "id": "63d7bc2d", "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -257,7 +262,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "trainable params: 282,624 || all params: 1,229,863,936 || trainable%: 0.022980103060766553\n" + "trainable params: 282,624 || all params: 1,229,863,936 || trainable%: 0.0230\n" ] }, { @@ -276,11 +281,11 @@ " (SelfAttention): MT5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (v): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", @@ -293,7 +298,7 @@ " (DenseReluDense): MT5DenseGatedActDense(\n", " (wi_0): Linear(in_features=1024, out_features=2816, bias=False)\n", " (wi_1): Linear(\n", - " in_features=1024, out_features=2816, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=2816, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 2816x1])\n", " )\n", " (wo): Linear(in_features=2816, out_features=1024, bias=False)\n", @@ -311,11 +316,11 @@ " (SelfAttention): MT5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (v): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", @@ -327,7 +332,7 @@ " (DenseReluDense): MT5DenseGatedActDense(\n", " (wi_0): Linear(in_features=1024, out_features=2816, bias=False)\n", " (wi_1): Linear(\n", - " in_features=1024, out_features=2816, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=2816, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 2816x1])\n", " )\n", " (wo): Linear(in_features=2816, out_features=1024, bias=False)\n", @@ -352,11 +357,11 @@ " (SelfAttention): MT5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (v): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", @@ -369,11 +374,11 @@ " (EncDecAttention): MT5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (v): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", @@ -385,7 +390,7 @@ " (DenseReluDense): MT5DenseGatedActDense(\n", " (wi_0): Linear(in_features=1024, out_features=2816, bias=False)\n", " (wi_1): Linear(\n", - " in_features=1024, out_features=2816, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=2816, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 2816x1])\n", " )\n", " (wo): Linear(in_features=2816, out_features=1024, bias=False)\n", @@ -403,11 +408,11 @@ " (SelfAttention): MT5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (v): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", @@ -419,11 +424,11 @@ " (EncDecAttention): MT5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (v): Linear(\n", - " in_features=1024, out_features=1024, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 1024x1])\n", " )\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", @@ -435,7 +440,7 @@ " (DenseReluDense): MT5DenseGatedActDense(\n", " (wi_0): Linear(in_features=1024, out_features=2816, bias=False)\n", " (wi_1): Linear(\n", - " in_features=1024, out_features=2816, bias=False\n", + " (base_layer): Linear(in_features=1024, out_features=2816, bias=False)\n", " (ia3_l): ParameterDict( (default): Parameter containing: [torch.FloatTensor of size 2816x1])\n", " )\n", " (wo): Linear(in_features=2816, out_features=1024, bias=False)\n", @@ -457,7 +462,7 @@ ")" ] }, - "execution_count": 16, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -470,7 +475,8 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 6, + "id": "155b8728", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -519,27 +525,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:datasets.builder:Found cached dataset financial_phrasebank (/root/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141)\n" + "Using the latest cached version of the dataset since financial_phrasebank couldn't be found on the Hugging Face Hub\n", + "Found the latest cached dataset configuration 'sentences_allagree' at /root/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141 (last modified on Thu Jul 31 03:15:41 2025).\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bbfb7533b5ca459194e171df56b79566", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/1 [00:00 100:\n", - " break\n", - "test_preds" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.5 (v3.10.5:f377153967, Jun 6 2022, 12:36:10) [Clang 13.0.0 (clang-1300.0.29.30)]" - }, - "vscode": { - "interpreter": { - "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py index 4be3e1666b..50bd15a89e 100644 --- a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py +++ b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py @@ -54,10 +54,12 @@ def b2mb(x): # This context manager is used to track the peak memory usage of the process class TorchTracemalloc: def __enter__(self): + self.device_type = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda" + self.device_module = getattr(torch, self.device_type, torch.cuda) gc.collect() - torch.cuda.empty_cache() - torch.cuda.reset_max_memory_allocated() # reset the peak gauge to zero - self.begin = torch.cuda.memory_allocated() + self.device_module.empty_cache() + self.device_module.reset_peak_memory_stats() # reset the peak gauge to zero + self.begin = self.device_module.memory_allocated() self.process = psutil.Process() self.cpu_begin = self.cpu_mem_used() @@ -87,9 +89,9 @@ def __exit__(self, *exc): self.peak_monitoring = False gc.collect() - torch.cuda.empty_cache() - self.end = torch.cuda.memory_allocated() - self.peak = torch.cuda.max_memory_allocated() + self.device_module.empty_cache() + self.end = self.device_module.memory_allocated() + self.peak = self.device_module.max_memory_allocated() self.used = b2mb(self.end - self.begin) self.peaked = b2mb(self.peak - self.begin) @@ -199,12 +201,18 @@ def collate_fn(examples): optimizer.step() lr_scheduler.step() optimizer.zero_grad() - # Printing the GPU memory usage details such as allocated memory, peak memory, and total memory usage - accelerator.print(f"GPU Memory before entering the train : {b2mb(tracemalloc.begin)}") - accelerator.print(f"GPU Memory consumed at the end of the train (end-begin): {tracemalloc.used}") - accelerator.print(f"GPU Peak Memory consumed during the train (max-begin): {tracemalloc.peaked}") + # Printing the device memory usage details such as allocated memory, peak memory, and total memory usage accelerator.print( - f"GPU Total Peak Memory consumed during the train (max): {tracemalloc.peaked + b2mb(tracemalloc.begin)}" + f"{accelerator.device.type.upper()} Memory before entering the train : {b2mb(tracemalloc.begin)}" + ) + accelerator.print( + f"{accelerator.device.type.upper()} Memory consumed at the end of the train (end-begin): {tracemalloc.used}" + ) + accelerator.print( + f"{accelerator.device.type.upper()} Peak Memory consumed during the train (max-begin): {tracemalloc.peaked}" + ) + accelerator.print( + f"{accelerator.device.type.upper()} Total Peak Memory consumed during the train (max): {tracemalloc.peaked + b2mb(tracemalloc.begin)}" ) accelerator.print(f"CPU Memory before entering the train : {b2mb(tracemalloc.cpu_begin)}") @@ -230,12 +238,18 @@ def collate_fn(examples): preds = accelerator.gather_for_metrics(outputs).detach().cpu().numpy() eval_preds.extend(tokenizer.batch_decode(preds, skip_special_tokens=True)) - # Printing the GPU memory usage details such as allocated memory, peak memory, and total memory usage - accelerator.print(f"GPU Memory before entering the eval : {b2mb(tracemalloc.begin)}") - accelerator.print(f"GPU Memory consumed at the end of the eval (end-begin): {tracemalloc.used}") - accelerator.print(f"GPU Peak Memory consumed during the eval (max-begin): {tracemalloc.peaked}") + # Printing the device memory usage details such as allocated memory, peak memory, and total memory usage + accelerator.print( + f"{accelerator.device.type.upper()} Memory before entering the eval : {b2mb(tracemalloc.begin)}" + ) + accelerator.print( + f"{accelerator.device.type.upper()} Memory consumed at the end of the eval (end-begin): {tracemalloc.used}" + ) + accelerator.print( + f"{accelerator.device.type.upper()} Peak Memory consumed during the eval (max-begin): {tracemalloc.peaked}" + ) accelerator.print( - f"GPU Total Peak Memory consumed during the eval (max): {tracemalloc.peaked + b2mb(tracemalloc.begin)}" + f"{accelerator.device.type.upper()} Total Peak Memory consumed during the eval (max): {tracemalloc.peaked + b2mb(tracemalloc.begin)}" ) accelerator.print(f"CPU Memory before entering the eval : {b2mb(tracemalloc.cpu_begin)}") diff --git a/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb b/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb index aa85f9a743..259fc665dc 100644 --- a/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb +++ b/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb @@ -2,26 +2,10 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "5f93b7d1", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "===================================BUG REPORT===================================\n", - "Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n", - "For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link\n", - "================================================================================\n", - "CUDA SETUP: CUDA runtime path found: /home/sourab/miniconda3/envs/ml/lib/libcudart.so\n", - "CUDA SETUP: Highest compute capability among GPUs detected: 7.5\n", - "CUDA SETUP: Detected CUDA version 117\n", - "CUDA SETUP: Loading binary /home/sourab/miniconda3/envs/ml/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...\n" - ] - } - ], + "outputs": [], "source": [ "from transformers import AutoModelForSeq2SeqLM\n", "from peft import get_peft_config, get_peft_model, get_peft_model_state_dict, PrefixTuningConfig, TaskType\n", @@ -30,14 +14,13 @@ "import os\n", "\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n", "from transformers import AutoTokenizer\n", "from torch.utils.data import DataLoader\n", "from transformers import default_data_collator, get_linear_schedule_with_warmup\n", "from tqdm import tqdm\n", "from datasets import load_dataset\n", "\n", - "device = \"cuda\"\n", + "device = torch.accelerator.current_accelerator().type if hasattr(torch, \"accelerator\") else \"cuda\"\n", "model_name_or_path = \"t5-large\"\n", "tokenizer_name_or_path = \"t5-large\"\n", "\n", @@ -52,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "8d0850ac", "metadata": {}, "outputs": [], @@ -76,18 +59,19 @@ "name": "stderr", "output_type": "stream", "text": [ - "Found cached dataset financial_phrasebank (/home/sourab/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141)\n" + "Using the latest cached version of the dataset since financial_phrasebank couldn't be found on the Hugging Face Hub\n", + "Found the latest cached dataset configuration 'sentences_allagree' at /root/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141 (last modified on Thu Jul 31 06:23:15 2025).\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ec4be98991b84181bfa75f8846422b8b", + "model_id": "3b321971d6f942418bd5ef6105a1aa65", "version_major": 2, "version_minor": 0 }, "text/plain": [ - " 0%| | 0/1 [00:00 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, GenerationConfig\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpeft\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n", - "File \u001b[0;32m:1055\u001b[0m, in \u001b[0;36m_handle_fromlist\u001b[0;34m(module, fromlist, import_, recursive)\u001b[0m\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/transformers/utils/import_utils.py:1076\u001b[0m, in \u001b[0;36m_LazyModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1074\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_module(name)\n\u001b[1;32m 1075\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_class_to_module\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[0;32m-> 1076\u001b[0m module \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_module\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_class_to_module\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1077\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(module, name)\n\u001b[1;32m 1078\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/transformers/utils/import_utils.py:1086\u001b[0m, in \u001b[0;36m_LazyModule._get_module\u001b[0;34m(self, module_name)\u001b[0m\n\u001b[1;32m 1084\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_get_module\u001b[39m(\u001b[38;5;28mself\u001b[39m, module_name: \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 1085\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1086\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmodule_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__name__\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1087\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to import \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodule_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m because of the following error (look up to see its\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1090\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m traceback):\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1091\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/importlib/__init__.py:127\u001b[0m, in \u001b[0;36mimport_module\u001b[0;34m(name, package)\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 126\u001b[0m level \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m--> 127\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/transformers/training_args_seq2seq.py:21\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Optional, Union\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneration\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfiguration_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GenerationConfig\n\u001b[0;32m---> 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtraining_args\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TrainingArguments\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m add_start_docstrings\n\u001b[1;32m 25\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mgetLogger(\u001b[38;5;18m__name__\u001b[39m)\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/transformers/training_args.py:29\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Any, Dict, List, Optional, Union\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpackaging\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m version\n\u001b[0;32m---> 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdebug_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DebugOption\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtrainer_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 31\u001b[0m EvaluationStrategy,\n\u001b[1;32m 32\u001b[0m FSDPOption,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 36\u001b[0m ShardedDDPOption,\n\u001b[1;32m 37\u001b[0m )\n\u001b[1;32m 38\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 39\u001b[0m ExplicitEnum,\n\u001b[1;32m 40\u001b[0m cached_property,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 53\u001b[0m requires_backends,\n\u001b[1;32m 54\u001b[0m )\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/transformers/debug_utils.py:21\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ExplicitEnum, is_torch_available, logging\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_torch_available():\n\u001b[0;32m---> 21\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[1;32m 24\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mget_logger(\u001b[38;5;18m__name__\u001b[39m)\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mDebugUnderflowOverflow\u001b[39;00m:\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/__init__.py:1465\u001b[0m\n\u001b[1;32m 1463\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m library\n\u001b[1;32m 1464\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m TYPE_CHECKING:\n\u001b[0;32m-> 1465\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _meta_registrations\n\u001b[1;32m 1467\u001b[0m \u001b[38;5;66;03m# Enable CUDA Sanitizer\u001b[39;00m\n\u001b[1;32m 1468\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTORCH_CUDA_SANITIZER\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m os\u001b[38;5;241m.\u001b[39menviron:\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_meta_registrations.py:7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims_common\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mutils\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tensor\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_decomp\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _add_op_to_registry, global_decomposition_table, meta_table\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_ops\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OpOverload\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _elementwise_meta, ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_decomp/__init__.py:169\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m decompositions\n\u001b[1;32m 168\u001b[0m \u001b[38;5;66;03m# populate the table\u001b[39;00m\n\u001b[0;32m--> 169\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_decomp\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdecompositions\u001b[39;00m\n\u001b[1;32m 170\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_refs\u001b[39;00m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;66;03m# This list was copied from torch/_inductor/decomposition.py\u001b[39;00m\n\u001b[1;32m 173\u001b[0m \u001b[38;5;66;03m# excluding decompositions that results in prim ops\u001b[39;00m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# Resulting opset of decomposition is core aten ops\u001b[39;00m\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_decomp/decompositions.py:10\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Callable, cast, Iterable, List, Optional, Tuple, Union\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mprims\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims_common\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mutils\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctional\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mF\u001b[39;00m\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_prims/__init__.py:33\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims_common\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 18\u001b[0m check,\n\u001b[1;32m 19\u001b[0m Dim,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 30\u001b[0m type_to_dtype,\n\u001b[1;32m 31\u001b[0m )\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims_common\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mwrappers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m backwards_not_supported\n\u001b[0;32m---> 33\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_subclasses\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfake_tensor\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FakeTensor, FakeTensorMode\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moverrides\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m handle_torch_function, has_torch_function\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_pytree\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tree_flatten, tree_map, tree_unflatten\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_subclasses/__init__.py:3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_subclasses\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfake_tensor\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 4\u001b[0m DynamicOutputShapeException,\n\u001b[1;32m 5\u001b[0m FakeTensor,\n\u001b[1;32m 6\u001b[0m FakeTensorMode,\n\u001b[1;32m 7\u001b[0m UnsupportedFakeTensorException,\n\u001b[1;32m 8\u001b[0m )\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_subclasses\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfake_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CrossRefFakeMode\n\u001b[1;32m 12\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 13\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFakeTensor\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 14\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFakeTensorMode\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCrossRefFakeMode\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 18\u001b[0m ]\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_subclasses/fake_tensor.py:13\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mweakref\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ReferenceType\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[0;32m---> 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_guards\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Source\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_ops\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OpOverload\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims_common\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 16\u001b[0m elementwise_dtypes,\n\u001b[1;32m 17\u001b[0m ELEMENTWISE_TYPE_PROMOTION_KIND,\n\u001b[1;32m 18\u001b[0m is_float_dtype,\n\u001b[1;32m 19\u001b[0m is_integer_dtype,\n\u001b[1;32m 20\u001b[0m )\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_guards.py:14\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# TODO(voz): Stolen pattern, not sure why this is the case,\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# but mypy complains.\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m \u001b[38;5;66;03m# type: ignore[import]\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[1;32m 16\u001b[0m log\u001b[38;5;241m.\u001b[39mwarning(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo sympy found\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/__init__.py:74\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlogic\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (to_cnf, to_dnf, to_nnf, And, Or, Not, Xor, Nand, Nor,\n\u001b[1;32m 68\u001b[0m Implies, Equivalent, ITE, POSform, SOPform, simplify_logic, bool_map,\n\u001b[1;32m 69\u001b[0m true, false, satisfiable)\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01massumptions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (AppliedPredicate, Predicate, AssumptionsContext,\n\u001b[1;32m 72\u001b[0m assuming, Q, ask, register_handler, remove_handler, refine)\n\u001b[0;32m---> 74\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (Poly, PurePoly, poly_from_expr, parallel_poly_from_expr,\n\u001b[1;32m 75\u001b[0m degree, total_degree, degree_list, LC, LM, LT, pdiv, prem, pquo,\n\u001b[1;32m 76\u001b[0m pexquo, div, rem, quo, exquo, half_gcdex, gcdex, invert,\n\u001b[1;32m 77\u001b[0m subresultants, resultant, discriminant, cofactors, gcd_list, gcd,\n\u001b[1;32m 78\u001b[0m lcm_list, lcm, terms_gcd, trunc, monic, content, primitive, compose,\n\u001b[1;32m 79\u001b[0m decompose, sturm, gff_list, gff, sqf_norm, sqf_part, sqf_list, sqf,\n\u001b[1;32m 80\u001b[0m factor_list, factor, intervals, refine_root, count_roots, real_roots,\n\u001b[1;32m 81\u001b[0m nroots, ground_roots, nth_power_roots_poly, cancel, reduced, groebner,\n\u001b[1;32m 82\u001b[0m is_zero_dimensional, GroebnerBasis, poly, symmetrize, horner,\n\u001b[1;32m 83\u001b[0m interpolate, rational_interpolate, viete, together,\n\u001b[1;32m 84\u001b[0m BasePolynomialError, ExactQuotientFailed, PolynomialDivisionFailed,\n\u001b[1;32m 85\u001b[0m OperationNotSupported, HeuristicGCDFailed, HomomorphismFailed,\n\u001b[1;32m 86\u001b[0m IsomorphismFailed, ExtraneousFactors, EvaluationFailed,\n\u001b[1;32m 87\u001b[0m RefinementFailed, CoercionFailed, NotInvertible, NotReversible,\n\u001b[1;32m 88\u001b[0m NotAlgebraic, DomainError, PolynomialError, UnificationFailed,\n\u001b[1;32m 89\u001b[0m GeneratorsError, GeneratorsNeeded, ComputationFailed,\n\u001b[1;32m 90\u001b[0m UnivariatePolynomialError, MultivariatePolynomialError,\n\u001b[1;32m 91\u001b[0m PolificationFailed, OptionError, FlagError, minpoly,\n\u001b[1;32m 92\u001b[0m minimal_polynomial, primitive_element, field_isomorphism,\n\u001b[1;32m 93\u001b[0m to_number_field, isolate, round_two, prime_decomp, prime_valuation,\n\u001b[1;32m 94\u001b[0m galois_group, itermonomials, Monomial, lex, grlex,\n\u001b[1;32m 95\u001b[0m grevlex, ilex, igrlex, igrevlex, CRootOf, rootof, RootOf,\n\u001b[1;32m 96\u001b[0m ComplexRootOf, RootSum, roots, Domain, FiniteField, IntegerRing,\n\u001b[1;32m 97\u001b[0m RationalField, RealField, ComplexField, PythonFiniteField,\n\u001b[1;32m 98\u001b[0m GMPYFiniteField, PythonIntegerRing, GMPYIntegerRing, PythonRational,\n\u001b[1;32m 99\u001b[0m GMPYRationalField, AlgebraicField, PolynomialRing, FractionField,\n\u001b[1;32m 100\u001b[0m ExpressionDomain, FF_python, FF_gmpy, ZZ_python, ZZ_gmpy, QQ_python,\n\u001b[1;32m 101\u001b[0m QQ_gmpy, GF, FF, ZZ, QQ, ZZ_I, QQ_I, RR, CC, EX, EXRAW,\n\u001b[1;32m 102\u001b[0m construct_domain, swinnerton_dyer_poly, cyclotomic_poly,\n\u001b[1;32m 103\u001b[0m symmetric_poly, random_poly, interpolating_poly, jacobi_poly,\n\u001b[1;32m 104\u001b[0m chebyshevt_poly, chebyshevu_poly, hermite_poly, hermite_prob_poly,\n\u001b[1;32m 105\u001b[0m legendre_poly, laguerre_poly, apart, apart_list, assemble_partfrac_list,\n\u001b[1;32m 106\u001b[0m Options, ring, xring, vring, sring, field, xfield, vfield, sfield)\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mseries\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (Order, O, limit, Limit, gruntz, series, approximants,\n\u001b[1;32m 109\u001b[0m residue, EmptySequence, SeqPer, SeqFormula, sequence, SeqAdd, SeqMul,\n\u001b[1;32m 110\u001b[0m fourier_series, fps, difference_delta, limit_seq)\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (factorial, factorial2, rf, ff, binomial,\n\u001b[1;32m 113\u001b[0m RisingFactorial, FallingFactorial, subfactorial, carmichael,\n\u001b[1;32m 114\u001b[0m fibonacci, lucas, motzkin, tribonacci, harmonic, bernoulli, bell, euler,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 133\u001b[0m Znm, elliptic_k, elliptic_f, elliptic_e, elliptic_pi, beta, mathieus,\n\u001b[1;32m 134\u001b[0m mathieuc, mathieusprime, mathieucprime, riemann_xi, betainc, betainc_regularized)\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/polys/__init__.py:78\u001b[0m\n\u001b[1;32m 3\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPoly\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPurePoly\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpoly_from_expr\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mparallel_poly_from_expr\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdegree\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtotal_degree\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdegree_list\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLC\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLM\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLT\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpdiv\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mprem\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpquo\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfield\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mxfield\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mvfield\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msfield\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 66\u001b[0m ]\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolytools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (Poly, PurePoly, poly_from_expr,\n\u001b[1;32m 69\u001b[0m parallel_poly_from_expr, degree, total_degree, degree_list, LC, LM,\n\u001b[1;32m 70\u001b[0m LT, pdiv, prem, pquo, pexquo, div, rem, quo, exquo, half_gcdex, gcdex,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 75\u001b[0m count_roots, real_roots, nroots, ground_roots, nth_power_roots_poly,\n\u001b[1;32m 76\u001b[0m cancel, reduced, groebner, is_zero_dimensional, GroebnerBasis, poly)\n\u001b[0;32m---> 78\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolyfuncs\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (symmetrize, horner, interpolate,\n\u001b[1;32m 79\u001b[0m rational_interpolate, viete)\n\u001b[1;32m 81\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrationaltools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m together\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolyerrors\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (BasePolynomialError, ExactQuotientFailed,\n\u001b[1;32m 84\u001b[0m PolynomialDivisionFailed, OperationNotSupported, HeuristicGCDFailed,\n\u001b[1;32m 85\u001b[0m HomomorphismFailed, IsomorphismFailed, ExtraneousFactors,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 90\u001b[0m MultivariatePolynomialError, PolificationFailed, OptionError,\n\u001b[1;32m 91\u001b[0m FlagError)\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/polys/polyfuncs.py:10\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolyoptions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m allowed_flags, build_options\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolytools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m poly_from_expr, Poly\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mspecialpolys\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 11\u001b[0m symmetric_poly, interpolating_poly)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrings\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m sring\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutilities\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m numbered_symbols, take, public\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/polys/specialpolys.py:298\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dmp_mul(f, h, n, K), dmp_mul(g, h, n, K), h\n\u001b[1;32m 296\u001b[0m \u001b[38;5;66;03m# A few useful polynomials from Wang's paper ('78).\u001b[39;00m\n\u001b[0;32m--> 298\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrings\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ring\n\u001b[1;32m 300\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_f_0\u001b[39m():\n\u001b[1;32m 301\u001b[0m R, x, y, z \u001b[38;5;241m=\u001b[39m ring(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mx,y,z\u001b[39m\u001b[38;5;124m\"\u001b[39m, ZZ)\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/polys/rings.py:30\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolyoptions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (Domain \u001b[38;5;28;01mas\u001b[39;00m DomainOpt,\n\u001b[1;32m 27\u001b[0m Order \u001b[38;5;28;01mas\u001b[39;00m OrderOpt, build_options)\n\u001b[1;32m 28\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolyutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (expr_from_dict, _dict_reorder,\n\u001b[1;32m 29\u001b[0m _parallel_dict_from_expr)\n\u001b[0;32m---> 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprinting\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdefaults\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DefaultPrinting\n\u001b[1;32m 31\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutilities\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m public, subsets\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutilities\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01miterables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m is_sequence\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/printing/__init__.py:5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"Printing subsystem\"\"\"\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpretty\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pager_print, pretty, pretty_print, pprint, pprint_use_unicode, pprint_try_use_unicode\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlatex\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m latex, print_latex, multiline_latex\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmathml\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m mathml, print_mathml\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpython\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m python, print_python\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/printing/latex.py:18\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msympify\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SympifyError\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlogic\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mboolalg\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m true, BooleanTrue, BooleanFalse\n\u001b[0;32m---> 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtensor\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marray\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m NDimArray\n\u001b[1;32m 20\u001b[0m \u001b[38;5;66;03m# sympy.printing imports\u001b[39;00m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprinting\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprecedence\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m precedence_traditional\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/tensor/__init__.py:4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"A module to manipulate symbolic objects with indices including tensors\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mindexed\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m IndexedBase, Idx, Indexed\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mindex_methods\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_contraction_structure, get_indices\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m shape\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/tensor/indexed.py:114\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlogic\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m fuzzy_bool, fuzzy_not\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msympify\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _sympify\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mspecial\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtensor_functions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m KroneckerDelta\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmultipledispatch\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dispatch\n\u001b[1;32m 116\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutilities\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01miterables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m is_sequence, NotIterable\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/functions/__init__.py:21\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01melementary\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtrigonometric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (sin, cos, tan,\n\u001b[1;32m 18\u001b[0m sec, csc, cot, sinc, asin, acos, atan, asec, acsc, acot, atan2)\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01melementary\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexponential\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (exp_polar, exp, log,\n\u001b[1;32m 20\u001b[0m LambertW)\n\u001b[0;32m---> 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01melementary\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhyperbolic\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (sinh, cosh, tanh, coth,\n\u001b[1;32m 22\u001b[0m sech, csch, asinh, acosh, atanh, acoth, asech, acsch)\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01melementary\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mintegers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m floor, ceiling, frac\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01melementary\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpiecewise\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (Piecewise, piecewise_fold,\n\u001b[1;32m 25\u001b[0m piecewise_exclusive)\n", - "File \u001b[0;32m:1007\u001b[0m, in \u001b[0;36m_find_and_load\u001b[0;34m(name, import_)\u001b[0m\n", - "File \u001b[0;32m:986\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[0;34m(name, import_)\u001b[0m\n", - "File \u001b[0;32m:680\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[0;34m(spec)\u001b[0m\n", - "File \u001b[0;32m:846\u001b[0m, in \u001b[0;36mexec_module\u001b[0;34m(self, module)\u001b[0m\n", - "File \u001b[0;32m:978\u001b[0m, in \u001b[0;36mget_code\u001b[0;34m(self, fullname)\u001b[0m\n", - "File \u001b[0;32m:647\u001b[0m, in \u001b[0;36m_compile_bytecode\u001b[0;34m(data, name, bytecode_path, source_path)\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], + "outputs": [], "source": [ "import os\n", "\n", @@ -69,10 +26,9 @@ "from peft import get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType\n", "from datasets import load_dataset\n", "\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", "\n", - "device = \"cuda\"\n", + "device = torch.accelerator.current_accelerator().type if hasattr(torch, \"accelerator\") else \"cuda\"\n", "model_name_or_path = \"t5-large\"\n", "tokenizer_name_or_path = \"t5-large\"\n", "\n", @@ -100,7 +56,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "trainable params: 40960 || all params: 737709056 || trainable%: 0.005552324411210698\n" + "trainable params: 40,960 || all params: 737,709,056 || trainable%: 0.0056\n" ] }, { @@ -289,27 +245,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Found cached dataset financial_phrasebank (/data/proxem/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141)\n" + "Using the latest cached version of the dataset since financial_phrasebank couldn't be found on the Hugging Face Hub\n", + "Found the latest cached dataset configuration 'sentences_allagree' at /root/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141 (last modified on Thu Jul 31 06:43:45 2025).\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d3a799c64a2c43258dc6166c90e2e49f", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/1 [00:00 Tensor(a!)\n", + " registered at /pytorch/build/aten/src/ATen/RegisterSchema.cpp:6\n", + " dispatch key: XPU\n", + " previous kernel: registered at /pytorch/aten/src/ATen/VmapModeRegistrations.cpp:37\n", + " new kernel: registered at /build/intel-pytorch-extension/build/Release/csrc/gpu/csrc/gpu/xpu/ATen/RegisterXPU_0.cpp:172 (function operator())\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2025-07-31 07:06:51,984] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to xpu (auto detect)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/bin/ld: cannot find -laio: No such file or directory\n", + "collect2: error: ld returned 1 exit status\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2025-07-31 07:06:52,955] [INFO] [logging.py:107:log_dist] [Rank -1] [TorchCheckpointEngine] Initialized with serialization = False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n" ] }, { @@ -471,7 +436,7 @@ "
\n", " \n", " \n", - " [1275/1275 02:52, Epoch 5/5]\n", + " [1275/1275 03:31, Epoch 5/5]\n", "
\n", " \n", " \n", @@ -485,33 +450,33 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", "
14.7848000.5769330.5594712.1699000.5071560.621145
20.6482000.4375750.5770930.5377000.4309960.651982
30.5362000.3978570.6255510.4822000.4267180.696035
40.4722000.3731600.6431720.4597000.4708940.682819
50.4525000.3702340.6563880.4360000.4096040.718062

" @@ -526,7 +491,7 @@ { "data": { "text/plain": [ - "TrainOutput(global_step=1275, training_loss=1.3787811279296875, metrics={'train_runtime': 173.3699, 'train_samples_per_second': 58.747, 'train_steps_per_second': 7.354, 'total_flos': 344546979840000.0, 'train_loss': 1.3787811279296875, 'epoch': 5.0})" + "TrainOutput(global_step=1275, training_loss=0.8170911183076747, metrics={'train_runtime': 213.5513, 'train_samples_per_second': 47.693, 'train_steps_per_second': 5.97, 'total_flos': 344546979840000.0, 'train_loss': 0.8170911183076747, 'epoch': 5.0})" ] }, "execution_count": 5, @@ -596,7 +561,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "bd20cd4c", "metadata": { "ExecuteTime": { @@ -604,17 +569,9 @@ "start_time": "2023-05-30T09:53:15.059304Z" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "164K\tt5-large_PROMPT_TUNING_SEQ_2_SEQ_LM/adapter_model.bin\r\n" - ] - } - ], + "outputs": [], "source": [ - "ckpt = f\"{peft_model_id}/adapter_model.bin\"\n", + "ckpt = f\"{peft_model_id}/adapter_model.safetensors\"\n", "!du -h $ckpt" ] }, @@ -654,13 +611,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "Aspocomp Group , headquartered in Helsinki , Finland , develops interconnection solutions for the electronics industry .\n", - "{'input_ids': tensor([[ 71, 7990, 7699, 1531, 3, 6, 3, 27630, 16, 29763,\n", - " 3, 6, 16458, 3, 6, 1344, 7, 1413, 28102, 1275,\n", - " 21, 8, 12800, 681, 3, 5, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", - " 1, 1, 1]])}\n", - "tensor([[ 0, 7163, 1]])\n", - "['neutral']\n" + "EPS grew to 0.04 eur from 0.02 eur .\n", + "{'input_ids': tensor([[ 3, 24935, 3, 4774, 12, 4097, 6348, 3, 1238, 45,\n", + " 4097, 4305, 3, 1238, 3, 5, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}\n", + "tensor([[ 0, 1465, 1]])\n", + "['positive']\n" ] } ], @@ -688,9 +643,9 @@ ], "metadata": { "kernelspec": { - "display_name": "peft", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "peft" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -702,7 +657,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.11.13" }, "toc": { "base_numbering": 1, diff --git a/examples/conditional_generation/requirements.txt b/examples/conditional_generation/requirements.txt index 8dcf534a99..9571ec3501 100644 --- a/examples/conditional_generation/requirements.txt +++ b/examples/conditional_generation/requirements.txt @@ -3,4 +3,6 @@ accelerate evaluate deepspeed tqdm -datasets \ No newline at end of file +datasets +safetensors +scikit-learn \ No newline at end of file