From 000f631b9a075df211afe026501c078af329b3ae Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Thu, 31 Jul 2025 07:01:43 +0000 Subject: [PATCH 01/10] enable conditional_generation example for XPU Signed-off-by: Liu, Kaixuan --- .../multitask_prompt_tuning.ipynb | 1013 ++++++++++++++++- .../peft_adalora_seq2seq.py | 31 +- .../peft_ia3_seq2seq.ipynb | 216 ++-- .../peft_lora_seq2seq.ipynb | 408 ++++++- ...q2seq_accelerate_big_model_inference.ipynb | 47 +- ...ora_seq2seq_accelerate_ds_zero3_offload.py | 34 +- .../peft_prefix_tuning_seq2seq.ipynb | 339 ++++-- .../peft_prompt_tuning_seq2seq.ipynb | 130 +-- ..._prompt_tuning_seq2seq_with_generate.ipynb | 187 ++- .../conditional_generation/requirements.txt | 3 +- 10 files changed, 1953 insertions(+), 455 deletions(-) diff --git a/examples/conditional_generation/multitask_prompt_tuning.ipynb b/examples/conditional_generation/multitask_prompt_tuning.ipynb index 96fd2180dd..c4d602d283 100644 --- a/examples/conditional_generation/multitask_prompt_tuning.ipynb +++ b/examples/conditional_generation/multitask_prompt_tuning.ipynb @@ -2,19 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "58ff91ca-ce92-43d0-ae8b-4e9e89e193f6", "metadata": { "tags": [] }, "outputs": [], "source": [ + "import torch\n", "from datasets import load_dataset\n", "from transformers import set_seed, AutoModelForSeq2SeqLM, AutoTokenizer\n", "from peft import get_peft_model, MultitaskPromptTuningConfig, TaskType, MultitaskPromptTuningInit\n", "\n", "set_seed(42)\n", - "\n", + "device = \"xpu\" if torch.xpu.is_available() else \"cuda\"\n", "model_name = \"google/flan-t5-base\"\n", "\n", "peft_config = MultitaskPromptTuningConfig(\n", @@ -31,18 +32,18 @@ "model = AutoModelForSeq2SeqLM.from_pretrained(model_name)\n", "model = get_peft_model(model, peft_config)\n", "\n", - "model = model.cuda()\n", + "model = model.to(device)\n", "\n", "\n", "def send_to_device(batch):\n", " for i in batch:\n", - " batch[i] = batch[i].cuda()\n", + " batch[i] = batch[i].to(device)\n", " return batch" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "eb112bc1-ffaf-49fa-a216-0d601ec304ee", "metadata": { "tags": [] @@ -86,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "e5a16ec4-8fef-4ba9-95b6-a661eb51e50c", "metadata": { "tags": [] @@ -159,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "cceecc94-f43a-4f62-8d45-926f2f02f36d", "metadata": { "tags": [] @@ -179,7 +180,979 @@ "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/57507 [00:00)]\n", + " 0%| | 0/109 [00:00)]\n", + " 0%| | 0/109 [00:00)]\n", + " 0%| | 0/109 [00:00)]\n", + " 0%| | 0/109 [00:00)]\n", + " 0%| | 0/109 [00:00)]\n", + " 0%| | 0/109 [00:00)]\n", + " 0%| | 0/109 [00:00)]\n", + " 0%| | 0/109 [00:00)]\n", + " 0%| | 0/109 [00:00)]\n", + " 0%| | 0/109 [00:00)]\n", + " 0%| | 0/109 [00:00)]\n", + " 0%| | 0/109 [00:00" + "" ] }, - "execution_count": 13, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -65,11 +67,41 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 3, + "id": "da74b569", "metadata": { "id": "8d0850ac" }, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "54bafcb49ba34fcb98a2f072a93a071f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "config.json: 0%| | 0.00/800 [00:00 \u001b[39m\u001b[32m409\u001b[39m \u001b[43mresponse\u001b[49m\u001b[43m.\u001b[49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 410\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/requests/models.py:1026\u001b[39m, in \u001b[36mResponse.raise_for_status\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1025\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[32m-> \u001b[39m\u001b[32m1026\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response=\u001b[38;5;28mself\u001b[39m)\n", + "\u001b[31mHTTPError\u001b[39m: 404 Client Error: Not Found for url: https://huggingface.co/smangrul/twitter_complaints_bigscience_T0_3B_LORA_SEQ_2_SEQ_LM/resolve/main/adapter_config.json", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[31mRepositoryNotFoundError\u001b[39m Traceback (most recent call last)", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/peft/config.py:200\u001b[39m, in \u001b[36mPeftConfigMixin.from_pretrained\u001b[39m\u001b[34m(cls, pretrained_model_name_or_path, subfolder, **kwargs)\u001b[39m\n\u001b[32m 199\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m200\u001b[39m config_file = \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 201\u001b[39m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCONFIG_NAME\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m=\u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mhf_hub_download_kwargs\u001b[49m\n\u001b[32m 202\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 203\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_validators.py:114\u001b[39m, in \u001b[36mvalidate_hf_hub_args.._inner_fn\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 112\u001b[39m kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.\u001b[34m__name__\u001b[39m, has_token=has_token, kwargs=kwargs)\n\u001b[32m--> \u001b[39m\u001b[32m114\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:1010\u001b[39m, in \u001b[36mhf_hub_download\u001b[39m\u001b[34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, resume_download, force_filename, local_dir_use_symlinks)\u001b[39m\n\u001b[32m 1009\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1010\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_hf_hub_download_to_cache_dir\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1011\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Destination\u001b[39;49;00m\n\u001b[32m 1012\u001b[39m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1013\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# File info\u001b[39;49;00m\n\u001b[32m 1014\u001b[39m \u001b[43m \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1015\u001b[39m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1016\u001b[39m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1017\u001b[39m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1018\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# HTTP info\u001b[39;49;00m\n\u001b[32m 1019\u001b[39m \u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[43m=\u001b[49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1020\u001b[39m \u001b[43m \u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1021\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1022\u001b[39m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m=\u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1023\u001b[39m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1024\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Additional options\u001b[39;49;00m\n\u001b[32m 1025\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m=\u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1026\u001b[39m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[43m=\u001b[49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1027\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:1117\u001b[39m, in \u001b[36m_hf_hub_download_to_cache_dir\u001b[39m\u001b[34m(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)\u001b[39m\n\u001b[32m 1116\u001b[39m \u001b[38;5;66;03m# Otherwise, raise appropriate error\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1117\u001b[39m \u001b[43m_raise_on_head_call_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhead_call_error\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1119\u001b[39m \u001b[38;5;66;03m# From now on, etag, commit_hash, url and size are not None.\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:1658\u001b[39m, in \u001b[36m_raise_on_head_call_error\u001b[39m\u001b[34m(head_call_error, force_download, local_files_only)\u001b[39m\n\u001b[32m 1653\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(head_call_error, (RepositoryNotFoundError, GatedRepoError)) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[32m 1654\u001b[39m \u001b[38;5;28misinstance\u001b[39m(head_call_error, HfHubHTTPError) \u001b[38;5;129;01mand\u001b[39;00m head_call_error.response.status_code == \u001b[32m401\u001b[39m\n\u001b[32m 1655\u001b[39m ):\n\u001b[32m 1656\u001b[39m \u001b[38;5;66;03m# Repo not found or gated => let's raise the actual error\u001b[39;00m\n\u001b[32m 1657\u001b[39m \u001b[38;5;66;03m# Unauthorized => likely a token issue => let's raise the actual error\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1658\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m head_call_error\n\u001b[32m 1659\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1660\u001b[39m \u001b[38;5;66;03m# Otherwise: most likely a connection issue or Hub downtime => let's warn the user\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:1546\u001b[39m, in \u001b[36m_get_metadata_or_catch_error\u001b[39m\u001b[34m(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, token, local_files_only, relative_filename, storage_folder)\u001b[39m\n\u001b[32m 1545\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1546\u001b[39m metadata = \u001b[43mget_hf_file_metadata\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1547\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m=\u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[43m=\u001b[49m\u001b[43mendpoint\u001b[49m\n\u001b[32m 1548\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1549\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_validators.py:114\u001b[39m, in \u001b[36mvalidate_hf_hub_args.._inner_fn\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 112\u001b[39m kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.\u001b[34m__name__\u001b[39m, has_token=has_token, kwargs=kwargs)\n\u001b[32m--> \u001b[39m\u001b[32m114\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:1463\u001b[39m, in \u001b[36mget_hf_file_metadata\u001b[39m\u001b[34m(url, token, proxies, timeout, library_name, library_version, user_agent, headers, endpoint)\u001b[39m\n\u001b[32m 1462\u001b[39m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1463\u001b[39m r = \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1464\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mHEAD\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 1465\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1466\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1467\u001b[39m \u001b[43m \u001b[49m\u001b[43mallow_redirects\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1468\u001b[39m \u001b[43m \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1469\u001b[39m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m=\u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1470\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1471\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1472\u001b[39m hf_raise_for_status(r)\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:286\u001b[39m, in \u001b[36m_request_wrapper\u001b[39m\u001b[34m(method, url, follow_relative_redirects, **params)\u001b[39m\n\u001b[32m 285\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[32m--> \u001b[39m\u001b[32m286\u001b[39m response = \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 287\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 288\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 289\u001b[39m \u001b[43m \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 290\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 291\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 293\u001b[39m \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[32m 294\u001b[39m \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:310\u001b[39m, in \u001b[36m_request_wrapper\u001b[39m\u001b[34m(method, url, follow_relative_redirects, **params)\u001b[39m\n\u001b[32m 309\u001b[39m response = http_backoff(method=method, url=url, **params, retry_on_exceptions=(), retry_on_status_codes=(\u001b[32m429\u001b[39m,))\n\u001b[32m--> \u001b[39m\u001b[32m310\u001b[39m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 311\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_http.py:459\u001b[39m, in \u001b[36mhf_raise_for_status\u001b[39m\u001b[34m(response, endpoint_name)\u001b[39m\n\u001b[32m 450\u001b[39m message = (\n\u001b[32m 451\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse.status_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m Client Error.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 452\u001b[39m + \u001b[33m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m (...)\u001b[39m\u001b[32m 457\u001b[39m \u001b[33m\"\u001b[39m\u001b[33m https://huggingface.co/docs/huggingface_hub/authentication\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 458\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m459\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m _format(RepositoryNotFoundError, message, response) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01me\u001b[39;00m\n\u001b[32m 461\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m response.status_code == \u001b[32m400\u001b[39m:\n", + "\u001b[31mRepositoryNotFoundError\u001b[39m: 404 Client Error. (Request ID: Root=1-688b07a7-1999e2383e23171c1be77586;01650cab-5274-427d-9fe6-d27257e6940b)\n\nRepository Not Found for url: https://huggingface.co/smangrul/twitter_complaints_bigscience_T0_3B_LORA_SEQ_2_SEQ_LM/resolve/main/adapter_config.json.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated. For more details, see https://huggingface.co/docs/huggingface_hub/authentication", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[31mValueError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 18\u001b[39m\n\u001b[32m 15\u001b[39m batch_size = \u001b[32m8\u001b[39m\n\u001b[32m 17\u001b[39m peft_model_id = \u001b[33m\"\u001b[39m\u001b[33msmangrul/twitter_complaints_bigscience_T0_3B_LORA_SEQ_2_SEQ_LM\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m---> \u001b[39m\u001b[32m18\u001b[39m config = \u001b[43mPeftConfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpeft_model_id\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/peft/config.py:204\u001b[39m, in \u001b[36mPeftConfigMixin.from_pretrained\u001b[39m\u001b[34m(cls, pretrained_model_name_or_path, subfolder, **kwargs)\u001b[39m\n\u001b[32m 200\u001b[39m config_file = hf_hub_download(\n\u001b[32m 201\u001b[39m pretrained_model_name_or_path, CONFIG_NAME, subfolder=subfolder, **hf_hub_download_kwargs\n\u001b[32m 202\u001b[39m )\n\u001b[32m 203\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m--> \u001b[39m\u001b[32m204\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mCan\u001b[39m\u001b[33m'\u001b[39m\u001b[33mt find \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCONFIG_NAME\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m at \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mexc\u001b[39;00m\n\u001b[32m 206\u001b[39m loaded_attributes = \u001b[38;5;28mcls\u001b[39m.from_json_file(config_file)\n\u001b[32m 207\u001b[39m kwargs = {**class_kwargs, **loaded_attributes}\n", + "\u001b[31mValueError\u001b[39m: Can't find 'adapter_config.json' at 'smangrul/twitter_complaints_bigscience_T0_3B_LORA_SEQ_2_SEQ_LM'" + ] + } + ], "source": [ "from transformers import AutoModelForSeq2SeqLM\n", "from peft import PeftModel, PeftConfig\n", @@ -22,6 +54,7 @@ "text_column = \"Tweet text\"\n", "label_column = \"text_label\"\n", "batch_size = 8\n", + "device = \"xpu\" if torch.xpu.is_available() else \"cuda\"\n", "\n", "peft_model_id = \"smangrul/twitter_complaints_bigscience_T0_3B_LORA_SEQ_2_SEQ_LM\"\n", "config = PeftConfig.from_pretrained(peft_model_id)" @@ -29,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "cc55820a", "metadata": {}, "outputs": [], @@ -114,7 +147,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "b33be5e6", "metadata": {}, "outputs": [ @@ -142,14 +175,14 @@ "print(inputs)\n", "\n", "with torch.no_grad():\n", - " outputs = model.generate(input_ids=inputs[\"input_ids\"].to(\"cuda\"), max_new_tokens=10)\n", + " outputs = model.generate(input_ids=inputs[\"input_ids\"].to(device), max_new_tokens=10)\n", " print(outputs)\n", " print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "b6d6cd5b", "metadata": {}, "outputs": [ @@ -166,7 +199,7 @@ "model.eval()\n", "eval_preds = []\n", "for _, batch in enumerate(tqdm(eval_dataloader)):\n", - " batch = {k: v.to(\"cuda\") for k, v in batch.items() if k != \"labels\"}\n", + " batch = {k: v.to(device) for k, v in batch.items() if k != \"labels\"}\n", " with torch.no_grad():\n", " outputs = model.generate(**batch, max_new_tokens=10)\n", " preds = outputs.detach().cpu().numpy()\n", @@ -240,7 +273,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5 (v3.10.5:f377153967, Jun 6 2022, 12:36:10) [Clang 13.0.0 (clang-1300.0.29.30)]" + "version": "3.11.13" }, "vscode": { "interpreter": { diff --git a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py index 4be3e1666b..c2c5f39305 100644 --- a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py +++ b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py @@ -54,10 +54,12 @@ def b2mb(x): # This context manager is used to track the peak memory usage of the process class TorchTracemalloc: def __enter__(self): + self.device_type = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda" + self.device_module = getattr(torch, self.device_type, torch.cuda) gc.collect() - torch.cuda.empty_cache() - torch.cuda.reset_max_memory_allocated() # reset the peak gauge to zero - self.begin = torch.cuda.memory_allocated() + self.device_module.empty_cache() + self.device_module.reset_peak_memory_stats() # reset the peak gauge to zero + self.begin = self.device_module.memory_allocated() self.process = psutil.Process() self.cpu_begin = self.cpu_mem_used() @@ -87,9 +89,9 @@ def __exit__(self, *exc): self.peak_monitoring = False gc.collect() - torch.cuda.empty_cache() - self.end = torch.cuda.memory_allocated() - self.peak = torch.cuda.max_memory_allocated() + self.device_module.empty_cache() + self.end = self.device_module.memory_allocated() + self.peak = self.device_module.max_memory_allocated() self.used = b2mb(self.end - self.begin) self.peaked = b2mb(self.peak - self.begin) @@ -199,12 +201,12 @@ def collate_fn(examples): optimizer.step() lr_scheduler.step() optimizer.zero_grad() - # Printing the GPU memory usage details such as allocated memory, peak memory, and total memory usage - accelerator.print(f"GPU Memory before entering the train : {b2mb(tracemalloc.begin)}") - accelerator.print(f"GPU Memory consumed at the end of the train (end-begin): {tracemalloc.used}") - accelerator.print(f"GPU Peak Memory consumed during the train (max-begin): {tracemalloc.peaked}") + # Printing the device memory usage details such as allocated memory, peak memory, and total memory usage + accelerator.print(f"{accelerator.device.type.upper()} Memory before entering the train : {b2mb(tracemalloc.begin)}") + accelerator.print(f"{accelerator.device.type.upper()} Memory consumed at the end of the train (end-begin): {tracemalloc.used}") + accelerator.print(f"{accelerator.device.type.upper()} Peak Memory consumed during the train (max-begin): {tracemalloc.peaked}") accelerator.print( - f"GPU Total Peak Memory consumed during the train (max): {tracemalloc.peaked + b2mb(tracemalloc.begin)}" + f"{accelerator.device.type.upper()} Total Peak Memory consumed during the train (max): {tracemalloc.peaked + b2mb(tracemalloc.begin)}" ) accelerator.print(f"CPU Memory before entering the train : {b2mb(tracemalloc.cpu_begin)}") @@ -230,12 +232,12 @@ def collate_fn(examples): preds = accelerator.gather_for_metrics(outputs).detach().cpu().numpy() eval_preds.extend(tokenizer.batch_decode(preds, skip_special_tokens=True)) - # Printing the GPU memory usage details such as allocated memory, peak memory, and total memory usage - accelerator.print(f"GPU Memory before entering the eval : {b2mb(tracemalloc.begin)}") - accelerator.print(f"GPU Memory consumed at the end of the eval (end-begin): {tracemalloc.used}") - accelerator.print(f"GPU Peak Memory consumed during the eval (max-begin): {tracemalloc.peaked}") + # Printing the device memory usage details such as allocated memory, peak memory, and total memory usage + accelerator.print(f"{accelerator.device.type.upper()} Memory before entering the eval : {b2mb(tracemalloc.begin)}") + accelerator.print(f"{accelerator.device.type.upper()} Memory consumed at the end of the eval (end-begin): {tracemalloc.used}") + accelerator.print(f"{accelerator.device.type.upper()} Peak Memory consumed during the eval (max-begin): {tracemalloc.peaked}") accelerator.print( - f"GPU Total Peak Memory consumed during the eval (max): {tracemalloc.peaked + b2mb(tracemalloc.begin)}" + f"{accelerator.device.type.upper()} Total Peak Memory consumed during the eval (max): {tracemalloc.peaked + b2mb(tracemalloc.begin)}" ) accelerator.print(f"CPU Memory before entering the eval : {b2mb(tracemalloc.cpu_begin)}") diff --git a/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb b/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb index aa85f9a743..44eb6fadfc 100644 --- a/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb +++ b/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb @@ -5,23 +5,7 @@ "execution_count": 1, "id": "5f93b7d1", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "===================================BUG REPORT===================================\n", - "Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n", - "For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link\n", - "================================================================================\n", - "CUDA SETUP: CUDA runtime path found: /home/sourab/miniconda3/envs/ml/lib/libcudart.so\n", - "CUDA SETUP: Highest compute capability among GPUs detected: 7.5\n", - "CUDA SETUP: Detected CUDA version 117\n", - "CUDA SETUP: Loading binary /home/sourab/miniconda3/envs/ml/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...\n" - ] - } - ], + "outputs": [], "source": [ "from transformers import AutoModelForSeq2SeqLM\n", "from peft import get_peft_config, get_peft_model, get_peft_model_state_dict, PrefixTuningConfig, TaskType\n", @@ -30,14 +14,13 @@ "import os\n", "\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n", "from transformers import AutoTokenizer\n", "from torch.utils.data import DataLoader\n", "from transformers import default_data_collator, get_linear_schedule_with_warmup\n", "from tqdm import tqdm\n", "from datasets import load_dataset\n", "\n", - "device = \"cuda\"\n", + "device = \"xpu\" if torch.xpu.is_available() else \"cuda\"\n", "model_name_or_path = \"t5-large\"\n", "tokenizer_name_or_path = \"t5-large\"\n", "\n", @@ -52,10 +35,213 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "8d0850ac", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a53255e7261d484bbb102253280b3475", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "config.json: 0%| | 0.00/1.21k [00:00 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer, GenerationConfig\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpeft\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n", - "File \u001b[0;32m:1055\u001b[0m, in \u001b[0;36m_handle_fromlist\u001b[0;34m(module, fromlist, import_, recursive)\u001b[0m\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/transformers/utils/import_utils.py:1076\u001b[0m, in \u001b[0;36m_LazyModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1074\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_module(name)\n\u001b[1;32m 1075\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_class_to_module\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[0;32m-> 1076\u001b[0m module \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_module\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_class_to_module\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1077\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(module, name)\n\u001b[1;32m 1078\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/transformers/utils/import_utils.py:1086\u001b[0m, in \u001b[0;36m_LazyModule._get_module\u001b[0;34m(self, module_name)\u001b[0m\n\u001b[1;32m 1084\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_get_module\u001b[39m(\u001b[38;5;28mself\u001b[39m, module_name: \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 1085\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1086\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmodule_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__name__\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1087\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to import \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodule_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m because of the following error (look up to see its\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1090\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m traceback):\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1091\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/importlib/__init__.py:127\u001b[0m, in \u001b[0;36mimport_module\u001b[0;34m(name, package)\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 126\u001b[0m level \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m--> 127\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/transformers/training_args_seq2seq.py:21\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Optional, Union\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneration\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfiguration_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GenerationConfig\n\u001b[0;32m---> 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtraining_args\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TrainingArguments\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m add_start_docstrings\n\u001b[1;32m 25\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mgetLogger(\u001b[38;5;18m__name__\u001b[39m)\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/transformers/training_args.py:29\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Any, Dict, List, Optional, Union\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpackaging\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m version\n\u001b[0;32m---> 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdebug_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DebugOption\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtrainer_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 31\u001b[0m EvaluationStrategy,\n\u001b[1;32m 32\u001b[0m FSDPOption,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 36\u001b[0m ShardedDDPOption,\n\u001b[1;32m 37\u001b[0m )\n\u001b[1;32m 38\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 39\u001b[0m ExplicitEnum,\n\u001b[1;32m 40\u001b[0m cached_property,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 53\u001b[0m requires_backends,\n\u001b[1;32m 54\u001b[0m )\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/transformers/debug_utils.py:21\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ExplicitEnum, is_torch_available, logging\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_torch_available():\n\u001b[0;32m---> 21\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[1;32m 24\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mget_logger(\u001b[38;5;18m__name__\u001b[39m)\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mDebugUnderflowOverflow\u001b[39;00m:\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/__init__.py:1465\u001b[0m\n\u001b[1;32m 1463\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m library\n\u001b[1;32m 1464\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m TYPE_CHECKING:\n\u001b[0;32m-> 1465\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _meta_registrations\n\u001b[1;32m 1467\u001b[0m \u001b[38;5;66;03m# Enable CUDA Sanitizer\u001b[39;00m\n\u001b[1;32m 1468\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTORCH_CUDA_SANITIZER\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m os\u001b[38;5;241m.\u001b[39menviron:\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_meta_registrations.py:7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims_common\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mutils\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tensor\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_decomp\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _add_op_to_registry, global_decomposition_table, meta_table\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_ops\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OpOverload\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _elementwise_meta, ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_decomp/__init__.py:169\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m decompositions\n\u001b[1;32m 168\u001b[0m \u001b[38;5;66;03m# populate the table\u001b[39;00m\n\u001b[0;32m--> 169\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_decomp\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdecompositions\u001b[39;00m\n\u001b[1;32m 170\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_refs\u001b[39;00m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;66;03m# This list was copied from torch/_inductor/decomposition.py\u001b[39;00m\n\u001b[1;32m 173\u001b[0m \u001b[38;5;66;03m# excluding decompositions that results in prim ops\u001b[39;00m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;66;03m# Resulting opset of decomposition is core aten ops\u001b[39;00m\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_decomp/decompositions.py:10\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Callable, cast, Iterable, List, Optional, Tuple, Union\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mprims\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims_common\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mutils\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctional\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mF\u001b[39;00m\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_prims/__init__.py:33\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims_common\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 18\u001b[0m check,\n\u001b[1;32m 19\u001b[0m Dim,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 30\u001b[0m type_to_dtype,\n\u001b[1;32m 31\u001b[0m )\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims_common\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mwrappers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m backwards_not_supported\n\u001b[0;32m---> 33\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_subclasses\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfake_tensor\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FakeTensor, FakeTensorMode\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moverrides\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m handle_torch_function, has_torch_function\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_pytree\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tree_flatten, tree_map, tree_unflatten\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_subclasses/__init__.py:3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_subclasses\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfake_tensor\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 4\u001b[0m DynamicOutputShapeException,\n\u001b[1;32m 5\u001b[0m FakeTensor,\n\u001b[1;32m 6\u001b[0m FakeTensorMode,\n\u001b[1;32m 7\u001b[0m UnsupportedFakeTensorException,\n\u001b[1;32m 8\u001b[0m )\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_subclasses\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfake_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CrossRefFakeMode\n\u001b[1;32m 12\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 13\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFakeTensor\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 14\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFakeTensorMode\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCrossRefFakeMode\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 18\u001b[0m ]\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_subclasses/fake_tensor.py:13\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mweakref\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ReferenceType\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[0;32m---> 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_guards\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Source\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_ops\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OpOverload\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_prims_common\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 16\u001b[0m elementwise_dtypes,\n\u001b[1;32m 17\u001b[0m ELEMENTWISE_TYPE_PROMOTION_KIND,\n\u001b[1;32m 18\u001b[0m is_float_dtype,\n\u001b[1;32m 19\u001b[0m is_integer_dtype,\n\u001b[1;32m 20\u001b[0m )\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/torch/_guards.py:14\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# TODO(voz): Stolen pattern, not sure why this is the case,\u001b[39;00m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# but mypy complains.\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m \u001b[38;5;66;03m# type: ignore[import]\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[1;32m 16\u001b[0m log\u001b[38;5;241m.\u001b[39mwarning(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo sympy found\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/__init__.py:74\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlogic\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (to_cnf, to_dnf, to_nnf, And, Or, Not, Xor, Nand, Nor,\n\u001b[1;32m 68\u001b[0m Implies, Equivalent, ITE, POSform, SOPform, simplify_logic, bool_map,\n\u001b[1;32m 69\u001b[0m true, false, satisfiable)\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01massumptions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (AppliedPredicate, Predicate, AssumptionsContext,\n\u001b[1;32m 72\u001b[0m assuming, Q, ask, register_handler, remove_handler, refine)\n\u001b[0;32m---> 74\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (Poly, PurePoly, poly_from_expr, parallel_poly_from_expr,\n\u001b[1;32m 75\u001b[0m degree, total_degree, degree_list, LC, LM, LT, pdiv, prem, pquo,\n\u001b[1;32m 76\u001b[0m pexquo, div, rem, quo, exquo, half_gcdex, gcdex, invert,\n\u001b[1;32m 77\u001b[0m subresultants, resultant, discriminant, cofactors, gcd_list, gcd,\n\u001b[1;32m 78\u001b[0m lcm_list, lcm, terms_gcd, trunc, monic, content, primitive, compose,\n\u001b[1;32m 79\u001b[0m decompose, sturm, gff_list, gff, sqf_norm, sqf_part, sqf_list, sqf,\n\u001b[1;32m 80\u001b[0m factor_list, factor, intervals, refine_root, count_roots, real_roots,\n\u001b[1;32m 81\u001b[0m nroots, ground_roots, nth_power_roots_poly, cancel, reduced, groebner,\n\u001b[1;32m 82\u001b[0m is_zero_dimensional, GroebnerBasis, poly, symmetrize, horner,\n\u001b[1;32m 83\u001b[0m interpolate, rational_interpolate, viete, together,\n\u001b[1;32m 84\u001b[0m BasePolynomialError, ExactQuotientFailed, PolynomialDivisionFailed,\n\u001b[1;32m 85\u001b[0m OperationNotSupported, HeuristicGCDFailed, HomomorphismFailed,\n\u001b[1;32m 86\u001b[0m IsomorphismFailed, ExtraneousFactors, EvaluationFailed,\n\u001b[1;32m 87\u001b[0m RefinementFailed, CoercionFailed, NotInvertible, NotReversible,\n\u001b[1;32m 88\u001b[0m NotAlgebraic, DomainError, PolynomialError, UnificationFailed,\n\u001b[1;32m 89\u001b[0m GeneratorsError, GeneratorsNeeded, ComputationFailed,\n\u001b[1;32m 90\u001b[0m UnivariatePolynomialError, MultivariatePolynomialError,\n\u001b[1;32m 91\u001b[0m PolificationFailed, OptionError, FlagError, minpoly,\n\u001b[1;32m 92\u001b[0m minimal_polynomial, primitive_element, field_isomorphism,\n\u001b[1;32m 93\u001b[0m to_number_field, isolate, round_two, prime_decomp, prime_valuation,\n\u001b[1;32m 94\u001b[0m galois_group, itermonomials, Monomial, lex, grlex,\n\u001b[1;32m 95\u001b[0m grevlex, ilex, igrlex, igrevlex, CRootOf, rootof, RootOf,\n\u001b[1;32m 96\u001b[0m ComplexRootOf, RootSum, roots, Domain, FiniteField, IntegerRing,\n\u001b[1;32m 97\u001b[0m RationalField, RealField, ComplexField, PythonFiniteField,\n\u001b[1;32m 98\u001b[0m GMPYFiniteField, PythonIntegerRing, GMPYIntegerRing, PythonRational,\n\u001b[1;32m 99\u001b[0m GMPYRationalField, AlgebraicField, PolynomialRing, FractionField,\n\u001b[1;32m 100\u001b[0m ExpressionDomain, FF_python, FF_gmpy, ZZ_python, ZZ_gmpy, QQ_python,\n\u001b[1;32m 101\u001b[0m QQ_gmpy, GF, FF, ZZ, QQ, ZZ_I, QQ_I, RR, CC, EX, EXRAW,\n\u001b[1;32m 102\u001b[0m construct_domain, swinnerton_dyer_poly, cyclotomic_poly,\n\u001b[1;32m 103\u001b[0m symmetric_poly, random_poly, interpolating_poly, jacobi_poly,\n\u001b[1;32m 104\u001b[0m chebyshevt_poly, chebyshevu_poly, hermite_poly, hermite_prob_poly,\n\u001b[1;32m 105\u001b[0m legendre_poly, laguerre_poly, apart, apart_list, assemble_partfrac_list,\n\u001b[1;32m 106\u001b[0m Options, ring, xring, vring, sring, field, xfield, vfield, sfield)\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mseries\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (Order, O, limit, Limit, gruntz, series, approximants,\n\u001b[1;32m 109\u001b[0m residue, EmptySequence, SeqPer, SeqFormula, sequence, SeqAdd, SeqMul,\n\u001b[1;32m 110\u001b[0m fourier_series, fps, difference_delta, limit_seq)\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (factorial, factorial2, rf, ff, binomial,\n\u001b[1;32m 113\u001b[0m RisingFactorial, FallingFactorial, subfactorial, carmichael,\n\u001b[1;32m 114\u001b[0m fibonacci, lucas, motzkin, tribonacci, harmonic, bernoulli, bell, euler,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 133\u001b[0m Znm, elliptic_k, elliptic_f, elliptic_e, elliptic_pi, beta, mathieus,\n\u001b[1;32m 134\u001b[0m mathieuc, mathieusprime, mathieucprime, riemann_xi, betainc, betainc_regularized)\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/polys/__init__.py:78\u001b[0m\n\u001b[1;32m 3\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPoly\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPurePoly\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpoly_from_expr\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mparallel_poly_from_expr\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdegree\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtotal_degree\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdegree_list\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLC\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLM\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLT\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpdiv\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mprem\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpquo\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfield\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mxfield\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mvfield\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msfield\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 66\u001b[0m ]\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolytools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (Poly, PurePoly, poly_from_expr,\n\u001b[1;32m 69\u001b[0m parallel_poly_from_expr, degree, total_degree, degree_list, LC, LM,\n\u001b[1;32m 70\u001b[0m LT, pdiv, prem, pquo, pexquo, div, rem, quo, exquo, half_gcdex, gcdex,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 75\u001b[0m count_roots, real_roots, nroots, ground_roots, nth_power_roots_poly,\n\u001b[1;32m 76\u001b[0m cancel, reduced, groebner, is_zero_dimensional, GroebnerBasis, poly)\n\u001b[0;32m---> 78\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolyfuncs\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (symmetrize, horner, interpolate,\n\u001b[1;32m 79\u001b[0m rational_interpolate, viete)\n\u001b[1;32m 81\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrationaltools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m together\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolyerrors\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (BasePolynomialError, ExactQuotientFailed,\n\u001b[1;32m 84\u001b[0m PolynomialDivisionFailed, OperationNotSupported, HeuristicGCDFailed,\n\u001b[1;32m 85\u001b[0m HomomorphismFailed, IsomorphismFailed, ExtraneousFactors,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 90\u001b[0m MultivariatePolynomialError, PolificationFailed, OptionError,\n\u001b[1;32m 91\u001b[0m FlagError)\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/polys/polyfuncs.py:10\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolyoptions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m allowed_flags, build_options\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolytools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m poly_from_expr, Poly\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mspecialpolys\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 11\u001b[0m symmetric_poly, interpolating_poly)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrings\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m sring\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutilities\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m numbered_symbols, take, public\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/polys/specialpolys.py:298\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dmp_mul(f, h, n, K), dmp_mul(g, h, n, K), h\n\u001b[1;32m 296\u001b[0m \u001b[38;5;66;03m# A few useful polynomials from Wang's paper ('78).\u001b[39;00m\n\u001b[0;32m--> 298\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrings\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ring\n\u001b[1;32m 300\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_f_0\u001b[39m():\n\u001b[1;32m 301\u001b[0m R, x, y, z \u001b[38;5;241m=\u001b[39m ring(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mx,y,z\u001b[39m\u001b[38;5;124m\"\u001b[39m, ZZ)\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/polys/rings.py:30\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolyoptions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (Domain \u001b[38;5;28;01mas\u001b[39;00m DomainOpt,\n\u001b[1;32m 27\u001b[0m Order \u001b[38;5;28;01mas\u001b[39;00m OrderOpt, build_options)\n\u001b[1;32m 28\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolys\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpolyutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (expr_from_dict, _dict_reorder,\n\u001b[1;32m 29\u001b[0m _parallel_dict_from_expr)\n\u001b[0;32m---> 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprinting\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdefaults\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DefaultPrinting\n\u001b[1;32m 31\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutilities\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m public, subsets\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutilities\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01miterables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m is_sequence\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/printing/__init__.py:5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"Printing subsystem\"\"\"\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpretty\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pager_print, pretty, pretty_print, pprint, pprint_use_unicode, pprint_try_use_unicode\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlatex\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m latex, print_latex, multiline_latex\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmathml\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m mathml, print_mathml\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpython\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m python, print_python\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/printing/latex.py:18\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msympify\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SympifyError\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlogic\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mboolalg\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m true, BooleanTrue, BooleanFalse\n\u001b[0;32m---> 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtensor\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marray\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m NDimArray\n\u001b[1;32m 20\u001b[0m \u001b[38;5;66;03m# sympy.printing imports\u001b[39;00m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprinting\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprecedence\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m precedence_traditional\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/tensor/__init__.py:4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"A module to manipulate symbolic objects with indices including tensors\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mindexed\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m IndexedBase, Idx, Indexed\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mindex_methods\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_contraction_structure, get_indices\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m shape\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/tensor/indexed.py:114\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlogic\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m fuzzy_bool, fuzzy_not\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msympify\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _sympify\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mspecial\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtensor_functions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m KroneckerDelta\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmultipledispatch\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dispatch\n\u001b[1;32m 116\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutilities\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01miterables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m is_sequence, NotIterable\n", - "File \u001b[0;32m~/anaconda3/envs/peft/lib/python3.9/site-packages/sympy/functions/__init__.py:21\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01melementary\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtrigonometric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (sin, cos, tan,\n\u001b[1;32m 18\u001b[0m sec, csc, cot, sinc, asin, acos, atan, asec, acsc, acot, atan2)\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01melementary\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexponential\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (exp_polar, exp, log,\n\u001b[1;32m 20\u001b[0m LambertW)\n\u001b[0;32m---> 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01melementary\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhyperbolic\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (sinh, cosh, tanh, coth,\n\u001b[1;32m 22\u001b[0m sech, csch, asinh, acosh, atanh, acoth, asech, acsch)\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01melementary\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mintegers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m floor, ceiling, frac\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msympy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01melementary\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpiecewise\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (Piecewise, piecewise_fold,\n\u001b[1;32m 25\u001b[0m piecewise_exclusive)\n", - "File \u001b[0;32m:1007\u001b[0m, in \u001b[0;36m_find_and_load\u001b[0;34m(name, import_)\u001b[0m\n", - "File \u001b[0;32m:986\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[0;34m(name, import_)\u001b[0m\n", - "File \u001b[0;32m:680\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[0;34m(spec)\u001b[0m\n", - "File \u001b[0;32m:846\u001b[0m, in \u001b[0;36mexec_module\u001b[0;34m(self, module)\u001b[0m\n", - "File \u001b[0;32m:978\u001b[0m, in \u001b[0;36mget_code\u001b[0;34m(self, fullname)\u001b[0m\n", - "File \u001b[0;32m:647\u001b[0m, in \u001b[0;36m_compile_bytecode\u001b[0;34m(data, name, bytecode_path, source_path)\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], + "outputs": [], "source": [ "import os\n", "\n", @@ -69,10 +26,9 @@ "from peft import get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType\n", "from datasets import load_dataset\n", "\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", "\n", - "device = \"cuda\"\n", + "device = \"xpu\" if torch.xpu.is_available() else \"cuda\"\n", "model_name_or_path = \"t5-large\"\n", "tokenizer_name_or_path = \"t5-large\"\n", "\n", @@ -100,7 +56,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "trainable params: 40960 || all params: 737709056 || trainable%: 0.005552324411210698\n" + "trainable params: 40,960 || all params: 737,709,056 || trainable%: 0.0056\n" ] }, { @@ -289,27 +245,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Found cached dataset financial_phrasebank (/data/proxem/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141)\n" + "Using the latest cached version of the dataset since financial_phrasebank couldn't be found on the Hugging Face Hub\n", + "Found the latest cached dataset configuration 'sentences_allagree' at /root/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141 (last modified on Thu Jul 31 06:31:53 2025).\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d3a799c64a2c43258dc6166c90e2e49f", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/1 [00:00 Tensor(a!)\n", + " registered at /pytorch/build/aten/src/ATen/RegisterSchema.cpp:6\n", + " dispatch key: XPU\n", + " previous kernel: registered at /pytorch/aten/src/ATen/VmapModeRegistrations.cpp:37\n", + " new kernel: registered at /build/intel-pytorch-extension/build/Release/csrc/gpu/csrc/gpu/xpu/ATen/RegisterXPU_0.cpp:172 (function operator())\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2025-07-31 06:37:45,694] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to xpu (auto detect)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/bin/ld: cannot find -laio: No such file or directory\n", + "collect2: error: ld returned 1 exit status\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2025-07-31 06:37:46,651] [INFO] [logging.py:107:log_dist] [Rank -1] [TorchCheckpointEngine] Initialized with serialization = False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n" ] }, { @@ -470,8 +435,8 @@ "\n", "
\n", " \n", - " \n", - " [1275/1275 02:52, Epoch 5/5]\n", + " \n", + " [1275/1275 03:26, Epoch 5/5]\n", "
\n", " \n", " \n", @@ -485,36 +450,36 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - "
14.7848000.5769330.5594712.0364000.5444500.629956
20.6482000.4375750.5770930.4971000.4222110.638767
30.5362000.3978570.6255510.4609000.3849510.678414
40.4722000.3731600.643172
50.4525000.3702340.6563880.4289000.4053360.691630

" + "

\n", + "

\n", + " \n", + " \n", + " [20/29 00:02 < 00:01, 6.78 it/s]\n", + "
\n", + " " ], "text/plain": [ "" @@ -522,16 +487,6 @@ }, "metadata": {}, "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "TrainOutput(global_step=1275, training_loss=1.3787811279296875, metrics={'train_runtime': 173.3699, 'train_samples_per_second': 58.747, 'train_steps_per_second': 7.354, 'total_flos': 344546979840000.0, 'train_loss': 1.3787811279296875, 'epoch': 5.0})" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -688,9 +643,9 @@ ], "metadata": { "kernelspec": { - "display_name": "peft", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "peft" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -702,7 +657,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.11.13" }, "toc": { "base_numbering": 1, diff --git a/examples/conditional_generation/requirements.txt b/examples/conditional_generation/requirements.txt index 8dcf534a99..bac964ee58 100644 --- a/examples/conditional_generation/requirements.txt +++ b/examples/conditional_generation/requirements.txt @@ -3,4 +3,5 @@ accelerate evaluate deepspeed tqdm -datasets \ No newline at end of file +datasets +scikit-learn \ No newline at end of file From 381571f0db6ce66bf95397fad69ea843b10a8300 Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Thu, 31 Jul 2025 07:11:31 +0000 Subject: [PATCH 02/10] update log Signed-off-by: Liu, Kaixuan --- ..._prompt_tuning_seq2seq_with_generate.ipynb | 60 +++++++++---------- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb b/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb index e677331789..edcab01790 100644 --- a/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb +++ b/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "5f93b7d1", "metadata": { "ExecuteTime": { @@ -246,13 +246,13 @@ "output_type": "stream", "text": [ "Using the latest cached version of the dataset since financial_phrasebank couldn't be found on the Hugging Face Hub\n", - "Found the latest cached dataset configuration 'sentences_allagree' at /root/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141 (last modified on Thu Jul 31 06:31:53 2025).\n" + "Found the latest cached dataset configuration 'sentences_allagree' at /root/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141 (last modified on Thu Jul 31 06:43:45 2025).\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "62ef1d34d8994a22bd955c3d62e58061", + "model_id": "79ef90cbad2f4c2088f01102cadb8a3b", "version_major": 2, "version_minor": 0 }, @@ -266,7 +266,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6bd5d95630ef4ac292604dc4ddc82c15", + "model_id": "0f5b177b658646cfa90b3a2801138807", "version_major": 2, "version_minor": 0 }, @@ -280,9 +280,9 @@ { "data": { "text/plain": [ - "{'sentence': 'After the transaction , Tikkurila has no powder coatings related operations .',\n", - " 'label': 1,\n", - " 'text_label': 'neutral'}" + "{'sentence': 'This new partnership agreement represents a significant milestone for both parties .',\n", + " 'label': 2,\n", + " 'text_label': 'positive'}" ] }, "execution_count": 3, @@ -321,7 +321,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4322a4db22cb4a039b603129de1621f5", + "model_id": "0a5f7b5967704fab97f11bc07813625c", "version_major": 2, "version_minor": 0 }, @@ -335,7 +335,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b002043924e743bdad4f471611b7b583", + "model_id": "1ff9578c074e4736a8812f6ffc8138b5", "version_major": 2, "version_minor": 0 }, @@ -391,7 +391,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[W731 06:37:45.865043490 OperatorEntry.cpp:217] Warning: Warning only once for all operators, other operators may also be overridden.\n", + "[W731 07:06:51.135038656 OperatorEntry.cpp:217] Warning: Warning only once for all operators, other operators may also be overridden.\n", " Overriding a previously registered kernel for the same operator and the same dispatch key\n", " operator: aten::geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!)\n", " registered at /pytorch/build/aten/src/ATen/RegisterSchema.cpp:6\n", @@ -404,7 +404,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2025-07-31 06:37:45,694] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to xpu (auto detect)\n" + "[2025-07-31 07:06:51,984] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to xpu (auto detect)\n" ] }, { @@ -419,7 +419,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2025-07-31 06:37:46,651] [INFO] [logging.py:107:log_dist] [Rank -1] [TorchCheckpointEngine] Initialized with serialization = False\n" + "[2025-07-31 07:06:52,955] [INFO] [logging.py:107:log_dist] [Rank -1] [TorchCheckpointEngine] Initialized with serialization = False\n" ] }, { @@ -435,8 +435,8 @@ "\n", "
\n", " \n", - " \n", - " [1275/1275 03:26, Epoch 5/5]\n", + " \n", + " [1201/1275 03:15 < 00:12, 6.12 it/s, Epoch 4.71/5]\n", "
\n", " \n", " \n", @@ -450,36 +450,30 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", - "
12.0364000.5444500.6299562.1699000.5071560.621145
20.4971000.4222110.6387670.5377000.4309960.651982
30.4609000.3849510.6784140.4822000.4267180.696035
40.4289000.4053360.6916300.4597000.4708940.682819

\n", - "

\n", - " \n", - " \n", - " [20/29 00:02 < 00:01, 6.78 it/s]\n", - "
\n", - " " + "

" ], "text/plain": [ "" From 006ab11f97e9c31ba47e157c42c70bfa2c1cfe70 Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Thu, 31 Jul 2025 07:26:00 +0000 Subject: [PATCH 03/10] use orig param Signed-off-by: Liu, Kaixuan --- .../multitask_prompt_tuning.ipynb | 2 +- ..._prompt_tuning_seq2seq_with_generate.ipynb | 36 +++++++++++++------ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/examples/conditional_generation/multitask_prompt_tuning.ipynb b/examples/conditional_generation/multitask_prompt_tuning.ipynb index c4d602d283..bf3085b03d 100644 --- a/examples/conditional_generation/multitask_prompt_tuning.ipynb +++ b/examples/conditional_generation/multitask_prompt_tuning.ipynb @@ -1190,7 +1190,7 @@ "optimizer = AdamW(model.parameters(), lr=1e-4)\n", "scheduler = get_cosine_schedule_with_warmup(optimizer, 200, len(train))\n", "\n", - "n = 10\n", + "n = 1000\n", "step = 0\n", "train_ = tqdm(train)\n", "\n", diff --git a/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb b/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb index edcab01790..c112e8d8e0 100644 --- a/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb +++ b/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb @@ -378,7 +378,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "6b3a4090", "metadata": { "ExecuteTime": { @@ -435,8 +435,8 @@ "\n", "

\n", " \n", - " \n", - " [1201/1275 03:15 < 00:12, 6.12 it/s, Epoch 4.71/5]\n", + " \n", + " [1275/1275 03:31, Epoch 5/5]\n", "
\n", " \n", " \n", @@ -472,6 +472,12 @@ " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", "
0.4708940.682819
50.4360000.4096040.718062

" ], @@ -481,6 +487,16 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "TrainOutput(global_step=1275, training_loss=0.8170911183076747, metrics={'train_runtime': 213.5513, 'train_samples_per_second': 47.693, 'train_steps_per_second': 5.97, 'total_flos': 344546979840000.0, 'train_loss': 0.8170911183076747, 'epoch': 5.0})" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -558,7 +574,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "164K\tt5-large_PROMPT_TUNING_SEQ_2_SEQ_LM/adapter_model.bin\r\n" + "du: cannot access 't5-large_PeftType.PROMPT_TUNING_TaskType.SEQ_2_SEQ_LM/adapter_model.bin': No such file or directory\n" ] } ], @@ -603,13 +619,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "Aspocomp Group , headquartered in Helsinki , Finland , develops interconnection solutions for the electronics industry .\n", - "{'input_ids': tensor([[ 71, 7990, 7699, 1531, 3, 6, 3, 27630, 16, 29763,\n", - " 3, 6, 16458, 3, 6, 1344, 7, 1413, 28102, 1275,\n", - " 21, 8, 12800, 681, 3, 5, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", - " 1, 1, 1]])}\n", - "tensor([[ 0, 7163, 1]])\n", - "['neutral']\n" + "EPS grew to 0.04 eur from 0.02 eur .\n", + "{'input_ids': tensor([[ 3, 24935, 3, 4774, 12, 4097, 6348, 3, 1238, 45,\n", + " 4097, 4305, 3, 1238, 3, 5, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}\n", + "tensor([[ 0, 1465, 1]])\n", + "['positive']\n" ] } ], From 366ec6df3d401814990f1ef4c7b805c60b27482e Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Thu, 31 Jul 2025 19:29:18 -0400 Subject: [PATCH 04/10] delete file and output from notebook Signed-off-by: Liu, Kaixuan --- .../multitask_prompt_tuning.ipynb | 976 +----------------- ...q2seq_accelerate_big_model_inference.ipynb | 286 ----- 2 files changed, 2 insertions(+), 1260 deletions(-) delete mode 100644 examples/conditional_generation/peft_lora_seq2seq_accelerate_big_model_inference.ipynb diff --git a/examples/conditional_generation/multitask_prompt_tuning.ipynb b/examples/conditional_generation/multitask_prompt_tuning.ipynb index bf3085b03d..920eb887f1 100644 --- a/examples/conditional_generation/multitask_prompt_tuning.ipynb +++ b/examples/conditional_generation/multitask_prompt_tuning.ipynb @@ -180,979 +180,7 @@ "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/57507 [00:00)]\n", - " 0%| | 0/109 [00:00)]\n", - " 0%| | 0/109 [00:00)]\n", - " 0%| | 0/109 [00:00)]\n", - " 0%| | 0/109 [00:00)]\n", - " 0%| | 0/109 [00:00)]\n", - " 0%| | 0/109 [00:00)]\n", - " 0%| | 0/109 [00:00)]\n", - " 0%| | 0/109 [00:00)]\n", - " 0%| | 0/109 [00:00)]\n", - " 0%| | 0/109 [00:00)]\n", - " 0%| | 0/109 [00:00)]\n", - " 0%| | 0/109 [00:00 \u001b[39m\u001b[32m409\u001b[39m \u001b[43mresponse\u001b[49m\u001b[43m.\u001b[49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 410\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/requests/models.py:1026\u001b[39m, in \u001b[36mResponse.raise_for_status\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1025\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[32m-> \u001b[39m\u001b[32m1026\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response=\u001b[38;5;28mself\u001b[39m)\n", - "\u001b[31mHTTPError\u001b[39m: 404 Client Error: Not Found for url: https://huggingface.co/smangrul/twitter_complaints_bigscience_T0_3B_LORA_SEQ_2_SEQ_LM/resolve/main/adapter_config.json", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[31mRepositoryNotFoundError\u001b[39m Traceback (most recent call last)", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/peft/config.py:200\u001b[39m, in \u001b[36mPeftConfigMixin.from_pretrained\u001b[39m\u001b[34m(cls, pretrained_model_name_or_path, subfolder, **kwargs)\u001b[39m\n\u001b[32m 199\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m200\u001b[39m config_file = \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 201\u001b[39m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mCONFIG_NAME\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m=\u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mhf_hub_download_kwargs\u001b[49m\n\u001b[32m 202\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 203\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_validators.py:114\u001b[39m, in \u001b[36mvalidate_hf_hub_args.._inner_fn\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 112\u001b[39m kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.\u001b[34m__name__\u001b[39m, has_token=has_token, kwargs=kwargs)\n\u001b[32m--> \u001b[39m\u001b[32m114\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:1010\u001b[39m, in \u001b[36mhf_hub_download\u001b[39m\u001b[34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, resume_download, force_filename, local_dir_use_symlinks)\u001b[39m\n\u001b[32m 1009\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1010\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_hf_hub_download_to_cache_dir\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1011\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Destination\u001b[39;49;00m\n\u001b[32m 1012\u001b[39m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1013\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# File info\u001b[39;49;00m\n\u001b[32m 1014\u001b[39m \u001b[43m \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1015\u001b[39m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1016\u001b[39m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1017\u001b[39m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1018\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# HTTP info\u001b[39;49;00m\n\u001b[32m 1019\u001b[39m \u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[43m=\u001b[49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1020\u001b[39m \u001b[43m \u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1021\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1022\u001b[39m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m=\u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1023\u001b[39m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1024\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Additional options\u001b[39;49;00m\n\u001b[32m 1025\u001b[39m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m=\u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1026\u001b[39m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[43m=\u001b[49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1027\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:1117\u001b[39m, in \u001b[36m_hf_hub_download_to_cache_dir\u001b[39m\u001b[34m(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)\u001b[39m\n\u001b[32m 1116\u001b[39m \u001b[38;5;66;03m# Otherwise, raise appropriate error\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1117\u001b[39m \u001b[43m_raise_on_head_call_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhead_call_error\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1119\u001b[39m \u001b[38;5;66;03m# From now on, etag, commit_hash, url and size are not None.\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:1658\u001b[39m, in \u001b[36m_raise_on_head_call_error\u001b[39m\u001b[34m(head_call_error, force_download, local_files_only)\u001b[39m\n\u001b[32m 1653\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(head_call_error, (RepositoryNotFoundError, GatedRepoError)) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[32m 1654\u001b[39m \u001b[38;5;28misinstance\u001b[39m(head_call_error, HfHubHTTPError) \u001b[38;5;129;01mand\u001b[39;00m head_call_error.response.status_code == \u001b[32m401\u001b[39m\n\u001b[32m 1655\u001b[39m ):\n\u001b[32m 1656\u001b[39m \u001b[38;5;66;03m# Repo not found or gated => let's raise the actual error\u001b[39;00m\n\u001b[32m 1657\u001b[39m \u001b[38;5;66;03m# Unauthorized => likely a token issue => let's raise the actual error\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1658\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m head_call_error\n\u001b[32m 1659\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1660\u001b[39m \u001b[38;5;66;03m# Otherwise: most likely a connection issue or Hub downtime => let's warn the user\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:1546\u001b[39m, in \u001b[36m_get_metadata_or_catch_error\u001b[39m\u001b[34m(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, token, local_files_only, relative_filename, storage_folder)\u001b[39m\n\u001b[32m 1545\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1546\u001b[39m metadata = \u001b[43mget_hf_file_metadata\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1547\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m=\u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[43m=\u001b[49m\u001b[43mendpoint\u001b[49m\n\u001b[32m 1548\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1549\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_validators.py:114\u001b[39m, in \u001b[36mvalidate_hf_hub_args.._inner_fn\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 112\u001b[39m kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.\u001b[34m__name__\u001b[39m, has_token=has_token, kwargs=kwargs)\n\u001b[32m--> \u001b[39m\u001b[32m114\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:1463\u001b[39m, in \u001b[36mget_hf_file_metadata\u001b[39m\u001b[34m(url, token, proxies, timeout, library_name, library_version, user_agent, headers, endpoint)\u001b[39m\n\u001b[32m 1462\u001b[39m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1463\u001b[39m r = \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1464\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mHEAD\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 1465\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1466\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1467\u001b[39m \u001b[43m \u001b[49m\u001b[43mallow_redirects\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1468\u001b[39m \u001b[43m \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 1469\u001b[39m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m=\u001b[49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1470\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1471\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1472\u001b[39m hf_raise_for_status(r)\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:286\u001b[39m, in \u001b[36m_request_wrapper\u001b[39m\u001b[34m(method, url, follow_relative_redirects, **params)\u001b[39m\n\u001b[32m 285\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[32m--> \u001b[39m\u001b[32m286\u001b[39m response = \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 287\u001b[39m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 288\u001b[39m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m=\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 289\u001b[39m \u001b[43m \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 290\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 291\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 293\u001b[39m \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[32m 294\u001b[39m \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/file_download.py:310\u001b[39m, in \u001b[36m_request_wrapper\u001b[39m\u001b[34m(method, url, follow_relative_redirects, **params)\u001b[39m\n\u001b[32m 309\u001b[39m response = http_backoff(method=method, url=url, **params, retry_on_exceptions=(), retry_on_status_codes=(\u001b[32m429\u001b[39m,))\n\u001b[32m--> \u001b[39m\u001b[32m310\u001b[39m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 311\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_http.py:459\u001b[39m, in \u001b[36mhf_raise_for_status\u001b[39m\u001b[34m(response, endpoint_name)\u001b[39m\n\u001b[32m 450\u001b[39m message = (\n\u001b[32m 451\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse.status_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m Client Error.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 452\u001b[39m + \u001b[33m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m (...)\u001b[39m\u001b[32m 457\u001b[39m \u001b[33m\"\u001b[39m\u001b[33m https://huggingface.co/docs/huggingface_hub/authentication\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 458\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m459\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m _format(RepositoryNotFoundError, message, response) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01me\u001b[39;00m\n\u001b[32m 461\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m response.status_code == \u001b[32m400\u001b[39m:\n", - "\u001b[31mRepositoryNotFoundError\u001b[39m: 404 Client Error. (Request ID: Root=1-688b07a7-1999e2383e23171c1be77586;01650cab-5274-427d-9fe6-d27257e6940b)\n\nRepository Not Found for url: https://huggingface.co/smangrul/twitter_complaints_bigscience_T0_3B_LORA_SEQ_2_SEQ_LM/resolve/main/adapter_config.json.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated. For more details, see https://huggingface.co/docs/huggingface_hub/authentication", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[31mValueError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 18\u001b[39m\n\u001b[32m 15\u001b[39m batch_size = \u001b[32m8\u001b[39m\n\u001b[32m 17\u001b[39m peft_model_id = \u001b[33m\"\u001b[39m\u001b[33msmangrul/twitter_complaints_bigscience_T0_3B_LORA_SEQ_2_SEQ_LM\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m---> \u001b[39m\u001b[32m18\u001b[39m config = \u001b[43mPeftConfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpeft_model_id\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m/usr/local/lib/python3.11/dist-packages/peft/config.py:204\u001b[39m, in \u001b[36mPeftConfigMixin.from_pretrained\u001b[39m\u001b[34m(cls, pretrained_model_name_or_path, subfolder, **kwargs)\u001b[39m\n\u001b[32m 200\u001b[39m config_file = hf_hub_download(\n\u001b[32m 201\u001b[39m pretrained_model_name_or_path, CONFIG_NAME, subfolder=subfolder, **hf_hub_download_kwargs\n\u001b[32m 202\u001b[39m )\n\u001b[32m 203\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m--> \u001b[39m\u001b[32m204\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mCan\u001b[39m\u001b[33m'\u001b[39m\u001b[33mt find \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mCONFIG_NAME\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m at \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mexc\u001b[39;00m\n\u001b[32m 206\u001b[39m loaded_attributes = \u001b[38;5;28mcls\u001b[39m.from_json_file(config_file)\n\u001b[32m 207\u001b[39m kwargs = {**class_kwargs, **loaded_attributes}\n", - "\u001b[31mValueError\u001b[39m: Can't find 'adapter_config.json' at 'smangrul/twitter_complaints_bigscience_T0_3B_LORA_SEQ_2_SEQ_LM'" - ] - } - ], - "source": [ - "from transformers import AutoModelForSeq2SeqLM\n", - "from peft import PeftModel, PeftConfig\n", - "import torch\n", - "from datasets import load_dataset\n", - "import os\n", - "from transformers import AutoTokenizer\n", - "from torch.utils.data import DataLoader\n", - "from transformers import default_data_collator, get_linear_schedule_with_warmup\n", - "from tqdm import tqdm\n", - "from datasets import load_dataset\n", - "\n", - "dataset_name = \"twitter_complaints\"\n", - "text_column = \"Tweet text\"\n", - "label_column = \"text_label\"\n", - "batch_size = 8\n", - "device = \"xpu\" if torch.xpu.is_available() else \"cuda\"\n", - "\n", - "peft_model_id = \"smangrul/twitter_complaints_bigscience_T0_3B_LORA_SEQ_2_SEQ_LM\"\n", - "config = PeftConfig.from_pretrained(peft_model_id)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc55820a", - "metadata": {}, - "outputs": [], - "source": [ - "peft_model_id = \"smangrul/twitter_complaints_bigscience_T0_3B_LORA_SEQ_2_SEQ_LM\"\n", - "max_memory = {0: \"6GIB\", 1: \"0GIB\", 2: \"0GIB\", 3: \"0GIB\", 4: \"0GIB\", \"cpu\": \"30GB\"}\n", - "config = PeftConfig.from_pretrained(peft_model_id)\n", - "model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, device_map=\"auto\", max_memory=max_memory)\n", - "model = PeftModel.from_pretrained(model, peft_model_id, device_map=\"auto\", max_memory=max_memory)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e1a3648b", - "metadata": {}, - "outputs": [], - "source": [ - "from datasets import load_dataset\n", - "\n", - "dataset = load_dataset(\"ought/raft\", dataset_name)\n", - "\n", - "classes = [k.replace(\"_\", \" \") for k in dataset[\"train\"].features[\"Label\"].names]\n", - "print(classes)\n", - "dataset = dataset.map(\n", - " lambda x: {\"text_label\": [classes[label] for label in x[\"Label\"]]},\n", - " batched=True,\n", - " num_proc=1,\n", - ")\n", - "print(dataset)\n", - "dataset[\"train\"][0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fe12d4d3", - "metadata": {}, - "outputs": [], - "source": [ - "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n", - "target_max_length = max([len(tokenizer(class_label)[\"input_ids\"]) for class_label in classes])\n", - "\n", - "\n", - "def preprocess_function(examples):\n", - " inputs = examples[text_column]\n", - " targets = examples[label_column]\n", - " model_inputs = tokenizer(inputs, truncation=True)\n", - " labels = tokenizer(\n", - " targets, max_length=target_max_length, padding=\"max_length\", truncation=True, return_tensors=\"pt\"\n", - " )\n", - " labels = labels[\"input_ids\"]\n", - " labels[labels == tokenizer.pad_token_id] = -100\n", - " model_inputs[\"labels\"] = labels\n", - " return model_inputs\n", - "\n", - "\n", - "processed_datasets = dataset.map(\n", - " preprocess_function,\n", - " batched=True,\n", - " num_proc=1,\n", - " remove_columns=dataset[\"train\"].column_names,\n", - " load_from_cache_file=True,\n", - " desc=\"Running tokenizer on dataset\",\n", - ")\n", - "\n", - "train_dataset = processed_datasets[\"train\"]\n", - "eval_dataset = processed_datasets[\"train\"]\n", - "test_dataset = processed_datasets[\"test\"]\n", - "\n", - "\n", - "def collate_fn(examples):\n", - " return tokenizer.pad(examples, padding=\"longest\", return_tensors=\"pt\")\n", - "\n", - "\n", - "train_dataloader = DataLoader(\n", - " train_dataset, shuffle=True, collate_fn=collate_fn, batch_size=batch_size, pin_memory=True\n", - ")\n", - "eval_dataloader = DataLoader(eval_dataset, collate_fn=collate_fn, batch_size=batch_size, pin_memory=True)\n", - "test_dataloader = DataLoader(test_dataset, collate_fn=collate_fn, batch_size=batch_size, pin_memory=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b33be5e6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "@NYTsupport i have complained a dozen times & yet my papers are still thrown FAR from my door. Why is this so hard to resolve?\n", - "{'input_ids': tensor([[25335, 1499, 3, 10, 3320, 12056, 382, 20390, 3, 23,\n", - " 43, 25932, 3, 9, 9611, 648, 3, 184, 4624, 117,\n", - " 780, 82, 5778, 33, 341, 3, 12618, 377, 4280, 45,\n", - " 82, 1365, 5, 1615, 19, 48, 78, 614, 12, 7785,\n", - " 58, 16229, 3, 10, 3, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}\n", - "tensor([[ 0, 10394, 1]], device='cuda:0')\n", - "['complaint']\n" - ] - } - ], - "source": [ - "model.eval()\n", - "i = 15\n", - "inputs = tokenizer(f'{text_column} : {dataset[\"test\"][i][\"Tweet text\"]} Label : ', return_tensors=\"pt\")\n", - "print(dataset[\"test\"][i][\"Tweet text\"])\n", - "print(inputs)\n", - "\n", - "with torch.no_grad():\n", - " outputs = model.generate(input_ids=inputs[\"input_ids\"].to(device), max_new_tokens=10)\n", - " print(outputs)\n", - " print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b6d6cd5b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/7 [00:00 100:\n", - " break\n", - "test_preds" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.13" - }, - "vscode": { - "interpreter": { - "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From b005da2da5da4ef1bc565559ca75af463c50d6d2 Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Thu, 31 Jul 2025 19:39:03 -0400 Subject: [PATCH 05/10] delete extra output Signed-off-by: Liu, Kaixuan --- .../peft_lora_seq2seq.ipynb | 331 +----------------- .../peft_prefix_tuning_seq2seq.ipynb | 205 +---------- 2 files changed, 2 insertions(+), 534 deletions(-) diff --git a/examples/conditional_generation/peft_lora_seq2seq.ipynb b/examples/conditional_generation/peft_lora_seq2seq.ipynb index 32243d253d..fa4b116161 100644 --- a/examples/conditional_generation/peft_lora_seq2seq.ipynb +++ b/examples/conditional_generation/peft_lora_seq2seq.ipynb @@ -38,336 +38,7 @@ "execution_count": 2, "id": "8d0850ac", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "trainable params: 2,359,296 || all params: 1,231,940,608 || trainable%: 0.1915\n" - ] - }, - { - "data": { - "text/plain": [ - "PeftModelForSeq2SeqLM(\n", - " (base_model): LoraModel(\n", - " (model): MT5ForConditionalGeneration(\n", - " (shared): Embedding(250112, 1024)\n", - " (encoder): MT5Stack(\n", - " (embed_tokens): Embedding(250112, 1024)\n", - " (block): ModuleList(\n", - " (0): MT5Block(\n", - " (layer): ModuleList(\n", - " (0): MT5LayerSelfAttention(\n", - " (SelfAttention): MT5Attention(\n", - " (q): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (v): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (relative_attention_bias): Embedding(32, 16)\n", - " )\n", - " (layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (1): MT5LayerFF(\n", - " (DenseReluDense): MT5DenseGatedActDense(\n", - " (wi_0): Linear(in_features=1024, out_features=2816, bias=False)\n", - " (wi_1): Linear(in_features=1024, out_features=2816, bias=False)\n", - " (wo): Linear(in_features=2816, out_features=1024, bias=False)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " (act): NewGELUActivation()\n", - " )\n", - " (layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " )\n", - " (1-23): 23 x MT5Block(\n", - " (layer): ModuleList(\n", - " (0): MT5LayerSelfAttention(\n", - " (SelfAttention): MT5Attention(\n", - " (q): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (v): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", - " )\n", - " (layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (1): MT5LayerFF(\n", - " (DenseReluDense): MT5DenseGatedActDense(\n", - " (wi_0): Linear(in_features=1024, out_features=2816, bias=False)\n", - " (wi_1): Linear(in_features=1024, out_features=2816, bias=False)\n", - " (wo): Linear(in_features=2816, out_features=1024, bias=False)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " (act): NewGELUActivation()\n", - " )\n", - " (layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " )\n", - " )\n", - " (final_layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (decoder): MT5Stack(\n", - " (embed_tokens): Embedding(250112, 1024)\n", - " (block): ModuleList(\n", - " (0): MT5Block(\n", - " (layer): ModuleList(\n", - " (0): MT5LayerSelfAttention(\n", - " (SelfAttention): MT5Attention(\n", - " (q): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (v): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (relative_attention_bias): Embedding(32, 16)\n", - " )\n", - " (layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (1): MT5LayerCrossAttention(\n", - " (EncDecAttention): MT5Attention(\n", - " (q): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (v): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", - " )\n", - " (layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (2): MT5LayerFF(\n", - " (DenseReluDense): MT5DenseGatedActDense(\n", - " (wi_0): Linear(in_features=1024, out_features=2816, bias=False)\n", - " (wi_1): Linear(in_features=1024, out_features=2816, bias=False)\n", - " (wo): Linear(in_features=2816, out_features=1024, bias=False)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " (act): NewGELUActivation()\n", - " )\n", - " (layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " )\n", - " (1-23): 23 x MT5Block(\n", - " (layer): ModuleList(\n", - " (0): MT5LayerSelfAttention(\n", - " (SelfAttention): MT5Attention(\n", - " (q): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (v): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", - " )\n", - " (layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (1): MT5LayerCrossAttention(\n", - " (EncDecAttention): MT5Attention(\n", - " (q): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (v): lora.Linear(\n", - " (base_layer): Linear(in_features=1024, out_features=1024, bias=False)\n", - " (lora_dropout): ModuleDict(\n", - " (default): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lora_A): ModuleDict(\n", - " (default): Linear(in_features=1024, out_features=8, bias=False)\n", - " )\n", - " (lora_B): ModuleDict(\n", - " (default): Linear(in_features=8, out_features=1024, bias=False)\n", - " )\n", - " (lora_embedding_A): ParameterDict()\n", - " (lora_embedding_B): ParameterDict()\n", - " (lora_magnitude_vector): ModuleDict()\n", - " )\n", - " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", - " )\n", - " (layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (2): MT5LayerFF(\n", - " (DenseReluDense): MT5DenseGatedActDense(\n", - " (wi_0): Linear(in_features=1024, out_features=2816, bias=False)\n", - " (wi_1): Linear(in_features=1024, out_features=2816, bias=False)\n", - " (wo): Linear(in_features=2816, out_features=1024, bias=False)\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " (act): NewGELUActivation()\n", - " )\n", - " (layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " )\n", - " )\n", - " )\n", - " (final_layer_norm): MT5LayerNorm()\n", - " (dropout): Dropout(p=0.1, inplace=False)\n", - " )\n", - " (lm_head): Linear(in_features=1024, out_features=250112, bias=False)\n", - " )\n", - " )\n", - ")" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# creating model\n", "peft_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1)\n", diff --git a/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb b/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb index 44eb6fadfc..9890a7c05d 100644 --- a/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb +++ b/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb @@ -38,210 +38,7 @@ "execution_count": 2, "id": "8d0850ac", "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a53255e7261d484bbb102253280b3475", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "config.json: 0%| | 0.00/1.21k [00:00 Date: Fri, 1 Aug 2025 10:13:52 -0400 Subject: [PATCH 06/10] update device setting Signed-off-by: Liu, Kaixuan --- .../multitask_prompt_tuning.ipynb | 4 +- .../peft_ia3_seq2seq.ipynb | 4 +- .../peft_lora_seq2seq.ipynb | 61 +----------- .../peft_prefix_tuning_seq2seq.ipynb | 92 +------------------ .../peft_prompt_tuning_seq2seq.ipynb | 91 +----------------- ..._prompt_tuning_seq2seq_with_generate.ipynb | 4 +- 6 files changed, 18 insertions(+), 238 deletions(-) diff --git a/examples/conditional_generation/multitask_prompt_tuning.ipynb b/examples/conditional_generation/multitask_prompt_tuning.ipynb index 920eb887f1..354e0dbc8c 100644 --- a/examples/conditional_generation/multitask_prompt_tuning.ipynb +++ b/examples/conditional_generation/multitask_prompt_tuning.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "58ff91ca-ce92-43d0-ae8b-4e9e89e193f6", "metadata": { "tags": [] @@ -15,7 +15,7 @@ "from peft import get_peft_model, MultitaskPromptTuningConfig, TaskType, MultitaskPromptTuningInit\n", "\n", "set_seed(42)\n", - "device = \"xpu\" if torch.xpu.is_available() else \"cuda\"\n", + "device = torch.accelerator.current_accelerator().type if hasattr(torch, \"accelerator\") else \"cuda\"\n", "model_name = \"google/flan-t5-base\"\n", "\n", "peft_config = MultitaskPromptTuningConfig(\n", diff --git a/examples/conditional_generation/peft_ia3_seq2seq.ipynb b/examples/conditional_generation/peft_ia3_seq2seq.ipynb index cae6f0b557..a05f8364fd 100644 --- a/examples/conditional_generation/peft_ia3_seq2seq.ipynb +++ b/examples/conditional_generation/peft_ia3_seq2seq.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "0c152fc8", "metadata": { "id": "5f93b7d1" @@ -23,7 +23,7 @@ "from tqdm import tqdm\n", "from datasets import load_dataset\n", "\n", - "device = \"xpu\" if torch.xpu.is_available() else \"cuda\"\n", + "device = torch.accelerator.current_accelerator().type if hasattr(torch, \"accelerator\") else \"cuda\"\n", "model_name_or_path = \"bigscience/mt0-large\"\n", "tokenizer_name_or_path = \"bigscience/mt0-large\"\n", "\n", diff --git a/examples/conditional_generation/peft_lora_seq2seq.ipynb b/examples/conditional_generation/peft_lora_seq2seq.ipynb index fa4b116161..3bf9bfe733 100644 --- a/examples/conditional_generation/peft_lora_seq2seq.ipynb +++ b/examples/conditional_generation/peft_lora_seq2seq.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "5f93b7d1", "metadata": {}, "outputs": [], @@ -20,7 +20,7 @@ "from tqdm import tqdm\n", "from datasets import load_dataset\n", "\n", - "device = \"xpu\" if torch.xpu.is_available() else \"cuda\"\n", + "device = torch.accelerator.current_accelerator().type if hasattr(torch, \"accelerator\") else \"cuda\"\n", "model_name_or_path = \"bigscience/mt0-large\"\n", "tokenizer_name_or_path = \"bigscience/mt0-large\"\n", "\n", @@ -208,63 +208,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "6b3a4090", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:03<00:00, 4.02it/s]\n", - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:02<00:00, 10.43it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "epoch=0: train_ppl=tensor(1.3389, device='xpu:0') train_epoch_loss=tensor(0.2918, device='xpu:0') eval_ppl=tensor(1.0656, device='xpu:0') eval_epoch_loss=tensor(0.0635, device='xpu:0')\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:00<00:00, 4.22it/s]\n", - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:02<00:00, 10.96it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "epoch=1: train_ppl=tensor(1.0657, device='xpu:0') train_epoch_loss=tensor(0.0636, device='xpu:0') eval_ppl=tensor(1.0616, device='xpu:0') eval_epoch_loss=tensor(0.0598, device='xpu:0')\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:00<00:00, 4.23it/s]\n", - "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:02<00:00, 10.98it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "epoch=2: train_ppl=tensor(1.0353, device='xpu:0') train_epoch_loss=tensor(0.0347, device='xpu:0') eval_ppl=tensor(1.0560, device='xpu:0') eval_epoch_loss=tensor(0.0545, device='xpu:0')\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "# training and evaluation\n", "model = model.to(device)\n", diff --git a/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb b/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb index 9890a7c05d..2fb2a4f1fa 100644 --- a/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb +++ b/examples/conditional_generation/peft_prefix_tuning_seq2seq.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "5f93b7d1", "metadata": {}, "outputs": [], @@ -20,7 +20,7 @@ "from tqdm import tqdm\n", "from datasets import load_dataset\n", "\n", - "device = \"xpu\" if torch.xpu.is_available() else \"cuda\"\n", + "device = torch.accelerator.current_accelerator().type if hasattr(torch, \"accelerator\") else \"cuda\"\n", "model_name_or_path = \"t5-large\"\n", "tokenizer_name_or_path = \"t5-large\"\n", "\n", @@ -236,94 +236,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "6b3a4090", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/255 [00:00 Date: Fri, 1 Aug 2025 13:06:57 -0400 Subject: [PATCH 07/10] replace model.bin with safetensors file Signed-off-by: Liu, Kaixuan --- .../multitask_prompt_tuning.ipynb | 5 ++- .../peft_adalora_seq2seq.py | 4 +- .../peft_ia3_seq2seq.ipynb | 39 +++---------------- .../peft_lora_seq2seq.ipynb | 6 +-- .../peft_prefix_tuning_seq2seq.ipynb | 6 +-- .../peft_prompt_tuning_seq2seq.ipynb | 6 +-- ..._prompt_tuning_seq2seq_with_generate.ipynb | 4 +- .../conditional_generation/requirements.txt | 1 + 8 files changed, 22 insertions(+), 49 deletions(-) diff --git a/examples/conditional_generation/multitask_prompt_tuning.ipynb b/examples/conditional_generation/multitask_prompt_tuning.ipynb index 354e0dbc8c..1eaec5b016 100644 --- a/examples/conditional_generation/multitask_prompt_tuning.ipynb +++ b/examples/conditional_generation/multitask_prompt_tuning.ipynb @@ -294,7 +294,7 @@ " num_tasks=1,\n", " task_type=TaskType.SEQ_2_SEQ_LM,\n", " prompt_tuning_init=MultitaskPromptTuningInit.EXACT_SOURCE_TASK,\n", - " prompt_tuning_init_state_dict_path=\"checkpoints_source/50000/adapter_model.bin\",\n", + " prompt_tuning_init_state_dict_path=\"checkpoints_source/50000/adapter_model.safetensors\",\n", " num_virtual_tokens=50,\n", " num_transformer_submodules=1,\n", ")\n", @@ -361,8 +361,9 @@ "source": [ "# load last checkpoint for now\n", "from peft import set_peft_model_state_dict\n", + "from safetensors.torch import load_file\n", "\n", - "sd_6000 = torch.load(\"checkpoints_target/6000/adapter_model.bin\")\n", + "sd_6000 = load_file(\"checkpoints_target/6000/adapter_model.safetensors\")\n", "set_peft_model_state_dict(model, sd_6000)\n", "\n", "# evaluate val\n", diff --git a/examples/conditional_generation/peft_adalora_seq2seq.py b/examples/conditional_generation/peft_adalora_seq2seq.py index 28b0ad6701..deaa81bd47 100644 --- a/examples/conditional_generation/peft_adalora_seq2seq.py +++ b/examples/conditional_generation/peft_adalora_seq2seq.py @@ -11,7 +11,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false" -device = "xpu" if torch.xpu.is_available() else "cuda" +device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda" model_name_or_path = "facebook/bart-base" tokenizer_name_or_path = "facebook/bart-base" @@ -160,7 +160,7 @@ def preprocess_function(examples): model.save_pretrained(peft_model_id) -ckpt = f"{peft_model_id}/adapter_model.bin" +ckpt = f"{peft_model_id}/adapter_model.safetensors" # get_ipython().system('du -h $ckpt') diff --git a/examples/conditional_generation/peft_ia3_seq2seq.ipynb b/examples/conditional_generation/peft_ia3_seq2seq.ipynb index a05f8364fd..155fab5530 100644 --- a/examples/conditional_generation/peft_ia3_seq2seq.ipynb +++ b/examples/conditional_generation/peft_ia3_seq2seq.ipynb @@ -67,41 +67,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "da74b569", "metadata": { "id": "8d0850ac" }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "54bafcb49ba34fcb98a2f072a93a071f", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "config.json: 0%| | 0.00/800 [00:00 Date: Fri, 1 Aug 2025 13:17:09 -0400 Subject: [PATCH 08/10] adjust Signed-off-by: Liu, Kaixuan --- .../peft_prompt_tuning_seq2seq_with_generate.ipynb | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb b/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb index 4449e8b929..95be106162 100644 --- a/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb +++ b/examples/conditional_generation/peft_prompt_tuning_seq2seq_with_generate.ipynb @@ -569,15 +569,7 @@ "start_time": "2023-05-30T09:53:15.059304Z" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "du: cannot access 't5-large_PeftType.PROMPT_TUNING_TaskType.SEQ_2_SEQ_LM/adapter_model.bin': No such file or directory\n" - ] - } - ], + "outputs": [], "source": [ "ckpt = f\"{peft_model_id}/adapter_model.safetensors\"\n", "!du -h $ckpt" From c06c312c168b29a36a76cd86c6802e0df91c6e59 Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Mon, 4 Aug 2025 18:00:58 -0400 Subject: [PATCH 09/10] run make style Signed-off-by: Liu, Kaixuan --- .../peft_adalora_seq2seq.py | 2 +- ...ora_seq2seq_accelerate_ds_zero3_offload.py | 30 +++++++++++++------ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/examples/conditional_generation/peft_adalora_seq2seq.py b/examples/conditional_generation/peft_adalora_seq2seq.py index deaa81bd47..32f532226b 100644 --- a/examples/conditional_generation/peft_adalora_seq2seq.py +++ b/examples/conditional_generation/peft_adalora_seq2seq.py @@ -51,7 +51,7 @@ lora_dropout=0.1, task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, - total_step=len(dataset['train']) * num_epochs, + total_step=len(dataset["train"]) * num_epochs, ) model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path) diff --git a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py index c2c5f39305..d0fb9ee241 100644 --- a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py +++ b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py @@ -202,9 +202,15 @@ def collate_fn(examples): lr_scheduler.step() optimizer.zero_grad() # Printing the device memory usage details such as allocated memory, peak memory, and total memory usage - accelerator.print(f"{accelerator.device.type.upper()} Memory before entering the train : {b2mb(tracemalloc.begin)}") - accelerator.print(f"{accelerator.device.type.upper()} Memory consumed at the end of the train (end-begin): {tracemalloc.used}") - accelerator.print(f"{accelerator.device.type.upper()} Peak Memory consumed during the train (max-begin): {tracemalloc.peaked}") + accelerator.print( + f"{accelerator.device.type.upper()} Memory before entering the train : {b2mb(tracemalloc.begin)}" + ) + accelerator.print( + f"{accelerator.device.type.upper()} Memory consumed at the end of the train (end-begin): {tracemalloc.used}" + ) + accelerator.print( + f"{accelerator.device.type.upper()} Peak Memory consumed during the train (max-begin): {tracemalloc.peaked}" + ) accelerator.print( f"{accelerator.device.type.upper()} Total Peak Memory consumed during the train (max): {tracemalloc.peaked + b2mb(tracemalloc.begin)}" ) @@ -233,9 +239,15 @@ def collate_fn(examples): eval_preds.extend(tokenizer.batch_decode(preds, skip_special_tokens=True)) # Printing the device memory usage details such as allocated memory, peak memory, and total memory usage - accelerator.print(f"{accelerator.device.type.upper()} Memory before entering the eval : {b2mb(tracemalloc.begin)}") - accelerator.print(f"{accelerator.device.type.upper()} Memory consumed at the end of the eval (end-begin): {tracemalloc.used}") - accelerator.print(f"{accelerator.device.type.upper()} Peak Memory consumed during the eval (max-begin): {tracemalloc.peaked}") + accelerator.print( + f"{accelerator.device.type.upper()} Memory before entering the eval : {b2mb(tracemalloc.begin)}" + ) + accelerator.print( + f"{accelerator.device.type.upper()} Memory consumed at the end of the eval (end-begin): {tracemalloc.used}" + ) + accelerator.print( + f"{accelerator.device.type.upper()} Peak Memory consumed during the eval (max-begin): {tracemalloc.peaked}" + ) accelerator.print( f"{accelerator.device.type.upper()} Total Peak Memory consumed during the eval (max): {tracemalloc.peaked + b2mb(tracemalloc.begin)}" ) @@ -249,9 +261,9 @@ def collate_fn(examples): correct = 0 total = 0 - assert len(eval_preds) == len(dataset["train"][label_column]), ( - f"{len(eval_preds)} != {len(dataset['train'][label_column])}" - ) + assert len(eval_preds) == len( + dataset["train"][label_column] + ), f"{len(eval_preds)} != {len(dataset['train'][label_column])}" for pred, true in zip(eval_preds, dataset["train"][label_column]): if pred.strip() == true.strip(): correct += 1 From ac81931e684c734a22929353eb801c998ecbbed4 Mon Sep 17 00:00:00 2001 From: "Liu, Kaixuan" Date: Tue, 5 Aug 2025 14:39:16 -0400 Subject: [PATCH 10/10] fix format issue by upgrading ruff version Signed-off-by: Liu, Kaixuan --- .../peft_lora_seq2seq_accelerate_ds_zero3_offload.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py index d0fb9ee241..50bd15a89e 100644 --- a/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py +++ b/examples/conditional_generation/peft_lora_seq2seq_accelerate_ds_zero3_offload.py @@ -261,9 +261,9 @@ def collate_fn(examples): correct = 0 total = 0 - assert len(eval_preds) == len( - dataset["train"][label_column] - ), f"{len(eval_preds)} != {len(dataset['train'][label_column])}" + assert len(eval_preds) == len(dataset["train"][label_column]), ( + f"{len(eval_preds)} != {len(dataset['train'][label_column])}" + ) for pred, true in zip(eval_preds, dataset["train"][label_column]): if pred.strip() == true.strip(): correct += 1