Refactored paths

This commit is contained in:
zin
2026-05-06 18:22:54 +00:00
parent 4e192b7bc4
commit dd22ee09a4
8 changed files with 61 additions and 0 deletions
File diff suppressed because one or more lines are too long
+140
View File
@@ -0,0 +1,140 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Загрузка датасета DEAM\n",
"\n",
"Этот ноутбук предназначен для автоматизации процесса скачивания и подготовки музыкального датасета **DEAM** (Database for Emotional Analysis in Music).\n",
"Данные будут помещены в папку `dataset/DEAM`."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting kagglehub\n",
" Downloading kagglehub-1.0.1-py3-none-any.whl.metadata (40 kB)\n",
"Collecting kagglesdk<1.0,>=0.1.22 (from kagglehub)\n",
" Downloading kagglesdk-0.1.23-py3-none-any.whl.metadata (13 kB)\n",
"Requirement already satisfied: packaging in /home/zin/projects/Thesis/.venv/lib/python3.11/site-packages (from kagglehub) (25.0)\n",
"Requirement already satisfied: pyyaml in /home/zin/projects/Thesis/.venv/lib/python3.11/site-packages (from kagglehub) (6.0.3)\n",
"Requirement already satisfied: requests in /home/zin/projects/Thesis/.venv/lib/python3.11/site-packages (from kagglehub) (2.32.5)\n",
"Requirement already satisfied: tqdm in /home/zin/projects/Thesis/.venv/lib/python3.11/site-packages (from kagglehub) (4.67.1)\n",
"Requirement already satisfied: protobuf in /home/zin/projects/Thesis/.venv/lib/python3.11/site-packages (from kagglesdk<1.0,>=0.1.22->kagglehub) (6.33.4)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in /home/zin/projects/Thesis/.venv/lib/python3.11/site-packages (from requests->kagglehub) (3.4.4)\n",
"Requirement already satisfied: idna<4,>=2.5 in /home/zin/projects/Thesis/.venv/lib/python3.11/site-packages (from requests->kagglehub) (3.11)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/zin/projects/Thesis/.venv/lib/python3.11/site-packages (from requests->kagglehub) (2.6.3)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /home/zin/projects/Thesis/.venv/lib/python3.11/site-packages (from requests->kagglehub) (2026.1.4)\n",
"Downloading kagglehub-1.0.1-py3-none-any.whl (70 kB)\n",
"Downloading kagglesdk-0.1.23-py3-none-any.whl (217 kB)\n",
"Installing collected packages: kagglesdk, kagglehub\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2/2\u001b[0m [kagglehub]\n",
"\u001b[1A\u001b[2KSuccessfully installed kagglehub-1.0.1 kagglesdk-0.1.23\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.1.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
]
}
],
"source": [
"!pip install kagglehub"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Скачиваем датасет DEAM...\n",
"Downloading to /home/zin/.cache/kagglehub/datasets/imsparsh/deam-mediaeval-dataset-emotional-analysis-in-music/1.archive...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1.83G/1.83G [01:09<00:00, 28.2MB/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Extracting files...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Датасет скачан во временную директорию: /home/zin/.cache/kagglehub/datasets/imsparsh/deam-mediaeval-dataset-emotional-analysis-in-music/versions/1\n",
"Переносим файлы в ../dataset/DEAM...\n",
"\n",
"[УСПЕХ] Датасет DEAM готов к работе!\n"
]
}
],
"source": [
"import os\n",
"import shutil\n",
"import kagglehub\n",
"from pathlib import Path\n",
"\n",
"# 1. Настройка путей\n",
"DATASET_ROOT = Path(\"../dataset\")\n",
"DEAM_ROOT = DATASET_ROOT / \"DEAM\"\n",
"DEAM_ROOT.mkdir(parents=True, exist_ok=True)\n",
"\n",
"# 2. Загрузка через kagglehub\n",
"print(\"Скачиваем датасет DEAM...\")\n",
"kaggle_cache_path = kagglehub.dataset_download(\"imsparsh/deam-mediaeval-dataset-emotional-analysis-in-music\")\n",
"print(f\"Датасет скачан во временную директорию: {kaggle_cache_path}\")\n",
"\n",
"# 3. Перемещение файлов в проект\n",
"print(f\"Переносим файлы в {DEAM_ROOT}...\")\n",
"shutil.copytree(kaggle_cache_path, DEAM_ROOT, dirs_exist_ok=True)\n",
"\n",
"print(\"\\n[УСПЕХ] Датасет DEAM готов к работе!\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (my-python-project)",
"language": "python",
"name": "my-python-project"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
File diff suppressed because one or more lines are too long
+146
View File
@@ -0,0 +1,146 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"id": "83693ad7",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from pathlib import Path"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "99850a99",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Читаем файл аннотаций: ../dataset/DEAM/DEAM_Annotations/annotations/annotations per each rater/song_level/static_annotations_songs_1_2000.csv\n"
]
}
],
"source": [
"# 1. Ищем файл (поднимаемся из src на уровень выше)\n",
"deam_root = Path(\"../dataset/DEAM\")\n",
"\n",
"# Ищем файл статичных аннотаций. Берем первый попавшийся.\n",
"csv_files = list(deam_root.rglob(\"*static_annotations*.csv\"))\n",
"if not csv_files:\n",
" # Если не нашел static, берем вообще любой csv с аннотациями\n",
" csv_files = list(deam_root.rglob(\"*.csv\"))\n",
"\n",
"if not csv_files:\n",
" # Если путь неверный или файлов нет, скрипт сразу скажет об этом и покажет полный путь\n",
" raise FileNotFoundError(f\"В папке {deam_root.resolve()} не найдено ни одного CSV файла! Проверьте пути.\")\n",
"\n",
"anno_path = csv_files[0]\n",
"print(f\"Читаем файл аннотаций: {anno_path}\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5fbc493f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Оригинальные колонки в файле: ['workerID', ' SongId', ' Valence', ' Arousal']\n"
]
}
],
"source": [
"# 2. Загружаем и чистим колонки\n",
"df = pd.read_csv(anno_path)\n",
"print(\"Оригинальные колонки в файле:\", df.columns.tolist())\n",
"\n",
"# Сносим пробелы по краям и переводим в нижний регистр\n",
"df.columns = [str(c).strip().lower() for c in df.columns]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1e28fece",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Успешно найдены колонки -> ID: 'workerid', Valence: 'valence', Arousal: 'arousal'\n"
]
}
],
"source": [
"# 3. Умный поиск колонок\n",
"# Ищем первую колонку, где есть 'id' или 'song'\n",
"song_col = next((c for c in df.columns if 'song' in c or 'id' in c), df.columns[0])\n",
"# Ищем valence (желательно mean, но сойдет любой)\n",
"v_col = next((c for c in df.columns if 'valence' in c and 'mean' in c), \n",
" next((c for c in df.columns if 'valence' in c), None))\n",
"# Ищем arousal\n",
"a_col = next((c for c in df.columns if 'arousal' in c and 'mean' in c), \n",
" next((c for c in df.columns if 'arousal' in c), None))\n",
"\n",
"if not v_col or not a_col:\n",
" raise ValueError(f\"Не смог найти Valence или Arousal! Доступные колонки: {df.columns.tolist()}\")\n",
"\n",
"print(f\"Успешно найдены колонки -> ID: '{song_col}', Valence: '{v_col}', Arousal: '{a_col}'\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "469f651c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Готово! Музыкальная база сохранена: ../dataset/DEAM/music_db.csv\n"
]
}
],
"source": [
"# 4. Сохраняем результат\n",
"clean_df = df[[song_col, v_col, a_col]].copy()\n",
"clean_df.columns = ['song_id', 'valence', 'arousal']\n",
"\n",
"output_path = deam_root / \"music_db.csv\"\n",
"clean_df.to_csv(output_path, index=False)\n",
"print(f\"Готово! Музыкальная база сохранена: {output_path}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (my-python-project)",
"language": "python",
"name": "my-python-project"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+114
View File
@@ -0,0 +1,114 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d70d8e32",
"metadata": {},
"outputs": [],
"source": [
"from concurrent.futures import ProcessPoolExecutor\n",
"import pandas as pd\n",
"from pathlib import Path\n",
"from PIL import Image\n",
"import torch\n",
"from torchvision import transforms\n",
"from tqdm import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "31b0fa82",
"metadata": {},
"outputs": [],
"source": [
"DATA_ROOT = Path(\"../dataset/EmoSet-118K\")\n",
"TRANSFORM = transforms.Compose([\n",
" transforms.Resize((224,224)),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1a17ecf5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/94481 [00:00<?, ?it/s]\n"
]
},
{
"ename": "PicklingError",
"evalue": "Can't pickle <class 'pandas.core.frame.Pandas'>: attribute lookup Pandas on pandas.core.frame failed",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31m_RemoteTraceback\u001b[39m Traceback (most recent call last)",
"\u001b[31m_RemoteTraceback\u001b[39m: \n\"\"\"\nTraceback (most recent call last):\n File \"/home/zin/.pyenv/versions/3.11.7/lib/python3.11/multiprocessing/queues.py\", line 244, in _feed\n obj = _ForkingPickler.dumps(obj)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/zin/.pyenv/versions/3.11.7/lib/python3.11/multiprocessing/reduction.py\", line 51, in dumps\n cls(buf, protocol).dump(obj)\n_pickle.PicklingError: Can't pickle <class 'pandas.core.frame.Pandas'>: attribute lookup Pandas on pandas.core.frame failed\n\"\"\"",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[31mPicklingError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 20\u001b[39m\n\u001b[32m 18\u001b[39m futures = [executor.submit(process_row, row, split_dir, tensor_dir) \u001b[38;5;28;01mfor\u001b[39;00m row \u001b[38;5;129;01min\u001b[39;00m df.itertuples()]\n\u001b[32m 19\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m tqdm(futures):\n\u001b[32m---> \u001b[39m\u001b[32m20\u001b[39m results.append(\u001b[43mf\u001b[49m\u001b[43m.\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 22\u001b[39m new_df = pd.DataFrame(results)\n\u001b[32m 23\u001b[39m new_df.to_csv(DATA_ROOT / split / \u001b[33m\"\u001b[39m\u001b[33mlabels_tensor.csv\u001b[39m\u001b[33m\"\u001b[39m, index=\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.11.7/lib/python3.11/concurrent/futures/_base.py:449\u001b[39m, in \u001b[36mFuture.result\u001b[39m\u001b[34m(self, timeout)\u001b[39m\n\u001b[32m 447\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[32m 448\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._state == FINISHED:\n\u001b[32m--> \u001b[39m\u001b[32m449\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 451\u001b[39m \u001b[38;5;28mself\u001b[39m._condition.wait(timeout)\n\u001b[32m 453\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.11.7/lib/python3.11/concurrent/futures/_base.py:401\u001b[39m, in \u001b[36mFuture.__get_result\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 399\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._exception:\n\u001b[32m 400\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m401\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m._exception\n\u001b[32m 402\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 403\u001b[39m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[32m 404\u001b[39m \u001b[38;5;28mself\u001b[39m = \u001b[38;5;28;01mNone\u001b[39;00m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.11.7/lib/python3.11/multiprocessing/queues.py:244\u001b[39m, in \u001b[36mQueue._feed\u001b[39m\u001b[34m(buffer, notempty, send_bytes, writelock, reader_close, writer_close, ignore_epipe, onerror, queue_sem)\u001b[39m\n\u001b[32m 241\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[32m 243\u001b[39m \u001b[38;5;66;03m# serialize the data before acquiring the lock\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m244\u001b[39m obj = \u001b[43m_ForkingPickler\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 245\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m wacquire \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 246\u001b[39m send_bytes(obj)\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/versions/3.11.7/lib/python3.11/multiprocessing/reduction.py:51\u001b[39m, in \u001b[36mForkingPickler.dumps\u001b[39m\u001b[34m(cls, obj, protocol)\u001b[39m\n\u001b[32m 48\u001b[39m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[32m 49\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdumps\u001b[39m(\u001b[38;5;28mcls\u001b[39m, obj, protocol=\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m 50\u001b[39m buf = io.BytesIO()\n\u001b[32m---> \u001b[39m\u001b[32m51\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mbuf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprotocol\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdump\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 52\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m buf.getbuffer()\n",
"\u001b[31mPicklingError\u001b[39m: Can't pickle <class 'pandas.core.frame.Pandas'>: attribute lookup Pandas on pandas.core.frame failed"
]
}
],
"source": [
"def process_row(row, split_dir, tensor_dir):\n",
" img_path = split_dir / row.filename\n",
" img = Image.open(img_path).convert(\"RGB\")\n",
" tensor = TRANSFORM(img)\n",
" tensor_path = tensor_dir / f\"{row.filename}.pt\"\n",
" torch.save(tensor, tensor_path)\n",
" return {\"tensor_path\": str(tensor_path), \"label\": row.label}\n",
"\n",
"for split in [\"train\",\"val\",\"test\"]:\n",
" split_dir = DATA_ROOT / split / \"images\"\n",
" tensor_dir = DATA_ROOT / split / \"tensors\"\n",
" tensor_dir.mkdir(exist_ok=True, parents=True)\n",
"\n",
" df = pd.read_csv(DATA_ROOT / split / \"labels.csv\")\n",
"\n",
" results = []\n",
" with ProcessPoolExecutor(max_workers=12) as executor:\n",
" futures = [executor.submit(process_row, row, split_dir, tensor_dir) for row in df.itertuples()]\n",
" for f in tqdm(futures):\n",
" results.append(f.result())\n",
"\n",
" new_df = pd.DataFrame(results)\n",
" new_df.to_csv(DATA_ROOT / split / \"labels_tensor.csv\", index=False)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "thesis-py3.11",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+199
View File
@@ -0,0 +1,199 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "ca08df84",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using device: cuda\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Step 0/1000, Loss: 1.0013\n",
"Step 10/1000, Loss: 1.0088\n",
"Step 20/1000, Loss: 0.9956\n",
"Step 30/1000, Loss: 0.9781\n",
"Step 40/1000, Loss: 0.9613\n",
"Step 50/1000, Loss: 0.9313\n",
"Step 60/1000, Loss: 0.8927\n",
"Step 70/1000, Loss: 0.8503\n",
"Step 80/1000, Loss: 0.7537\n",
"Step 90/1000, Loss: 0.6689\n",
"Step 100/1000, Loss: 0.6063\n",
"Step 110/1000, Loss: 0.5172\n",
"Step 120/1000, Loss: 0.4592\n",
"Step 130/1000, Loss: 0.4044\n",
"Step 140/1000, Loss: 0.3610\n",
"Step 150/1000, Loss: 0.3175\n",
"Step 160/1000, Loss: 0.2825\n",
"Step 170/1000, Loss: 0.2560\n",
"Step 180/1000, Loss: 0.2360\n",
"Step 190/1000, Loss: 0.2203\n",
"Step 200/1000, Loss: 0.1930\n",
"Step 210/1000, Loss: 0.1854\n",
"Step 220/1000, Loss: 0.1723\n",
"Step 230/1000, Loss: 0.1546\n",
"Step 240/1000, Loss: 0.1386\n",
"Step 250/1000, Loss: 0.1271\n",
"Step 260/1000, Loss: 0.1109\n",
"Step 270/1000, Loss: 0.1032\n",
"Step 280/1000, Loss: 0.0899\n",
"Step 290/1000, Loss: 0.0807\n",
"Step 300/1000, Loss: 0.0750\n",
"Step 310/1000, Loss: 0.0813\n",
"Step 320/1000, Loss: 0.0612\n",
"Step 330/1000, Loss: 0.0544\n",
"Step 340/1000, Loss: 0.0552\n",
"Step 350/1000, Loss: 0.0446\n",
"Step 360/1000, Loss: 0.0403\n",
"Step 370/1000, Loss: 0.0350\n",
"Step 380/1000, Loss: 0.0612\n",
"Step 390/1000, Loss: 0.0364\n",
"Step 400/1000, Loss: 0.0322\n",
"Step 410/1000, Loss: 0.0302\n",
"Step 420/1000, Loss: 0.0519\n",
"Step 430/1000, Loss: 0.0319\n",
"Step 440/1000, Loss: 0.0260\n",
"Step 450/1000, Loss: 0.0208\n",
"Step 460/1000, Loss: 0.0409\n",
"Step 470/1000, Loss: 0.0291\n",
"Step 480/1000, Loss: 0.0234\n",
"Step 490/1000, Loss: 0.0194\n",
"Step 500/1000, Loss: 0.0274\n",
"Step 510/1000, Loss: 0.0231\n",
"Step 520/1000, Loss: 0.0199\n",
"Step 530/1000, Loss: 0.0154\n",
"Step 540/1000, Loss: 0.0278\n",
"Step 550/1000, Loss: 0.0185\n",
"Step 560/1000, Loss: 0.0180\n",
"Step 570/1000, Loss: 0.0152\n",
"Step 580/1000, Loss: 0.0132\n",
"Step 590/1000, Loss: 0.0111\n",
"Step 600/1000, Loss: 0.0396\n",
"Step 610/1000, Loss: 0.0179\n",
"Step 620/1000, Loss: 0.0148\n",
"Step 630/1000, Loss: 0.0123\n",
"Step 640/1000, Loss: 0.0265\n",
"Step 650/1000, Loss: 0.0133\n",
"Step 660/1000, Loss: 0.0128\n",
"Step 670/1000, Loss: 0.0107\n",
"Step 680/1000, Loss: 0.0142\n",
"Step 690/1000, Loss: 0.0202\n",
"Step 700/1000, Loss: 0.0125\n",
"Step 710/1000, Loss: 0.0107\n",
"Step 720/1000, Loss: 0.0140\n",
"Step 730/1000, Loss: 0.0195\n",
"Step 740/1000, Loss: 0.0148\n",
"Step 750/1000, Loss: 0.0109\n",
"Step 760/1000, Loss: 0.0094\n",
"Step 770/1000, Loss: 0.0121\n",
"Step 780/1000, Loss: 0.0233\n",
"Step 790/1000, Loss: 0.0151\n",
"Step 800/1000, Loss: 0.0134\n",
"Step 810/1000, Loss: 0.0117\n",
"Step 820/1000, Loss: 0.0124\n",
"Step 830/1000, Loss: 0.0221\n",
"Step 840/1000, Loss: 0.0161\n",
"Step 850/1000, Loss: 0.0136\n",
"Step 860/1000, Loss: 0.0161\n",
"Step 870/1000, Loss: 0.0194\n",
"Step 880/1000, Loss: 0.0145\n",
"Step 890/1000, Loss: 0.0149\n",
"Step 900/1000, Loss: 0.0232\n",
"Step 910/1000, Loss: 0.0166\n",
"Step 920/1000, Loss: 0.0156\n",
"Step 930/1000, Loss: 0.0276\n",
"Step 940/1000, Loss: 0.0176\n",
"Step 950/1000, Loss: 0.0152\n",
"Step 960/1000, Loss: 0.0162\n",
"Step 970/1000, Loss: 0.0143\n",
"Step 980/1000, Loss: 0.0136\n",
"Step 990/1000, Loss: 0.0117\n",
"Total time: 67.25 s\n"
]
}
],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"import time\n",
"\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"print(\"Using device:\", device)\n",
"\n",
"\n",
"# Огромные параметры\n",
"N, D_in, H1, H2, H3, D_out = 300_000, 4096, 2048, 1024, 512, 10\n",
"batch_size = 16_384 # большой батч\n",
"steps = 1000 # много итераций для длительной нагрузки\n",
"\n",
"# Случайные данные на GPU\n",
"x = torch.randn(N, D_in, device=device, dtype=torch.float32)\n",
"y = torch.randn(N, D_out, device=device, dtype=torch.float32)\n",
"\n",
"model = nn.Sequential(\n",
" nn.Linear(D_in, H1),\n",
" nn.ReLU(),\n",
" nn.Linear(H1, H2),\n",
" nn.ReLU(),\n",
" nn.Linear(H2, H3),\n",
" nn.ReLU(),\n",
" nn.Linear(H3, D_out)\n",
").to(device)\n",
"\n",
"loss_fn = nn.MSELoss()\n",
"optimizer = optim.Adam(model.parameters(), lr=1e-3)\n",
"\n",
"start = time.time()\n",
"for t in range(steps):\n",
" idx = torch.randint(0, N, (batch_size,), device=device)\n",
" x_batch = x[idx]\n",
" y_batch = y[idx]\n",
"\n",
" y_pred = model(x_batch)\n",
" loss = loss_fn(y_pred, y_batch)\n",
"\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" if t % 10 == 0:\n",
" # замедляем вывод, чтобы можно было наблюдать\n",
" print(f\"Step {t}/{steps}, Loss: {loss.item():.4f}\")\n",
"\n",
"end = time.time()\n",
"print(f\"Total time: {end-start:.2f} s\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+759
View File
@@ -0,0 +1,759 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9336560f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"id": "0c00b67b",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"from torch.utils.data import Dataset, DataLoader\n",
"import torchvision.transforms as T\n",
"\n",
"import pandas as pd\n",
"from pathlib import Path\n",
"from PIL import Image\n",
"from tqdm import tqdm\n",
"\n",
"import timm\n",
"import numpy as np\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "84c3657f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'cuda'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# === CONFIG ===\n",
"DATA_ROOT = Path(\"../dataset/EmoSet-118K\")\n",
"BATCH_SIZE = 64 # V100 спокойно тянет\n",
"EPOCHS = 15\n",
"LR = 3e-4\n",
"NUM_WORKERS = 24\n",
"\n",
"DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
"DEVICE\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "9f749add",
"metadata": {},
"outputs": [],
"source": [
"class EmoSetDataset(Dataset):\n",
" def __init__(self, root, split):\n",
" self.root = Path(root) / split\n",
" self.df = pd.read_csv(self.root / \"labels.csv\")\n",
"\n",
" self.labels = sorted(self.df[\"label\"].unique())\n",
" self.label2idx = {l: i for i, l in enumerate(self.labels)}\n",
" self.idx2label = {i: l for l, i in self.label2idx.items()}\n",
"\n",
" self.transform = T.Compose([\n",
" T.Resize((224, 224)),\n",
" T.ToTensor(),\n",
" T.Normalize(\n",
" mean=[0.485, 0.456, 0.406],\n",
" std=[0.229, 0.224, 0.225]\n",
" )\n",
" ])\n",
"\n",
" def __len__(self):\n",
" return len(self.df)\n",
"\n",
" def __getitem__(self, idx):\n",
" row = self.df.iloc[idx]\n",
" img_path = self.root / \"images\" / row[\"filename\"]\n",
"\n",
" img = Image.open(img_path).convert(\"RGB\")\n",
" img = self.transform(img)\n",
"\n",
" label = self.label2idx[row[\"label\"]]\n",
" return img, label\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c8805341",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Classes: ['amusement', 'anger', 'awe', 'contentment', 'disgust', 'excitement', 'fear', 'sadness']\n"
]
}
],
"source": [
"train_ds = EmoSetDataset(DATA_ROOT, \"train\")\n",
"val_ds = EmoSetDataset(DATA_ROOT, \"val\")\n",
"\n",
"train_loader = DataLoader(\n",
" train_ds,\n",
" batch_size=BATCH_SIZE,\n",
" shuffle=True,\n",
" num_workers=NUM_WORKERS,\n",
" pin_memory=True\n",
")\n",
"\n",
"val_loader = DataLoader(\n",
" val_ds,\n",
" batch_size=BATCH_SIZE,\n",
" shuffle=False,\n",
" num_workers=NUM_WORKERS,\n",
" pin_memory=True\n",
")\n",
"\n",
"print(\"Classes:\", train_ds.labels)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "dffce582",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ResNet(\n",
" (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
" (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
" (layer1): Sequential(\n",
" (0): Bottleneck(\n",
" (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" (downsample): Sequential(\n",
" (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (1): Bottleneck(\n",
" (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (2): Bottleneck(\n",
" (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" )\n",
" (layer2): Sequential(\n",
" (0): Bottleneck(\n",
" (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" (downsample): Sequential(\n",
" (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
" (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (1): Bottleneck(\n",
" (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (2): Bottleneck(\n",
" (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (3): Bottleneck(\n",
" (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" )\n",
" (layer3): Sequential(\n",
" (0): Bottleneck(\n",
" (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" (downsample): Sequential(\n",
" (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
" (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (1): Bottleneck(\n",
" (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (2): Bottleneck(\n",
" (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (3): Bottleneck(\n",
" (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (4): Bottleneck(\n",
" (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (5): Bottleneck(\n",
" (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" )\n",
" (layer4): Sequential(\n",
" (0): Bottleneck(\n",
" (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" (downsample): Sequential(\n",
" (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
" (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (1): Bottleneck(\n",
" (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (2): Bottleneck(\n",
" (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" )\n",
" (global_pool): SelectAdaptivePool2d(pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))\n",
" (fc): Linear(in_features=2048, out_features=8, bias=True)\n",
")"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = timm.create_model(\n",
" \"resnet50\",\n",
" pretrained=True,\n",
" num_classes=len(train_ds.labels)\n",
")\n",
"\n",
"model.to(DEVICE)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "81a457ef",
"metadata": {},
"outputs": [],
"source": [
"criterion = nn.CrossEntropyLoss()\n",
"\n",
"optimizer = torch.optim.AdamW(\n",
" model.parameters(),\n",
" lr=LR,\n",
" weight_decay=1e-4\n",
")\n",
"\n",
"scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(\n",
" optimizer,\n",
" T_max=EPOCHS\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "951aa9e3",
"metadata": {},
"outputs": [],
"source": [
"def train_epoch(model, loader):\n",
" model.train()\n",
" total_loss = 0\n",
" correct = 0\n",
" total = 0\n",
"\n",
" for imgs, labels in tqdm(loader, leave=False):\n",
" imgs = imgs.to(DEVICE)\n",
" labels = labels.to(DEVICE)\n",
"\n",
" optimizer.zero_grad()\n",
" logits = model(imgs)\n",
" loss = criterion(logits, labels)\n",
"\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" total_loss += loss.item() * imgs.size(0)\n",
" preds = logits.argmax(dim=1)\n",
" correct += (preds == labels).sum().item()\n",
" total += labels.size(0)\n",
"\n",
" return total_loss / total, correct / total\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "fb7e9398",
"metadata": {},
"outputs": [],
"source": [
"@torch.no_grad()\n",
"def val_epoch(model, loader):\n",
" model.eval()\n",
" total_loss = 0\n",
" correct = 0\n",
" total = 0\n",
"\n",
" for imgs, labels in loader:\n",
" imgs = imgs.to(DEVICE)\n",
" labels = labels.to(DEVICE)\n",
"\n",
" logits = model(imgs)\n",
" loss = criterion(logits, labels)\n",
"\n",
" total_loss += loss.item() * imgs.size(0)\n",
" preds = logits.argmax(dim=1)\n",
" correct += (preds == labels).sum().item()\n",
" total += labels.size(0)\n",
"\n",
" return total_loss / total, correct / total\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "9e870e5d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/1477 [00:00<?, ?it/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 01 | Train loss: 0.8383, acc: 0.6954 | Val loss: 0.6694, acc: 0.7563\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 02 | Train loss: 0.5462, acc: 0.7972 | Val loss: 0.6592, acc: 0.7594\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 03 | Train loss: 0.3654, acc: 0.8632 | Val loss: 0.7263, acc: 0.7600\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 04 | Train loss: 0.2111, acc: 0.9230 | Val loss: 0.8572, acc: 0.7472\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 05 | Train loss: 0.1187, acc: 0.9585 | Val loss: 1.0372, acc: 0.7453\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 06 | Train loss: 0.0690, acc: 0.9768 | Val loss: 1.1982, acc: 0.7529\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 07 | Train loss: 0.0466, acc: 0.9843 | Val loss: 1.3178, acc: 0.7492\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 08 | Train loss: 0.0295, acc: 0.9905 | Val loss: 1.3926, acc: 0.7551\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 09 | Train loss: 0.0204, acc: 0.9938 | Val loss: 1.4682, acc: 0.7497\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 10 | Train loss: 0.0146, acc: 0.9955 | Val loss: 1.4784, acc: 0.7604\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 11 | Train loss: 0.0087, acc: 0.9975 | Val loss: 1.5263, acc: 0.7580\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 12 | Train loss: 0.0057, acc: 0.9987 | Val loss: 1.5689, acc: 0.7558\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 13 | Train loss: 0.0044, acc: 0.9990 | Val loss: 1.5952, acc: 0.7566\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 14 | Train loss: 0.0030, acc: 0.9993 | Val loss: 1.6130, acc: 0.7600\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 15 | Train loss: 0.0025, acc: 0.9995 | Val loss: 1.5921, acc: 0.7627\n"
]
}
],
"source": [
"best_val_acc = 0.0\n",
"\n",
"for epoch in range(1, EPOCHS + 1):\n",
" train_loss, train_acc = train_epoch(model, train_loader)\n",
" val_loss, val_acc = val_epoch(model, val_loader)\n",
"\n",
" scheduler.step()\n",
"\n",
" print(\n",
" f\"Epoch {epoch:02d} | \"\n",
" f\"Train loss: {train_loss:.4f}, acc: {train_acc:.4f} | \"\n",
" f\"Val loss: {val_loss:.4f}, acc: {val_acc:.4f}\"\n",
" )\n",
"\n",
" if val_acc > best_val_acc:\n",
" best_val_acc = val_acc\n",
" torch.save(model.state_dict(), \"emoset_resnet50_best.pth\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7796ef11",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "thesis-py3.11",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+61
View File
@@ -0,0 +1,61 @@
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.linear_model import Ridge
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib
# 1. Эталонный маппинг EmoSet -> Valence/Arousal (шкала 1-9)
# Убедись, что индексы ключей (0-7) совпадают с тем, как они размечены в твоем labels.npy
# Стандартный порядок EmoSet:
EMO_VA_MAP = {
0: (7.5, 6.5), # amusement (радость/веселье) - позитивно, средне-активно
1: (6.5, 5.0), # awe (трепет/восхищение) - позитивно, спокойно
2: (7.0, 3.0), # contentment (удовлетворение) - позитивно, очень спокойно
3: (8.0, 8.0), # excitement (возбуждение) - очень позитивно, очень активно
4: (2.0, 8.0), # anger (гнев) - негативно, очень активно
5: (3.0, 6.0), # disgust (отвращение) - негативно, средне-активно
6: (2.5, 7.5), # fear (страх) - негативно, очень активно
7: (2.0, 2.0), # sadness (грусть) - негативно, пассивно
}
# 2. Загрузка данных
# Укажи пути к твоим эмбеддингам и меткам (можно взять train или test, для демо не так критично)
EMBEDDINGS_PATH = Path("../emoset_test_embeddings.npy")
LABELS_PATH = Path("../emoset_test_labels.npy")
print("Загрузка данных...")
X = np.load(EMBEDDINGS_PATH)
y_labels = np.load(LABELS_PATH)
# Преобразуем дискретные метки в целевые координаты V-A
print("Формирование целевых координат (Valence, Arousal)...")
y_va = np.array([EMO_VA_MAP[label] for label in y_labels])
# Разделение на train/val
X_train, X_test, y_train, y_test = train_test_split(X, y_va, test_size=0.2, random_state=42)
# 3. Обучение модели
print("Обучение Ridge регрессора...")
# Ridge отлично справляется с многомерными эмбеддингами, избегая переобучения
base_estimator = Ridge(alpha=1.0)
model = MultiOutputRegressor(base_estimator)
model.fit(X_train, y_train)
# 4. Оценка
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Обучение завершено!")
print(f"MSE (Среднеквадратичная ошибка): {mse:.4f}")
print(f"R^2 Score (Коэффициент детерминации): {r2:.4f}")
# 5. Сохранение
output_model_path = Path("../src/music_engine/va_regressor.pkl")
output_model_path.parent.mkdir(parents=True, exist_ok=True)
joblib.dump(model, output_model_path)
print(f"Модель сохранена в: {output_model_path}")