diff --git a/src/music_engine/va_regressor.pkl b/src/music_engine/va_regressor.pkl index 3b8697e..a071044 100644 Binary files a/src/music_engine/va_regressor.pkl and b/src/music_engine/va_regressor.pkl differ diff --git a/src/scripts/21_train_images.ipynb b/src/scripts/21_train_images.ipynb deleted file mode 100644 index 98cf8a0..0000000 --- a/src/scripts/21_train_images.ipynb +++ /dev/null @@ -1,541 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0c00b67b", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "from PIL import Image\n", - "import pandas as pd\n", - "import numpy as np\n", - "from tqdm import tqdm\n", - "\n", - "import torch\n", - "import torch.nn as nn\n", - "from torch.utils.data import Dataset, DataLoader\n", - "import torchvision.transforms as T\n", - "import timm" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "84c3657f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'cuda'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Конфигурация параметров обучения и путей файловой системы\n", - "DATA_ROOT = Path(\"../dataset/EmoSet-118K\")\n", - "BATCH_SIZE = 64\n", - "EPOCHS = 15\n", - "LR = 3e-4\n", - "NUM_WORKERS = 62\n", - "\n", - "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", - "print(f\"Аппаратное ускорение: {device}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9f749add", - "metadata": {}, - "outputs": [], - "source": [ - "class EmoSetDataset(Dataset):\n", - " def __init__(self, root: Path | str, split: str):\n", - " self.root = Path(root) / split\n", - " self.df = pd.read_csv(self.root / \"labels.csv\")\n", - "\n", - " # Формирование словарей маппинга классов\n", - " self.labels = sorted(self.df[\"label\"].unique())\n", - " self.label2idx = {l: i for i, l in enumerate(self.labels)}\n", - " self.idx2label = {i: l for l, i in self.label2idx.items()}\n", - "\n", - " # Базовые трансформации для валидации и теста\n", - " base_tf = [\n", - " T.ToTensor(),\n", - " T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n", - " ]\n", - "\n", - " # Внедрение аугментации исключительно для обучающей выборки (предотвращение переобучения)\n", - " if split == \"train\":\n", - " self.transform = T.Compose([\n", - " T.RandomResizedCrop(224),\n", - " T.RandomHorizontalFlip(),\n", - " *base_tf\n", - " ])\n", - " else:\n", - " self.transform = T.Compose([\n", - " T.Resize(256),\n", - " T.CenterCrop(224),\n", - " *base_tf\n", - " ])\n", - "\n", - " def __len__(self):\n", - " return len(self.df)\n", - "\n", - " def __getitem__(self, idx):\n", - " row = self.df.iloc[idx]\n", - " img_path = self.root / \"images\" / row[\"filename\"]\n", - "\n", - " # Обработка возможных исключений ввода-вывода (поврежденные JPEG-файлы в датасете)\n", - " try:\n", - " img = Image.open(img_path).convert(\"RGB\")\n", - " except Exception:\n", - " img = Image.new(\"RGB\", (224, 224), (0, 0, 0))\n", - "\n", - " img_tensor = self.transform(img)\n", - " label_idx = self.label2idx[row[\"label\"]]\n", - " \n", - " return img_tensor, label_idx" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c8805341", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Classes: ['amusement', 'anger', 'awe', 'contentment', 'disgust', 'excitement', 'fear', 'sadness']\n" - ] - } - ], - "source": [ - "# Подготовка объектов выборки\n", - "train_ds = EmoSetDataset(DATA_ROOT, \"train\")\n", - "val_ds = EmoSetDataset(DATA_ROOT, \"val\")\n", - "\n", - "# Инициализация итераторов с закреплением памяти (pin_memory) для ускорения передачи на GPU\n", - "train_loader = DataLoader(\n", - " train_ds,\n", - " batch_size=BATCH_SIZE,\n", - " shuffle=True,\n", - " num_workers=NUM_WORKERS,\n", - " pin_memory=True\n", - ")\n", - "\n", - "val_loader = DataLoader(\n", - " val_ds,\n", - " batch_size=BATCH_SIZE,\n", - " shuffle=False,\n", - " num_workers=NUM_WORKERS,\n", - " pin_memory=True\n", - ")\n", - "\n", - "print(f\"Индексированные классы: {train_ds.labels}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dffce582", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ResNet(\n", - " (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", - " (layer1): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer2): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " (3): Bottleneck(\n", - " (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer3): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " (3): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " (4): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " (5): Bottleneck(\n", - " (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (layer4): Sequential(\n", - " (0): Bottleneck(\n", - " (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " (downsample): Sequential(\n", - " (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " )\n", - " )\n", - " (1): Bottleneck(\n", - " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " (2): Bottleneck(\n", - " (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act1): ReLU(inplace=True)\n", - " (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", - " (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (drop_block): Identity()\n", - " (act2): ReLU(inplace=True)\n", - " (aa): Identity()\n", - " (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (act3): ReLU(inplace=True)\n", - " )\n", - " )\n", - " (global_pool): SelectAdaptivePool2d(pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))\n", - " (fc): Linear(in_features=2048, out_features=8, bias=True)\n", - ")" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# TODO перед защитой, повторить оптимизаторы\n", - "# Загрузка предобученной архитектуры ResNet-50 с заменой классификационного слоя\n", - "model = timm.create_model(\n", - " \"resnet50\",\n", - " pretrained=True,\n", - " num_classes=len(train_ds.labels)\n", - ")\n", - "model.to(device)\n", - "\n", - "# Функция потерь для многоклассовой классификации\n", - "criterion = nn.CrossEntropyLoss()\n", - "\n", - "# Оптимизатор AdamW с L2-регуляризацией (weight_decay) для повышения обобщающей способности\n", - "optimizer = torch.optim.AdamW(\n", - " model.parameters(),\n", - " lr=LR,\n", - " weight_decay=1e-4\n", - ")\n", - "\n", - "# Планировщик скорости обучения: косинусный отжиг\n", - "scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(\n", - " optimizer,\n", - " T_max=EPOCHS\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81a457ef", - "metadata": {}, - "outputs": [], - "source": [ - "def train_epoch(current_model, loader):\n", - " current_model.train()\n", - " total_loss = 0.0\n", - " correct_preds = 0\n", - " total_samples = 0\n", - "\n", - " for imgs, labels in tqdm(loader, desc=\"Тренировка\", leave=False):\n", - " imgs = imgs.to(device)\n", - " labels = labels.to(device)\n", - "\n", - " optimizer.zero_grad()\n", - " logits = current_model(imgs)\n", - " loss = criterion(logits, labels)\n", - "\n", - " loss.backward()\n", - " optimizer.step()\n", - "\n", - " total_loss += loss.item() * imgs.size(0)\n", - " preds = logits.argmax(dim=1)\n", - " correct_preds += (preds == labels).sum().item()\n", - " total_samples += labels.size(0)\n", - "\n", - " return total_loss / total_samples, correct_preds / total_samples\n", - "\n", - "@torch.no_grad()\n", - "def val_epoch(current_model, loader):\n", - " # Перевод модели в режим инференса (отключение Dropout и фиксация BatchNorm)\n", - " current_model.eval()\n", - " total_loss = 0.0\n", - " correct_preds = 0\n", - " total_samples = 0\n", - "\n", - " for imgs, labels in tqdm(loader, desc=\"Валидация\", leave=False):\n", - " imgs = imgs.to(device)\n", - " labels = labels.to(device)\n", - "\n", - " logits = current_model(imgs)\n", - " loss = criterion(logits, labels)\n", - "\n", - " total_loss += loss.item() * imgs.size(0)\n", - " preds = logits.argmax(dim=1)\n", - " correct_preds += (preds == labels).sum().item()\n", - " total_samples += labels.size(0)\n", - "\n", - " return total_loss / total_samples, correct_preds / total_samples" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "951aa9e3", - "metadata": {}, - "outputs": [], - "source": [ - "best_val_acc = 0.0\n", - "checkpoint_path = \"../emoset_resnet50_best.pth\"\n", - "\n", - "print(\"Старт процесса обучения...\")\n", - "\n", - "for epoch in range(1, EPOCHS + 1):\n", - " train_loss, train_acc = train_epoch(model, train_loader)\n", - " val_loss, val_acc = val_epoch(model, val_loader)\n", - "\n", - " # Обновление шага планировщика\n", - " scheduler.step()\n", - "\n", - " print(\n", - " f\"Эпоха {epoch:02d}/{EPOCHS} | \"\n", - " f\"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f} | \"\n", - " f\"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}\"\n", - " )\n", - "\n", - " # Экспорт весов при улучшении целевой метрики\n", - " if val_acc > best_val_acc:\n", - " best_val_acc = val_acc\n", - " torch.save(model.state_dict(), checkpoint_path)\n", - " print(f\" -> Сохранен новый лучший чекпоинт (Acc: {best_val_acc:.4f})\")\n", - "\n", - "print(\"Обучение завершено.\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "thesis-py3.11", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/src/scripts/22_1_train_images_m.py b/src/scripts/22_1_train_images_m.py new file mode 100644 index 0000000..10dba53 --- /dev/null +++ b/src/scripts/22_1_train_images_m.py @@ -0,0 +1,283 @@ +import os +import random +import warnings +from pathlib import Path +from PIL import Image +import pandas as pd +import numpy as np +from tqdm import tqdm + +import torch +import torch.nn as nn +from torch.utils.data import Dataset, DataLoader +import torchvision.transforms as T +import timm + +import matplotlib.pyplot as plt +import seaborn as sns +from sklearn.metrics import confusion_matrix + +# Подавление предупреждений цветовых профилей +warnings.filterwarnings("ignore", message=".*Unknown Adobe color transform code.*") + +# Настройки окружения +DATA_ROOT = Path("./NFS/Thesis/Emoset/EmoSet-118K") +# ВАЖНО: Добавили путь для медиа файлов +MEDIA_DIR = Path("./src/scripts/media") +MEDIA_DIR.mkdir(parents=True, exist_ok=True) + +BATCH_SIZE = 64 +EPOCHS = 30 +LR = 5e-5 +NUM_WORKERS = 32 +PATIENCE = 7 + +# Маппинг классов +CLASS_MAPPING = { + "amusement": 0, "anger": 1, "awe": 2, "contentment": 3, + "disgust": 4, "excitement": 5, "fear": 6, "sadness": 7 +} +# Инвертированный маппинг для графиков +INV_CLASS_MAPPING = {v: k for k, v in CLASS_MAPPING.items()} +CLASS_NAMES = [INV_CLASS_MAPPING[i] for i in range(len(CLASS_MAPPING))] + +DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f"Устройство: {DEVICE}") + +# Фиксация генераторов псевдослучайных чисел +def set_seed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + +set_seed() + +# Инициализация структур данных +class EmoSetDataset(Dataset): + def __init__(self, root: Path | str, split: str, transform=None): + self.root = Path(root) / split + self.df = pd.read_csv(self.root / "labels.csv") + self.transform = transform + + # Фильтрация датафрейма + self.df = self.df[self.df["label"].isin(CLASS_MAPPING.keys())].reset_index(drop=True) + + def __len__(self): + return len(self.df) + + def __getitem__(self, idx): + row = self.df.iloc[idx] + img_path = self.root / "images" / row["filename"] + + try: + img = Image.open(img_path).convert("RGB") + except Exception: + img = Image.new("RGB", (256, 256), (0, 0, 0)) + + if self.transform: + img_tensor = self.transform(img) + else: + img_tensor = T.ToTensor()(img) + + label_idx = CLASS_MAPPING[row["label"]] + return img_tensor, label_idx + +# Трансформации +base_tf = [ + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +] + +train_transform = T.Compose([ + T.Resize(256, antialias=True), + T.RandomCrop(224), + T.RandomHorizontalFlip(), + *base_tf +]) + +val_transform = T.Compose([ + T.Resize(256, antialias=True), + T.CenterCrop(224), + *base_tf +]) + +train_ds = EmoSetDataset(DATA_ROOT, "train", transform=train_transform) +val_ds = EmoSetDataset(DATA_ROOT, "val", transform=val_transform) + +train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True) +val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True) + +# Инициализация модели и оптимизатора +model = timm.create_model("resnet50", pretrained=True, num_classes=8, drop_rate=0.3) +model.to(DEVICE) + +criterion = nn.CrossEntropyLoss(label_smoothing=0.1) + +optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-3) +scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS) + +# Функции для отрисовки графиков +def plot_learning_curves(history): + """Отрисовка графиков функции потерь и точности""" + epochs = range(1, len(history['train_loss']) + 1) + + plt.figure(figsize=(14, 5)) + + # График Loss + plt.subplot(1, 2, 1) + plt.plot(epochs, history['train_loss'], 'b-', label='Train Loss') + plt.plot(epochs, history['val_loss'], 'r--', label='Validation Loss') + plt.title('График функции потерь (Loss)', fontsize=14) + plt.xlabel('Эпохи', fontsize=12) + plt.ylabel('Loss', fontsize=12) + plt.legend() + plt.grid(True, linestyle=':', alpha=0.7) + + # График Accuracy + plt.subplot(1, 2, 2) + plt.plot(epochs, history['train_acc'], 'b-', label='Train Accuracy') + plt.plot(epochs, history['val_acc'], 'r--', label='Validation Accuracy') + plt.title('График точности (Accuracy)', fontsize=14) + plt.xlabel('Эпохи', fontsize=12) + plt.ylabel('Accuracy', fontsize=12) + plt.legend() + plt.grid(True, linestyle=':', alpha=0.7) + + plt.tight_layout() + plot_path = MEDIA_DIR / "training_history.png" + plt.savefig(plot_path, dpi=300, bbox_inches='tight') + plt.close() + print(f"[INFO] График обучения сохранен в: {plot_path}") + +def plot_confusion_matrix(y_true, y_pred): + """Отрисовка тепловой матрицы ошибок""" + cm = confusion_matrix(y_true, y_pred) + + plt.figure(figsize=(10, 8)) + sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', + xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES, + cbar_kws={'label': 'Количество сэмплов'}) + + plt.title('Матрица ошибок (Confusion Matrix) - ResNet50', fontsize=16, pad=20) + plt.ylabel('Истинные классы (Ground Truth)', fontsize=12) + plt.xlabel('Предсказанные классы (Predicted)', fontsize=12) + + plt.xticks(rotation=45, ha='right') + plt.yticks(rotation=0) + + plt.tight_layout() + cm_path = MEDIA_DIR / "confusion_matrix_emoset.png" + plt.savefig(cm_path, dpi=300, bbox_inches='tight') + plt.close() + print(f"[INFO] Матрица ошибок сохранена в: {cm_path}") + +# Логика эпохи обучения +def train_epoch(current_model, loader): + current_model.train() + total_loss, correct_preds, total_samples = 0.0, 0, 0 + + for imgs, labels in tqdm(loader, desc="Тренировка", leave=False, smoothing=0): + imgs, labels = imgs.to(DEVICE), labels.to(DEVICE) + + optimizer.zero_grad(set_to_none=True) + logits = current_model(imgs) + loss = criterion(logits, labels) + + loss.backward() + optimizer.step() + + total_loss += loss.item() * imgs.size(0) + preds = logits.argmax(dim=1) + correct_preds += (preds == labels).sum().item() + total_samples += labels.size(0) + + return total_loss / total_samples, correct_preds / total_samples + +# Логика эпохи валидации с сохранением предсказаний для матрицы ошибок +@torch.no_grad() +def val_epoch(current_model, loader, return_preds=False): + current_model.eval() + total_loss, correct_preds, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + for imgs, labels in tqdm(loader, desc="Валидация", leave=False, smoothing=0): + imgs, labels = imgs.to(DEVICE), labels.to(DEVICE) + + logits = current_model(imgs) + loss = criterion(logits, labels) + + total_loss += loss.item() * imgs.size(0) + preds = logits.argmax(dim=1) + + correct_preds += (preds == labels).sum().item() + total_samples += labels.size(0) + + if return_preds: + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + avg_loss = total_loss / total_samples + avg_acc = correct_preds / total_samples + + if return_preds: + return avg_loss, avg_acc, all_labels, all_preds + return avg_loss, avg_acc + +if __name__ == "__main__": + best_val_acc = 0.0 + best_val_loss = float('inf') + epochs_no_improve = 0 + checkpoint_path = "./emosetV2_resnet50_best.pth" + + # Словарь для хранения истории обучения + history = { + 'train_loss': [], 'train_acc': [], + 'val_loss': [], 'val_acc': [] + } + + # Переменные для хранения лучших предсказаний для матрицы + best_labels, best_preds = [], [] + + print("Старт обучения.") + + for epoch in range(1, EPOCHS + 1): + train_loss, train_acc = train_epoch(model, train_loader) + + # Получаем предсказания только если это может быть лучшая эпоха + val_loss, val_acc, val_labels, val_preds = val_epoch(model, val_loader, return_preds=True) + + scheduler.step() + + # Запись в историю + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['val_loss'].append(val_loss) + history['val_acc'].append(val_acc) + + print(f"[{epoch}/{EPOCHS}] Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}") + + # Сохранение лучших весов по Accuracy + if val_acc > best_val_acc: + best_val_acc = val_acc + best_labels = val_labels # Сохраняем предсказания лучшей модели + best_preds = val_preds + torch.save(model.state_dict(), checkpoint_path) + print(f"Сохранен чекпоинт (Acc: {best_val_acc:.4f})") + + # Оценка переобучения по Loss (Early Stopping) + if val_loss < best_val_loss: + best_val_loss = val_loss + epochs_no_improve = 0 + else: + epochs_no_improve += 1 + if epochs_no_improve >= PATIENCE: + print(f"Ранняя остановка: метрика валидации не улучшается {PATIENCE} эпох.") + break + + print("Процесс обучения завершен. Генерирую графики для диссертации...") + plot_learning_curves(history) + plot_confusion_matrix(best_labels, best_preds) + print("Все медиафайлы успешно созданы!") \ No newline at end of file diff --git a/src/scripts/23_1_extract_embeddings_m.py b/src/scripts/23_1_extract_embeddings_m.py new file mode 100644 index 0000000..ac4d174 --- /dev/null +++ b/src/scripts/23_1_extract_embeddings_m.py @@ -0,0 +1,171 @@ +import os +from pathlib import Path +from PIL import Image +import pandas as pd +import numpy as np +from tqdm import tqdm + +import torch +from torch.utils.data import Dataset, DataLoader +import torchvision.transforms as T +import timm + +import matplotlib.pyplot as plt +import seaborn as sns +from sklearn.manifold import TSNE + +# Настройки путей для медиа +MEDIA_DIR = Path("scripts/media") +MEDIA_DIR.mkdir(parents=True, exist_ok=True) + +# Конфигурация путей для инференса и кэширования векторов +DATA_ROOT = Path("./NFS/Thesis/Emoset/EmoSet-118K") +MODEL_PATH = Path("./src/emoset_resnet50_best.pth") + +BATCH_SIZE = 128 +NUM_WORKERS = 32 + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f"Вычисления перенесены на: {device}") + +class EmoSetFeatureDataset(Dataset): + def __init__(self, root: Path | str, split: str): + self.root = Path(root) / split + self.df = pd.read_csv(self.root / "labels.csv") + + self.labels = sorted(self.df["label"].unique()) + self.label2idx = {l: i for i, l in enumerate(self.labels)} + self.idx2label = {i: l for l, i in self.label2idx.items()} + + # Для экстракции признаков аугментация отключена, используется строгий CenterCrop + self.transform = T.Compose([ + T.Resize(256), + T.CenterCrop(224), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + def __len__(self): + return len(self.df) + + def __getitem__(self, idx): + row = self.df.iloc[idx] + img_path = self.root / "images" / row["filename"] + + # Перехват битых файлов выборки + try: + img = Image.open(img_path).convert("RGB") + except Exception: + img = Image.new("RGB", (224, 224), (0, 0, 0)) + + img_tensor = self.transform(img) + label_idx = self.label2idx[row["label"]] + + return img_tensor, label_idx + +def plot_tsne(embeddings, labels, idx2label, sample_limit=3000): + """Генерация t-SNE графика для диссертации""" + print(f"Построение t-SNE проекции для {sample_limit} сэмплов...") + + tsne_model = TSNE(n_components=2, perplexity=30, random_state=42) + embeddings_2d = tsne_model.fit_transform(embeddings[:sample_limit]) + labels_subset = labels[:sample_limit] + + plt.figure(figsize=(12, 9)) + + # Используем более академическую палитру + scatter = plt.scatter( + embeddings_2d[:, 0], + embeddings_2d[:, 1], + c=labels_subset, + cmap="Set2", # Set2 лучше различается при печати + alpha=0.7, + s=20, + edgecolors='w', + linewidths=0.5 + ) + + # Формирование легенды + handles, _ = scatter.legend_elements() + legend_labels = [idx2label[i] for i in range(len(idx2label))] + + # Размещение легенды снаружи графика, чтобы не перекрывать данные + plt.legend(handles, legend_labels, title="Эмоциональные классы", + bbox_to_anchor=(1.05, 1), loc='upper left') + + plt.title("2D проекция скрытого пространства признаков (t-SNE)", pad=20, fontsize=14) + plt.xlabel("Первая главная компонента (t-SNE 1)", fontsize=12) + plt.ylabel("Вторая главная компонента (t-SNE 2)", fontsize=12) + plt.grid(True, linestyle='--', alpha=0.3) + + plt.tight_layout() + plot_path = MEDIA_DIR / "tsne_embeddings.png" + plt.savefig(plot_path, dpi=300, bbox_inches='tight') + plt.close() + print(f"[INFO] График t-SNE сохранен в: {plot_path}") + + +if __name__ == "__main__": + test_ds = EmoSetFeatureDataset(DATA_ROOT, "test") + test_loader = DataLoader( + test_ds, + batch_size=BATCH_SIZE, + shuffle=False, # Отключение шаффла для строгого соответствия индексов + num_workers=NUM_WORKERS, + pin_memory=True + ) + + print(f"Подготовлено для извлечения: {len(test_ds)} файлов.") + + # Инициализация модели и загрузка лучших весов + feature_extractor = timm.create_model( + "resnet50", + pretrained=False, + num_classes=len(test_ds.labels) + ) + + try: + checkpoint = torch.load(MODEL_PATH, map_location=device) + feature_extractor.load_state_dict(checkpoint) + print("Веса модели успешно загружены.") + except Exception as e: + print(f"Ошибка загрузки весов: {e}. Убедитесь, что модель обучена.") + exit(1) + + # Удаление классификационного слоя (fc) + feature_extractor.reset_classifier(0) + feature_extractor.to(device) + feature_extractor.eval() + + print("Слой классификации удален. Модель готова к экстракции.") + + extracted_embeddings = [] + extracted_labels = [] + + print("Старт пакетной экстракции признаков...") + + with torch.no_grad(): + for imgs, labels in tqdm(test_loader, desc="Экстракция"): + imgs = imgs.to(device) + + # Получение вектора [BATCH_SIZE, 2048] + embeddings_batch = feature_extractor(imgs) + + extracted_embeddings.append(embeddings_batch.cpu().numpy()) + extracted_labels.append(labels.numpy()) + + # Агрегация батчей в единые массивы + np_embeddings = np.concatenate(extracted_embeddings, axis=0) + np_labels = np.concatenate(extracted_labels, axis=0) + + print(f"Размерность матрицы признаков: {np_embeddings.shape}") + + # Сохранение артефактов + np.save("./src/emoset_test_embeddings.npy", np_embeddings) + np.save("./src/emoset_test_labels.npy", np_labels) + print("Матрицы успешно экспортированы в .npy файлы.") + + # Генерация медиа для диссертации + plot_tsne(np_embeddings, np_labels, test_ds.idx2label, sample_limit=3000) + + print("Процесс полностью завершен.") \ No newline at end of file diff --git a/src/scripts/91_generate_metrics.py b/src/scripts/91_generate_metrics.py index b23fd6a..d648525 100644 --- a/src/scripts/91_generate_metrics.py +++ b/src/scripts/91_generate_metrics.py @@ -1,96 +1,97 @@ -import joblib import numpy as np import pandas as pd +import joblib from pathlib import Path -import matplotlib.pyplot as plt - -from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score -# Калибровочные координаты центров эмоциональных классов в пространстве Рассела [1.0 - 9.0] -EMOTION_TO_VA_COORDS = { - 0: (7.5, 6.5), # amusement - 1: (2.0, 8.0), # anger - 2: (6.5, 5.0), # awe - 3: (7.0, 3.0), # contentment - 4: (3.0, 6.0), # disgust - 5: (8.0, 8.0), # excitement - 6: (2.5, 7.5), # fear - 7: (2.0, 2.0), # sadness +# 1. Настройка путей +embeddings_path = Path("./src/emoset_test_embeddings.npy") +csv_path = Path("./NFS/Thesis/Emoset/EmoSet-118K/test/labels.csv") +model_path = Path("./src/music_engine/va_regressor.pkl") + +output_dir = Path("./src/scripts/media") +output_file = output_dir / "metrics_output.txt" + +# 2. Корректный маппинг 8 классов EmoSet в шкалу DEAM [1.0, 9.0] +# Формула перевода из [-1, 1] в [1, 9]: 5.0 + (X * 4.0) +EMO_TO_VA = { + "amusement": [8.2, 6.6], # Веселье (Высокий позитив, средняя энергия) + "awe": [7.0, 7.4], # Восхищение (Позитив, высокая энергия) + "contentment": [7.8, 3.4], # Умиротворение (Позитив, низкая энергия) + "excitement": [8.2, 8.2], # Возбуждение (Макс. позитив, макс. энергия) + "anger": [2.2, 7.8], # Гнев (Глубокий негатив, высокая энергия) + "disgust": [2.6, 6.6], # Отвращение (Негатив, средняя энергия) + "fear": [2.6, 8.2], # Страх (Негатив, максимальная энергия) + "sadness": [2.2, 2.6] # Грусть (Глубокий негатив, низкая энергия) } -def evaluate_regression_model(): - # Инициализация путей к артефактам пайплайна - base_dir = Path(__file__).resolve().parent.parent.parent - embeddings_path = base_dir / "src" / "emoset_test_embeddings.npy" - labels_path = base_dir / "src" / "emoset_test_labels.npy" - model_path = base_dir / "src" / "music_engine" / "va_regressor.pkl" - - if not all(p.exists() for p in [embeddings_path, labels_path, model_path]): - print("Отсутствуют необходимые артефакты для расчета метрик.") +def generate_slide_metrics(): + print("[INFO] Загрузка тестовых артефактов...") + + if not all(p.exists() for p in [embeddings_path, csv_path, model_path]): + print("[ERROR] Проверьте наличие файлов данных или модели регрессора.") return - # Загрузка скрытых представлений и инициализация регрессора - x_features = np.load(embeddings_path) - y_discrete = np.load(labels_path) - regression_pipeline = joblib.load(model_path) - - # Маппинг дискретных меток в непрерывные координаты - y_continuous = np.array([EMOTION_TO_VA_COORDS[label] for label in y_discrete]) - - # Изоляция тестовой выборки (сохранение детерминированности через random_state) - _, x_test, _, y_test = train_test_split(x_features, y_continuous, test_size=0.2, random_state=42) - - # Генерация предсказаний на отложенной выборке - y_pred = regression_pipeline.predict(x_test) - - # Расчет метрик качества регрессии (Mean Squared Error, R-squared) - mse_valence = mean_squared_error(y_test[:, 0], y_pred[:, 0]) - r2_valence = r2_score(y_test[:, 0], y_pred[:, 0]) - - mse_arousal = mean_squared_error(y_test[:, 1], y_pred[:, 1]) - r2_arousal = r2_score(y_test[:, 1], y_pred[:, 1]) - - print("Метрики качества регрессионной модели на тестовой выборке:") - print(f"Valence -> MSE: {mse_valence:.4f} | R^2: {r2_valence:.4f}") - print(f"Arousal -> MSE: {mse_arousal:.4f} | R^2: {r2_arousal:.4f}") - - # Построение диагностических диаграмм рассеяния (Scatter Plots) - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 7)) - - # Конфигурация подграфика: Ось Валентности - ax1.scatter(y_test[:, 0], y_pred[:, 0], alpha=0.3, color='#1f77b4', edgecolors='none', label='Прогноз регрессора') - ax1.plot([1, 9], [1, 9], 'r--', lw=2, label='Идеальное совпадение (x=y)') - ax1.set_title('Диаграмма рассеяния: Valence (Позитивность)', fontsize=14, fontweight='bold') - ax1.set_xlabel('Эталонные значения (центры классов)', fontsize=12) - ax1.set_ylabel('Непрерывные предсказания модели', fontsize=12) - ax1.set_xlim(1, 9) - ax1.set_ylim(1, 9) - ax1.grid(True, linestyle='--', alpha=0.6) - ax1.legend(loc='upper left', fontsize=10) - - # Научное обоснование распределения данных для комиссии - ax1.text(1.2, 8.2, - 'Формирование вертикальных кластеров\n' - 'обусловлено проекцией 8 дискретных\n' - 'базовых эмоций на непрерывную\n' - 'координатную плоскость.', - fontsize=10, bbox=dict(facecolor='white', alpha=0.9, edgecolor='gray')) + output_dir.mkdir(parents=True, exist_ok=True) - # Конфигурация подграфика: Ось Активности - ax2.scatter(y_test[:, 1], y_pred[:, 1], alpha=0.3, color='#ff7f0e', edgecolors='none', label='Прогноз регрессора') - ax2.plot([1, 9], [1, 9], 'r--', lw=2, label='Идеальное совпадение (x=y)') - ax2.set_title('Диаграмма рассеяния: Arousal (Активность)', fontsize=14, fontweight='bold') - ax2.set_xlabel('Эталонные значения (центры классов)', fontsize=12) - ax2.set_ylabel('Непрерывные предсказания модели', fontsize=12) - ax2.set_xlim(1, 9) - ax2.set_ylim(1, 9) - ax2.grid(True, linestyle='--', alpha=0.6) - ax2.legend(loc='upper left', fontsize=10) + # 3. Загрузка эмбеддингов и меток + X_test = np.load(embeddings_path) + df = pd.read_csv(csv_path) - plt.tight_layout() - plt.savefig('regression_metrics_plot.png', dpi=300, bbox_inches='tight') - print("Диагностические графики экспортированы в regression_metrics_plot.png") + if len(X_test) != len(df): + print(f"[WARN] Корректировка размеров выборки: Эмбеддинги ({len(X_test)}) != Метки ({len(df)})") + min_len = min(len(X_test), len(df)) + X_test = X_test[:min_len] + df = df.iloc[:min_len] + + y_test_list = [EMO_TO_VA.get(label.lower().strip(), [5.0, 5.0]) for label in df['label']] + y_test = np.array(y_test_list) + + # 4. Выполнение инференса + print("[INFO] Выполнение инференса регрессора на скрытом пространстве признаков...") + regressor = joblib.load(model_path) + y_pred = regressor.predict(X_test) + + # === БЛОК ДИАГНОСТИКИ ШКАЛЫ === + print("\n" + "-"*50) + print(" ДИАГНОСТИКА ДИАПАЗОНОВ ЗНАЧЕНИЙ ".center(50)) + print("-"*50) + print(f"Истинные (y_test) -> Мин: {y_test.min():.2f}, Макс: {y_test.max():.2f}, Среднее: {y_test.mean():.2f}") + print(f"Предсказания (y_pred) -> Мин: {y_pred.min():.2f}, Макс: {y_pred.max():.2f}, Среднее: {y_pred.mean():.2f}") + print("-"*50 + "\n") + # ============================== + + # 5. Расчет метрик + mse_v = mean_squared_error(y_test[:, 0], y_pred[:, 0]) + r2_v = r2_score(y_test[:, 0], y_pred[:, 0]) + + mse_a = mean_squared_error(y_test[:, 1], y_pred[:, 1]) + r2_a = r2_score(y_test[:, 1], y_pred[:, 1]) + + mse_total = mean_squared_error(y_test, y_pred) + r2_total = r2_score(y_test, y_pred) + + # 6. Вывод и сохранение результатов + table_content = f""" +================================================== + ТАБЛИЦА МЕТРИК ДЛЯ СЛАЙДА 10 +================================================== +| Метрика | Valence (V) | Arousal (A) | Общая (Total) | +|------------|--------------|--------------|---------------| +| MSE | {mse_v:<12.4f} | {mse_a:<12.4f} | {mse_total:<13.4f} | +| R² | {r2_v:<12.4f} | {r2_a:<12.4f} | {r2_total:<13.4f} | +================================================== + +Формула целевой функции для вставки на слайд (LaTeX): +$$Score_{{final}} = D_{{emo}} + 4.0 \cdot Acoustic_{{penalty}}$$ +""" + + print(table_content) + + with open(output_file, 'w', encoding='utf-8') as f: + f.write(table_content) + + print(f"[SUCCESS] Метрики успешно сохранены в файл: {output_file.absolute()}") if __name__ == "__main__": - evaluate_regression_model() \ No newline at end of file + generate_slide_metrics() \ No newline at end of file diff --git a/src/scripts/media/confusion_matrix_emoset.png b/src/scripts/media/confusion_matrix_emoset.png new file mode 100644 index 0000000..5f8f681 Binary files /dev/null and b/src/scripts/media/confusion_matrix_emoset.png differ diff --git a/src/scripts/media/metrics_output.txt b/src/scripts/media/metrics_output.txt new file mode 100644 index 0000000..ca3ec62 --- /dev/null +++ b/src/scripts/media/metrics_output.txt @@ -0,0 +1,12 @@ + +================================================== + ТАБЛИЦА МЕТРИК ДЛЯ СЛАЙДА 10 +================================================== +| Метрика | Valence (V) | Arousal (A) | Общая (Total) | +|------------|--------------|--------------|---------------| +| MSE | 1.5135 | 2.2743 | 1.8939 | +| R² | 0.7927 | 0.4321 | 0.6124 | +================================================== + +Формула целевой функции для вставки на слайд (LaTeX): +$$Score_{final} = D_{emo} + 4.0 \cdot Acoustic_{penalty}$$ diff --git a/src/scripts/media/training_history.png b/src/scripts/media/training_history.png new file mode 100644 index 0000000..6c7a72f Binary files /dev/null and b/src/scripts/media/training_history.png differ diff --git a/src/scripts/media/tsne_embeddings.png b/src/scripts/media/tsne_embeddings.png new file mode 100644 index 0000000..0d3cc9c Binary files /dev/null and b/src/scripts/media/tsne_embeddings.png differ