feat: add metrics

This commit is contained in:
zin
2026-06-16 04:59:51 +00:00
parent 14968dd4d4
commit 934a4cbff4
9 changed files with 550 additions and 624 deletions
Binary file not shown.
-541
View File
@@ -1,541 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "0c00b67b",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pathlib import Path\n",
"from PIL import Image\n",
"import pandas as pd\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
"\n",
"import torch\n",
"import torch.nn as nn\n",
"from torch.utils.data import Dataset, DataLoader\n",
"import torchvision.transforms as T\n",
"import timm"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "84c3657f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'cuda'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Конфигурация параметров обучения и путей файловой системы\n",
"DATA_ROOT = Path(\"../dataset/EmoSet-118K\")\n",
"BATCH_SIZE = 64\n",
"EPOCHS = 15\n",
"LR = 3e-4\n",
"NUM_WORKERS = 62\n",
"\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"print(f\"Аппаратное ускорение: {device}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f749add",
"metadata": {},
"outputs": [],
"source": [
"class EmoSetDataset(Dataset):\n",
" def __init__(self, root: Path | str, split: str):\n",
" self.root = Path(root) / split\n",
" self.df = pd.read_csv(self.root / \"labels.csv\")\n",
"\n",
" # Формирование словарей маппинга классов\n",
" self.labels = sorted(self.df[\"label\"].unique())\n",
" self.label2idx = {l: i for i, l in enumerate(self.labels)}\n",
" self.idx2label = {i: l for l, i in self.label2idx.items()}\n",
"\n",
" # Базовые трансформации для валидации и теста\n",
" base_tf = [\n",
" T.ToTensor(),\n",
" T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n",
" ]\n",
"\n",
" # Внедрение аугментации исключительно для обучающей выборки (предотвращение переобучения)\n",
" if split == \"train\":\n",
" self.transform = T.Compose([\n",
" T.RandomResizedCrop(224),\n",
" T.RandomHorizontalFlip(),\n",
" *base_tf\n",
" ])\n",
" else:\n",
" self.transform = T.Compose([\n",
" T.Resize(256),\n",
" T.CenterCrop(224),\n",
" *base_tf\n",
" ])\n",
"\n",
" def __len__(self):\n",
" return len(self.df)\n",
"\n",
" def __getitem__(self, idx):\n",
" row = self.df.iloc[idx]\n",
" img_path = self.root / \"images\" / row[\"filename\"]\n",
"\n",
" # Обработка возможных исключений ввода-вывода (поврежденные JPEG-файлы в датасете)\n",
" try:\n",
" img = Image.open(img_path).convert(\"RGB\")\n",
" except Exception:\n",
" img = Image.new(\"RGB\", (224, 224), (0, 0, 0))\n",
"\n",
" img_tensor = self.transform(img)\n",
" label_idx = self.label2idx[row[\"label\"]]\n",
" \n",
" return img_tensor, label_idx"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8805341",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Classes: ['amusement', 'anger', 'awe', 'contentment', 'disgust', 'excitement', 'fear', 'sadness']\n"
]
}
],
"source": [
"# Подготовка объектов выборки\n",
"train_ds = EmoSetDataset(DATA_ROOT, \"train\")\n",
"val_ds = EmoSetDataset(DATA_ROOT, \"val\")\n",
"\n",
"# Инициализация итераторов с закреплением памяти (pin_memory) для ускорения передачи на GPU\n",
"train_loader = DataLoader(\n",
" train_ds,\n",
" batch_size=BATCH_SIZE,\n",
" shuffle=True,\n",
" num_workers=NUM_WORKERS,\n",
" pin_memory=True\n",
")\n",
"\n",
"val_loader = DataLoader(\n",
" val_ds,\n",
" batch_size=BATCH_SIZE,\n",
" shuffle=False,\n",
" num_workers=NUM_WORKERS,\n",
" pin_memory=True\n",
")\n",
"\n",
"print(f\"Индексированные классы: {train_ds.labels}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dffce582",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ResNet(\n",
" (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
" (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
" (layer1): Sequential(\n",
" (0): Bottleneck(\n",
" (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" (downsample): Sequential(\n",
" (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (1): Bottleneck(\n",
" (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (2): Bottleneck(\n",
" (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" )\n",
" (layer2): Sequential(\n",
" (0): Bottleneck(\n",
" (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" (downsample): Sequential(\n",
" (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
" (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (1): Bottleneck(\n",
" (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (2): Bottleneck(\n",
" (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (3): Bottleneck(\n",
" (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" )\n",
" (layer3): Sequential(\n",
" (0): Bottleneck(\n",
" (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" (downsample): Sequential(\n",
" (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
" (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (1): Bottleneck(\n",
" (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (2): Bottleneck(\n",
" (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (3): Bottleneck(\n",
" (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (4): Bottleneck(\n",
" (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (5): Bottleneck(\n",
" (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" )\n",
" (layer4): Sequential(\n",
" (0): Bottleneck(\n",
" (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" (downsample): Sequential(\n",
" (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
" (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" )\n",
" )\n",
" (1): Bottleneck(\n",
" (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" (2): Bottleneck(\n",
" (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act1): ReLU(inplace=True)\n",
" (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
" (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (drop_block): Identity()\n",
" (act2): ReLU(inplace=True)\n",
" (aa): Identity()\n",
" (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
" (act3): ReLU(inplace=True)\n",
" )\n",
" )\n",
" (global_pool): SelectAdaptivePool2d(pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))\n",
" (fc): Linear(in_features=2048, out_features=8, bias=True)\n",
")"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# TODO перед защитой, повторить оптимизаторы\n",
"# Загрузка предобученной архитектуры ResNet-50 с заменой классификационного слоя\n",
"model = timm.create_model(\n",
" \"resnet50\",\n",
" pretrained=True,\n",
" num_classes=len(train_ds.labels)\n",
")\n",
"model.to(device)\n",
"\n",
"# Функция потерь для многоклассовой классификации\n",
"criterion = nn.CrossEntropyLoss()\n",
"\n",
"# Оптимизатор AdamW с L2-регуляризацией (weight_decay) для повышения обобщающей способности\n",
"optimizer = torch.optim.AdamW(\n",
" model.parameters(),\n",
" lr=LR,\n",
" weight_decay=1e-4\n",
")\n",
"\n",
"# Планировщик скорости обучения: косинусный отжиг\n",
"scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(\n",
" optimizer,\n",
" T_max=EPOCHS\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "81a457ef",
"metadata": {},
"outputs": [],
"source": [
"def train_epoch(current_model, loader):\n",
" current_model.train()\n",
" total_loss = 0.0\n",
" correct_preds = 0\n",
" total_samples = 0\n",
"\n",
" for imgs, labels in tqdm(loader, desc=\"Тренировка\", leave=False):\n",
" imgs = imgs.to(device)\n",
" labels = labels.to(device)\n",
"\n",
" optimizer.zero_grad()\n",
" logits = current_model(imgs)\n",
" loss = criterion(logits, labels)\n",
"\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" total_loss += loss.item() * imgs.size(0)\n",
" preds = logits.argmax(dim=1)\n",
" correct_preds += (preds == labels).sum().item()\n",
" total_samples += labels.size(0)\n",
"\n",
" return total_loss / total_samples, correct_preds / total_samples\n",
"\n",
"@torch.no_grad()\n",
"def val_epoch(current_model, loader):\n",
" # Перевод модели в режим инференса (отключение Dropout и фиксация BatchNorm)\n",
" current_model.eval()\n",
" total_loss = 0.0\n",
" correct_preds = 0\n",
" total_samples = 0\n",
"\n",
" for imgs, labels in tqdm(loader, desc=\"Валидация\", leave=False):\n",
" imgs = imgs.to(device)\n",
" labels = labels.to(device)\n",
"\n",
" logits = current_model(imgs)\n",
" loss = criterion(logits, labels)\n",
"\n",
" total_loss += loss.item() * imgs.size(0)\n",
" preds = logits.argmax(dim=1)\n",
" correct_preds += (preds == labels).sum().item()\n",
" total_samples += labels.size(0)\n",
"\n",
" return total_loss / total_samples, correct_preds / total_samples"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "951aa9e3",
"metadata": {},
"outputs": [],
"source": [
"best_val_acc = 0.0\n",
"checkpoint_path = \"../emoset_resnet50_best.pth\"\n",
"\n",
"print(\"Старт процесса обучения...\")\n",
"\n",
"for epoch in range(1, EPOCHS + 1):\n",
" train_loss, train_acc = train_epoch(model, train_loader)\n",
" val_loss, val_acc = val_epoch(model, val_loader)\n",
"\n",
" # Обновление шага планировщика\n",
" scheduler.step()\n",
"\n",
" print(\n",
" f\"Эпоха {epoch:02d}/{EPOCHS} | \"\n",
" f\"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f} | \"\n",
" f\"Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}\"\n",
" )\n",
"\n",
" # Экспорт весов при улучшении целевой метрики\n",
" if val_acc > best_val_acc:\n",
" best_val_acc = val_acc\n",
" torch.save(model.state_dict(), checkpoint_path)\n",
" print(f\" -> Сохранен новый лучший чекпоинт (Acc: {best_val_acc:.4f})\")\n",
"\n",
"print(\"Обучение завершено.\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "thesis-py3.11",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+283
View File
@@ -0,0 +1,283 @@
import os
import random
import warnings
from pathlib import Path
from PIL import Image
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import timm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
# Подавление предупреждений цветовых профилей
warnings.filterwarnings("ignore", message=".*Unknown Adobe color transform code.*")
# Настройки окружения
DATA_ROOT = Path("./NFS/Thesis/Emoset/EmoSet-118K")
# ВАЖНО: Добавили путь для медиа файлов
MEDIA_DIR = Path("./src/scripts/media")
MEDIA_DIR.mkdir(parents=True, exist_ok=True)
BATCH_SIZE = 64
EPOCHS = 30
LR = 5e-5
NUM_WORKERS = 32
PATIENCE = 7
# Маппинг классов
CLASS_MAPPING = {
"amusement": 0, "anger": 1, "awe": 2, "contentment": 3,
"disgust": 4, "excitement": 5, "fear": 6, "sadness": 7
}
# Инвертированный маппинг для графиков
INV_CLASS_MAPPING = {v: k for k, v in CLASS_MAPPING.items()}
CLASS_NAMES = [INV_CLASS_MAPPING[i] for i in range(len(CLASS_MAPPING))]
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Устройство: {DEVICE}")
# Фиксация генераторов псевдослучайных чисел
def set_seed(seed=42):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
set_seed()
# Инициализация структур данных
class EmoSetDataset(Dataset):
def __init__(self, root: Path | str, split: str, transform=None):
self.root = Path(root) / split
self.df = pd.read_csv(self.root / "labels.csv")
self.transform = transform
# Фильтрация датафрейма
self.df = self.df[self.df["label"].isin(CLASS_MAPPING.keys())].reset_index(drop=True)
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
row = self.df.iloc[idx]
img_path = self.root / "images" / row["filename"]
try:
img = Image.open(img_path).convert("RGB")
except Exception:
img = Image.new("RGB", (256, 256), (0, 0, 0))
if self.transform:
img_tensor = self.transform(img)
else:
img_tensor = T.ToTensor()(img)
label_idx = CLASS_MAPPING[row["label"]]
return img_tensor, label_idx
# Трансформации
base_tf = [
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]
train_transform = T.Compose([
T.Resize(256, antialias=True),
T.RandomCrop(224),
T.RandomHorizontalFlip(),
*base_tf
])
val_transform = T.Compose([
T.Resize(256, antialias=True),
T.CenterCrop(224),
*base_tf
])
train_ds = EmoSetDataset(DATA_ROOT, "train", transform=train_transform)
val_ds = EmoSetDataset(DATA_ROOT, "val", transform=val_transform)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
# Инициализация модели и оптимизатора
model = timm.create_model("resnet50", pretrained=True, num_classes=8, drop_rate=0.3)
model.to(DEVICE)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
# Функции для отрисовки графиков
def plot_learning_curves(history):
"""Отрисовка графиков функции потерь и точности"""
epochs = range(1, len(history['train_loss']) + 1)
plt.figure(figsize=(14, 5))
# График Loss
plt.subplot(1, 2, 1)
plt.plot(epochs, history['train_loss'], 'b-', label='Train Loss')
plt.plot(epochs, history['val_loss'], 'r--', label='Validation Loss')
plt.title('График функции потерь (Loss)', fontsize=14)
plt.xlabel('Эпохи', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend()
plt.grid(True, linestyle=':', alpha=0.7)
# График Accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, history['train_acc'], 'b-', label='Train Accuracy')
plt.plot(epochs, history['val_acc'], 'r--', label='Validation Accuracy')
plt.title('График точности (Accuracy)', fontsize=14)
plt.xlabel('Эпохи', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend()
plt.grid(True, linestyle=':', alpha=0.7)
plt.tight_layout()
plot_path = MEDIA_DIR / "training_history.png"
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"[INFO] График обучения сохранен в: {plot_path}")
def plot_confusion_matrix(y_true, y_pred):
"""Отрисовка тепловой матрицы ошибок"""
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES,
cbar_kws={'label': 'Количество сэмплов'})
plt.title('Матрица ошибок (Confusion Matrix) - ResNet50', fontsize=16, pad=20)
plt.ylabel('Истинные классы (Ground Truth)', fontsize=12)
plt.xlabel('Предсказанные классы (Predicted)', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
cm_path = MEDIA_DIR / "confusion_matrix_emoset.png"
plt.savefig(cm_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"[INFO] Матрица ошибок сохранена в: {cm_path}")
# Логика эпохи обучения
def train_epoch(current_model, loader):
current_model.train()
total_loss, correct_preds, total_samples = 0.0, 0, 0
for imgs, labels in tqdm(loader, desc="Тренировка", leave=False, smoothing=0):
imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
optimizer.zero_grad(set_to_none=True)
logits = current_model(imgs)
loss = criterion(logits, labels)
loss.backward()
optimizer.step()
total_loss += loss.item() * imgs.size(0)
preds = logits.argmax(dim=1)
correct_preds += (preds == labels).sum().item()
total_samples += labels.size(0)
return total_loss / total_samples, correct_preds / total_samples
# Логика эпохи валидации с сохранением предсказаний для матрицы ошибок
@torch.no_grad()
def val_epoch(current_model, loader, return_preds=False):
current_model.eval()
total_loss, correct_preds, total_samples = 0.0, 0, 0
all_preds, all_labels = [], []
for imgs, labels in tqdm(loader, desc="Валидация", leave=False, smoothing=0):
imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
logits = current_model(imgs)
loss = criterion(logits, labels)
total_loss += loss.item() * imgs.size(0)
preds = logits.argmax(dim=1)
correct_preds += (preds == labels).sum().item()
total_samples += labels.size(0)
if return_preds:
all_preds.extend(preds.cpu().numpy())
all_labels.extend(labels.cpu().numpy())
avg_loss = total_loss / total_samples
avg_acc = correct_preds / total_samples
if return_preds:
return avg_loss, avg_acc, all_labels, all_preds
return avg_loss, avg_acc
if __name__ == "__main__":
best_val_acc = 0.0
best_val_loss = float('inf')
epochs_no_improve = 0
checkpoint_path = "./emosetV2_resnet50_best.pth"
# Словарь для хранения истории обучения
history = {
'train_loss': [], 'train_acc': [],
'val_loss': [], 'val_acc': []
}
# Переменные для хранения лучших предсказаний для матрицы
best_labels, best_preds = [], []
print("Старт обучения.")
for epoch in range(1, EPOCHS + 1):
train_loss, train_acc = train_epoch(model, train_loader)
# Получаем предсказания только если это может быть лучшая эпоха
val_loss, val_acc, val_labels, val_preds = val_epoch(model, val_loader, return_preds=True)
scheduler.step()
# Запись в историю
history['train_loss'].append(train_loss)
history['train_acc'].append(train_acc)
history['val_loss'].append(val_loss)
history['val_acc'].append(val_acc)
print(f"[{epoch}/{EPOCHS}] Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")
# Сохранение лучших весов по Accuracy
if val_acc > best_val_acc:
best_val_acc = val_acc
best_labels = val_labels # Сохраняем предсказания лучшей модели
best_preds = val_preds
torch.save(model.state_dict(), checkpoint_path)
print(f"Сохранен чекпоинт (Acc: {best_val_acc:.4f})")
# Оценка переобучения по Loss (Early Stopping)
if val_loss < best_val_loss:
best_val_loss = val_loss
epochs_no_improve = 0
else:
epochs_no_improve += 1
if epochs_no_improve >= PATIENCE:
print(f"Ранняя остановка: метрика валидации не улучшается {PATIENCE} эпох.")
break
print("Процесс обучения завершен. Генерирую графики для диссертации...")
plot_learning_curves(history)
plot_confusion_matrix(best_labels, best_preds)
print("Все медиафайлы успешно созданы!")
+171
View File
@@ -0,0 +1,171 @@
import os
from pathlib import Path
from PIL import Image
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import timm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import TSNE
# Настройки путей для медиа
MEDIA_DIR = Path("scripts/media")
MEDIA_DIR.mkdir(parents=True, exist_ok=True)
# Конфигурация путей для инференса и кэширования векторов
DATA_ROOT = Path("./NFS/Thesis/Emoset/EmoSet-118K")
MODEL_PATH = Path("./src/emoset_resnet50_best.pth")
BATCH_SIZE = 128
NUM_WORKERS = 32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Вычисления перенесены на: {device}")
class EmoSetFeatureDataset(Dataset):
def __init__(self, root: Path | str, split: str):
self.root = Path(root) / split
self.df = pd.read_csv(self.root / "labels.csv")
self.labels = sorted(self.df["label"].unique())
self.label2idx = {l: i for i, l in enumerate(self.labels)}
self.idx2label = {i: l for l, i in self.label2idx.items()}
# Для экстракции признаков аугментация отключена, используется строгий CenterCrop
self.transform = T.Compose([
T.Resize(256),
T.CenterCrop(224),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
row = self.df.iloc[idx]
img_path = self.root / "images" / row["filename"]
# Перехват битых файлов выборки
try:
img = Image.open(img_path).convert("RGB")
except Exception:
img = Image.new("RGB", (224, 224), (0, 0, 0))
img_tensor = self.transform(img)
label_idx = self.label2idx[row["label"]]
return img_tensor, label_idx
def plot_tsne(embeddings, labels, idx2label, sample_limit=3000):
"""Генерация t-SNE графика для диссертации"""
print(f"Построение t-SNE проекции для {sample_limit} сэмплов...")
tsne_model = TSNE(n_components=2, perplexity=30, random_state=42)
embeddings_2d = tsne_model.fit_transform(embeddings[:sample_limit])
labels_subset = labels[:sample_limit]
plt.figure(figsize=(12, 9))
# Используем более академическую палитру
scatter = plt.scatter(
embeddings_2d[:, 0],
embeddings_2d[:, 1],
c=labels_subset,
cmap="Set2", # Set2 лучше различается при печати
alpha=0.7,
s=20,
edgecolors='w',
linewidths=0.5
)
# Формирование легенды
handles, _ = scatter.legend_elements()
legend_labels = [idx2label[i] for i in range(len(idx2label))]
# Размещение легенды снаружи графика, чтобы не перекрывать данные
plt.legend(handles, legend_labels, title="Эмоциональные классы",
bbox_to_anchor=(1.05, 1), loc='upper left')
plt.title("2D проекция скрытого пространства признаков (t-SNE)", pad=20, fontsize=14)
plt.xlabel("Первая главная компонента (t-SNE 1)", fontsize=12)
plt.ylabel("Вторая главная компонента (t-SNE 2)", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.3)
plt.tight_layout()
plot_path = MEDIA_DIR / "tsne_embeddings.png"
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"[INFO] График t-SNE сохранен в: {plot_path}")
if __name__ == "__main__":
test_ds = EmoSetFeatureDataset(DATA_ROOT, "test")
test_loader = DataLoader(
test_ds,
batch_size=BATCH_SIZE,
shuffle=False, # Отключение шаффла для строгого соответствия индексов
num_workers=NUM_WORKERS,
pin_memory=True
)
print(f"Подготовлено для извлечения: {len(test_ds)} файлов.")
# Инициализация модели и загрузка лучших весов
feature_extractor = timm.create_model(
"resnet50",
pretrained=False,
num_classes=len(test_ds.labels)
)
try:
checkpoint = torch.load(MODEL_PATH, map_location=device)
feature_extractor.load_state_dict(checkpoint)
print("Веса модели успешно загружены.")
except Exception as e:
print(f"Ошибка загрузки весов: {e}. Убедитесь, что модель обучена.")
exit(1)
# Удаление классификационного слоя (fc)
feature_extractor.reset_classifier(0)
feature_extractor.to(device)
feature_extractor.eval()
print("Слой классификации удален. Модель готова к экстракции.")
extracted_embeddings = []
extracted_labels = []
print("Старт пакетной экстракции признаков...")
with torch.no_grad():
for imgs, labels in tqdm(test_loader, desc="Экстракция"):
imgs = imgs.to(device)
# Получение вектора [BATCH_SIZE, 2048]
embeddings_batch = feature_extractor(imgs)
extracted_embeddings.append(embeddings_batch.cpu().numpy())
extracted_labels.append(labels.numpy())
# Агрегация батчей в единые массивы
np_embeddings = np.concatenate(extracted_embeddings, axis=0)
np_labels = np.concatenate(extracted_labels, axis=0)
print(f"Размерность матрицы признаков: {np_embeddings.shape}")
# Сохранение артефактов
np.save("./src/emoset_test_embeddings.npy", np_embeddings)
np.save("./src/emoset_test_labels.npy", np_labels)
print("Матрицы успешно экспортированы в .npy файлы.")
# Генерация медиа для диссертации
plot_tsne(np_embeddings, np_labels, test_ds.idx2label, sample_limit=3000)
print("Процесс полностью завершен.")
+74 -73
View File
@@ -1,96 +1,97 @@
import joblib
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import joblib
from pathlib import Path from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score from sklearn.metrics import mean_squared_error, r2_score
# Калибровочные координаты центров эмоциональных классов в пространстве Рассела [1.0 - 9.0] # 1. Настройка путей
EMOTION_TO_VA_COORDS = { embeddings_path = Path("./src/emoset_test_embeddings.npy")
0: (7.5, 6.5), # amusement csv_path = Path("./NFS/Thesis/Emoset/EmoSet-118K/test/labels.csv")
1: (2.0, 8.0), # anger model_path = Path("./src/music_engine/va_regressor.pkl")
2: (6.5, 5.0), # awe
3: (7.0, 3.0), # contentment output_dir = Path("./src/scripts/media")
4: (3.0, 6.0), # disgust output_file = output_dir / "metrics_output.txt"
5: (8.0, 8.0), # excitement
6: (2.5, 7.5), # fear # 2. Корректный маппинг 8 классов EmoSet в шкалу DEAM [1.0, 9.0]
7: (2.0, 2.0), # sadness # Формула перевода из [-1, 1] в [1, 9]: 5.0 + (X * 4.0)
EMO_TO_VA = {
"amusement": [8.2, 6.6], # Веселье (Высокий позитив, средняя энергия)
"awe": [7.0, 7.4], # Восхищение (Позитив, высокая энергия)
"contentment": [7.8, 3.4], # Умиротворение (Позитив, низкая энергия)
"excitement": [8.2, 8.2], # Возбуждение (Макс. позитив, макс. энергия)
"anger": [2.2, 7.8], # Гнев (Глубокий негатив, высокая энергия)
"disgust": [2.6, 6.6], # Отвращение (Негатив, средняя энергия)
"fear": [2.6, 8.2], # Страх (Негатив, максимальная энергия)
"sadness": [2.2, 2.6] # Грусть (Глубокий негатив, низкая энергия)
} }
def evaluate_regression_model(): def generate_slide_metrics():
# Инициализация путей к артефактам пайплайна print("[INFO] Загрузка тестовых артефактов...")
base_dir = Path(__file__).resolve().parent.parent.parent
embeddings_path = base_dir / "src" / "emoset_test_embeddings.npy"
labels_path = base_dir / "src" / "emoset_test_labels.npy"
model_path = base_dir / "src" / "music_engine" / "va_regressor.pkl"
if not all(p.exists() for p in [embeddings_path, labels_path, model_path]): if not all(p.exists() for p in [embeddings_path, csv_path, model_path]):
print("Отсутствуют необходимые артефакты для расчета метрик.") print("[ERROR] Проверьте наличие файлов данных или модели регрессора.")
return return
# Загрузка скрытых представлений и инициализация регрессора output_dir.mkdir(parents=True, exist_ok=True)
x_features = np.load(embeddings_path)
y_discrete = np.load(labels_path)
regression_pipeline = joblib.load(model_path)
# Маппинг дискретных меток в непрерывные координаты # 3. Загрузка эмбеддингов и меток
y_continuous = np.array([EMOTION_TO_VA_COORDS[label] for label in y_discrete]) X_test = np.load(embeddings_path)
df = pd.read_csv(csv_path)
# Изоляция тестовой выборки (сохранение детерминированности через random_state) if len(X_test) != len(df):
_, x_test, _, y_test = train_test_split(x_features, y_continuous, test_size=0.2, random_state=42) print(f"[WARN] Корректировка размеров выборки: Эмбеддинги ({len(X_test)}) != Метки ({len(df)})")
min_len = min(len(X_test), len(df))
X_test = X_test[:min_len]
df = df.iloc[:min_len]
# Генерация предсказаний на отложенной выборке y_test_list = [EMO_TO_VA.get(label.lower().strip(), [5.0, 5.0]) for label in df['label']]
y_pred = regression_pipeline.predict(x_test) y_test = np.array(y_test_list)
# Расчет метрик качества регрессии (Mean Squared Error, R-squared) # 4. Выполнение инференса
mse_valence = mean_squared_error(y_test[:, 0], y_pred[:, 0]) print("[INFO] Выполнение инференса регрессора на скрытом пространстве признаков...")
r2_valence = r2_score(y_test[:, 0], y_pred[:, 0]) regressor = joblib.load(model_path)
y_pred = regressor.predict(X_test)
mse_arousal = mean_squared_error(y_test[:, 1], y_pred[:, 1]) # === БЛОК ДИАГНОСТИКИ ШКАЛЫ ===
r2_arousal = r2_score(y_test[:, 1], y_pred[:, 1]) print("\n" + "-"*50)
print(" ДИАГНОСТИКА ДИАПАЗОНОВ ЗНАЧЕНИЙ ".center(50))
print("-"*50)
print(f"Истинные (y_test) -> Мин: {y_test.min():.2f}, Макс: {y_test.max():.2f}, Среднее: {y_test.mean():.2f}")
print(f"Предсказания (y_pred) -> Мин: {y_pred.min():.2f}, Макс: {y_pred.max():.2f}, Среднее: {y_pred.mean():.2f}")
print("-"*50 + "\n")
# ==============================
print("Метрики качества регрессионной модели на тестовой выборке:") # 5. Расчет метрик
print(f"Valence -> MSE: {mse_valence:.4f} | R^2: {r2_valence:.4f}") mse_v = mean_squared_error(y_test[:, 0], y_pred[:, 0])
print(f"Arousal -> MSE: {mse_arousal:.4f} | R^2: {r2_arousal:.4f}") r2_v = r2_score(y_test[:, 0], y_pred[:, 0])
# Построение диагностических диаграмм рассеяния (Scatter Plots) mse_a = mean_squared_error(y_test[:, 1], y_pred[:, 1])
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 7)) r2_a = r2_score(y_test[:, 1], y_pred[:, 1])
# Конфигурация подграфика: Ось Валентности mse_total = mean_squared_error(y_test, y_pred)
ax1.scatter(y_test[:, 0], y_pred[:, 0], alpha=0.3, color='#1f77b4', edgecolors='none', label='Прогноз регрессора') r2_total = r2_score(y_test, y_pred)
ax1.plot([1, 9], [1, 9], 'r--', lw=2, label='Идеальное совпадение (x=y)')
ax1.set_title('Диаграмма рассеяния: Valence (Позитивность)', fontsize=14, fontweight='bold')
ax1.set_xlabel('Эталонные значения (центры классов)', fontsize=12)
ax1.set_ylabel('Непрерывные предсказания модели', fontsize=12)
ax1.set_xlim(1, 9)
ax1.set_ylim(1, 9)
ax1.grid(True, linestyle='--', alpha=0.6)
ax1.legend(loc='upper left', fontsize=10)
# Научное обоснование распределения данных для комиссии # 6. Вывод и сохранение результатов
ax1.text(1.2, 8.2, table_content = f"""
'Формирование вертикальных кластеров\n' ==================================================
'обусловлено проекцией 8 дискретных\n' ТАБЛИЦА МЕТРИК ДЛЯ СЛАЙДА 10
'базовых эмоций на непрерывную\n' ==================================================
'координатную плоскость.', | Метрика | Valence (V) | Arousal (A) | Общая (Total) |
fontsize=10, bbox=dict(facecolor='white', alpha=0.9, edgecolor='gray')) |------------|--------------|--------------|---------------|
| MSE | {mse_v:<12.4f} | {mse_a:<12.4f} | {mse_total:<13.4f} |
| R² | {r2_v:<12.4f} | {r2_a:<12.4f} | {r2_total:<13.4f} |
==================================================
# Конфигурация подграфика: Ось Активности Формула целевой функции для вставки на слайд (LaTeX):
ax2.scatter(y_test[:, 1], y_pred[:, 1], alpha=0.3, color='#ff7f0e', edgecolors='none', label='Прогноз регрессора') $$Score_{{final}} = D_{{emo}} + 4.0 \cdot Acoustic_{{penalty}}$$
ax2.plot([1, 9], [1, 9], 'r--', lw=2, label='Идеальное совпадение (x=y)') """
ax2.set_title('Диаграмма рассеяния: Arousal (Активность)', fontsize=14, fontweight='bold')
ax2.set_xlabel('Эталонные значения (центры классов)', fontsize=12)
ax2.set_ylabel('Непрерывные предсказания модели', fontsize=12)
ax2.set_xlim(1, 9)
ax2.set_ylim(1, 9)
ax2.grid(True, linestyle='--', alpha=0.6)
ax2.legend(loc='upper left', fontsize=10)
plt.tight_layout() print(table_content)
plt.savefig('regression_metrics_plot.png', dpi=300, bbox_inches='tight')
print("Диагностические графики экспортированы в regression_metrics_plot.png") with open(output_file, 'w', encoding='utf-8') as f:
f.write(table_content)
print(f"[SUCCESS] Метрики успешно сохранены в файл: {output_file.absolute()}")
if __name__ == "__main__": if __name__ == "__main__":
evaluate_regression_model() generate_slide_metrics()
Binary file not shown.

After

Width:  |  Height:  |  Size: 313 KiB

+12
View File
@@ -0,0 +1,12 @@
==================================================
ТАБЛИЦА МЕТРИК ДЛЯ СЛАЙДА 10
==================================================
| Метрика | Valence (V) | Arousal (A) | Общая (Total) |
|------------|--------------|--------------|---------------|
| MSE | 1.5135 | 2.2743 | 1.8939 |
| R² | 0.7927 | 0.4321 | 0.6124 |
==================================================
Формула целевой функции для вставки на слайд (LaTeX):
$$Score_{final} = D_{emo} + 4.0 \cdot Acoustic_{penalty}$$
Binary file not shown.

After

Width:  |  Height:  |  Size: 243 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB