Files
Thesis/src/scripts/train_regressor.py
T
2026-05-28 17:15:33 +00:00

58 lines
2.0 KiB
Python

import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.linear_model import RidgeCV
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib
EMO_VA_MAP = {
0: (7.5, 6.5), # amusement
1: (2.0, 8.0), # anger
2: (6.5, 5.0), # awe
3: (7.0, 3.0), # contentment
4: (3.0, 6.0), # disgust
5: (8.0, 8.0), # excitement
6: (2.5, 7.5), # fear
7: (2.0, 2.0), # sadness
}
BASE_DIR = Path(__file__).resolve().parent.parent
EMBEDDINGS_PATH = BASE_DIR / "emoset_test_embeddings.npy"
LABELS_PATH = BASE_DIR / "emoset_test_labels.npy"
print("Загрузка данных...")
X = np.load(EMBEDDINGS_PATH)
y_labels = np.load(LABELS_PATH)
y_va = np.array([EMO_VA_MAP[label] for label in y_labels])
X_train, X_test, y_train, y_test = train_test_split(X, y_va, test_size=0.2, random_state=42)
print("Обучение масштабатора и RidgeCV регрессора...")
model = Pipeline([
('scaler', StandardScaler()),
('regressor', MultiOutputRegressor(RidgeCV(alphas=[0.1, 1.0, 10.0, 100.0, 1000.0])))
])
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"\nУспех! Обучение завершено!")
print(f"MSE: {mse:.4f}")
print(f"R^2 Score: {r2:.4f}")
print("\n--- ДИАГНОСТИКА РАЗБРОСА ПРЕДСКАЗАНИЙ ---")
print(f"Valence: от {y_pred[:, 0].min():.2f} до {y_pred[:, 0].max():.2f} (Эталон: 2.0 - 8.0)")
print(f"Arousal: от {y_pred[:, 1].min():.2f} до {y_pred[:, 1].max():.2f} (Эталон: 2.0 - 8.0)")
output_model_path = BASE_DIR / "music_engine" / "va_regressor.pkl"
output_model_path.parent.mkdir(parents=True, exist_ok=True)
joblib.dump(model, output_model_path)
print(f"\nМодель сохранена в: {output_model_path}")