feat: finale

2026-06-03 09:16:12 +00:00
parent 3850b15053
commit a57addcbb1
9 changed files with 807 additions and 176 deletions
@@ -17,6 +17,9 @@ services:
      - emom_mesh
    env_file:
      - .env
    volumes:
      - ./src:/app/src
      - ${DATA_DEAM_DIR}:/app/dataset/DEAM:ro
    depends_on:
      - emom_inference
@@ -31,9 +34,10 @@ services:
    env_file:
      - .env
    volumes:
-      - ${HOST_ARTIFACTS_DIR}/emoset_resnet50_best.pth:/app/models/resnet50.pth:ro
+      - ${HOST_ARTIFACTS_DIR}/emoset_resnet50_best.pth:/app/src/emoset_resnet50_best.pth:ro
-      - ${HOST_ARTIFACTS_DIR}/music_engine/va_regressor.pkl:/app/models/regressor.pkl:ro
+      - ${HOST_ARTIFACTS_DIR}/music_engine/va_regressor.pkl:/app/src/music_engine/va_regressor.pkl:ro
      - ${DATA_DEAM_DIR}:/app/dataset/DEAM:ro
      - ~/.cache/huggingface:/root/.cache/huggingface
    deploy:
      resources:
        reservations:
@@ -3,18 +3,22 @@ FROM pytorch/pytorch:2.2.1-cuda12.1-cudnn8-runtime
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
-WORKDIR /app
+# 1. Системные зависимости
 RUN apt-get update && apt-get install -y \
    libglib2.0-0 libsm6 libxext6 libxrender-dev \
    && rm -rf /var/lib/apt/lists/*
-# Устанавливаем зависимости для ML и API напрямую, чтобы не плодить requirements.txt
+# 2. Python пакеты
-RUN pip install --no-cache-dir fastapi uvicorn timm scikit-learn pandas joblib python-multipart
+RUN pip install --no-cache-dir fastapi uvicorn timm scikit-learn pandas joblib python-multipart transformers==4.38.2 tokenizers==0.15.2 accelerate
 # 3. Копируем код в контейнер
 WORKDIR /app
 COPY src/ /app/src/
 # 4. МАГИЯ ЗДЕСЬ: Переходим внутрь папки src
 WORKDIR /app/src
 EXPOSE 8000
-# Запускаем FastAPI сервер
+# 5. Запускаем локально (без префикса src.)
-CMD ["uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]
@@ -4,12 +4,14 @@ ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
 WORKDIR /app
 # Только легкие зависимости для отображения интерфейса
 RUN pip install --no-cache-dir streamlit==1.32.0 requests pandas pillow
 COPY src/ /app/src/
 # МАГИЯ ЗДЕСЬ: Переходим внутрь папки src
 WORKDIR /app/src
 EXPOSE 8080
-CMD ["streamlit", "run", "src/main.py", "--server.port", "8080", "--server.address", "0.0.0.0"]
+# Запускаем локально
 CMD ["streamlit", "run", "main.py", "--server.port", "8080", "--server.address", "0.0.0.0"]
@@ -1,18 +1,20 @@
 import io
-import os
+import traceback
 import numpy as np
 from typing import List
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.responses import JSONResponse
 from PIL import Image
 # Импортируем твои существующие загрузчики (они теперь работают только на бэкенде)
 from data_loader import load_music_engine, load_image_processor
 from music_engine.llm_bridge import LLMAcousticBridge
 app = FastAPI(title="EmoM Inference API", version="1.0.0")
 # Глобальный кэш для удержания моделей в памяти
 ml_context = {
    "image_processor": None,
-    "music_matcher": None
+    "music_matcher": None,
    "llm_bridge": None
 }
@app.on_event("startup")
@@ -20,34 +22,64 @@ async def startup_event():
    print("Инициализация нейросетевого ядра EmoM...")
    ml_context["image_processor"] = load_image_processor()
    ml_context["music_matcher"] = load_music_engine()
-    
+    ml_context["llm_bridge"] = LLMAcousticBridge()
    if not ml_context["image_processor"] or not ml_context["music_matcher"]:
        raise RuntimeError("Отказ системы: Артефакты моделей не найдены.")
    print("Вычислительный конвейер готов к работе.")
@app.post("/analyze")
-async def analyze_image_endpoint(file: UploadFile = File(...)):
+async def analyze_event_endpoint(files: List[UploadFile] = File(...)):
    """
    Принимает изображение, прогоняет через ResNet и возвращает треки из DEAM.
    """
    try:
-        # 1. Чтение бинарных данных из запроса
+        # 1. Читаем все загруженные картинки
-        image_bytes = await file.read()
+        images = []
-        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        for file in files:
            image_bytes = await file.read()
            img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
            images.append(img)
        print(f"Начата обработка события из {len(images)} фотографий...")
-        # 2. Инференс (ВНИМАНИЕ: здесь используй реальные названия методов из своих классов!)
+        img_processor = ml_context["image_processor"]
-        # Предположим, твой процессор выдает координаты V/A
+        matcher = ml_context["music_matcher"]
-        v_a_coords = ml_context["image_processor"].extract_va(image) 
+        llm = ml_context["llm_bridge"]
        all_v, all_a = [], []
        all_objects = []
        # 2. Прогоняем каждую картинку через нейросети
        for img in images:
            embedding = img_processor.extract_embedding(img)
            v, a = matcher.predict_va(embedding)
            all_v.append(v)
            all_a.append(a)
            caption = img_processor.describe_scene(img)
            all_objects.append(caption)
        # 3. Усредняем эмоции события
        target_v = float(np.mean(all_v))
        target_a = float(np.mean(all_a))
        unique_semantics = list(set(all_objects))
        # 4. Запрашиваем акустический профиль у Ollama
        print(f"Запрос к Ollama. V={target_v:.2f}, A={target_a:.2f}")
        llm_profile = llm.get_acoustic_profile(target_v, target_a, unique_semantics)
        # 5. Ищем треки в базе
        print("Поиск подходящих композиций...")
        playlist_df = matcher.find_nearest_tracks(target_v, target_a, llm_profile=llm_profile, top_k=15)
-        # 3. Поиск треков в базе
+        # Переводим таблицу в JSON-формат
-        matched_tracks = ml_context["music_matcher"].find_tracks(v_a_coords)
+        tracks_list = playlist_df.to_dict(orient="records")
-        
+
        # 4. Формирование ответа
        return JSONResponse(content={
            "status": "success",
-            "valence_arousal": v_a_coords,
+            "images_processed": len(images),
-            "tracks": matched_tracks
+            "target_v": target_v,
            "target_a": target_a,
            "llm_profile": llm_profile,
            "semantics": unique_semantics,
            "tracks": tracks_list
        })
    except Exception as e:
        print(traceback.format_exc()) 
        raise HTTPException(status_code=500, detail=f"Ошибка инференса: {str(e)}")
@@ -1,55 +1,49 @@
 import os
 from pathlib import Path
 import pandas as pd
 import numpy as np
 import streamlit as st
 # Импорты твоих движков
 from music_engine.matcher import MusicMatcher
 from music_engine.image_processor import ImageProcessor
 # Базовая директория (папка src)
 BASE_DIR = Path(__file__).resolve().parent
@st.cache_resource
 def load_music_engine():
-    # Инициализация базы данных и регрессора для музыкального мэтчинга
+    """Загрузка базы данных и модели регрессора для бэкенда."""
    # Пути соответствуют тем, что мы примонтировали в Docker
    db_path = BASE_DIR.parent / "dataset" / "DEAM" / "music_db.csv"
    model_path = BASE_DIR / "music_engine" / "va_regressor.pkl"
    if not db_path.exists():
        print(f"Музыкальная БД не найдена: {db_path}")
        return None
    return MusicMatcher(db_path=db_path, model_path=model_path)
@st.cache_resource
 def load_image_processor():
-    # Модуль обработки визуальных признаков
+    """Инициализация нейросетевого экстрактора (ResNet-50)."""
-    model_path = BASE_DIR / "emoset_resnet50_best.pth"
+    weights_path = BASE_DIR / "emoset_resnet50_best.pth"
-    
+    return ImageProcessor(weights_path)
    # Обработка пути при вызове из корневой директории
    if not model_path.exists():
        model_path = BASE_DIR.parent / "emoset_resnet50_best.pth"
    return ImageProcessor(model_path=model_path)
@st.cache_data
 def load_emoset_data():
-    # Выборка данных датасета для вкладки отладки
+    """
-    dataset_root = BASE_DIR.parent / "dataset" / "EmoSet-118K" / "test"
+    Загрузка эталонного датасета EmoSet. 
-    
+    (Оставлено для обратной совместимости, если понадобится локальная отладка)
-    csv_path = dataset_root / "labels.csv"
+    """
-    img_dir = dataset_root / "images"
+    try:
-    emb_path = BASE_DIR / "emoset_test_embeddings.npy"
+        images_path = BASE_DIR.parent / "dataset" / "EmoSet-118K" / "test" / "images"
-    lbl_path = BASE_DIR / "emoset_test_labels.npy"
+        labels_path = BASE_DIR / "emoset_test_labels.npy"
-
+        embeddings_path = BASE_DIR / "emoset_test_embeddings.npy"
-    if not all([csv_path.exists(), emb_path.exists(), lbl_path.exists()]):
+        
-        print("Тестовые файлы датасета не найдены, вкладка отладки может работать некорректно")
+        # Если файлов нет (например, на проде), возвращаем None
-        return None, None, None, None
+        if not all(p.exists() for p in [labels_path, embeddings_path]):
-
+            return None, None, None, None
-    labels_df = pd.read_csv(csv_path)
+            
-    
+        labels = np.load(labels_path)
-    test_filenames = labels_df['filename'].tolist()
+        embeddings = np.load(embeddings_path)
-    test_embeddings = np.load(emb_path)
+        
-    test_labels = np.load(lbl_path)
+        # Читаем CSV с метками
-    
+        df = pd.read_csv(BASE_DIR.parent / "dataset" / "EmoSet-118K" / "test" / "labels.csv")
-    return test_filenames, test_embeddings, test_labels, img_dir
+        image_files = df['filename'].tolist()
        return image_files, embeddings, labels, images_path
    except Exception as e:
        print(f"Предупреждение: Тестовые артефакты EmoSet не найдены ({e})")
        return None, None, None, None
@@ -1,62 +1,190 @@
 import os
 import requests
 import streamlit as st
 import streamlit.components.v1 as components
 from PIL import Image
 import base64
 from io import BytesIO
-# Конфигурация UI
+st.set_page_config(page_title="EmoM Playlist Generator", layout="wide", initial_sidebar_state="collapsed")
 st.set_page_config(
    page_title="EmoM | EmotionMusic",
    layout="wide",
    initial_sidebar_state="collapsed"
 )
-st.markdown(
+API_URL = os.getenv("BACKEND_API_URL", "http://emom_inference:8000") + "/analyze"
-    """
+DEAM_AUDIO_DIR = "/app/dataset/DEAM/DEAM_audio/MEMD_audio"
    <style>
    img { max-width: 100%; height: auto; object-fit: contain; border-radius: 4px; }
    [data-testid="stMetricValue"] { font-size: 1.8rem; font-weight: 600; }
    #MainMenu {visibility: hidden;}
    footer {visibility: hidden;}
    </style>
    """,
    unsafe_allow_html=True
 )
-# Маршрутизация к нашему новому микросервису (берется из .env, либо локалхост)
+def get_thumbnail_html(images, max_display=12):
-API_URL = os.getenv("BACKEND_API_URL", "http://localhost:8000") + "/analyze"
+    html_images = ""
    for file in images[:max_display]:
        img = Image.open(file)
        img.thumbnail((100, 100))
        if img.mode != "RGB": 
            img = img.convert("RGB")
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        b64_str = base64.b64encode(buffered.getvalue()).decode()
        html_images += f'<img src="data:image/jpeg;base64,{b64_str}" style="width: 60px; height: 60px; object-fit: cover; border-radius: 8px; margin-right: 8px; margin-bottom: 8px; border: 1px solid rgba(255, 255, 255, 0.2);">'
    if len(images) > max_display:
        html_images += f'<span style="display: inline-block; width: 60px; height: 60px; line-height: 60px; text-align: center; background: rgba(150, 150, 150, 0.2); border-radius: 8px; vertical-align: top; font-size: 14px;">+{len(images) - max_display}</span>'
    return f'<div style="display: flex; flex-wrap: wrap;">{html_images}</div>'
 def main():
-    st.title("Система генерации саундтреков (EmoM)")
+    if "live_state" not in st.session_state: 
-    st.caption("Микросервисная архитектура: Frontend (Streamlit) -> REST API -> PyTorch/DEAM")
+        st.session_state.live_state = "upload"
-
+    if "result_data" not in st.session_state: 
-    uploaded_file = st.file_uploader("Загрузите изображение для анализа", type=["jpg", "jpeg", "png"])
+        st.session_state.result_data = None
    if uploaded_file is not None:
        st.image(uploaded_file, caption="Входной визуальный контент")
-        if st.button("Анализировать"):
+    viewport = st.query_params.get("viewport", "desktop")
-            with st.spinner("Отправка данных в вычислительный кластер..."):
+
-                try:
+    st.markdown("""
-                    # Отправляем POST-запрос в наш FastAPI микросервис
+    <style>
-                    files = {"file": (uploaded_file.name, uploaded_file.getvalue(), uploaded_file.type)}
+    [data-testid="stFileUploadDropzone"] { min-height: 250px !important; display: flex; align-items: center; justify-content: center; border-radius: 16px; background-color: rgba(255, 75, 75, 0.03); }
-                    response = requests.post(API_URL, files=files, timeout=30)
+    .spinner-container { display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 40vh; margin-top: 10vh; }
-                    
+    .big-spinner { width: 120px; height: 120px; border: 10px solid rgba(255, 75, 75, 0.1); border-top: 10px solid #ff4b4b; border-radius: 50%; animation: spin 1s linear infinite; margin-bottom: 2rem; }
-                    if response.status_code == 200:
+    @keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }
-                        data = response.json()
+    #MainMenu {visibility: hidden;} footer {visibility: hidden;}
-                        st.success("Анализ успешно завершен!")
+    </style>
-                        
+    """, unsafe_allow_html=True)
-                        # Вывод результатов
+
-                        st.subheader("Результаты анализа")
+    if st.session_state.live_state == "upload":
-                        st.write(f"Координаты Valence/Arousal: {data.get('valence_arousal')}")
+        upload_placeholder = st.empty()
-                        st.write("Подобранные треки:")
+        with upload_placeholder.container():
-                        st.json(data.get('tracks'))
+            st.write("Загрузите изображения для визуально-семантического анализа.")
-                        
+            if viewport == "mobile": 
-                        # Здесь в будущем можно добавить обращение к Ollama для генерации красивого описания
+                st.markdown("<br>", unsafe_allow_html=True)
-                        
+            
-                    else:
+            uploaded_files = st.file_uploader(
-                        st.error(f"Ошибка сервера: {response.text}")
+                "Загрузка файлов", 
-                        
+                type=['png', 'jpg', 'jpeg'], 
-                except requests.exceptions.ConnectionError:
+                accept_multiple_files=True,
-                    st.error("Ошибка сети: Микросервис инференса недоступен. Проверьте статус Docker-контейнера emom_inference.")
+                label_visibility="collapsed" if viewport == "mobile" else "visible"
            )
            if uploaded_files:
                st.markdown("<br>", unsafe_allow_html=True)
                if st.button("Выполнить анализ", type="primary", use_container_width=True):
                    st.session_state.uploaded_images = uploaded_files
                    st.session_state.live_state = "processing"
                    upload_placeholder.empty()
                    st.rerun()
                st.markdown("<br>", unsafe_allow_html=True)
                st.caption("Выбранные файлы:")
                st.markdown(get_thumbnail_html(uploaded_files), unsafe_allow_html=True)
    elif st.session_state.live_state == "processing":
        components.html("<script>window.parent.scrollTo(0, 0);</script>", height=0, width=0)
        files = st.session_state.get("uploaded_images", [])
        st.markdown('<div class="spinner-container"><div class="big-spinner"></div><h3 style="text-align: center; font-weight: 400;">Обработка данных...</h3></div>', unsafe_allow_html=True)
        try:
            upload_data = [('files', (f.name, f.getvalue(), f.type)) for f in files]
            response = requests.post(API_URL, files=upload_data, timeout=300) 
            if response.status_code == 200:
                st.session_state.result_data = response.json()
                st.session_state.live_state = "result"
                st.rerun()
            else:
                st.error(f"Ошибка сервера: {response.status_code}")
                if st.button("Назад"):
                    st.session_state.live_state = "upload"
                    st.rerun()
        except Exception as e:
            st.error(f"Ошибка соединения: {str(e)}")
            if st.button("Назад"):
                st.session_state.live_state = "upload"
                st.rerun()
    elif st.session_state.live_state == "result":
        components.html("<script>window.parent.scrollTo(0, 0);</script>", height=0, width=0)
        data = st.session_state.result_data
        st.header(f"Сгенерированный плейлист (обработано файлов: {data['images_processed']})")
        for row in data.get("tracks", []):
            with st.container(border=True):
                song_id = int(row['song_id'])
                score = row['final_score']
                audio_path = f"{DEAM_AUDIO_DIR}/{song_id}.mp3"
                if not os.path.exists(audio_path): 
                    audio_path = audio_path.replace('.mp3', '.wav')
                if viewport == "desktop":
                    c1, c2 = st.columns([1, 3])
                    with c1:
                        st.write(f"**Track ID:** {song_id}")
                        st.caption(f"Score: {score:.4f}")
                    with c2:
                        if os.path.exists(audio_path): 
                            st.audio(audio_path)
                        else: 
                            st.caption("Аудиофайл не найден")
                else:
                    st.write(f"**Track ID:** {song_id} (Score: {score:.4f})")
                    if os.path.exists(audio_path): 
                        st.audio(audio_path)
                    else: 
                        st.caption("Аудиофайл не найден")
        st.markdown("<br>", unsafe_allow_html=True)
        with st.expander("Отладочная информация (Метрики)"):
            st.subheader("Координаты V/A")
            c_v, c_a = st.columns(2)
            c_v.metric("Valence", f"{data['target_v']:.2f}")
            c_a.metric("Arousal", f"{data['target_a']:.2f}")
            st.markdown("---")
            st.subheader("Акустические признаки (LLM)")
            feature_titles = {
                "energy": "RMS Energy",
                "flux": "Spectral Flux",
                "centroid": "Spectral Centroid",
                "pitch": "F0 (Pitch)",
                "hnr": "HNR",
                "zcr": "ZCR"
            }
            # Развернутые описания для комиссии (передаются в аргумент help)
            feature_helps = {
                "energy": "Среднеквадратичная амплитуда (громкость). Бывает высокой в плотных, интенсивных композициях, отражает общую акустическую энергию сцены.",
                "flux": "Спектральный поток. Измеряет резкость изменений в спектре. Высок при четком, агрессивном ритме и частой смене нот.",
                "centroid": "Спектральный центроид («яркость» звука). Высокие значения указывают на преобладание высоких частот (звонкие инструменты, открытые пространства).",
                "pitch": "Основная частота звука. Высокий pitch характерен для позитивных, легких или, напротив, напряженных мелодий.",
                "hnr": "Отношение гармоник к шуму. Высокий HNR — чистая мелодия и вокал. Низкий HNR — присутствие дисторшна, шумов или перкуссии.",
                "zcr": "Частота пересечения нуля. Отражает шумовую составляющую сигнала. Высок в треках с выраженными ударными (hi-hats) или атмосферным шумом."
            }
            llm_profile = data.get("llm_profile")
            if llm_profile and isinstance(llm_profile, dict) and len(llm_profile) > 0:
                cols_per_row = 2 if viewport == "mobile" else 3
                llm_items = list(llm_profile.items())
                for i in range(0, len(llm_items), cols_per_row):
                    cols = st.columns(cols_per_row)
                    for j in range(cols_per_row):
                        if i + j < len(llm_items):
                            k, v = llm_items[i + j]
                            label = feature_titles.get(k, k)
                            tooltip = feature_helps.get(k, "")
                            # Форматируем до 2 знаков после запятой (например, 0.64)
                            cols[j].metric(label, f"{v:.2f}", help=tooltip)
            else:
                st.caption("Акустический профиль недоступен. Применен fallback-алгоритм.")
            st.markdown("---")
            st.write("**Извлеченные теги (BLIP-2):**")
            st.write(", ".join([str(c).capitalize() for c in data.get("semantics", [])]))
        st.markdown("<br>", unsafe_allow_html=True)
        if st.button("Новый запрос", use_container_width=True):
            st.session_state.live_state = "upload"
            st.session_state.result_data = None
            st.session_state.pop("uploaded_images", None)
            st.rerun()
 if __name__ == "__main__":
    main()
@@ -32,7 +32,11 @@ class ImageProcessor:
        # Модуль семантического описания сцены
        print("Инициализация BLIP-2...")
-        self.blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
+        # Обход бага конфигурации Hugging Face (ручная сборка процессора)
        from transformers import BlipImageProcessor, AutoTokenizer
        img_proc = BlipImageProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
        tok = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b", use_fast=False)
        self.blip_processor = Blip2Processor(image_processor=img_proc, tokenizer=tok)
        self.blip_model = Blip2ForConditionalGeneration.from_pretrained(
            "Salesforce/blip2-opt-2.7b", 
            torch_dtype=torch.float16 
@@ -1,65 +1,67 @@
-import re
+import os
 import json
 import re
 import requests
 class LLMAcousticBridge:
-    def __init__(self, target_model="dolphin-llama3:8b"):
+    def __init__(self, model_name="dolphin-llama3:8b"):
-        self.api_url = "http://localhost:11434/api/generate"
+        self.model_name = model_name
-        self.model = target_model
+        # Динамический выбор URL (внутри Docker используется emom_ollama)
        base_url = os.getenv("OLLAMA_API_URL", "http://emom_ollama:11434")
        self.api_url = f"{base_url}/api/generate"
-    def _extract_json(self, raw_text: str):
+    def get_acoustic_profile(self, valence, arousal, semantics):
-        # Проверка на ИИдиота, LLM иногда игнорирует format="json" и оборачивает ответ в маркдаун
+        context_str = ", ".join(semantics) if semantics else "abstract scene"
-        try:
+        
-            match = re.search(r'\{.*\}', raw_text, re.DOTALL)
+        # Строгий промпт с примером вывода
-            if match:
+        prompt = f"""
-                return json.loads(match.group(0))
+        Analyze the visual context and emotions to determine the ideal background music properties. 
-            return json.loads(raw_text)
+        Emotions: Valence {valence:.1f}/9.0 (Positivity), Arousal {arousal:.1f}/9.0 (Energy). 
-        except json.JSONDecodeError:
+        Visual Context: {context_str}. 
-            # Если ИИдиот
+        Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
-            return None
+        
-
+        1. "energy": (Loudness/Density)
-    def get_acoustic_profile(self, v_score: float, a_score: float, scene_context: list) -> dict | None:
+        2. "flux": (Rhythmic sharpness/Beat)
-        # Агрегация контекста для обработки серии снимков (события)
+        3. "centroid": (Brightness)
-        context_merged = " | ".join(scene_context) if scene_context else "abstract scene"
+        4. "pitch": (Fundamental frequency)
        5. "hnr": (Harmonics-to-Noise)
        6. "zcr": (Percussiveness)
        Return ONLY a valid JSON object. No explanations, no markdown blocks.
        Example: {{"energy": 0.8, "flux": 0.5, "centroid": 0.6, "pitch": 0.4, "hnr": 0.9, "zcr": 0.3}}
        """
        system_prompt = f"""You are an expert music producer and acoustic engineer. 
 Analyze the visual context and emotions to determine the ideal background music properties.
 Emotions: Valence {v_score:.1f}/9.0 (Positivity), Arousal {a_score:.1f}/9.0 (Energy).
 Visual Context: {context_merged}.
 Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
 1. "energy": (Loudness/Density. High for massive/busy scenes, Low for calm)
 2. "flux": (Rhythmic sharpness/Beat. High for action/people/cars, Low for static nature)
 3. "centroid": (Brightness: 0=Dark/Bass/Massive, 1=Bright/Treble/Light)
 4. "pitch": (Fundamental frequency: 0=Low pitch/Huge objects, 1=High pitch/Small objects)
 5. "hnr": (Harmonics-to-Noise: 0=Noisy/Distorted textures, 1=Clear/Melodic/Smooth textures)
 6. "zcr": (Percussiveness. High for detailed noise like leaves/rain, Low for solid blocks)
 Return ONLY a valid JSON object. Do not add any text or explanation.
 Example: {{"energy": 0.5, "flux": 0.2, "centroid": 0.4, "pitch": 0.3, "hnr": 0.8, "zcr": 0.1}}"""
        try:
-            # Отправка промпта локальной Ollama
+            payload = {
-            response = requests.post(self.api_url, json={
+                "model": self.model_name,
-                "model": self.model,
+                "prompt": prompt,
                "prompt": system_prompt,
                "stream": False,
-                "format": "json"
+                "format": "json"  # Принудительный JSON-режим Ollama
-            }, timeout=45)
+            }
            response.raise_for_status()
-            raw_response = response.json().get("response", "")
+            print(f"Запрос акустического профиля к Ollama...")
-            profile_data = self._extract_json(raw_response)
+            response = requests.post(self.api_url, json=payload, timeout=120)
-            # Валидация структуры ответа
+            if response.status_code == 200:
-            expected_features = {'energy', 'flux', 'centroid', 'pitch', 'hnr', 'zcr'}
+                data = response.json()
-            
+                response_text = data.get("response", "")
            if profile_data and expected_features.issubset(profile_data.keys()):
                return profile_data
-            print("LLM вернула неполный или некорректный набор акустических признаков")
+                try:
-            return None
+                    # 1. Попытка прямой десериализации
-            
+                    profile = json.loads(response_text)
-        except requests.exceptions.RequestException as req_err:
+                    return profile
-            print(f"Не удалось подключиться к Ollama: {req_err}")
+                except json.JSONDecodeError:
-            return None
+                    # 2. Аварийное извлечение JSON из текста с помощью регулярного выражения
                    match = re.search(r'\{.*\}', response_text, re.DOTALL)
                    if match:
                        return json.loads(match.group(0))
                    print(f"Ошибка парсинга LLM ответа: {response_text}")
                    return {}
            else:
                print(f"Ollama вернула ошибку HTTP: {response.status_code}")
                return {}
        except Exception as e:
            print(f"Ошибка соединения с Ollama: {str(e)}")
            return {}
@@ -0,0 +1,461 @@
 .
 ├── bin
 │   ├── activate
 │   ├── activate.csh
 │   ├── activate.fish
 │   ├── activate.nu
 │   ├── activate.ps1
 │   ├── activate_this.py
 │   ├── debugpy
 │   ├── debugpy-adapter
 │   ├── f2py
 │   ├── fonttools
 │   ├── httpx
 │   ├── ipython
 │   ├── ipython3
 │   ├── isympy
 │   ├── jlpm
 │   ├── jsonpointer
 │   ├── jsonschema
 │   ├── jupyter
 │   ├── jupyter-dejavu
 │   ├── jupyter-events
 │   ├── jupyter-execute
 │   ├── jupyter-kernel
 │   ├── jupyter-kernelspec
 │   ├── jupyter-lab
 │   ├── jupyter-labextension
 │   ├── jupyter-labhub
 │   ├── jupyter-migrate
 │   ├── jupyter-nbconvert
 │   ├── jupyter-run
 │   ├── jupyter-server
 │   ├── jupyter-troubleshoot
 │   ├── jupyter-trust
 │   ├── normalizer
 │   ├── numpy-config
 │   ├── pip
 │   ├── pip3
 │   ├── pip3.12
 │   ├── proton
 │   ├── proton-viewer
 │   ├── pybabel
 │   ├── pyftmerge
 │   ├── pyftsubset
 │   ├── pygmentize
 │   ├── pyjson5
 │   ├── python -> /usr/bin/python3
 │   ├── python3 -> python
 │   ├── python3.12 -> python
 │   ├── send2trash
 │   ├── streamlit
 │   ├── streamlit.cmd
 │   ├── torchfrtrace
 │   ├── torchrun
 │   ├── tqdm
 │   ├── ttx
 │   ├── watchmedo
 │   └── wsdump
 ├── CACHEDIR.TAG
 ├── docker
 │   ├── Dockerfile.api
 │   └── Dockerfile.ui
 ├── docker-compose.yml
 ├── Dockerfile
 ├── .dockerignore
 ├── .env
 ├── etc
 │   └── jupyter
 │       ├── jupyter_notebook_config.d
 │       │   └── jupyterlab.json
 │       ├── jupyter_server_config.d
 │       │   ├── jupyterlab.json
 │       │   ├── jupyter-lsp-jupyter-server.json
 │       │   ├── jupyter_server_terminals.json
 │       │   └── notebook_shim.json
 │       └── nbconfig
 │           └── notebook.d
 ├── .gitignore
 ├── .idea
 │   ├── .gitignore
 │   ├── inspectionProfiles
 │   │   └── profiles_settings.xml
 │   ├── misc.xml
 │   ├── modules.xml
 │   ├── Thesis.iml
 │   ├── vcs.xml
 │   └── workspace.xml
 ├── lib
 │   └── python3.12
 │       └── site-packages
 │           ├── altair
 │           ├── altair-6.0.0.dist-info
 │           ├── anyio
 │           ├── anyio-4.12.1.dist-info
 │           ├── argon2
 │           ├── argon2_cffi-25.1.0.dist-info
 │           ├── _argon2_cffi_bindings
 │           ├── argon2_cffi_bindings-25.1.0.dist-info
 │           ├── arrow
 │           ├── arrow-1.4.0.dist-info
 │           ├── asttokens
 │           ├── asttokens-3.0.1.dist-info
 │           ├── async_lru
 │           ├── async_lru-2.0.5.dist-info
 │           ├── attr
 │           ├── attrs
 │           ├── attrs-25.4.0.dist-info
 │           ├── babel
 │           ├── babel-2.17.0.dist-info
 │           ├── beautifulsoup4-4.14.3.dist-info
 │           ├── bleach
 │           ├── bleach-6.3.0.dist-info
 │           ├── blinker
 │           ├── blinker-1.9.0.dist-info
 │           ├── bs4
 │           ├── cachetools
 │           ├── cachetools-6.2.4.dist-info
 │           ├── certifi
 │           ├── certifi-2026.1.4.dist-info
 │           ├── cffi
 │           ├── cffi-2.0.0.dist-info
 │           ├── _cffi_backend.cpython-312-x86_64-linux-gnu.so
 │           ├── charset_normalizer
 │           ├── charset_normalizer-3.4.4.dist-info
 │           ├── click
 │           ├── click-8.3.1.dist-info
 │           ├── comm
 │           ├── comm-0.2.3.dist-info
 │           ├── contourpy
 │           ├── contourpy-1.3.3.dist-info
 │           ├── cycler
 │           ├── cycler-0.12.1.dist-info
 │           ├── dateutil
 │           ├── debugpy
 │           ├── debugpy-1.8.19.dist-info
 │           ├── decorator-5.2.1.dist-info
 │           ├── decorator.py
 │           ├── defusedxml
 │           ├── defusedxml-0.7.1.dist-info
 │           ├── _distutils_hack
 │           ├── distutils-precedence.pth
 │           ├── .DS_Store
 │           ├── executing
 │           ├── executing-2.2.1.dist-info
 │           ├── fastjsonschema
 │           ├── fastjsonschema-2.21.2.dist-info
 │           ├── filelock
 │           ├── filelock-3.20.3.dist-info
 │           ├── fontTools
 │           ├── fonttools-4.61.1.dist-info
 │           ├── fqdn
 │           ├── fqdn-1.5.1.dist-info
 │           ├── fsspec
 │           ├── fsspec-2026.1.0.dist-info
 │           ├── functorch
 │           ├── git
 │           ├── gitdb
 │           ├── gitdb-4.0.12.dist-info
 │           ├── gitpython-3.1.46.dist-info
 │           ├── google
 │           ├── h11
 │           ├── h11-0.16.0.dist-info
 │           ├── httpcore
 │           ├── httpcore-1.0.9.dist-info
 │           ├── httpx
 │           ├── httpx-0.28.1.dist-info
 │           ├── idna
 │           ├── idna-3.11.dist-info
 │           ├── ipykernel
 │           ├── ipykernel-7.1.0.dist-info
 │           ├── ipykernel_launcher.py
 │           ├── IPython
 │           ├── ipython-9.9.0.dist-info
 │           ├── ipython_pygments_lexers-1.1.1.dist-info
 │           ├── ipython_pygments_lexers.py
 │           ├── isoduration
 │           ├── isoduration-20.11.0.dist-info
 │           ├── isympy.py
 │           ├── jedi
 │           ├── jedi-0.19.2.dist-info
 │           ├── jinja2
 │           ├── jinja2-3.1.6.dist-info
 │           ├── joblib
 │           ├── joblib-1.5.3.dist-info
 │           ├── json5
 │           ├── json5-0.13.0.dist-info
 │           ├── jsonpointer-3.0.0.dist-info
 │           ├── jsonpointer.py
 │           ├── jsonschema
 │           ├── jsonschema-4.26.0.dist-info
 │           ├── jsonschema_specifications
 │           ├── jsonschema_specifications-2025.9.1.dist-info
 │           ├── jupyter_client
 │           ├── jupyter_client-8.8.0.dist-info
 │           ├── jupyter_core
 │           ├── jupyter_core-5.9.1.dist-info
 │           ├── jupyter_events
 │           ├── jupyter_events-0.12.0.dist-info
 │           ├── jupyterlab
 │           ├── jupyterlab-4.5.1.dist-info
 │           ├── jupyterlab_pygments
 │           ├── jupyterlab_pygments-0.3.0.dist-info
 │           ├── jupyterlab_server
 │           ├── jupyterlab_server-2.28.0.dist-info
 │           ├── jupyter_lsp
 │           ├── jupyter_lsp-2.3.0.dist-info
 │           ├── jupyter.py
 │           ├── jupyter_server
 │           ├── jupyter_server-2.17.0.dist-info
 │           ├── jupyter_server_terminals
 │           ├── jupyter_server_terminals-0.5.3.dist-info
 │           ├── kiwisolver
 │           ├── kiwisolver-1.4.9.dist-info
 │           ├── lark
 │           ├── lark-1.3.1.dist-info
 │           ├── markupsafe
 │           ├── markupsafe-3.0.3.dist-info
 │           ├── matplotlib
 │           ├── matplotlib-3.10.8.dist-info
 │           ├── matplotlib_inline
 │           ├── matplotlib_inline-0.2.1.dist-info
 │           ├── mistune
 │           ├── mistune-3.2.0.dist-info
 │           ├── mpl_toolkits
 │           ├── mpmath
 │           ├── mpmath-1.3.0.dist-info
 │           ├── narwhals
 │           ├── narwhals-2.15.0.dist-info
 │           ├── nbclient
 │           ├── nbclient-0.10.4.dist-info
 │           ├── nbconvert
 │           ├── nbconvert-7.16.6.dist-info
 │           ├── nbformat
 │           ├── nbformat-5.10.4.dist-info
 │           ├── nest_asyncio-1.6.0.dist-info
 │           ├── nest_asyncio.py
 │           ├── networkx
 │           ├── networkx-3.6.1.dist-info
 │           ├── notebook_shim
 │           ├── notebook_shim-0.2.4.dist-info
 │           ├── numpy
 │           ├── numpy-2.4.1.dist-info
 │           ├── numpy.libs
 │           ├── nvidia
 │           ├── nvidia_cublas_cu12-12.8.4.1.dist-info
 │           ├── nvidia_cuda_cupti_cu12-12.8.90.dist-info
 │           ├── nvidia_cuda_nvrtc_cu12-12.8.93.dist-info
 │           ├── nvidia_cuda_runtime_cu12-12.8.90.dist-info
 │           ├── nvidia_cudnn_cu12-9.10.2.21.dist-info
 │           ├── nvidia_cufft_cu12-11.3.3.83.dist-info
 │           ├── nvidia_cufile_cu12-1.13.1.3.dist-info
 │           ├── nvidia_curand_cu12-10.3.9.90.dist-info
 │           ├── nvidia_cusolver_cu12-11.7.3.90.dist-info
 │           ├── nvidia_cusparse_cu12-12.5.8.93.dist-info
 │           ├── nvidia_cusparselt_cu12-0.7.1.dist-info
 │           ├── nvidia_nccl_cu12-2.27.5.dist-info
 │           ├── nvidia_nvjitlink_cu12-12.8.93.dist-info
 │           ├── nvidia_nvshmem_cu12-3.3.20.dist-info
 │           ├── nvidia_nvtx_cu12-12.8.90.dist-info
 │           ├── packaging
 │           ├── packaging-25.0.dist-info
 │           ├── pandas
 │           ├── pandas-2.3.3.dist-info
 │           ├── pandocfilters-1.5.1.dist-info
 │           ├── pandocfilters.py
 │           ├── parso
 │           ├── parso-0.8.5.dist-info
 │           ├── pexpect
 │           ├── pexpect-4.9.0.dist-info
 │           ├── PIL
 │           ├── pillow-12.1.0.dist-info
 │           ├── pillow.libs
 │           ├── pip
 │           ├── pip-25.3.dist-info
 │           ├── pkg_resources
 │           ├── platformdirs
 │           ├── platformdirs-4.5.1.dist-info
 │           ├── prometheus_client
 │           ├── prometheus_client-0.23.1.dist-info
 │           ├── prompt_toolkit
 │           ├── prompt_toolkit-3.0.52.dist-info
 │           ├── protobuf-6.33.4.dist-info
 │           ├── psutil
 │           ├── psutil-7.2.1.dist-info
 │           ├── ptyprocess
 │           ├── ptyprocess-0.7.0.dist-info
 │           ├── pure_eval
 │           ├── pure_eval-0.2.3.dist-info
 │           ├── pyarrow
 │           ├── pyarrow-22.0.0.dist-info
 │           ├── pycparser
 │           ├── pycparser-2.23.dist-info
 │           ├── pydeck
 │           ├── pydeck-0.9.1.dist-info
 │           ├── pygments
 │           ├── pygments-2.19.2.dist-info
 │           ├── pylab.py
 │           ├── pyparsing
 │           ├── pyparsing-3.3.1.dist-info
 │           ├── python_dateutil-2.9.0.post0.dist-info
 │           ├── pythonjsonlogger
 │           ├── python_json_logger-4.0.0.dist-info
 │           ├── pytz
 │           ├── pytz-2025.2.dist-info
 │           ├── pyyaml-6.0.3.dist-info
 │           ├── pyzmq-27.1.0.dist-info
 │           ├── pyzmq.libs
 │           ├── referencing
 │           ├── referencing-0.37.0.dist-info
 │           ├── requests
 │           ├── requests-2.32.5.dist-info
 │           ├── rfc3339_validator-0.1.4.dist-info
 │           ├── rfc3339_validator.py
 │           ├── rfc3986_validator-0.1.1.dist-info
 │           ├── rfc3986_validator.py
 │           ├── rfc3987_syntax
 │           ├── rfc3987_syntax-1.1.0.dist-info
 │           ├── rpds
 │           ├── rpds_py-0.30.0.dist-info
 │           ├── scikit_learn-1.8.0.dist-info
 │           ├── scikit_learn.libs
 │           ├── scipy
 │           ├── scipy-1.17.0.dist-info
 │           ├── scipy.libs
 │           ├── send2trash
 │           ├── send2trash-2.0.0.dist-info
 │           ├── setuptools
 │           ├── setuptools-80.9.0.dist-info
 │           ├── six-1.17.0.dist-info
 │           ├── six.py
 │           ├── sklearn
 │           ├── smmap
 │           ├── smmap-5.0.2.dist-info
 │           ├── soupsieve
 │           ├── soupsieve-2.8.1.dist-info
 │           ├── stack_data
 │           ├── stack_data-0.6.3.dist-info
 │           ├── streamlit
 │           ├── streamlit-1.53.0.dist-info
 │           ├── sympy
 │           ├── sympy-1.14.0.dist-info
 │           ├── tenacity
 │           ├── tenacity-9.1.2.dist-info
 │           ├── terminado
 │           ├── terminado-0.18.1.dist-info
 │           ├── threadpoolctl-3.6.0.dist-info
 │           ├── threadpoolctl.py
 │           ├── tinycss2
 │           ├── tinycss2-1.4.0.dist-info
 │           ├── toml
 │           ├── toml-0.10.2.dist-info
 │           ├── torch
 │           ├── torch-2.9.1.dist-info
 │           ├── torchaudio
 │           ├── torchaudio-2.9.1.dist-info
 │           ├── torchgen
 │           ├── torchvision
 │           ├── torchvision-0.24.1.dist-info
 │           ├── torchvision.libs
 │           ├── tornado
 │           ├── tornado-6.5.4.dist-info
 │           ├── tqdm
 │           ├── tqdm-4.67.1.dist-info
 │           ├── traitlets
 │           ├── traitlets-5.14.3.dist-info
 │           ├── triton
 │           ├── triton-3.5.1.dist-info
 │           ├── typing_extensions-4.15.0.dist-info
 │           ├── typing_extensions.py
 │           ├── tzdata
 │           ├── tzdata-2025.3.dist-info
 │           ├── uri_template
 │           ├── uri_template-1.3.0.dist-info
 │           ├── urllib3
 │           ├── urllib3-2.6.3.dist-info
 │           ├── _virtualenv.pth
 │           ├── _virtualenv.py
 │           ├── watchdog
 │           ├── watchdog-6.0.0.dist-info
 │           ├── wcwidth
 │           ├── wcwidth-0.2.14.dist-info
 │           ├── webcolors
 │           ├── webcolors-25.10.0.dist-info
 │           ├── webencodings
 │           ├── webencodings-0.5.1.dist-info
 │           ├── websocket
 │           ├── websocket_client-1.9.0.dist-info
 │           ├── _yaml
 │           ├── yaml
 │           └── zmq
 ├── Makefile
 ├── NFS
 ├── poetry.lock
 ├── pyproject.toml
 ├── pyvenv.cfg
 ├── README.md
 ├── requirements.txt
 ├── runs
 ├── share
 │   ├── applications
 │   │   └── jupyterlab.desktop
 │   ├── icons
 │   │   └── hicolor
 │   │       └── scalable
 │   ├── jupyter
 │   │   ├── kernels
 │   │   │   └── python3
 │   │   ├── lab
 │   │   │   ├── schemas
 │   │   │   ├── static
 │   │   │   └── themes
 │   │   ├── labextensions
 │   │   │   └── jupyterlab_pygments
 │   │   ├── nbconvert
 │   │   │   └── templates
 │   │   └── nbextensions
 │   │       └── pydeck
 │   └── man
 │       └── man1
 │           ├── ipython.1
 │           ├── isympy.1
 │           └── ttx.1
 ├── src
 │   ├── 5_epoch_emoset_resnet50_finetuned_2.41M.pth
 │   ├── api.py
 │   ├── data_loader.py
 │   ├── dataset_paths_cache.pkl
 │   ├── emoset_resnet50_best.pth
 │   ├── emoset_resnet50_finetuned_2_41M_best.pth
 │   ├── emoset_resnet50_resume.pth
 │   ├── emoset_test_embeddings.npy
 │   ├── emoset_test_labels.npy
 │   ├── main.py
 │   ├── music_engine
 │   │   ├── image_processor.py
 │   │   ├── __init__.py
 │   │   ├── llm_bridge.py
 │   │   ├── matcher.py
 │   │   └── va_regressor.pkl
 │   ├── scripts
 │   │   ├── 00_setup_env.sh
 │   │   ├── 01_download_DEAM.py
 │   │   ├── 02_download_EmoSet.py
 │   │   ├── 11_prerp_DEAM.py
 │   │   ├── 20_bench_GPU.py
 │   │   ├── 21_train_images.ipynb
 │   │   ├── 22_extract_embeddings.ipynb
 │   │   ├── 23_aggregate_DEAM_timeline.py
 │   │   ├── 24_train_regressor.py
 │   │   ├── 31_finetune_2.41M.py
 │   │   ├── 90_acc_images_model.ipynb
 │   │   └── 91_generate_metrics.py
 │   └── tabs
 │       ├── tab_dataset.py
 │       └── tab_live.py
 ├── tree.txt
 └── .vscode
    ├── launch.json
    └── tasks.json
 322 directories, 137 files