diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..02e2e99
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,18 @@
+bin/
+lib/
+share/
+etc/
+include/
+pyvenv.cfg
+.idea/
+.vscode/
+__pycache__/
+*.pyc
+.git/
+runs/
+dataset/
+NFS/
+*.pth
+*.pkl
+*.npy
+.env
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..e73e388
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,63 @@
+version: '3.8'
+
+networks:
+ emom_mesh:
+ driver: bridge
+
+services:
+ emom_ui:
+ build:
+ context: .
+ dockerfile: docker/Dockerfile.ui
+ container_name: emom_web_ui
+ restart: unless-stopped
+ ports:
+ - "8080:8080"
+ networks:
+ - emom_mesh
+ env_file:
+ - .env
+ volumes:
+ - ./src:/app/src
+ - ${DATA_DEAM_DIR}:/app/dataset/DEAM:ro
+ depends_on:
+ - emom_inference
+
+ emom_inference:
+ build:
+ context: .
+ dockerfile: docker/Dockerfile.api
+ container_name: emom_pytorch_api
+ restart: unless-stopped
+ networks:
+ - emom_mesh
+ env_file:
+ - .env
+ volumes:
+ - ${HOST_ARTIFACTS_DIR}/emoset_resnet50_best.pth:/app/src/emoset_resnet50_best.pth:ro
+ - ${HOST_ARTIFACTS_DIR}/music_engine/va_regressor.pkl:/app/src/music_engine/va_regressor.pkl:ro
+ - ${DATA_DEAM_DIR}:/app/dataset/DEAM:ro
+ - ~/.cache/huggingface:/root/.cache/huggingface
+ deploy:
+ resources:
+ reservations:
+ devices:
+ - driver: nvidia
+ count: 1
+ capabilities: [gpu]
+
+ emom_ollama:
+ image: ollama/ollama:latest
+ container_name: emom_ollama_engine
+ restart: unless-stopped
+ networks:
+ - emom_mesh
+ volumes:
+ - ~/.ollama:/root/.ollama
+ deploy:
+ resources:
+ reservations:
+ devices:
+ - driver: nvidia
+ count: 1
+ capabilities: [gpu]
\ No newline at end of file
diff --git a/docker/Dockerfile.api b/docker/Dockerfile.api
new file mode 100644
index 0000000..f93d3f2
--- /dev/null
+++ b/docker/Dockerfile.api
@@ -0,0 +1,24 @@
+FROM pytorch/pytorch:2.2.1-cuda12.1-cudnn8-runtime
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+# 1. Системные зависимости
+RUN apt-get update && apt-get install -y \
+ libglib2.0-0 libsm6 libxext6 libxrender-dev \
+ && rm -rf /var/lib/apt/lists/*
+
+# 2. Python пакеты
+RUN pip install --no-cache-dir fastapi uvicorn timm scikit-learn pandas joblib python-multipart transformers==4.38.2 tokenizers==0.15.2 accelerate
+
+# 3. Копируем код в контейнер
+WORKDIR /app
+COPY src/ /app/src/
+
+# 4. МАГИЯ ЗДЕСЬ: Переходим внутрь папки src
+WORKDIR /app/src
+
+EXPOSE 8000
+
+# 5. Запускаем локально (без префикса src.)
+CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]
\ No newline at end of file
diff --git a/docker/Dockerfile.ui b/docker/Dockerfile.ui
new file mode 100644
index 0000000..1bbfc38
--- /dev/null
+++ b/docker/Dockerfile.ui
@@ -0,0 +1,17 @@
+FROM python:3.12-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /app
+RUN pip install --no-cache-dir streamlit==1.32.0 requests pandas pillow
+
+COPY src/ /app/src/
+
+# МАГИЯ ЗДЕСЬ: Переходим внутрь папки src
+WORKDIR /app/src
+
+EXPOSE 8080
+
+# Запускаем локально
+CMD ["streamlit", "run", "main.py", "--server.port", "8080", "--server.address", "0.0.0.0"]
\ No newline at end of file
diff --git a/doecker-compose.yml b/doecker-compose.yml
deleted file mode 100644
index 2365e05..0000000
--- a/doecker-compose.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-version: '3.8'
-
-# Определение общих сетей для изоляции трафика
-networks:
- ai_mesh:
- driver: bridge
-
-services:
- # ----------------------------------------------------
- # SERVICE 1: Frontend (Пользовательский интерфейс)
- # Не требует GPU, может быть вынесен на отдельный сервер
- # ----------------------------------------------------
- web_ui:
- build:
- context: .
- dockerfile: Dockerfile
- container_name: emom_frontend
- restart: always
- ports:
- - "8080:8080"
- networks:
- - ai_mesh
- environment:
- - STREAMLIT_RUN=1
- # Указываем UI, где искать LLM-бэкенд (внутри Docker-сети)
- - OLLAMA_HOST=http://llm_backend:11434
- volumes:
- - ./src:/app/src
- # Модели пока остаются здесь, так как код монолитный,
- # но архитектурно сервис уже изолирован
- - /home/zin/projects/Thesis/src/emoset_resnet50_best.pth:/app/emoset_resnet50_best.pth:ro
- - /home/zin/projects/Thesis/src/music_engine/va_regressor.pkl:/app/src/music_engine/va_regressor.pkl:ro
- - /home/zin/projects/Thesis/dataset/DEAM:/app/dataset/DEAM:ro
- # Временно оставляем GPU для PyTorch (пока он не вынесен в API)
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: 1
- capabilities: [gpu]
-
- # ----------------------------------------------------
- # SERVICE 2: LLM Inference Backend (Ollama)
- # Изолированный сервис для языковой модели на GPU
- # ----------------------------------------------------
- llm_backend:
- image: ollama/ollama:latest
- container_name: ollama_gpu_inference
- restart: always
- networks:
- - ai_mesh
- ports:
- - "11434:11434"
- volumes:
- # Проброс локальных моделей Ollama, чтобы не качать их заново внутри докера
- - ~/.ollama:/root/.ollama
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: 1
- capabilities: [gpu]
\ No newline at end of file
diff --git a/src/api.py b/src/api.py
new file mode 100644
index 0000000..2dd0b04
--- /dev/null
+++ b/src/api.py
@@ -0,0 +1,85 @@
+import io
+import traceback
+import numpy as np
+from typing import List
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.responses import JSONResponse
+from PIL import Image
+
+from data_loader import load_music_engine, load_image_processor
+from music_engine.llm_bridge import LLMAcousticBridge
+
+app = FastAPI(title="EmoM Inference API", version="1.0.0")
+
+ml_context = {
+ "image_processor": None,
+ "music_matcher": None,
+ "llm_bridge": None
+}
+
+@app.on_event("startup")
+async def startup_event():
+ print("Инициализация нейросетевого ядра EmoM...")
+ ml_context["image_processor"] = load_image_processor()
+ ml_context["music_matcher"] = load_music_engine()
+ ml_context["llm_bridge"] = LLMAcousticBridge()
+ print("Вычислительный конвейер готов к работе.")
+
+@app.post("/analyze")
+async def analyze_event_endpoint(files: List[UploadFile] = File(...)):
+ try:
+ # 1. Читаем все загруженные картинки
+ images = []
+ for file in files:
+ image_bytes = await file.read()
+ img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+ images.append(img)
+
+ print(f"Начата обработка события из {len(images)} фотографий...")
+
+ img_processor = ml_context["image_processor"]
+ matcher = ml_context["music_matcher"]
+ llm = ml_context["llm_bridge"]
+
+ all_v, all_a = [], []
+ all_objects = []
+
+ # 2. Прогоняем каждую картинку через нейросети
+ for img in images:
+ embedding = img_processor.extract_embedding(img)
+ v, a = matcher.predict_va(embedding)
+ all_v.append(v)
+ all_a.append(a)
+
+ caption = img_processor.describe_scene(img)
+ all_objects.append(caption)
+
+ # 3. Усредняем эмоции события
+ target_v = float(np.mean(all_v))
+ target_a = float(np.mean(all_a))
+ unique_semantics = list(set(all_objects))
+
+ # 4. Запрашиваем акустический профиль у Ollama
+ print(f"Запрос к Ollama. V={target_v:.2f}, A={target_a:.2f}")
+ llm_profile = llm.get_acoustic_profile(target_v, target_a, unique_semantics)
+
+ # 5. Ищем треки в базе
+ print("Поиск подходящих композиций...")
+ playlist_df = matcher.find_nearest_tracks(target_v, target_a, llm_profile=llm_profile, top_k=15)
+
+ # Переводим таблицу в JSON-формат
+ tracks_list = playlist_df.to_dict(orient="records")
+
+ return JSONResponse(content={
+ "status": "success",
+ "images_processed": len(images),
+ "target_v": target_v,
+ "target_a": target_a,
+ "llm_profile": llm_profile,
+ "semantics": unique_semantics,
+ "tracks": tracks_list
+ })
+
+ except Exception as e:
+ print(traceback.format_exc())
+ raise HTTPException(status_code=500, detail=f"Ошибка инференса: {str(e)}")
\ No newline at end of file
diff --git a/src/data_loader.py b/src/data_loader.py
index caa7ae5..64dfd9f 100644
--- a/src/data_loader.py
+++ b/src/data_loader.py
@@ -1,55 +1,49 @@
-import os
from pathlib import Path
import pandas as pd
import numpy as np
-import streamlit as st
+# Импорты твоих движков
from music_engine.matcher import MusicMatcher
from music_engine.image_processor import ImageProcessor
+# Базовая директория (папка src)
BASE_DIR = Path(__file__).resolve().parent
-@st.cache_resource
def load_music_engine():
- # Инициализация базы данных и регрессора для музыкального мэтчинга
+ """Загрузка базы данных и модели регрессора для бэкенда."""
+ # Пути соответствуют тем, что мы примонтировали в Docker
db_path = BASE_DIR.parent / "dataset" / "DEAM" / "music_db.csv"
model_path = BASE_DIR / "music_engine" / "va_regressor.pkl"
- if not db_path.exists():
- print(f"Музыкальная БД не найдена: {db_path}")
- return None
-
return MusicMatcher(db_path=db_path, model_path=model_path)
-@st.cache_resource
def load_image_processor():
- # Модуль обработки визуальных признаков
- model_path = BASE_DIR / "emoset_resnet50_best.pth"
-
- # Обработка пути при вызове из корневой директории
- if not model_path.exists():
- model_path = BASE_DIR.parent / "emoset_resnet50_best.pth"
-
- return ImageProcessor(model_path=model_path)
+ """Инициализация нейросетевого экстрактора (ResNet-50)."""
+ weights_path = BASE_DIR / "emoset_resnet50_best.pth"
+ return ImageProcessor(weights_path)
-@st.cache_data
def load_emoset_data():
- # Выборка данных датасета для вкладки отладки
- dataset_root = BASE_DIR.parent / "dataset" / "EmoSet-118K" / "test"
-
- csv_path = dataset_root / "labels.csv"
- img_dir = dataset_root / "images"
- emb_path = BASE_DIR / "emoset_test_embeddings.npy"
- lbl_path = BASE_DIR / "emoset_test_labels.npy"
-
- if not all([csv_path.exists(), emb_path.exists(), lbl_path.exists()]):
- print("Тестовые файлы датасета не найдены, вкладка отладки может работать некорректно")
- return None, None, None, None
-
- labels_df = pd.read_csv(csv_path)
-
- test_filenames = labels_df['filename'].tolist()
- test_embeddings = np.load(emb_path)
- test_labels = np.load(lbl_path)
-
- return test_filenames, test_embeddings, test_labels, img_dir
\ No newline at end of file
+ """
+ Загрузка эталонного датасета EmoSet.
+ (Оставлено для обратной совместимости, если понадобится локальная отладка)
+ """
+ try:
+ images_path = BASE_DIR.parent / "dataset" / "EmoSet-118K" / "test" / "images"
+ labels_path = BASE_DIR / "emoset_test_labels.npy"
+ embeddings_path = BASE_DIR / "emoset_test_embeddings.npy"
+
+ # Если файлов нет (например, на проде), возвращаем None
+ if not all(p.exists() for p in [labels_path, embeddings_path]):
+ return None, None, None, None
+
+ labels = np.load(labels_path)
+ embeddings = np.load(embeddings_path)
+
+ # Читаем CSV с метками
+ df = pd.read_csv(BASE_DIR.parent / "dataset" / "EmoSet-118K" / "test" / "labels.csv")
+ image_files = df['filename'].tolist()
+
+ return image_files, embeddings, labels, images_path
+ except Exception as e:
+ print(f"Предупреждение: Тестовые артефакты EmoSet не найдены ({e})")
+ return None, None, None, None
\ No newline at end of file
diff --git a/src/main.py b/src/main.py
index b0da338..f771a47 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,73 +1,190 @@
-import sys
import os
-import subprocess
-
+import requests
import streamlit as st
import streamlit.components.v1 as components
+from PIL import Image
+import base64
+from io import BytesIO
-from data_loader import load_music_engine, load_emoset_data, load_image_processor
-from tabs.tab_dataset import render_dataset_tab
-from tabs.tab_live import render_live_tab
+st.set_page_config(page_title="EmoM Playlist Generator", layout="wide", initial_sidebar_state="collapsed")
-# Костыль для прямого запуска
-if __name__ == "__main__":
- if "STREAMLIT_RUN" not in os.environ:
- os.environ["STREAMLIT_RUN"] = "1"
- cmd = [sys.executable, "-m", "streamlit", "run", __file__, "--server.port", "8080", "--server.address", "0.0.0.0"]
- subprocess.run(cmd)
- sys.exit()
+API_URL = os.getenv("BACKEND_API_URL", "http://emom_inference:8000") + "/analyze"
+DEAM_AUDIO_DIR = "/app/dataset/DEAM/DEAM_audio/MEMD_audio"
-viewport_mode = st.query_params.get("viewport", "desktop")
-page_layout = "centered" if viewport_mode == "mobile" else "wide"
-
-st.set_page_config(page_title="Thesis Demo", layout=page_layout)
-
-# Определения ширины экрана и смены верстки
-components.html(
- """
-
- """,
- height=0,
- width=0,
-)
+ if len(images) > max_display:
+ html_images += f'+{len(images) - max_display}'
+ return f'
{html_images}
'
-st.markdown(
- """
+def main():
+ if "live_state" not in st.session_state:
+ st.session_state.live_state = "upload"
+ if "result_data" not in st.session_state:
+ st.session_state.result_data = None
+
+ viewport = st.query_params.get("viewport", "desktop")
+
+ st.markdown("""
- """,
- unsafe_allow_html=True
-)
+ """, unsafe_allow_html=True)
-# Подгрузка ML-моделей и датасета
-music_matcher = load_music_engine()
-img_processor = load_image_processor()
-emoset_files, emoset_embeddings, emoset_labels, emoset_path = load_emoset_data()
+ if st.session_state.live_state == "upload":
+ upload_placeholder = st.empty()
+ with upload_placeholder.container():
+ st.write("Загрузите изображения для визуально-семантического анализа.")
+ if viewport == "mobile":
+ st.markdown("
", unsafe_allow_html=True)
+
+ uploaded_files = st.file_uploader(
+ "Загрузка файлов",
+ type=['png', 'jpg', 'jpeg'],
+ accept_multiple_files=True,
+ label_visibility="collapsed" if viewport == "mobile" else "visible"
+ )
+
+ if uploaded_files:
+ st.markdown("
", unsafe_allow_html=True)
+ if st.button("Выполнить анализ", type="primary", use_container_width=True):
+ st.session_state.uploaded_images = uploaded_files
+ st.session_state.live_state = "processing"
+ upload_placeholder.empty()
+ st.rerun()
+
+ st.markdown("
", unsafe_allow_html=True)
+ st.caption("Выбранные файлы:")
+ st.markdown(get_thumbnail_html(uploaded_files), unsafe_allow_html=True)
-st.title("Генератор саундтреков (Research Demo)")
+ elif st.session_state.live_state == "processing":
+ components.html("", height=0, width=0)
+ files = st.session_state.get("uploaded_images", [])
+ st.markdown('', unsafe_allow_html=True)
+
+ try:
+ upload_data = [('files', (f.name, f.getvalue(), f.type)) for f in files]
+ response = requests.post(API_URL, files=upload_data, timeout=300)
+
+ if response.status_code == 200:
+ st.session_state.result_data = response.json()
+ st.session_state.live_state = "result"
+ st.rerun()
+ else:
+ st.error(f"Ошибка сервера: {response.status_code}")
+ if st.button("Назад"):
+ st.session_state.live_state = "upload"
+ st.rerun()
+ except Exception as e:
+ st.error(f"Ошибка соединения: {str(e)}")
+ if st.button("Назад"):
+ st.session_state.live_state = "upload"
+ st.rerun()
+
+ elif st.session_state.live_state == "result":
+ components.html("", height=0, width=0)
+ data = st.session_state.result_data
+
+ st.header(f"Сгенерированный плейлист (обработано файлов: {data['images_processed']})")
+
+ for row in data.get("tracks", []):
+ with st.container(border=True):
+ song_id = int(row['song_id'])
+ score = row['final_score']
+
+ audio_path = f"{DEAM_AUDIO_DIR}/{song_id}.mp3"
+ if not os.path.exists(audio_path):
+ audio_path = audio_path.replace('.mp3', '.wav')
+
+ if viewport == "desktop":
+ c1, c2 = st.columns([1, 3])
+ with c1:
+ st.write(f"**Track ID:** {song_id}")
+ st.caption(f"Score: {score:.4f}")
+ with c2:
+ if os.path.exists(audio_path):
+ st.audio(audio_path)
+ else:
+ st.caption("Аудиофайл не найден")
+ else:
+ st.write(f"**Track ID:** {song_id} (Score: {score:.4f})")
+ if os.path.exists(audio_path):
+ st.audio(audio_path)
+ else:
+ st.caption("Аудиофайл не найден")
-tab_live, tab_debug = st.tabs(["Анализ событий (Свои фото)", "Отладка (Датасет EmoSet)"])
+ st.markdown("
", unsafe_allow_html=True)
+
+ with st.expander("Отладочная информация (Метрики)"):
+ st.subheader("Координаты V/A")
+ c_v, c_a = st.columns(2)
+ c_v.metric("Valence", f"{data['target_v']:.2f}")
+ c_a.metric("Arousal", f"{data['target_a']:.2f}")
+
+ st.markdown("---")
+ st.subheader("Акустические признаки (LLM)")
+
+ feature_titles = {
+ "energy": "RMS Energy",
+ "flux": "Spectral Flux",
+ "centroid": "Spectral Centroid",
+ "pitch": "F0 (Pitch)",
+ "hnr": "HNR",
+ "zcr": "ZCR"
+ }
+
+ # Развернутые описания для комиссии (передаются в аргумент help)
+ feature_helps = {
+ "energy": "Среднеквадратичная амплитуда (громкость). Бывает высокой в плотных, интенсивных композициях, отражает общую акустическую энергию сцены.",
+ "flux": "Спектральный поток. Измеряет резкость изменений в спектре. Высок при четком, агрессивном ритме и частой смене нот.",
+ "centroid": "Спектральный центроид («яркость» звука). Высокие значения указывают на преобладание высоких частот (звонкие инструменты, открытые пространства).",
+ "pitch": "Основная частота звука. Высокий pitch характерен для позитивных, легких или, напротив, напряженных мелодий.",
+ "hnr": "Отношение гармоник к шуму. Высокий HNR — чистая мелодия и вокал. Низкий HNR — присутствие дисторшна, шумов или перкуссии.",
+ "zcr": "Частота пересечения нуля. Отражает шумовую составляющую сигнала. Высок в треках с выраженными ударными (hi-hats) или атмосферным шумом."
+ }
+
+ llm_profile = data.get("llm_profile")
+ if llm_profile and isinstance(llm_profile, dict) and len(llm_profile) > 0:
+ cols_per_row = 2 if viewport == "mobile" else 3
+ llm_items = list(llm_profile.items())
+
+ for i in range(0, len(llm_items), cols_per_row):
+ cols = st.columns(cols_per_row)
+ for j in range(cols_per_row):
+ if i + j < len(llm_items):
+ k, v = llm_items[i + j]
+ label = feature_titles.get(k, k)
+ tooltip = feature_helps.get(k, "")
+ # Форматируем до 2 знаков после запятой (например, 0.64)
+ cols[j].metric(label, f"{v:.2f}", help=tooltip)
+ else:
+ st.caption("Акустический профиль недоступен. Применен fallback-алгоритм.")
+
+ st.markdown("---")
+ st.write("**Извлеченные теги (BLIP-2):**")
+ st.write(", ".join([str(c).capitalize() for c in data.get("semantics", [])]))
-with tab_live:
- if img_processor:
- render_live_tab(music_matcher, img_processor)
- else:
- st.error("Ошибка загрузки: не найдены веса ResNet для image_processor.")
+ st.markdown("
", unsafe_allow_html=True)
+
+ if st.button("Новый запрос", use_container_width=True):
+ st.session_state.live_state = "upload"
+ st.session_state.result_data = None
+ st.session_state.pop("uploaded_images", None)
+ st.rerun()
-with tab_debug:
- render_dataset_tab(music_matcher, emoset_files, emoset_embeddings, emoset_labels, emoset_path)
\ No newline at end of file
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/src/music_engine/image_processor.py b/src/music_engine/image_processor.py
index fb654c2..01caacd 100644
--- a/src/music_engine/image_processor.py
+++ b/src/music_engine/image_processor.py
@@ -32,7 +32,11 @@ class ImageProcessor:
# Модуль семантического описания сцены
print("Инициализация BLIP-2...")
- self.blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
+ # Обход бага конфигурации Hugging Face (ручная сборка процессора)
+ from transformers import BlipImageProcessor, AutoTokenizer
+ img_proc = BlipImageProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
+ tok = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b", use_fast=False)
+ self.blip_processor = Blip2Processor(image_processor=img_proc, tokenizer=tok)
self.blip_model = Blip2ForConditionalGeneration.from_pretrained(
"Salesforce/blip2-opt-2.7b",
torch_dtype=torch.float16
diff --git a/src/music_engine/llm_bridge.py b/src/music_engine/llm_bridge.py
index 9e698ad..ef1cd60 100644
--- a/src/music_engine/llm_bridge.py
+++ b/src/music_engine/llm_bridge.py
@@ -1,65 +1,67 @@
-import re
+import os
import json
+import re
import requests
class LLMAcousticBridge:
- def __init__(self, target_model="dolphin-llama3:8b"):
- self.api_url = "http://localhost:11434/api/generate"
- self.model = target_model
+ def __init__(self, model_name="dolphin-llama3:8b"):
+ self.model_name = model_name
+ # Динамический выбор URL (внутри Docker используется emom_ollama)
+ base_url = os.getenv("OLLAMA_API_URL", "http://emom_ollama:11434")
+ self.api_url = f"{base_url}/api/generate"
- def _extract_json(self, raw_text: str):
- # Проверка на ИИдиота, LLM иногда игнорирует format="json" и оборачивает ответ в маркдаун
- try:
- match = re.search(r'\{.*\}', raw_text, re.DOTALL)
- if match:
- return json.loads(match.group(0))
- return json.loads(raw_text)
- except json.JSONDecodeError:
- # Если ИИдиот
- return None
-
- def get_acoustic_profile(self, v_score: float, a_score: float, scene_context: list) -> dict | None:
- # Агрегация контекста для обработки серии снимков (события)
- context_merged = " | ".join(scene_context) if scene_context else "abstract scene"
+ def get_acoustic_profile(self, valence, arousal, semantics):
+ context_str = ", ".join(semantics) if semantics else "abstract scene"
+
+ # Строгий промпт с примером вывода
+ prompt = f"""
+ Analyze the visual context and emotions to determine the ideal background music properties.
+ Emotions: Valence {valence:.1f}/9.0 (Positivity), Arousal {arousal:.1f}/9.0 (Energy).
+ Visual Context: {context_str}.
+ Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
+
+ 1. "energy": (Loudness/Density)
+ 2. "flux": (Rhythmic sharpness/Beat)
+ 3. "centroid": (Brightness)
+ 4. "pitch": (Fundamental frequency)
+ 5. "hnr": (Harmonics-to-Noise)
+ 6. "zcr": (Percussiveness)
+
+ Return ONLY a valid JSON object. No explanations, no markdown blocks.
+ Example: {{"energy": 0.8, "flux": 0.5, "centroid": 0.6, "pitch": 0.4, "hnr": 0.9, "zcr": 0.3}}
+ """
- system_prompt = f"""You are an expert music producer and acoustic engineer.
-Analyze the visual context and emotions to determine the ideal background music properties.
-Emotions: Valence {v_score:.1f}/9.0 (Positivity), Arousal {a_score:.1f}/9.0 (Energy).
-Visual Context: {context_merged}.
-
-Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
-1. "energy": (Loudness/Density. High for massive/busy scenes, Low for calm)
-2. "flux": (Rhythmic sharpness/Beat. High for action/people/cars, Low for static nature)
-3. "centroid": (Brightness: 0=Dark/Bass/Massive, 1=Bright/Treble/Light)
-4. "pitch": (Fundamental frequency: 0=Low pitch/Huge objects, 1=High pitch/Small objects)
-5. "hnr": (Harmonics-to-Noise: 0=Noisy/Distorted textures, 1=Clear/Melodic/Smooth textures)
-6. "zcr": (Percussiveness. High for detailed noise like leaves/rain, Low for solid blocks)
-
-Return ONLY a valid JSON object. Do not add any text or explanation.
-Example: {{"energy": 0.5, "flux": 0.2, "centroid": 0.4, "pitch": 0.3, "hnr": 0.8, "zcr": 0.1}}"""
-
try:
- # Отправка промпта локальной Ollama
- response = requests.post(self.api_url, json={
- "model": self.model,
- "prompt": system_prompt,
+ payload = {
+ "model": self.model_name,
+ "prompt": prompt,
"stream": False,
- "format": "json"
- }, timeout=45)
- response.raise_for_status()
+ "format": "json" # Принудительный JSON-режим Ollama
+ }
- raw_response = response.json().get("response", "")
- profile_data = self._extract_json(raw_response)
+ print(f"Запрос акустического профиля к Ollama...")
+ response = requests.post(self.api_url, json=payload, timeout=120)
- # Валидация структуры ответа
- expected_features = {'energy', 'flux', 'centroid', 'pitch', 'hnr', 'zcr'}
-
- if profile_data and expected_features.issubset(profile_data.keys()):
- return profile_data
+ if response.status_code == 200:
+ data = response.json()
+ response_text = data.get("response", "")
- print("LLM вернула неполный или некорректный набор акустических признаков")
- return None
-
- except requests.exceptions.RequestException as req_err:
- print(f"Не удалось подключиться к Ollama: {req_err}")
- return None
\ No newline at end of file
+ try:
+ # 1. Попытка прямой десериализации
+ profile = json.loads(response_text)
+ return profile
+ except json.JSONDecodeError:
+ # 2. Аварийное извлечение JSON из текста с помощью регулярного выражения
+ match = re.search(r'\{.*\}', response_text, re.DOTALL)
+ if match:
+ return json.loads(match.group(0))
+
+ print(f"Ошибка парсинга LLM ответа: {response_text}")
+ return {}
+ else:
+ print(f"Ollama вернула ошибку HTTP: {response.status_code}")
+ return {}
+
+ except Exception as e:
+ print(f"Ошибка соединения с Ollama: {str(e)}")
+ return {}
\ No newline at end of file
diff --git a/tree.txt b/tree.txt
new file mode 100644
index 0000000..a54bd94
--- /dev/null
+++ b/tree.txt
@@ -0,0 +1,461 @@
+.
+├── bin
+│ ├── activate
+│ ├── activate.csh
+│ ├── activate.fish
+│ ├── activate.nu
+│ ├── activate.ps1
+│ ├── activate_this.py
+│ ├── debugpy
+│ ├── debugpy-adapter
+│ ├── f2py
+│ ├── fonttools
+│ ├── httpx
+│ ├── ipython
+│ ├── ipython3
+│ ├── isympy
+│ ├── jlpm
+│ ├── jsonpointer
+│ ├── jsonschema
+│ ├── jupyter
+│ ├── jupyter-dejavu
+│ ├── jupyter-events
+│ ├── jupyter-execute
+│ ├── jupyter-kernel
+│ ├── jupyter-kernelspec
+│ ├── jupyter-lab
+│ ├── jupyter-labextension
+│ ├── jupyter-labhub
+│ ├── jupyter-migrate
+│ ├── jupyter-nbconvert
+│ ├── jupyter-run
+│ ├── jupyter-server
+│ ├── jupyter-troubleshoot
+│ ├── jupyter-trust
+│ ├── normalizer
+│ ├── numpy-config
+│ ├── pip
+│ ├── pip3
+│ ├── pip3.12
+│ ├── proton
+│ ├── proton-viewer
+│ ├── pybabel
+│ ├── pyftmerge
+│ ├── pyftsubset
+│ ├── pygmentize
+│ ├── pyjson5
+│ ├── python -> /usr/bin/python3
+│ ├── python3 -> python
+│ ├── python3.12 -> python
+│ ├── send2trash
+│ ├── streamlit
+│ ├── streamlit.cmd
+│ ├── torchfrtrace
+│ ├── torchrun
+│ ├── tqdm
+│ ├── ttx
+│ ├── watchmedo
+│ └── wsdump
+├── CACHEDIR.TAG
+├── docker
+│ ├── Dockerfile.api
+│ └── Dockerfile.ui
+├── docker-compose.yml
+├── Dockerfile
+├── .dockerignore
+├── .env
+├── etc
+│ └── jupyter
+│ ├── jupyter_notebook_config.d
+│ │ └── jupyterlab.json
+│ ├── jupyter_server_config.d
+│ │ ├── jupyterlab.json
+│ │ ├── jupyter-lsp-jupyter-server.json
+│ │ ├── jupyter_server_terminals.json
+│ │ └── notebook_shim.json
+│ └── nbconfig
+│ └── notebook.d
+├── .gitignore
+├── .idea
+│ ├── .gitignore
+│ ├── inspectionProfiles
+│ │ └── profiles_settings.xml
+│ ├── misc.xml
+│ ├── modules.xml
+│ ├── Thesis.iml
+│ ├── vcs.xml
+│ └── workspace.xml
+├── lib
+│ └── python3.12
+│ └── site-packages
+│ ├── altair
+│ ├── altair-6.0.0.dist-info
+│ ├── anyio
+│ ├── anyio-4.12.1.dist-info
+│ ├── argon2
+│ ├── argon2_cffi-25.1.0.dist-info
+│ ├── _argon2_cffi_bindings
+│ ├── argon2_cffi_bindings-25.1.0.dist-info
+│ ├── arrow
+│ ├── arrow-1.4.0.dist-info
+│ ├── asttokens
+│ ├── asttokens-3.0.1.dist-info
+│ ├── async_lru
+│ ├── async_lru-2.0.5.dist-info
+│ ├── attr
+│ ├── attrs
+│ ├── attrs-25.4.0.dist-info
+│ ├── babel
+│ ├── babel-2.17.0.dist-info
+│ ├── beautifulsoup4-4.14.3.dist-info
+│ ├── bleach
+│ ├── bleach-6.3.0.dist-info
+│ ├── blinker
+│ ├── blinker-1.9.0.dist-info
+│ ├── bs4
+│ ├── cachetools
+│ ├── cachetools-6.2.4.dist-info
+│ ├── certifi
+│ ├── certifi-2026.1.4.dist-info
+│ ├── cffi
+│ ├── cffi-2.0.0.dist-info
+│ ├── _cffi_backend.cpython-312-x86_64-linux-gnu.so
+│ ├── charset_normalizer
+│ ├── charset_normalizer-3.4.4.dist-info
+│ ├── click
+│ ├── click-8.3.1.dist-info
+│ ├── comm
+│ ├── comm-0.2.3.dist-info
+│ ├── contourpy
+│ ├── contourpy-1.3.3.dist-info
+│ ├── cycler
+│ ├── cycler-0.12.1.dist-info
+│ ├── dateutil
+│ ├── debugpy
+│ ├── debugpy-1.8.19.dist-info
+│ ├── decorator-5.2.1.dist-info
+│ ├── decorator.py
+│ ├── defusedxml
+│ ├── defusedxml-0.7.1.dist-info
+│ ├── _distutils_hack
+│ ├── distutils-precedence.pth
+│ ├── .DS_Store
+│ ├── executing
+│ ├── executing-2.2.1.dist-info
+│ ├── fastjsonschema
+│ ├── fastjsonschema-2.21.2.dist-info
+│ ├── filelock
+│ ├── filelock-3.20.3.dist-info
+│ ├── fontTools
+│ ├── fonttools-4.61.1.dist-info
+│ ├── fqdn
+│ ├── fqdn-1.5.1.dist-info
+│ ├── fsspec
+│ ├── fsspec-2026.1.0.dist-info
+│ ├── functorch
+│ ├── git
+│ ├── gitdb
+│ ├── gitdb-4.0.12.dist-info
+│ ├── gitpython-3.1.46.dist-info
+│ ├── google
+│ ├── h11
+│ ├── h11-0.16.0.dist-info
+│ ├── httpcore
+│ ├── httpcore-1.0.9.dist-info
+│ ├── httpx
+│ ├── httpx-0.28.1.dist-info
+│ ├── idna
+│ ├── idna-3.11.dist-info
+│ ├── ipykernel
+│ ├── ipykernel-7.1.0.dist-info
+│ ├── ipykernel_launcher.py
+│ ├── IPython
+│ ├── ipython-9.9.0.dist-info
+│ ├── ipython_pygments_lexers-1.1.1.dist-info
+│ ├── ipython_pygments_lexers.py
+│ ├── isoduration
+│ ├── isoduration-20.11.0.dist-info
+│ ├── isympy.py
+│ ├── jedi
+│ ├── jedi-0.19.2.dist-info
+│ ├── jinja2
+│ ├── jinja2-3.1.6.dist-info
+│ ├── joblib
+│ ├── joblib-1.5.3.dist-info
+│ ├── json5
+│ ├── json5-0.13.0.dist-info
+│ ├── jsonpointer-3.0.0.dist-info
+│ ├── jsonpointer.py
+│ ├── jsonschema
+│ ├── jsonschema-4.26.0.dist-info
+│ ├── jsonschema_specifications
+│ ├── jsonschema_specifications-2025.9.1.dist-info
+│ ├── jupyter_client
+│ ├── jupyter_client-8.8.0.dist-info
+│ ├── jupyter_core
+│ ├── jupyter_core-5.9.1.dist-info
+│ ├── jupyter_events
+│ ├── jupyter_events-0.12.0.dist-info
+│ ├── jupyterlab
+│ ├── jupyterlab-4.5.1.dist-info
+│ ├── jupyterlab_pygments
+│ ├── jupyterlab_pygments-0.3.0.dist-info
+│ ├── jupyterlab_server
+│ ├── jupyterlab_server-2.28.0.dist-info
+│ ├── jupyter_lsp
+│ ├── jupyter_lsp-2.3.0.dist-info
+│ ├── jupyter.py
+│ ├── jupyter_server
+│ ├── jupyter_server-2.17.0.dist-info
+│ ├── jupyter_server_terminals
+│ ├── jupyter_server_terminals-0.5.3.dist-info
+│ ├── kiwisolver
+│ ├── kiwisolver-1.4.9.dist-info
+│ ├── lark
+│ ├── lark-1.3.1.dist-info
+│ ├── markupsafe
+│ ├── markupsafe-3.0.3.dist-info
+│ ├── matplotlib
+│ ├── matplotlib-3.10.8.dist-info
+│ ├── matplotlib_inline
+│ ├── matplotlib_inline-0.2.1.dist-info
+│ ├── mistune
+│ ├── mistune-3.2.0.dist-info
+│ ├── mpl_toolkits
+│ ├── mpmath
+│ ├── mpmath-1.3.0.dist-info
+│ ├── narwhals
+│ ├── narwhals-2.15.0.dist-info
+│ ├── nbclient
+│ ├── nbclient-0.10.4.dist-info
+│ ├── nbconvert
+│ ├── nbconvert-7.16.6.dist-info
+│ ├── nbformat
+│ ├── nbformat-5.10.4.dist-info
+│ ├── nest_asyncio-1.6.0.dist-info
+│ ├── nest_asyncio.py
+│ ├── networkx
+│ ├── networkx-3.6.1.dist-info
+│ ├── notebook_shim
+│ ├── notebook_shim-0.2.4.dist-info
+│ ├── numpy
+│ ├── numpy-2.4.1.dist-info
+│ ├── numpy.libs
+│ ├── nvidia
+│ ├── nvidia_cublas_cu12-12.8.4.1.dist-info
+│ ├── nvidia_cuda_cupti_cu12-12.8.90.dist-info
+│ ├── nvidia_cuda_nvrtc_cu12-12.8.93.dist-info
+│ ├── nvidia_cuda_runtime_cu12-12.8.90.dist-info
+│ ├── nvidia_cudnn_cu12-9.10.2.21.dist-info
+│ ├── nvidia_cufft_cu12-11.3.3.83.dist-info
+│ ├── nvidia_cufile_cu12-1.13.1.3.dist-info
+│ ├── nvidia_curand_cu12-10.3.9.90.dist-info
+│ ├── nvidia_cusolver_cu12-11.7.3.90.dist-info
+│ ├── nvidia_cusparse_cu12-12.5.8.93.dist-info
+│ ├── nvidia_cusparselt_cu12-0.7.1.dist-info
+│ ├── nvidia_nccl_cu12-2.27.5.dist-info
+│ ├── nvidia_nvjitlink_cu12-12.8.93.dist-info
+│ ├── nvidia_nvshmem_cu12-3.3.20.dist-info
+│ ├── nvidia_nvtx_cu12-12.8.90.dist-info
+│ ├── packaging
+│ ├── packaging-25.0.dist-info
+│ ├── pandas
+│ ├── pandas-2.3.3.dist-info
+│ ├── pandocfilters-1.5.1.dist-info
+│ ├── pandocfilters.py
+│ ├── parso
+│ ├── parso-0.8.5.dist-info
+│ ├── pexpect
+│ ├── pexpect-4.9.0.dist-info
+│ ├── PIL
+│ ├── pillow-12.1.0.dist-info
+│ ├── pillow.libs
+│ ├── pip
+│ ├── pip-25.3.dist-info
+│ ├── pkg_resources
+│ ├── platformdirs
+│ ├── platformdirs-4.5.1.dist-info
+│ ├── prometheus_client
+│ ├── prometheus_client-0.23.1.dist-info
+│ ├── prompt_toolkit
+│ ├── prompt_toolkit-3.0.52.dist-info
+│ ├── protobuf-6.33.4.dist-info
+│ ├── psutil
+│ ├── psutil-7.2.1.dist-info
+│ ├── ptyprocess
+│ ├── ptyprocess-0.7.0.dist-info
+│ ├── pure_eval
+│ ├── pure_eval-0.2.3.dist-info
+│ ├── pyarrow
+│ ├── pyarrow-22.0.0.dist-info
+│ ├── pycparser
+│ ├── pycparser-2.23.dist-info
+│ ├── pydeck
+│ ├── pydeck-0.9.1.dist-info
+│ ├── pygments
+│ ├── pygments-2.19.2.dist-info
+│ ├── pylab.py
+│ ├── pyparsing
+│ ├── pyparsing-3.3.1.dist-info
+│ ├── python_dateutil-2.9.0.post0.dist-info
+│ ├── pythonjsonlogger
+│ ├── python_json_logger-4.0.0.dist-info
+│ ├── pytz
+│ ├── pytz-2025.2.dist-info
+│ ├── pyyaml-6.0.3.dist-info
+│ ├── pyzmq-27.1.0.dist-info
+│ ├── pyzmq.libs
+│ ├── referencing
+│ ├── referencing-0.37.0.dist-info
+│ ├── requests
+│ ├── requests-2.32.5.dist-info
+│ ├── rfc3339_validator-0.1.4.dist-info
+│ ├── rfc3339_validator.py
+│ ├── rfc3986_validator-0.1.1.dist-info
+│ ├── rfc3986_validator.py
+│ ├── rfc3987_syntax
+│ ├── rfc3987_syntax-1.1.0.dist-info
+│ ├── rpds
+│ ├── rpds_py-0.30.0.dist-info
+│ ├── scikit_learn-1.8.0.dist-info
+│ ├── scikit_learn.libs
+│ ├── scipy
+│ ├── scipy-1.17.0.dist-info
+│ ├── scipy.libs
+│ ├── send2trash
+│ ├── send2trash-2.0.0.dist-info
+│ ├── setuptools
+│ ├── setuptools-80.9.0.dist-info
+│ ├── six-1.17.0.dist-info
+│ ├── six.py
+│ ├── sklearn
+│ ├── smmap
+│ ├── smmap-5.0.2.dist-info
+│ ├── soupsieve
+│ ├── soupsieve-2.8.1.dist-info
+│ ├── stack_data
+│ ├── stack_data-0.6.3.dist-info
+│ ├── streamlit
+│ ├── streamlit-1.53.0.dist-info
+│ ├── sympy
+│ ├── sympy-1.14.0.dist-info
+│ ├── tenacity
+│ ├── tenacity-9.1.2.dist-info
+│ ├── terminado
+│ ├── terminado-0.18.1.dist-info
+│ ├── threadpoolctl-3.6.0.dist-info
+│ ├── threadpoolctl.py
+│ ├── tinycss2
+│ ├── tinycss2-1.4.0.dist-info
+│ ├── toml
+│ ├── toml-0.10.2.dist-info
+│ ├── torch
+│ ├── torch-2.9.1.dist-info
+│ ├── torchaudio
+│ ├── torchaudio-2.9.1.dist-info
+│ ├── torchgen
+│ ├── torchvision
+│ ├── torchvision-0.24.1.dist-info
+│ ├── torchvision.libs
+│ ├── tornado
+│ ├── tornado-6.5.4.dist-info
+│ ├── tqdm
+│ ├── tqdm-4.67.1.dist-info
+│ ├── traitlets
+│ ├── traitlets-5.14.3.dist-info
+│ ├── triton
+│ ├── triton-3.5.1.dist-info
+│ ├── typing_extensions-4.15.0.dist-info
+│ ├── typing_extensions.py
+│ ├── tzdata
+│ ├── tzdata-2025.3.dist-info
+│ ├── uri_template
+│ ├── uri_template-1.3.0.dist-info
+│ ├── urllib3
+│ ├── urllib3-2.6.3.dist-info
+│ ├── _virtualenv.pth
+│ ├── _virtualenv.py
+│ ├── watchdog
+│ ├── watchdog-6.0.0.dist-info
+│ ├── wcwidth
+│ ├── wcwidth-0.2.14.dist-info
+│ ├── webcolors
+│ ├── webcolors-25.10.0.dist-info
+│ ├── webencodings
+│ ├── webencodings-0.5.1.dist-info
+│ ├── websocket
+│ ├── websocket_client-1.9.0.dist-info
+│ ├── _yaml
+│ ├── yaml
+│ └── zmq
+├── Makefile
+├── NFS
+├── poetry.lock
+├── pyproject.toml
+├── pyvenv.cfg
+├── README.md
+├── requirements.txt
+├── runs
+├── share
+│ ├── applications
+│ │ └── jupyterlab.desktop
+│ ├── icons
+│ │ └── hicolor
+│ │ └── scalable
+│ ├── jupyter
+│ │ ├── kernels
+│ │ │ └── python3
+│ │ ├── lab
+│ │ │ ├── schemas
+│ │ │ ├── static
+│ │ │ └── themes
+│ │ ├── labextensions
+│ │ │ └── jupyterlab_pygments
+│ │ ├── nbconvert
+│ │ │ └── templates
+│ │ └── nbextensions
+│ │ └── pydeck
+│ └── man
+│ └── man1
+│ ├── ipython.1
+│ ├── isympy.1
+│ └── ttx.1
+├── src
+│ ├── 5_epoch_emoset_resnet50_finetuned_2.41M.pth
+│ ├── api.py
+│ ├── data_loader.py
+│ ├── dataset_paths_cache.pkl
+│ ├── emoset_resnet50_best.pth
+│ ├── emoset_resnet50_finetuned_2_41M_best.pth
+│ ├── emoset_resnet50_resume.pth
+│ ├── emoset_test_embeddings.npy
+│ ├── emoset_test_labels.npy
+│ ├── main.py
+│ ├── music_engine
+│ │ ├── image_processor.py
+│ │ ├── __init__.py
+│ │ ├── llm_bridge.py
+│ │ ├── matcher.py
+│ │ └── va_regressor.pkl
+│ ├── scripts
+│ │ ├── 00_setup_env.sh
+│ │ ├── 01_download_DEAM.py
+│ │ ├── 02_download_EmoSet.py
+│ │ ├── 11_prerp_DEAM.py
+│ │ ├── 20_bench_GPU.py
+│ │ ├── 21_train_images.ipynb
+│ │ ├── 22_extract_embeddings.ipynb
+│ │ ├── 23_aggregate_DEAM_timeline.py
+│ │ ├── 24_train_regressor.py
+│ │ ├── 31_finetune_2.41M.py
+│ │ ├── 90_acc_images_model.ipynb
+│ │ └── 91_generate_metrics.py
+│ └── tabs
+│ ├── tab_dataset.py
+│ └── tab_live.py
+├── tree.txt
+└── .vscode
+ ├── launch.json
+ └── tasks.json
+
+322 directories, 137 files