Docker integration into project #1

Merged
zin merged 2 commits from Dockered into main 2026-06-03 12:18:10 +03:00
11 changed files with 935 additions and 214 deletions
+18
View File
@@ -0,0 +1,18 @@
bin/
lib/
share/
etc/
include/
pyvenv.cfg
.idea/
.vscode/
__pycache__/
*.pyc
.git/
runs/
dataset/
NFS/
*.pth
*.pkl
*.npy
.env
+63
View File
@@ -0,0 +1,63 @@
version: '3.8'
networks:
emom_mesh:
driver: bridge
services:
emom_ui:
build:
context: .
dockerfile: docker/Dockerfile.ui
container_name: emom_web_ui
restart: unless-stopped
ports:
- "8080:8080"
networks:
- emom_mesh
env_file:
- .env
volumes:
- ./src:/app/src
- ${DATA_DEAM_DIR}:/app/dataset/DEAM:ro
depends_on:
- emom_inference
emom_inference:
build:
context: .
dockerfile: docker/Dockerfile.api
container_name: emom_pytorch_api
restart: unless-stopped
networks:
- emom_mesh
env_file:
- .env
volumes:
- ${HOST_ARTIFACTS_DIR}/emoset_resnet50_best.pth:/app/src/emoset_resnet50_best.pth:ro
- ${HOST_ARTIFACTS_DIR}/music_engine/va_regressor.pkl:/app/src/music_engine/va_regressor.pkl:ro
- ${DATA_DEAM_DIR}:/app/dataset/DEAM:ro
- ~/.cache/huggingface:/root/.cache/huggingface
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
emom_ollama:
image: ollama/ollama:latest
container_name: emom_ollama_engine
restart: unless-stopped
networks:
- emom_mesh
volumes:
- ~/.ollama:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
+24
View File
@@ -0,0 +1,24 @@
FROM pytorch/pytorch:2.2.1-cuda12.1-cudnn8-runtime
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
# 1. Системные зависимости
RUN apt-get update && apt-get install -y \
libglib2.0-0 libsm6 libxext6 libxrender-dev \
&& rm -rf /var/lib/apt/lists/*
# 2. Python пакеты
RUN pip install --no-cache-dir fastapi uvicorn timm scikit-learn pandas joblib python-multipart transformers==4.38.2 tokenizers==0.15.2 accelerate
# 3. Копируем код в контейнер
WORKDIR /app
COPY src/ /app/src/
# 4. МАГИЯ ЗДЕСЬ: Переходим внутрь папки src
WORKDIR /app/src
EXPOSE 8000
# 5. Запускаем локально (без префикса src.)
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]
+17
View File
@@ -0,0 +1,17 @@
FROM python:3.12-slim
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
RUN pip install --no-cache-dir streamlit==1.32.0 requests pandas pillow
COPY src/ /app/src/
# МАГИЯ ЗДЕСЬ: Переходим внутрь папки src
WORKDIR /app/src
EXPOSE 8080
# Запускаем локально
CMD ["streamlit", "run", "main.py", "--server.port", "8080", "--server.address", "0.0.0.0"]
-64
View File
@@ -1,64 +0,0 @@
version: '3.8'
# Определение общих сетей для изоляции трафика
networks:
ai_mesh:
driver: bridge
services:
# ----------------------------------------------------
# SERVICE 1: Frontend (Пользовательский интерфейс)
# Не требует GPU, может быть вынесен на отдельный сервер
# ----------------------------------------------------
web_ui:
build:
context: .
dockerfile: Dockerfile
container_name: emom_frontend
restart: always
ports:
- "8080:8080"
networks:
- ai_mesh
environment:
- STREAMLIT_RUN=1
# Указываем UI, где искать LLM-бэкенд (внутри Docker-сети)
- OLLAMA_HOST=http://llm_backend:11434
volumes:
- ./src:/app/src
# Модели пока остаются здесь, так как код монолитный,
# но архитектурно сервис уже изолирован
- /home/zin/projects/Thesis/src/emoset_resnet50_best.pth:/app/emoset_resnet50_best.pth:ro
- /home/zin/projects/Thesis/src/music_engine/va_regressor.pkl:/app/src/music_engine/va_regressor.pkl:ro
- /home/zin/projects/Thesis/dataset/DEAM:/app/dataset/DEAM:ro
# Временно оставляем GPU для PyTorch (пока он не вынесен в API)
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
# ----------------------------------------------------
# SERVICE 2: LLM Inference Backend (Ollama)
# Изолированный сервис для языковой модели на GPU
# ----------------------------------------------------
llm_backend:
image: ollama/ollama:latest
container_name: ollama_gpu_inference
restart: always
networks:
- ai_mesh
ports:
- "11434:11434"
volumes:
# Проброс локальных моделей Ollama, чтобы не качать их заново внутри докера
- ~/.ollama:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
+85
View File
@@ -0,0 +1,85 @@
import io
import traceback
import numpy as np
from typing import List
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse
from PIL import Image
from data_loader import load_music_engine, load_image_processor
from music_engine.llm_bridge import LLMAcousticBridge
app = FastAPI(title="EmoM Inference API", version="1.0.0")
ml_context = {
"image_processor": None,
"music_matcher": None,
"llm_bridge": None
}
@app.on_event("startup")
async def startup_event():
print("Инициализация нейросетевого ядра EmoM...")
ml_context["image_processor"] = load_image_processor()
ml_context["music_matcher"] = load_music_engine()
ml_context["llm_bridge"] = LLMAcousticBridge()
print("Вычислительный конвейер готов к работе.")
@app.post("/analyze")
async def analyze_event_endpoint(files: List[UploadFile] = File(...)):
try:
# 1. Читаем все загруженные картинки
images = []
for file in files:
image_bytes = await file.read()
img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
images.append(img)
print(f"Начата обработка события из {len(images)} фотографий...")
img_processor = ml_context["image_processor"]
matcher = ml_context["music_matcher"]
llm = ml_context["llm_bridge"]
all_v, all_a = [], []
all_objects = []
# 2. Прогоняем каждую картинку через нейросети
for img in images:
embedding = img_processor.extract_embedding(img)
v, a = matcher.predict_va(embedding)
all_v.append(v)
all_a.append(a)
caption = img_processor.describe_scene(img)
all_objects.append(caption)
# 3. Усредняем эмоции события
target_v = float(np.mean(all_v))
target_a = float(np.mean(all_a))
unique_semantics = list(set(all_objects))
# 4. Запрашиваем акустический профиль у Ollama
print(f"Запрос к Ollama. V={target_v:.2f}, A={target_a:.2f}")
llm_profile = llm.get_acoustic_profile(target_v, target_a, unique_semantics)
# 5. Ищем треки в базе
print("Поиск подходящих композиций...")
playlist_df = matcher.find_nearest_tracks(target_v, target_a, llm_profile=llm_profile, top_k=15)
# Переводим таблицу в JSON-формат
tracks_list = playlist_df.to_dict(orient="records")
return JSONResponse(content={
"status": "success",
"images_processed": len(images),
"target_v": target_v,
"target_a": target_a,
"llm_profile": llm_profile,
"semantics": unique_semantics,
"tracks": tracks_list
})
except Exception as e:
print(traceback.format_exc())
raise HTTPException(status_code=500, detail=f"Ошибка инференса: {str(e)}")
+26 -32
View File
@@ -1,55 +1,49 @@
import os
from pathlib import Path from pathlib import Path
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import streamlit as st
# Импорты твоих движков
from music_engine.matcher import MusicMatcher from music_engine.matcher import MusicMatcher
from music_engine.image_processor import ImageProcessor from music_engine.image_processor import ImageProcessor
# Базовая директория (папка src)
BASE_DIR = Path(__file__).resolve().parent BASE_DIR = Path(__file__).resolve().parent
@st.cache_resource
def load_music_engine(): def load_music_engine():
# Инициализация базы данных и регрессора для музыкального мэтчинга """Загрузка базы данных и модели регрессора для бэкенда."""
# Пути соответствуют тем, что мы примонтировали в Docker
db_path = BASE_DIR.parent / "dataset" / "DEAM" / "music_db.csv" db_path = BASE_DIR.parent / "dataset" / "DEAM" / "music_db.csv"
model_path = BASE_DIR / "music_engine" / "va_regressor.pkl" model_path = BASE_DIR / "music_engine" / "va_regressor.pkl"
if not db_path.exists():
print(f"Музыкальная БД не найдена: {db_path}")
return None
return MusicMatcher(db_path=db_path, model_path=model_path) return MusicMatcher(db_path=db_path, model_path=model_path)
@st.cache_resource
def load_image_processor(): def load_image_processor():
# Модуль обработки визуальных признаков """Инициализация нейросетевого экстрактора (ResNet-50)."""
model_path = BASE_DIR / "emoset_resnet50_best.pth" weights_path = BASE_DIR / "emoset_resnet50_best.pth"
return ImageProcessor(weights_path)
# Обработка пути при вызове из корневой директории
if not model_path.exists():
model_path = BASE_DIR.parent / "emoset_resnet50_best.pth"
return ImageProcessor(model_path=model_path)
@st.cache_data
def load_emoset_data(): def load_emoset_data():
# Выборка данных датасета для вкладки отладки """
dataset_root = BASE_DIR.parent / "dataset" / "EmoSet-118K" / "test" Загрузка эталонного датасета EmoSet.
(Оставлено для обратной совместимости, если понадобится локальная отладка)
"""
try:
images_path = BASE_DIR.parent / "dataset" / "EmoSet-118K" / "test" / "images"
labels_path = BASE_DIR / "emoset_test_labels.npy"
embeddings_path = BASE_DIR / "emoset_test_embeddings.npy"
csv_path = dataset_root / "labels.csv" # Если файлов нет (например, на проде), возвращаем None
img_dir = dataset_root / "images" if not all(p.exists() for p in [labels_path, embeddings_path]):
emb_path = BASE_DIR / "emoset_test_embeddings.npy"
lbl_path = BASE_DIR / "emoset_test_labels.npy"
if not all([csv_path.exists(), emb_path.exists(), lbl_path.exists()]):
print("Тестовые файлы датасета не найдены, вкладка отладки может работать некорректно")
return None, None, None, None return None, None, None, None
labels_df = pd.read_csv(csv_path) labels = np.load(labels_path)
embeddings = np.load(embeddings_path)
test_filenames = labels_df['filename'].tolist() # Читаем CSV с метками
test_embeddings = np.load(emb_path) df = pd.read_csv(BASE_DIR.parent / "dataset" / "EmoSet-118K" / "test" / "labels.csv")
test_labels = np.load(lbl_path) image_files = df['filename'].tolist()
return test_filenames, test_embeddings, test_labels, img_dir return image_files, embeddings, labels, images_path
except Exception as e:
print(f"Предупреждение: Тестовые артефакты EmoSet не найдены ({e})")
return None, None, None, None
+171 -54
View File
@@ -1,73 +1,190 @@
import sys
import os import os
import subprocess import requests
import streamlit as st import streamlit as st
import streamlit.components.v1 as components import streamlit.components.v1 as components
from PIL import Image
import base64
from io import BytesIO
from data_loader import load_music_engine, load_emoset_data, load_image_processor st.set_page_config(page_title="EmoM Playlist Generator", layout="wide", initial_sidebar_state="collapsed")
from tabs.tab_dataset import render_dataset_tab
from tabs.tab_live import render_live_tab
# Костыль для прямого запуска API_URL = os.getenv("BACKEND_API_URL", "http://emom_inference:8000") + "/analyze"
if __name__ == "__main__": DEAM_AUDIO_DIR = "/app/dataset/DEAM/DEAM_audio/MEMD_audio"
if "STREAMLIT_RUN" not in os.environ:
os.environ["STREAMLIT_RUN"] = "1"
cmd = [sys.executable, "-m", "streamlit", "run", __file__, "--server.port", "8080", "--server.address", "0.0.0.0"]
subprocess.run(cmd)
sys.exit()
viewport_mode = st.query_params.get("viewport", "desktop") def get_thumbnail_html(images, max_display=12):
page_layout = "centered" if viewport_mode == "mobile" else "wide" html_images = ""
for file in images[:max_display]:
img = Image.open(file)
img.thumbnail((100, 100))
if img.mode != "RGB":
img = img.convert("RGB")
buffered = BytesIO()
img.save(buffered, format="JPEG")
b64_str = base64.b64encode(buffered.getvalue()).decode()
html_images += f'<img src="data:image/jpeg;base64,{b64_str}" style="width: 60px; height: 60px; object-fit: cover; border-radius: 8px; margin-right: 8px; margin-bottom: 8px; border: 1px solid rgba(255, 255, 255, 0.2);">'
st.set_page_config(page_title="Thesis Demo", layout=page_layout) if len(images) > max_display:
html_images += f'<span style="display: inline-block; width: 60px; height: 60px; line-height: 60px; text-align: center; background: rgba(150, 150, 150, 0.2); border-radius: 8px; vertical-align: top; font-size: 14px;">+{len(images) - max_display}</span>'
return f'<div style="display: flex; flex-wrap: wrap;">{html_images}</div>'
# Определения ширины экрана и смены верстки def main():
components.html( if "live_state" not in st.session_state:
""" st.session_state.live_state = "upload"
<script> if "result_data" not in st.session_state:
const w = window.parent.innerWidth; st.session_state.result_data = None
const h = window.parent.innerHeight;
const url = new URL(window.parent.location.href);
// Считаем мобилкой, если ушли в портретный режим или экран уже 768px viewport = st.query_params.get("viewport", "desktop")
const isMobile = (h > w) || (w < 768);
const target = isMobile ? "mobile" : "desktop";
if (url.searchParams.get("viewport") !== target) { st.markdown("""
url.searchParams.set("viewport", target);
window.parent.location.href = url.href;
}
</script>
""",
height=0,
width=0,
)
st.markdown(
"""
<style> <style>
img { max-width: 100%; height: auto; object-fit: contain; } [data-testid="stFileUploadDropzone"] { min-height: 250px !important; display: flex; align-items: center; justify-content: center; border-radius: 16px; background-color: rgba(255, 75, 75, 0.03); }
[data-testid="stMetricValue"] { font-size: 1.8rem; } .spinner-container { display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 40vh; margin-top: 10vh; }
.big-spinner { width: 120px; height: 120px; border: 10px solid rgba(255, 75, 75, 0.1); border-top: 10px solid #ff4b4b; border-radius: 50%; animation: spin 1s linear infinite; margin-bottom: 2rem; }
@keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }
#MainMenu {visibility: hidden;} footer {visibility: hidden;}
</style> </style>
""", """, unsafe_allow_html=True)
unsafe_allow_html=True
if st.session_state.live_state == "upload":
upload_placeholder = st.empty()
with upload_placeholder.container():
st.write("Загрузите изображения для визуально-семантического анализа.")
if viewport == "mobile":
st.markdown("<br>", unsafe_allow_html=True)
uploaded_files = st.file_uploader(
"Загрузка файлов",
type=['png', 'jpg', 'jpeg'],
accept_multiple_files=True,
label_visibility="collapsed" if viewport == "mobile" else "visible"
) )
# Подгрузка ML-моделей и датасета if uploaded_files:
music_matcher = load_music_engine() st.markdown("<br>", unsafe_allow_html=True)
img_processor = load_image_processor() if st.button("Выполнить анализ", type="primary", use_container_width=True):
emoset_files, emoset_embeddings, emoset_labels, emoset_path = load_emoset_data() st.session_state.uploaded_images = uploaded_files
st.session_state.live_state = "processing"
upload_placeholder.empty()
st.rerun()
st.title("Генератор саундтреков (Research Demo)") st.markdown("<br>", unsafe_allow_html=True)
st.caption("Выбранные файлы:")
st.markdown(get_thumbnail_html(uploaded_files), unsafe_allow_html=True)
tab_live, tab_debug = st.tabs(["Анализ событий (Свои фото)", "Отладка (Датасет EmoSet)"]) elif st.session_state.live_state == "processing":
components.html("<script>window.parent.scrollTo(0, 0);</script>", height=0, width=0)
files = st.session_state.get("uploaded_images", [])
st.markdown('<div class="spinner-container"><div class="big-spinner"></div><h3 style="text-align: center; font-weight: 400;">Обработка данных...</h3></div>', unsafe_allow_html=True)
with tab_live: try:
if img_processor: upload_data = [('files', (f.name, f.getvalue(), f.type)) for f in files]
render_live_tab(music_matcher, img_processor) response = requests.post(API_URL, files=upload_data, timeout=300)
if response.status_code == 200:
st.session_state.result_data = response.json()
st.session_state.live_state = "result"
st.rerun()
else: else:
st.error("Ошибка загрузки: не найдены веса ResNet для image_processor.") st.error(f"Ошибка сервера: {response.status_code}")
if st.button("Назад"):
st.session_state.live_state = "upload"
st.rerun()
except Exception as e:
st.error(f"Ошибка соединения: {str(e)}")
if st.button("Назад"):
st.session_state.live_state = "upload"
st.rerun()
with tab_debug: elif st.session_state.live_state == "result":
render_dataset_tab(music_matcher, emoset_files, emoset_embeddings, emoset_labels, emoset_path) components.html("<script>window.parent.scrollTo(0, 0);</script>", height=0, width=0)
data = st.session_state.result_data
st.header(f"Сгенерированный плейлист (обработано файлов: {data['images_processed']})")
for row in data.get("tracks", []):
with st.container(border=True):
song_id = int(row['song_id'])
score = row['final_score']
audio_path = f"{DEAM_AUDIO_DIR}/{song_id}.mp3"
if not os.path.exists(audio_path):
audio_path = audio_path.replace('.mp3', '.wav')
if viewport == "desktop":
c1, c2 = st.columns([1, 3])
with c1:
st.write(f"**Track ID:** {song_id}")
st.caption(f"Score: {score:.4f}")
with c2:
if os.path.exists(audio_path):
st.audio(audio_path)
else:
st.caption("Аудиофайл не найден")
else:
st.write(f"**Track ID:** {song_id} (Score: {score:.4f})")
if os.path.exists(audio_path):
st.audio(audio_path)
else:
st.caption("Аудиофайл не найден")
st.markdown("<br>", unsafe_allow_html=True)
with st.expander("Отладочная информация (Метрики)"):
st.subheader("Координаты V/A")
c_v, c_a = st.columns(2)
c_v.metric("Valence", f"{data['target_v']:.2f}")
c_a.metric("Arousal", f"{data['target_a']:.2f}")
st.markdown("---")
st.subheader("Акустические признаки (LLM)")
feature_titles = {
"energy": "RMS Energy",
"flux": "Spectral Flux",
"centroid": "Spectral Centroid",
"pitch": "F0 (Pitch)",
"hnr": "HNR",
"zcr": "ZCR"
}
# Развернутые описания для комиссии (передаются в аргумент help)
feature_helps = {
"energy": "Среднеквадратичная амплитуда (громкость). Бывает высокой в плотных, интенсивных композициях, отражает общую акустическую энергию сцены.",
"flux": "Спектральный поток. Измеряет резкость изменений в спектре. Высок при четком, агрессивном ритме и частой смене нот.",
"centroid": "Спектральный центроид («яркость» звука). Высокие значения указывают на преобладание высоких частот (звонкие инструменты, открытые пространства).",
"pitch": "Основная частота звука. Высокий pitch характерен для позитивных, легких или, напротив, напряженных мелодий.",
"hnr": "Отношение гармоник к шуму. Высокий HNR — чистая мелодия и вокал. Низкий HNR — присутствие дисторшна, шумов или перкуссии.",
"zcr": "Частота пересечения нуля. Отражает шумовую составляющую сигнала. Высок в треках с выраженными ударными (hi-hats) или атмосферным шумом."
}
llm_profile = data.get("llm_profile")
if llm_profile and isinstance(llm_profile, dict) and len(llm_profile) > 0:
cols_per_row = 2 if viewport == "mobile" else 3
llm_items = list(llm_profile.items())
for i in range(0, len(llm_items), cols_per_row):
cols = st.columns(cols_per_row)
for j in range(cols_per_row):
if i + j < len(llm_items):
k, v = llm_items[i + j]
label = feature_titles.get(k, k)
tooltip = feature_helps.get(k, "")
# Форматируем до 2 знаков после запятой (например, 0.64)
cols[j].metric(label, f"{v:.2f}", help=tooltip)
else:
st.caption("Акустический профиль недоступен. Применен fallback-алгоритм.")
st.markdown("---")
st.write("**Извлеченные теги (BLIP-2):**")
st.write(", ".join([str(c).capitalize() for c in data.get("semantics", [])]))
st.markdown("<br>", unsafe_allow_html=True)
if st.button("Новый запрос", use_container_width=True):
st.session_state.live_state = "upload"
st.session_state.result_data = None
st.session_state.pop("uploaded_images", None)
st.rerun()
if __name__ == "__main__":
main()
+5 -1
View File
@@ -32,7 +32,11 @@ class ImageProcessor:
# Модуль семантического описания сцены # Модуль семантического описания сцены
print("Инициализация BLIP-2...") print("Инициализация BLIP-2...")
self.blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b") # Обход бага конфигурации Hugging Face (ручная сборка процессора)
from transformers import BlipImageProcessor, AutoTokenizer
img_proc = BlipImageProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
tok = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b", use_fast=False)
self.blip_processor = Blip2Processor(image_processor=img_proc, tokenizer=tok)
self.blip_model = Blip2ForConditionalGeneration.from_pretrained( self.blip_model = Blip2ForConditionalGeneration.from_pretrained(
"Salesforce/blip2-opt-2.7b", "Salesforce/blip2-opt-2.7b",
torch_dtype=torch.float16 torch_dtype=torch.float16
+57 -55
View File
@@ -1,65 +1,67 @@
import re import os
import json import json
import re
import requests import requests
class LLMAcousticBridge: class LLMAcousticBridge:
def __init__(self, target_model="dolphin-llama3:8b"): def __init__(self, model_name="dolphin-llama3:8b"):
self.api_url = "http://localhost:11434/api/generate" self.model_name = model_name
self.model = target_model # Динамический выбор URL (внутри Docker используется emom_ollama)
base_url = os.getenv("OLLAMA_API_URL", "http://emom_ollama:11434")
self.api_url = f"{base_url}/api/generate"
def get_acoustic_profile(self, valence, arousal, semantics):
context_str = ", ".join(semantics) if semantics else "abstract scene"
# Строгий промпт с примером вывода
prompt = f"""
Analyze the visual context and emotions to determine the ideal background music properties.
Emotions: Valence {valence:.1f}/9.0 (Positivity), Arousal {arousal:.1f}/9.0 (Energy).
Visual Context: {context_str}.
Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
1. "energy": (Loudness/Density)
2. "flux": (Rhythmic sharpness/Beat)
3. "centroid": (Brightness)
4. "pitch": (Fundamental frequency)
5. "hnr": (Harmonics-to-Noise)
6. "zcr": (Percussiveness)
Return ONLY a valid JSON object. No explanations, no markdown blocks.
Example: {{"energy": 0.8, "flux": 0.5, "centroid": 0.6, "pitch": 0.4, "hnr": 0.9, "zcr": 0.3}}
"""
def _extract_json(self, raw_text: str):
# Проверка на ИИдиота, LLM иногда игнорирует format="json" и оборачивает ответ в маркдаун
try: try:
match = re.search(r'\{.*\}', raw_text, re.DOTALL) payload = {
"model": self.model_name,
"prompt": prompt,
"stream": False,
"format": "json" # Принудительный JSON-режим Ollama
}
print(f"Запрос акустического профиля к Ollama...")
response = requests.post(self.api_url, json=payload, timeout=120)
if response.status_code == 200:
data = response.json()
response_text = data.get("response", "")
try:
# 1. Попытка прямой десериализации
profile = json.loads(response_text)
return profile
except json.JSONDecodeError:
# 2. Аварийное извлечение JSON из текста с помощью регулярного выражения
match = re.search(r'\{.*\}', response_text, re.DOTALL)
if match: if match:
return json.loads(match.group(0)) return json.loads(match.group(0))
return json.loads(raw_text)
except json.JSONDecodeError:
# Если ИИдиот
return None
def get_acoustic_profile(self, v_score: float, a_score: float, scene_context: list) -> dict | None: print(f"Ошибка парсинга LLM ответа: {response_text}")
# Агрегация контекста для обработки серии снимков (события) return {}
context_merged = " | ".join(scene_context) if scene_context else "abstract scene" else:
print(f"Ollama вернула ошибку HTTP: {response.status_code}")
return {}
system_prompt = f"""You are an expert music producer and acoustic engineer. except Exception as e:
Analyze the visual context and emotions to determine the ideal background music properties. print(f"Ошибка соединения с Ollama: {str(e)}")
Emotions: Valence {v_score:.1f}/9.0 (Positivity), Arousal {a_score:.1f}/9.0 (Energy). return {}
Visual Context: {context_merged}.
Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
1. "energy": (Loudness/Density. High for massive/busy scenes, Low for calm)
2. "flux": (Rhythmic sharpness/Beat. High for action/people/cars, Low for static nature)
3. "centroid": (Brightness: 0=Dark/Bass/Massive, 1=Bright/Treble/Light)
4. "pitch": (Fundamental frequency: 0=Low pitch/Huge objects, 1=High pitch/Small objects)
5. "hnr": (Harmonics-to-Noise: 0=Noisy/Distorted textures, 1=Clear/Melodic/Smooth textures)
6. "zcr": (Percussiveness. High for detailed noise like leaves/rain, Low for solid blocks)
Return ONLY a valid JSON object. Do not add any text or explanation.
Example: {{"energy": 0.5, "flux": 0.2, "centroid": 0.4, "pitch": 0.3, "hnr": 0.8, "zcr": 0.1}}"""
try:
# Отправка промпта локальной Ollama
response = requests.post(self.api_url, json={
"model": self.model,
"prompt": system_prompt,
"stream": False,
"format": "json"
}, timeout=45)
response.raise_for_status()
raw_response = response.json().get("response", "")
profile_data = self._extract_json(raw_response)
# Валидация структуры ответа
expected_features = {'energy', 'flux', 'centroid', 'pitch', 'hnr', 'zcr'}
if profile_data and expected_features.issubset(profile_data.keys()):
return profile_data
print("LLM вернула неполный или некорректный набор акустических признаков")
return None
except requests.exceptions.RequestException as req_err:
print(f"Не удалось подключиться к Ollama: {req_err}")
return None
+461
View File
@@ -0,0 +1,461 @@
.
├── bin
│   ├── activate
│   ├── activate.csh
│   ├── activate.fish
│   ├── activate.nu
│   ├── activate.ps1
│   ├── activate_this.py
│   ├── debugpy
│   ├── debugpy-adapter
│   ├── f2py
│   ├── fonttools
│   ├── httpx
│   ├── ipython
│   ├── ipython3
│   ├── isympy
│   ├── jlpm
│   ├── jsonpointer
│   ├── jsonschema
│   ├── jupyter
│   ├── jupyter-dejavu
│   ├── jupyter-events
│   ├── jupyter-execute
│   ├── jupyter-kernel
│   ├── jupyter-kernelspec
│   ├── jupyter-lab
│   ├── jupyter-labextension
│   ├── jupyter-labhub
│   ├── jupyter-migrate
│   ├── jupyter-nbconvert
│   ├── jupyter-run
│   ├── jupyter-server
│   ├── jupyter-troubleshoot
│   ├── jupyter-trust
│   ├── normalizer
│   ├── numpy-config
│   ├── pip
│   ├── pip3
│   ├── pip3.12
│   ├── proton
│   ├── proton-viewer
│   ├── pybabel
│   ├── pyftmerge
│   ├── pyftsubset
│   ├── pygmentize
│   ├── pyjson5
│   ├── python -> /usr/bin/python3
│   ├── python3 -> python
│   ├── python3.12 -> python
│   ├── send2trash
│   ├── streamlit
│   ├── streamlit.cmd
│   ├── torchfrtrace
│   ├── torchrun
│   ├── tqdm
│   ├── ttx
│   ├── watchmedo
│   └── wsdump
├── CACHEDIR.TAG
├── docker
│   ├── Dockerfile.api
│   └── Dockerfile.ui
├── docker-compose.yml
├── Dockerfile
├── .dockerignore
├── .env
├── etc
│   └── jupyter
│   ├── jupyter_notebook_config.d
│   │   └── jupyterlab.json
│   ├── jupyter_server_config.d
│   │   ├── jupyterlab.json
│   │   ├── jupyter-lsp-jupyter-server.json
│   │   ├── jupyter_server_terminals.json
│   │   └── notebook_shim.json
│   └── nbconfig
│   └── notebook.d
├── .gitignore
├── .idea
│   ├── .gitignore
│   ├── inspectionProfiles
│   │   └── profiles_settings.xml
│   ├── misc.xml
│   ├── modules.xml
│   ├── Thesis.iml
│   ├── vcs.xml
│   └── workspace.xml
├── lib
│   └── python3.12
│   └── site-packages
│   ├── altair
│   ├── altair-6.0.0.dist-info
│   ├── anyio
│   ├── anyio-4.12.1.dist-info
│   ├── argon2
│   ├── argon2_cffi-25.1.0.dist-info
│   ├── _argon2_cffi_bindings
│   ├── argon2_cffi_bindings-25.1.0.dist-info
│   ├── arrow
│   ├── arrow-1.4.0.dist-info
│   ├── asttokens
│   ├── asttokens-3.0.1.dist-info
│   ├── async_lru
│   ├── async_lru-2.0.5.dist-info
│   ├── attr
│   ├── attrs
│   ├── attrs-25.4.0.dist-info
│   ├── babel
│   ├── babel-2.17.0.dist-info
│   ├── beautifulsoup4-4.14.3.dist-info
│   ├── bleach
│   ├── bleach-6.3.0.dist-info
│   ├── blinker
│   ├── blinker-1.9.0.dist-info
│   ├── bs4
│   ├── cachetools
│   ├── cachetools-6.2.4.dist-info
│   ├── certifi
│   ├── certifi-2026.1.4.dist-info
│   ├── cffi
│   ├── cffi-2.0.0.dist-info
│   ├── _cffi_backend.cpython-312-x86_64-linux-gnu.so
│   ├── charset_normalizer
│   ├── charset_normalizer-3.4.4.dist-info
│   ├── click
│   ├── click-8.3.1.dist-info
│   ├── comm
│   ├── comm-0.2.3.dist-info
│   ├── contourpy
│   ├── contourpy-1.3.3.dist-info
│   ├── cycler
│   ├── cycler-0.12.1.dist-info
│   ├── dateutil
│   ├── debugpy
│   ├── debugpy-1.8.19.dist-info
│   ├── decorator-5.2.1.dist-info
│   ├── decorator.py
│   ├── defusedxml
│   ├── defusedxml-0.7.1.dist-info
│   ├── _distutils_hack
│   ├── distutils-precedence.pth
│   ├── .DS_Store
│   ├── executing
│   ├── executing-2.2.1.dist-info
│   ├── fastjsonschema
│   ├── fastjsonschema-2.21.2.dist-info
│   ├── filelock
│   ├── filelock-3.20.3.dist-info
│   ├── fontTools
│   ├── fonttools-4.61.1.dist-info
│   ├── fqdn
│   ├── fqdn-1.5.1.dist-info
│   ├── fsspec
│   ├── fsspec-2026.1.0.dist-info
│   ├── functorch
│   ├── git
│   ├── gitdb
│   ├── gitdb-4.0.12.dist-info
│   ├── gitpython-3.1.46.dist-info
│   ├── google
│   ├── h11
│   ├── h11-0.16.0.dist-info
│   ├── httpcore
│   ├── httpcore-1.0.9.dist-info
│   ├── httpx
│   ├── httpx-0.28.1.dist-info
│   ├── idna
│   ├── idna-3.11.dist-info
│   ├── ipykernel
│   ├── ipykernel-7.1.0.dist-info
│   ├── ipykernel_launcher.py
│   ├── IPython
│   ├── ipython-9.9.0.dist-info
│   ├── ipython_pygments_lexers-1.1.1.dist-info
│   ├── ipython_pygments_lexers.py
│   ├── isoduration
│   ├── isoduration-20.11.0.dist-info
│   ├── isympy.py
│   ├── jedi
│   ├── jedi-0.19.2.dist-info
│   ├── jinja2
│   ├── jinja2-3.1.6.dist-info
│   ├── joblib
│   ├── joblib-1.5.3.dist-info
│   ├── json5
│   ├── json5-0.13.0.dist-info
│   ├── jsonpointer-3.0.0.dist-info
│   ├── jsonpointer.py
│   ├── jsonschema
│   ├── jsonschema-4.26.0.dist-info
│   ├── jsonschema_specifications
│   ├── jsonschema_specifications-2025.9.1.dist-info
│   ├── jupyter_client
│   ├── jupyter_client-8.8.0.dist-info
│   ├── jupyter_core
│   ├── jupyter_core-5.9.1.dist-info
│   ├── jupyter_events
│   ├── jupyter_events-0.12.0.dist-info
│   ├── jupyterlab
│   ├── jupyterlab-4.5.1.dist-info
│   ├── jupyterlab_pygments
│   ├── jupyterlab_pygments-0.3.0.dist-info
│   ├── jupyterlab_server
│   ├── jupyterlab_server-2.28.0.dist-info
│   ├── jupyter_lsp
│   ├── jupyter_lsp-2.3.0.dist-info
│   ├── jupyter.py
│   ├── jupyter_server
│   ├── jupyter_server-2.17.0.dist-info
│   ├── jupyter_server_terminals
│   ├── jupyter_server_terminals-0.5.3.dist-info
│   ├── kiwisolver
│   ├── kiwisolver-1.4.9.dist-info
│   ├── lark
│   ├── lark-1.3.1.dist-info
│   ├── markupsafe
│   ├── markupsafe-3.0.3.dist-info
│   ├── matplotlib
│   ├── matplotlib-3.10.8.dist-info
│   ├── matplotlib_inline
│   ├── matplotlib_inline-0.2.1.dist-info
│   ├── mistune
│   ├── mistune-3.2.0.dist-info
│   ├── mpl_toolkits
│   ├── mpmath
│   ├── mpmath-1.3.0.dist-info
│   ├── narwhals
│   ├── narwhals-2.15.0.dist-info
│   ├── nbclient
│   ├── nbclient-0.10.4.dist-info
│   ├── nbconvert
│   ├── nbconvert-7.16.6.dist-info
│   ├── nbformat
│   ├── nbformat-5.10.4.dist-info
│   ├── nest_asyncio-1.6.0.dist-info
│   ├── nest_asyncio.py
│   ├── networkx
│   ├── networkx-3.6.1.dist-info
│   ├── notebook_shim
│   ├── notebook_shim-0.2.4.dist-info
│   ├── numpy
│   ├── numpy-2.4.1.dist-info
│   ├── numpy.libs
│   ├── nvidia
│   ├── nvidia_cublas_cu12-12.8.4.1.dist-info
│   ├── nvidia_cuda_cupti_cu12-12.8.90.dist-info
│   ├── nvidia_cuda_nvrtc_cu12-12.8.93.dist-info
│   ├── nvidia_cuda_runtime_cu12-12.8.90.dist-info
│   ├── nvidia_cudnn_cu12-9.10.2.21.dist-info
│   ├── nvidia_cufft_cu12-11.3.3.83.dist-info
│   ├── nvidia_cufile_cu12-1.13.1.3.dist-info
│   ├── nvidia_curand_cu12-10.3.9.90.dist-info
│   ├── nvidia_cusolver_cu12-11.7.3.90.dist-info
│   ├── nvidia_cusparse_cu12-12.5.8.93.dist-info
│   ├── nvidia_cusparselt_cu12-0.7.1.dist-info
│   ├── nvidia_nccl_cu12-2.27.5.dist-info
│   ├── nvidia_nvjitlink_cu12-12.8.93.dist-info
│   ├── nvidia_nvshmem_cu12-3.3.20.dist-info
│   ├── nvidia_nvtx_cu12-12.8.90.dist-info
│   ├── packaging
│   ├── packaging-25.0.dist-info
│   ├── pandas
│   ├── pandas-2.3.3.dist-info
│   ├── pandocfilters-1.5.1.dist-info
│   ├── pandocfilters.py
│   ├── parso
│   ├── parso-0.8.5.dist-info
│   ├── pexpect
│   ├── pexpect-4.9.0.dist-info
│   ├── PIL
│   ├── pillow-12.1.0.dist-info
│   ├── pillow.libs
│   ├── pip
│   ├── pip-25.3.dist-info
│   ├── pkg_resources
│   ├── platformdirs
│   ├── platformdirs-4.5.1.dist-info
│   ├── prometheus_client
│   ├── prometheus_client-0.23.1.dist-info
│   ├── prompt_toolkit
│   ├── prompt_toolkit-3.0.52.dist-info
│   ├── protobuf-6.33.4.dist-info
│   ├── psutil
│   ├── psutil-7.2.1.dist-info
│   ├── ptyprocess
│   ├── ptyprocess-0.7.0.dist-info
│   ├── pure_eval
│   ├── pure_eval-0.2.3.dist-info
│   ├── pyarrow
│   ├── pyarrow-22.0.0.dist-info
│   ├── pycparser
│   ├── pycparser-2.23.dist-info
│   ├── pydeck
│   ├── pydeck-0.9.1.dist-info
│   ├── pygments
│   ├── pygments-2.19.2.dist-info
│   ├── pylab.py
│   ├── pyparsing
│   ├── pyparsing-3.3.1.dist-info
│   ├── python_dateutil-2.9.0.post0.dist-info
│   ├── pythonjsonlogger
│   ├── python_json_logger-4.0.0.dist-info
│   ├── pytz
│   ├── pytz-2025.2.dist-info
│   ├── pyyaml-6.0.3.dist-info
│   ├── pyzmq-27.1.0.dist-info
│   ├── pyzmq.libs
│   ├── referencing
│   ├── referencing-0.37.0.dist-info
│   ├── requests
│   ├── requests-2.32.5.dist-info
│   ├── rfc3339_validator-0.1.4.dist-info
│   ├── rfc3339_validator.py
│   ├── rfc3986_validator-0.1.1.dist-info
│   ├── rfc3986_validator.py
│   ├── rfc3987_syntax
│   ├── rfc3987_syntax-1.1.0.dist-info
│   ├── rpds
│   ├── rpds_py-0.30.0.dist-info
│   ├── scikit_learn-1.8.0.dist-info
│   ├── scikit_learn.libs
│   ├── scipy
│   ├── scipy-1.17.0.dist-info
│   ├── scipy.libs
│   ├── send2trash
│   ├── send2trash-2.0.0.dist-info
│   ├── setuptools
│   ├── setuptools-80.9.0.dist-info
│   ├── six-1.17.0.dist-info
│   ├── six.py
│   ├── sklearn
│   ├── smmap
│   ├── smmap-5.0.2.dist-info
│   ├── soupsieve
│   ├── soupsieve-2.8.1.dist-info
│   ├── stack_data
│   ├── stack_data-0.6.3.dist-info
│   ├── streamlit
│   ├── streamlit-1.53.0.dist-info
│   ├── sympy
│   ├── sympy-1.14.0.dist-info
│   ├── tenacity
│   ├── tenacity-9.1.2.dist-info
│   ├── terminado
│   ├── terminado-0.18.1.dist-info
│   ├── threadpoolctl-3.6.0.dist-info
│   ├── threadpoolctl.py
│   ├── tinycss2
│   ├── tinycss2-1.4.0.dist-info
│   ├── toml
│   ├── toml-0.10.2.dist-info
│   ├── torch
│   ├── torch-2.9.1.dist-info
│   ├── torchaudio
│   ├── torchaudio-2.9.1.dist-info
│   ├── torchgen
│   ├── torchvision
│   ├── torchvision-0.24.1.dist-info
│   ├── torchvision.libs
│   ├── tornado
│   ├── tornado-6.5.4.dist-info
│   ├── tqdm
│   ├── tqdm-4.67.1.dist-info
│   ├── traitlets
│   ├── traitlets-5.14.3.dist-info
│   ├── triton
│   ├── triton-3.5.1.dist-info
│   ├── typing_extensions-4.15.0.dist-info
│   ├── typing_extensions.py
│   ├── tzdata
│   ├── tzdata-2025.3.dist-info
│   ├── uri_template
│   ├── uri_template-1.3.0.dist-info
│   ├── urllib3
│   ├── urllib3-2.6.3.dist-info
│   ├── _virtualenv.pth
│   ├── _virtualenv.py
│   ├── watchdog
│   ├── watchdog-6.0.0.dist-info
│   ├── wcwidth
│   ├── wcwidth-0.2.14.dist-info
│   ├── webcolors
│   ├── webcolors-25.10.0.dist-info
│   ├── webencodings
│   ├── webencodings-0.5.1.dist-info
│   ├── websocket
│   ├── websocket_client-1.9.0.dist-info
│   ├── _yaml
│   ├── yaml
│   └── zmq
├── Makefile
├── NFS
├── poetry.lock
├── pyproject.toml
├── pyvenv.cfg
├── README.md
├── requirements.txt
├── runs
├── share
│   ├── applications
│   │   └── jupyterlab.desktop
│   ├── icons
│   │   └── hicolor
│   │   └── scalable
│   ├── jupyter
│   │   ├── kernels
│   │   │   └── python3
│   │   ├── lab
│   │   │   ├── schemas
│   │   │   ├── static
│   │   │   └── themes
│   │   ├── labextensions
│   │   │   └── jupyterlab_pygments
│   │   ├── nbconvert
│   │   │   └── templates
│   │   └── nbextensions
│   │   └── pydeck
│   └── man
│   └── man1
│   ├── ipython.1
│   ├── isympy.1
│   └── ttx.1
├── src
│   ├── 5_epoch_emoset_resnet50_finetuned_2.41M.pth
│   ├── api.py
│   ├── data_loader.py
│   ├── dataset_paths_cache.pkl
│   ├── emoset_resnet50_best.pth
│   ├── emoset_resnet50_finetuned_2_41M_best.pth
│   ├── emoset_resnet50_resume.pth
│   ├── emoset_test_embeddings.npy
│   ├── emoset_test_labels.npy
│   ├── main.py
│   ├── music_engine
│   │   ├── image_processor.py
│   │   ├── __init__.py
│   │   ├── llm_bridge.py
│   │   ├── matcher.py
│   │   └── va_regressor.pkl
│   ├── scripts
│   │   ├── 00_setup_env.sh
│   │   ├── 01_download_DEAM.py
│   │   ├── 02_download_EmoSet.py
│   │   ├── 11_prerp_DEAM.py
│   │   ├── 20_bench_GPU.py
│   │   ├── 21_train_images.ipynb
│   │   ├── 22_extract_embeddings.ipynb
│   │   ├── 23_aggregate_DEAM_timeline.py
│   │   ├── 24_train_regressor.py
│   │   ├── 31_finetune_2.41M.py
│   │   ├── 90_acc_images_model.ipynb
│   │   └── 91_generate_metrics.py
│   └── tabs
│   ├── tab_dataset.py
│   └── tab_live.py
├── tree.txt
└── .vscode
├── launch.json
└── tasks.json
322 directories, 137 files