feat: finale

This commit is contained in:
zin
2026-06-03 09:16:12 +00:00
parent 3850b15053
commit a57addcbb1
9 changed files with 807 additions and 176 deletions
+5 -1
View File
@@ -32,7 +32,11 @@ class ImageProcessor:
# Модуль семантического описания сцены
print("Инициализация BLIP-2...")
self.blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
# Обход бага конфигурации Hugging Face (ручная сборка процессора)
from transformers import BlipImageProcessor, AutoTokenizer
img_proc = BlipImageProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
tok = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b", use_fast=False)
self.blip_processor = Blip2Processor(image_processor=img_proc, tokenizer=tok)
self.blip_model = Blip2ForConditionalGeneration.from_pretrained(
"Salesforce/blip2-opt-2.7b",
torch_dtype=torch.float16
+56 -54
View File
@@ -1,65 +1,67 @@
import re
import os
import json
import re
import requests
class LLMAcousticBridge:
def __init__(self, target_model="dolphin-llama3:8b"):
self.api_url = "http://localhost:11434/api/generate"
self.model = target_model
def __init__(self, model_name="dolphin-llama3:8b"):
self.model_name = model_name
# Динамический выбор URL (внутри Docker используется emom_ollama)
base_url = os.getenv("OLLAMA_API_URL", "http://emom_ollama:11434")
self.api_url = f"{base_url}/api/generate"
def _extract_json(self, raw_text: str):
# Проверка на ИИдиота, LLM иногда игнорирует format="json" и оборачивает ответ в маркдаун
try:
match = re.search(r'\{.*\}', raw_text, re.DOTALL)
if match:
return json.loads(match.group(0))
return json.loads(raw_text)
except json.JSONDecodeError:
# Если ИИдиот
return None
def get_acoustic_profile(self, v_score: float, a_score: float, scene_context: list) -> dict | None:
# Агрегация контекста для обработки серии снимков (события)
context_merged = " | ".join(scene_context) if scene_context else "abstract scene"
def get_acoustic_profile(self, valence, arousal, semantics):
context_str = ", ".join(semantics) if semantics else "abstract scene"
# Строгий промпт с примером вывода
prompt = f"""
Analyze the visual context and emotions to determine the ideal background music properties.
Emotions: Valence {valence:.1f}/9.0 (Positivity), Arousal {arousal:.1f}/9.0 (Energy).
Visual Context: {context_str}.
Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
1. "energy": (Loudness/Density)
2. "flux": (Rhythmic sharpness/Beat)
3. "centroid": (Brightness)
4. "pitch": (Fundamental frequency)
5. "hnr": (Harmonics-to-Noise)
6. "zcr": (Percussiveness)
Return ONLY a valid JSON object. No explanations, no markdown blocks.
Example: {{"energy": 0.8, "flux": 0.5, "centroid": 0.6, "pitch": 0.4, "hnr": 0.9, "zcr": 0.3}}
"""
system_prompt = f"""You are an expert music producer and acoustic engineer.
Analyze the visual context and emotions to determine the ideal background music properties.
Emotions: Valence {v_score:.1f}/9.0 (Positivity), Arousal {a_score:.1f}/9.0 (Energy).
Visual Context: {context_merged}.
Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
1. "energy": (Loudness/Density. High for massive/busy scenes, Low for calm)
2. "flux": (Rhythmic sharpness/Beat. High for action/people/cars, Low for static nature)
3. "centroid": (Brightness: 0=Dark/Bass/Massive, 1=Bright/Treble/Light)
4. "pitch": (Fundamental frequency: 0=Low pitch/Huge objects, 1=High pitch/Small objects)
5. "hnr": (Harmonics-to-Noise: 0=Noisy/Distorted textures, 1=Clear/Melodic/Smooth textures)
6. "zcr": (Percussiveness. High for detailed noise like leaves/rain, Low for solid blocks)
Return ONLY a valid JSON object. Do not add any text or explanation.
Example: {{"energy": 0.5, "flux": 0.2, "centroid": 0.4, "pitch": 0.3, "hnr": 0.8, "zcr": 0.1}}"""
try:
# Отправка промпта локальной Ollama
response = requests.post(self.api_url, json={
"model": self.model,
"prompt": system_prompt,
payload = {
"model": self.model_name,
"prompt": prompt,
"stream": False,
"format": "json"
}, timeout=45)
response.raise_for_status()
"format": "json" # Принудительный JSON-режим Ollama
}
raw_response = response.json().get("response", "")
profile_data = self._extract_json(raw_response)
print(f"Запрос акустического профиля к Ollama...")
response = requests.post(self.api_url, json=payload, timeout=120)
# Валидация структуры ответа
expected_features = {'energy', 'flux', 'centroid', 'pitch', 'hnr', 'zcr'}
if profile_data and expected_features.issubset(profile_data.keys()):
return profile_data
if response.status_code == 200:
data = response.json()
response_text = data.get("response", "")
print("LLM вернула неполный или некорректный набор акустических признаков")
return None
except requests.exceptions.RequestException as req_err:
print(f"Не удалось подключиться к Ollama: {req_err}")
return None
try:
# 1. Попытка прямой десериализации
profile = json.loads(response_text)
return profile
except json.JSONDecodeError:
# 2. Аварийное извлечение JSON из текста с помощью регулярного выражения
match = re.search(r'\{.*\}', response_text, re.DOTALL)
if match:
return json.loads(match.group(0))
print(f"Ошибка парсинга LLM ответа: {response_text}")
return {}
else:
print(f"Ollama вернула ошибку HTTP: {response.status_code}")
return {}
except Exception as e:
print(f"Ошибка соединения с Ollama: {str(e)}")
return {}