feat: finale
This commit is contained in:
@@ -32,7 +32,11 @@ class ImageProcessor:
|
||||
|
||||
# Модуль семантического описания сцены
|
||||
print("Инициализация BLIP-2...")
|
||||
self.blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
||||
# Обход бага конфигурации Hugging Face (ручная сборка процессора)
|
||||
from transformers import BlipImageProcessor, AutoTokenizer
|
||||
img_proc = BlipImageProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
||||
tok = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b", use_fast=False)
|
||||
self.blip_processor = Blip2Processor(image_processor=img_proc, tokenizer=tok)
|
||||
self.blip_model = Blip2ForConditionalGeneration.from_pretrained(
|
||||
"Salesforce/blip2-opt-2.7b",
|
||||
torch_dtype=torch.float16
|
||||
|
||||
@@ -1,65 +1,67 @@
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import requests
|
||||
|
||||
class LLMAcousticBridge:
|
||||
def __init__(self, target_model="dolphin-llama3:8b"):
|
||||
self.api_url = "http://localhost:11434/api/generate"
|
||||
self.model = target_model
|
||||
def __init__(self, model_name="dolphin-llama3:8b"):
|
||||
self.model_name = model_name
|
||||
# Динамический выбор URL (внутри Docker используется emom_ollama)
|
||||
base_url = os.getenv("OLLAMA_API_URL", "http://emom_ollama:11434")
|
||||
self.api_url = f"{base_url}/api/generate"
|
||||
|
||||
def _extract_json(self, raw_text: str):
|
||||
# Проверка на ИИдиота, LLM иногда игнорирует format="json" и оборачивает ответ в маркдаун
|
||||
try:
|
||||
match = re.search(r'\{.*\}', raw_text, re.DOTALL)
|
||||
if match:
|
||||
return json.loads(match.group(0))
|
||||
return json.loads(raw_text)
|
||||
except json.JSONDecodeError:
|
||||
# Если ИИдиот
|
||||
return None
|
||||
|
||||
def get_acoustic_profile(self, v_score: float, a_score: float, scene_context: list) -> dict | None:
|
||||
# Агрегация контекста для обработки серии снимков (события)
|
||||
context_merged = " | ".join(scene_context) if scene_context else "abstract scene"
|
||||
def get_acoustic_profile(self, valence, arousal, semantics):
|
||||
context_str = ", ".join(semantics) if semantics else "abstract scene"
|
||||
|
||||
# Строгий промпт с примером вывода
|
||||
prompt = f"""
|
||||
Analyze the visual context and emotions to determine the ideal background music properties.
|
||||
Emotions: Valence {valence:.1f}/9.0 (Positivity), Arousal {arousal:.1f}/9.0 (Energy).
|
||||
Visual Context: {context_str}.
|
||||
Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
|
||||
|
||||
1. "energy": (Loudness/Density)
|
||||
2. "flux": (Rhythmic sharpness/Beat)
|
||||
3. "centroid": (Brightness)
|
||||
4. "pitch": (Fundamental frequency)
|
||||
5. "hnr": (Harmonics-to-Noise)
|
||||
6. "zcr": (Percussiveness)
|
||||
|
||||
Return ONLY a valid JSON object. No explanations, no markdown blocks.
|
||||
Example: {{"energy": 0.8, "flux": 0.5, "centroid": 0.6, "pitch": 0.4, "hnr": 0.9, "zcr": 0.3}}
|
||||
"""
|
||||
|
||||
system_prompt = f"""You are an expert music producer and acoustic engineer.
|
||||
Analyze the visual context and emotions to determine the ideal background music properties.
|
||||
Emotions: Valence {v_score:.1f}/9.0 (Positivity), Arousal {a_score:.1f}/9.0 (Energy).
|
||||
Visual Context: {context_merged}.
|
||||
|
||||
Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
|
||||
1. "energy": (Loudness/Density. High for massive/busy scenes, Low for calm)
|
||||
2. "flux": (Rhythmic sharpness/Beat. High for action/people/cars, Low for static nature)
|
||||
3. "centroid": (Brightness: 0=Dark/Bass/Massive, 1=Bright/Treble/Light)
|
||||
4. "pitch": (Fundamental frequency: 0=Low pitch/Huge objects, 1=High pitch/Small objects)
|
||||
5. "hnr": (Harmonics-to-Noise: 0=Noisy/Distorted textures, 1=Clear/Melodic/Smooth textures)
|
||||
6. "zcr": (Percussiveness. High for detailed noise like leaves/rain, Low for solid blocks)
|
||||
|
||||
Return ONLY a valid JSON object. Do not add any text or explanation.
|
||||
Example: {{"energy": 0.5, "flux": 0.2, "centroid": 0.4, "pitch": 0.3, "hnr": 0.8, "zcr": 0.1}}"""
|
||||
|
||||
try:
|
||||
# Отправка промпта локальной Ollama
|
||||
response = requests.post(self.api_url, json={
|
||||
"model": self.model,
|
||||
"prompt": system_prompt,
|
||||
payload = {
|
||||
"model": self.model_name,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"format": "json"
|
||||
}, timeout=45)
|
||||
response.raise_for_status()
|
||||
"format": "json" # Принудительный JSON-режим Ollama
|
||||
}
|
||||
|
||||
raw_response = response.json().get("response", "")
|
||||
profile_data = self._extract_json(raw_response)
|
||||
print(f"Запрос акустического профиля к Ollama...")
|
||||
response = requests.post(self.api_url, json=payload, timeout=120)
|
||||
|
||||
# Валидация структуры ответа
|
||||
expected_features = {'energy', 'flux', 'centroid', 'pitch', 'hnr', 'zcr'}
|
||||
|
||||
if profile_data and expected_features.issubset(profile_data.keys()):
|
||||
return profile_data
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
response_text = data.get("response", "")
|
||||
|
||||
print("LLM вернула неполный или некорректный набор акустических признаков")
|
||||
return None
|
||||
|
||||
except requests.exceptions.RequestException as req_err:
|
||||
print(f"Не удалось подключиться к Ollama: {req_err}")
|
||||
return None
|
||||
try:
|
||||
# 1. Попытка прямой десериализации
|
||||
profile = json.loads(response_text)
|
||||
return profile
|
||||
except json.JSONDecodeError:
|
||||
# 2. Аварийное извлечение JSON из текста с помощью регулярного выражения
|
||||
match = re.search(r'\{.*\}', response_text, re.DOTALL)
|
||||
if match:
|
||||
return json.loads(match.group(0))
|
||||
|
||||
print(f"Ошибка парсинга LLM ответа: {response_text}")
|
||||
return {}
|
||||
else:
|
||||
print(f"Ollama вернула ошибку HTTP: {response.status_code}")
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Ошибка соединения с Ollama: {str(e)}")
|
||||
return {}
|
||||
Reference in New Issue
Block a user