feat: finale

2026-06-03 09:16:12 +00:00
parent 3850b15053
commit a57addcbb1
9 changed files with 807 additions and 176 deletions
@@ -32,7 +32,11 @@ class ImageProcessor:

        # Модуль семантического описания сцены
        print("Инициализация BLIP-2...")
-        self.blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
+        # Обход бага конфигурации Hugging Face (ручная сборка процессора)
+        from transformers import BlipImageProcessor, AutoTokenizer
+        img_proc = BlipImageProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
+        tok = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b", use_fast=False)
+        self.blip_processor = Blip2Processor(image_processor=img_proc, tokenizer=tok)
        self.blip_model = Blip2ForConditionalGeneration.from_pretrained(
            "Salesforce/blip2-opt-2.7b", 
            torch_dtype=torch.float16 
@@ -1,65 +1,67 @@
-import re
+import os
 import json
+import re
 import requests

 class LLMAcousticBridge:
-    def __init__(self, target_model="dolphin-llama3:8b"):
-        self.api_url = "http://localhost:11434/api/generate"
-        self.model = target_model
+    def __init__(self, model_name="dolphin-llama3:8b"):
+        self.model_name = model_name
+        # Динамический выбор URL (внутри Docker используется emom_ollama)
+        base_url = os.getenv("OLLAMA_API_URL", "http://emom_ollama:11434")
+        self.api_url = f"{base_url}/api/generate"

-    def _extract_json(self, raw_text: str):
-        # Проверка на ИИдиота, LLM иногда игнорирует format="json" и оборачивает ответ в маркдаун
-        try:
-            match = re.search(r'\{.*\}', raw_text, re.DOTALL)
-            if match:
-                return json.loads(match.group(0))
-            return json.loads(raw_text)
-        except json.JSONDecodeError:
-            # Если ИИдиот
-            return None
-
-    def get_acoustic_profile(self, v_score: float, a_score: float, scene_context: list) -> dict | None:
-        # Агрегация контекста для обработки серии снимков (события)
-        context_merged = " | ".join(scene_context) if scene_context else "abstract scene"
+    def get_acoustic_profile(self, valence, arousal, semantics):
+        context_str = ", ".join(semantics) if semantics else "abstract scene"
+        
+        # Строгий промпт с примером вывода
+        prompt = f"""
+        Analyze the visual context and emotions to determine the ideal background music properties. 
+        Emotions: Valence {valence:.1f}/9.0 (Positivity), Arousal {arousal:.1f}/9.0 (Energy). 
+        Visual Context: {context_str}. 
+        Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
+        
+        1. "energy": (Loudness/Density)
+        2. "flux": (Rhythmic sharpness/Beat)
+        3. "centroid": (Brightness)
+        4. "pitch": (Fundamental frequency)
+        5. "hnr": (Harmonics-to-Noise)
+        6. "zcr": (Percussiveness)
+        
+        Return ONLY a valid JSON object. No explanations, no markdown blocks.
+        Example: {{"energy": 0.8, "flux": 0.5, "centroid": 0.6, "pitch": 0.4, "hnr": 0.9, "zcr": 0.3}}
+        """
        
-        system_prompt = f"""You are an expert music producer and acoustic engineer. 
-Analyze the visual context and emotions to determine the ideal background music properties.
-Emotions: Valence {v_score:.1f}/9.0 (Positivity), Arousal {a_score:.1f}/9.0 (Energy).
-Visual Context: {context_merged}.
-
-Map this scene to exactly 6 acoustic features. Values MUST be floats between 0.0 and 1.0.
-1. "energy": (Loudness/Density. High for massive/busy scenes, Low for calm)
-2. "flux": (Rhythmic sharpness/Beat. High for action/people/cars, Low for static nature)
-3. "centroid": (Brightness: 0=Dark/Bass/Massive, 1=Bright/Treble/Light)
-4. "pitch": (Fundamental frequency: 0=Low pitch/Huge objects, 1=High pitch/Small objects)
-5. "hnr": (Harmonics-to-Noise: 0=Noisy/Distorted textures, 1=Clear/Melodic/Smooth textures)
-6. "zcr": (Percussiveness. High for detailed noise like leaves/rain, Low for solid blocks)
-
-Return ONLY a valid JSON object. Do not add any text or explanation.
-Example: {{"energy": 0.5, "flux": 0.2, "centroid": 0.4, "pitch": 0.3, "hnr": 0.8, "zcr": 0.1}}"""
-
        try:
-            # Отправка промпта локальной Ollama
-            response = requests.post(self.api_url, json={
-                "model": self.model,
-                "prompt": system_prompt,
+            payload = {
+                "model": self.model_name,
+                "prompt": prompt,
                "stream": False,
-                "format": "json"
-            }, timeout=45)
-            response.raise_for_status()
+                "format": "json"  # Принудительный JSON-режим Ollama
+            }
            
-            raw_response = response.json().get("response", "")
-            profile_data = self._extract_json(raw_response)
+            print(f"Запрос акустического профиля к Ollama...")
+            response = requests.post(self.api_url, json=payload, timeout=120)
            
-            # Валидация структуры ответа
-            expected_features = {'energy', 'flux', 'centroid', 'pitch', 'hnr', 'zcr'}
-            
-            if profile_data and expected_features.issubset(profile_data.keys()):
-                return profile_data
+            if response.status_code == 200:
+                data = response.json()
+                response_text = data.get("response", "")
                
-            print("LLM вернула неполный или некорректный набор акустических признаков")
-            return None
-            
-        except requests.exceptions.RequestException as req_err:
-            print(f"Не удалось подключиться к Ollama: {req_err}")
-            return None
+                try:
+                    # 1. Попытка прямой десериализации
+                    profile = json.loads(response_text)
+                    return profile
+                except json.JSONDecodeError:
+                    # 2. Аварийное извлечение JSON из текста с помощью регулярного выражения
+                    match = re.search(r'\{.*\}', response_text, re.DOTALL)
+                    if match:
+                        return json.loads(match.group(0))
+                    
+                    print(f"Ошибка парсинга LLM ответа: {response_text}")
+                    return {}
+            else:
+                print(f"Ollama вернула ошибку HTTP: {response.status_code}")
+                return {}
+                
+        except Exception as e:
+            print(f"Ошибка соединения с Ollama: {str(e)}")
+            return {}