feach: add mobile UI

2026-05-28 21:57:41 +00:00
parent fde8dbf2e7
commit c631c5649a
2 changed files with 252 additions and 75 deletions
@@ -1,4 +1,5 @@
 import streamlit as st
 import streamlit.components.v1 as components
 import sys
 import os
 import subprocess
@@ -8,7 +9,7 @@ from tabs.tab_dataset import render_dataset_tab
 from tabs.tab_live import render_live_tab
 # ----------------------------
-# 1️⃣ Запуск приложения
+# Запуск приложения
 # ----------------------------
 if __name__ == "__main__":
    if "STREAMLIT_RUN" not in os.environ:
@@ -17,27 +18,77 @@ if __name__ == "__main__":
        subprocess.run(cmd)
        sys.exit()
-st.set_page_config(page_title="Thesis Demo", layout="wide")
+# Автоматическое определение типа устройства через URL query parameters
 # Считывание происходит до set_page_config, что позволяет динамически менять layout
 viewport = st.query_params.get("viewport", "desktop")
 layout_mode = "centered" if viewport == "mobile" else "wide"
 st.set_page_config(page_title="Thesis Demo", layout=layout_mode)
 # Внедрение легковесного JavaScript-детектора для определения ширины экрана
 # Перезагружает контекст Streamlit один раз при инициализации сессии, исключая циклическую перезагрузку
 components.html(
    """
    <script>
    const width = window.parent.innerWidth;
    const height = window.parent.innerHeight;
    const currentUrl = new URL(window.parent.location.href);
    // Интерфейс признается мобильным, если экран находится в портретном режиме 
    // (высота больше ширины, что актуально для вертикальных 2.5K мониторов)
    // либо если абсолютная ширина физически мала.
    const isPortrait = height > width;
    const isSmallWidth = width < 768;
    const targetViewport = (isPortrait || isSmallWidth) ? "mobile" : "desktop";
    if (currentUrl.searchParams.get("viewport") !== targetViewport) {
        currentUrl.searchParams.set("viewport", targetViewport);
        window.parent.location.href = currentUrl.href;
    }
    </script>
    """,
    height=0,
    width=0,
 )
 # Глобальная инъекция базовых CSS-стилей для адаптации медиаконтента
 st.markdown(
    """
    <style>
    img {
        max-width: 100%;
        height: auto;
        object-fit: contain;
    }
    [data-testid="stMetricValue"] {
        font-size: 1.8rem;
    }
    </style>
    """,
    unsafe_allow_html=True
 )
 # ----------------------------
-# 2️⃣ Инициализация движка и данных
+# Инициализация движка и данных
 # ----------------------------
 matcher = load_music_engine()
 image_processor = load_image_processor()
 image_files, embeddings, labels_array, images_path = load_emoset_data()
 # ----------------------------
-# 3️⃣ Интерфейс и Вкладки
+# Интерфейс и Вкладки
 # ----------------------------
-st.title("🖼️ Генератор саундтреков (Research Demo)")
+st.title("Генератор саундтреков (Research Demo)")
-tab1, tab2 = st.tabs(["📊 Отладка (Датасет EmoSet)", "📸 Анализ событий (Свои фото)"])
+# Изменен порядок: Анализ событий стал первой активной вкладкой
 tab1, tab2 = st.tabs(["Анализ событий (Свои фото)", "Отладка (Датасет EmoSet)"])
 with tab1:
    render_dataset_tab(matcher, image_files, embeddings, labels_array, images_path)
 with tab2:
    if image_processor:
        render_live_tab(matcher, image_processor)
    else:
        st.error("Система обработки изображений недоступна (не найдены веса ResNet).")
 with tab2:
    render_dataset_tab(matcher, image_files, embeddings, labels_array, images_path)
@@ -1,82 +1,208 @@
 import streamlit as st
 import streamlit.components.v1 as components
 import numpy as np
 from PIL import Image
-import matplotlib.pyplot as plt
+import base64
-from music_engine.llm_bridge import LLMAcousticBridge # ИМПОРТИРУЕМ МОСТ
+from io import BytesIO
 from music_engine.llm_bridge import LLMAcousticBridge
 # Вспомогательная функция для крохотного предпросмотра
 def get_thumbnail_html(images, max_display=12):
    html_images = ""
    for file in images[:max_display]:
        img = Image.open(file)
        img.thumbnail((100, 100)) # Сжимаем картинку
        if img.mode != "RGB":
            img = img.convert("RGB")
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        b64_str = base64.b64encode(buffered.getvalue()).decode()
        # Строгие стили для квадратных миниатюр
        html_images += f'<img src="data:image/jpeg;base64,{b64_str}" style="width: 60px; height: 60px; object-fit: cover; border-radius: 8px; margin-right: 8px; margin-bottom: 8px; border: 1px solid rgba(255, 255, 255, 0.2);">'
    # Индикатор оставшихся фото, если их много
    if len(images) > max_display:
        html_images += f'<span style="display: inline-block; width: 60px; height: 60px; line-height: 60px; text-align: center; background: rgba(150, 150, 150, 0.2); border-radius: 8px; vertical-align: top; font-size: 14px;">+{len(images) - max_display}</span>'
    return f'<div style="display: flex; flex-wrap: wrap;">{html_images}</div>'
 def render_live_tab(matcher, image_processor):
-    st.write("Загрузите фотографии с вашего устройства. Система проанализирует эмоции и семантику кадра.")
+    if "live_state" not in st.session_state:
        st.session_state.live_state = "upload"
    if "result_data" not in st.session_state:
        st.session_state.result_data = None
-    uploaded_files = st.file_uploader(
+    viewport = st.query_params.get("viewport", "desktop")
        "Перетащите изображения сюда", 
        type=['png', 'jpg', 'jpeg'], 
        accept_multiple_files=True
    )
-    if uploaded_files:
+    # ==========================================
-        st.subheader("Анализ визуальных признаков:")
+    # CSS ИНЪЕКЦИИ
    # ==========================================
    st.markdown("""
    <style>
    [data-testid="stFileUploadDropzone"] {
        min-height: 250px !important;
        display: flex;
        align-items: center;
        justify-content: center;
        border-radius: 16px;
        background-color: rgba(255, 75, 75, 0.03);
    }
    .spinner-container {
        display: flex; flex-direction: column; align-items: center;
        justify-content: center; min-height: 40vh; margin-top: 10vh;
    }
    .big-spinner {
        width: 120px; height: 120px; border: 10px solid rgba(255, 75, 75, 0.1);
        border-top: 10px solid #ff4b4b; border-radius: 50%;
        animation: spin 1s linear infinite; margin-bottom: 2rem;
    }
    @keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }
    </style>
    """, unsafe_allow_html=True)
    # ==========================================
    # ЭКРАН 1: ЗАГРУЗКА
    # ==========================================
    if st.session_state.live_state == "upload":
        upload_placeholder = st.empty()
        with upload_placeholder.container():
            st.write("Загрузите фотографии с вашего устройства. Система проанализирует эмоции и семантику кадра.")
            if viewport == "mobile":
                st.markdown("<br>", unsafe_allow_html=True)
            uploaded_files = st.file_uploader(
                "Перетащите изображения сюда", 
                type=['png', 'jpg', 'jpeg'], 
                accept_multiple_files=True,
                label_visibility="collapsed" if viewport == "mobile" else "visible"
            )
            if uploaded_files:
                # 1. КНОПКА СРАЗУ ПОСЛЕ ЗАГРУЗКИ (Не нужно скроллить вниз)
                st.markdown("<br>", unsafe_allow_html=True)
                if st.button("Сгенерировать саундтрек", type="primary", use_container_width=True):
                    st.session_state.uploaded_images = uploaded_files
                    st.session_state.live_state = "processing"
                    upload_placeholder.empty()
                    st.rerun()
                # 2. МИНИАТЮРЫ ПОД КНОПКОЙ
                st.markdown("<br>", unsafe_allow_html=True)
                st.caption("Выбранные кадры:")
                # Генерируем компактный блок миниатюр
                st.markdown(get_thumbnail_html(uploaded_files), unsafe_allow_html=True)
    # ==========================================
    # ЭКРАН 2: АНАЛИЗ (СПИННЕР)
    # ==========================================
    elif st.session_state.live_state == "processing":
        components.html("<script>window.parent.scrollTo(0, 0);</script>", height=0, width=0)
        files = st.session_state.get("uploaded_images", [])
        st.markdown('<div class="spinner-container"><div class="big-spinner"></div></div>', unsafe_allow_html=True)
        status_text = st.empty()
        cols = st.columns(min(len(uploaded_files), 5))
        images = []
        all_objects = []
        all_v, all_a = [], []
        for i, file in enumerate(files):
            status_text.markdown(f"<h3 style='text-align: center; font-weight: 400;'>Анализ кадра {i + 1} из {len(files)}...</h3>", unsafe_allow_html=True)
        for i, file in enumerate(uploaded_files):
            img = Image.open(file)
            images.append(img)
            with cols[i % 5]:
                st.image(img, use_container_width=True)
                with st.spinner("VLM Анализ..."):
                    caption = image_processor.describe_scene(img)
                    st.caption(f"*{caption.capitalize()}*")
                    all_objects.append(caption)
-        if st.button("Сгенерировать саундтрек", type="primary", use_container_width=True):
+            embedding = image_processor.extract_embedding(img)
            v, a = matcher.predict_va(embedding)
            all_v.append(v)
            all_a.append(a)
-            # 1. Извлекаем эмоции
+            caption = image_processor.describe_scene(img)
-            all_v, all_a = [], []
+            all_objects.append(caption)
            for img in images:
                embedding = image_processor.extract_embedding(img)
                v, a = matcher.predict_va(embedding)
                all_v.append(v)
                all_a.append(a)
-            target_v, target_a = np.mean(all_v), np.mean(all_a)
+        target_v, target_a = np.mean(all_v), np.mean(all_a)
-            # 2. Переводим Объекты -> Акустику через LLM
+        status_text.markdown("<h3 style='text-align: center; font-weight: 400;'>Трансляция семантики в аудиопрофиль...</h3>", unsafe_allow_html=True)
-            with st.spinner("Phi-3 генерирует акустический профиль..."):
+        llm = LLMAcousticBridge()
-                llm = LLMAcousticBridge()
+        llm_profile = llm.get_acoustic_profile(target_v, target_a, list(set(all_objects)))
                llm_profile = llm.get_acoustic_profile(target_v, target_a, list(set(all_objects)))
-            # 3. Ищем треки
+        status_text.markdown("<h3 style='text-align: center; font-weight: 400;'>Поиск идеальных композиций...</h3>", unsafe_allow_html=True)
-            with st.spinner("Поиск треков в базе DEAM..."):
+        playlist = matcher.find_nearest_tracks(target_v, target_a, llm_profile=llm_profile, top_k=15)
                playlist = matcher.find_nearest_tracks(target_v, target_a, llm_profile=llm_profile, top_k=5)
-            st.success("Кросс-модальный анализ завершен!")
+        st.session_state.result_data = {
            "target_v": target_v,
            "target_a": target_a,
            "llm_profile": llm_profile,
            "playlist": playlist,
            "semantics": list(set(all_objects))
        }
        st.session_state.live_state = "result"
        st.rerun()
-            # ВЫВОД РЕЗУЛЬТАТОВ
+    # ==========================================
-            col_left, col_right = st.columns([1, 2])
+    # ЭКРАН 3: РЕЗУЛЬТАТЫ
    # ==========================================
    elif st.session_state.live_state == "result":
-            with col_left:
+        components.html("<script>window.parent.scrollTo(0, 0);</script>", height=0, width=0)
                st.header("Профиль")
                st.metric("Valence (Настроение)", f"{target_v:.2f}")
                st.metric("Arousal (Энергия)", f"{target_a:.2f}")
-                if llm_profile:
+        data = st.session_state.result_data
-                    st.write("**Требования LLM к звуку:**")
+        st.header("Рекомендованный плейлист")
                    for k, v in llm_profile.items():
                        st.caption(f"- {k}: {v:.2f}")
-            with col_right:
+        for _, row in data["playlist"].iterrows():
-                st.header("Плейлист")
+            with st.container(border=True):
-                for _, row in playlist.iterrows():
+                if viewport == "desktop":
-                    with st.container(border=True):
+                    c1, c2 = st.columns([1, 3])
-                        c1, c2 = st.columns([1, 3])
+                    with c1:
-                        with c1:
+                        st.write(f"**Track:** {int(row['song_id'])}")
-                            st.write(f"**Track:** {int(row['song_id'])}")
+                        st.caption(f"Score: {row['final_score']:.2f}")
-                            st.caption(f"Score: {row['final_score']:.2f}")
+                    with c2:
-                        with c2:
+                        audio_path = matcher.get_audio_path(row['song_id'])
-                            audio_path = matcher.get_audio_path(row['song_id'])
+                        if audio_path:
-                            if audio_path:
+                            st.audio(str(audio_path))
-                                st.audio(str(audio_path))
+                        else:
-                            else:
+                            st.warning("Файл не найден")
-                                st.warning("Файл не найден")
+                else:
                    st.write(f"**Track:** {int(row['song_id'])} (Score: {row['final_score']:.2f})")
                    audio_path = matcher.get_audio_path(row['song_id'])
                    if audio_path:
                        st.audio(str(audio_path))
                    else:
                        st.warning("Файл не найден")
        st.markdown("<br>", unsafe_allow_html=True)
        with st.expander("Технические параметры анализа"):
            c_v, c_a = st.columns(2)
            c_v.metric("Valence (Настроение)", f"{data['target_v']:.2f}")
            c_a.metric("Arousal (Энергия)", f"{data['target_a']:.2f}")
            st.markdown("---")
            st.write("**Акустические таргеты (LLM):**")
            if data["llm_profile"]:
                cols_per_row = 2 if viewport == "mobile" else 3
                llm_items = list(data["llm_profile"].items())
                for i in range(0, len(llm_items), cols_per_row):
                    cols = st.columns(cols_per_row)
                    for j in range(cols_per_row):
                        if i + j < len(llm_items):
                            k, v = llm_items[i + j]
                            cols[j].metric(k, f"{v:.2f}")
            st.markdown("---")
            st.write("**Обнаруженная семантика:**")
            st.write(", ".join([str(c).capitalize() for c in data["semantics"]]))
        st.markdown("<br>", unsafe_allow_html=True)
        if st.button("Новый анализ", use_container_width=True):
            st.session_state.live_state = "upload"
            st.session_state.result_data = None
            st.session_state.pop("uploaded_images", None)
            st.rerun()