matsuolab · KamikiKei · Apr 29, 2025 · May 18, 2025 · May 18, 2025 · May 18, 2025
diff --git a/.ci/baseline_metrics.json b/.ci/baseline_metrics.json
@@ -0,0 +1,4 @@
+{
+  "accuracy": 0.90,
+  "latency_ms": 12.0
+}
diff --git a/.ci/benchmark.py b/.ci/benchmark.py
@@ -0,0 +1,43 @@
+import json
+import time
+from pathlib import Path
+from sklearn.metrics import accuracy_score
+import joblib
+import pandas as pd
+
+# 設定
+MODEL_PATH = Path("day5/演習3/models/titanic_model.pkl")
+TEST_DATA_PATH = Path("day5/演習3/data/Titanic.csv")
+OUTPUT = Path("current_metrics.json")
+REPEATS = 100
+
+# 1) データロード
+df = pd.read_csv(TEST_DATA_PATH)
+X = df.drop("Survived", axis=1)
+y = df["Survived"]
+
+# 2) モデルロード
+model = joblib.load(MODEL_PATH)
+
+# 3) 精度計算
+preds = model.predict(X)
+acc = accuracy_score(y, preds)
+
+# 4) レイテンシ計測
+#    ダミー入力を REPEATS 回、
+#    時間計測して平均 ms を算出
+samples = X.sample(n=10, random_state=0)
+t0 = time.perf_counter()
+for _ in range(REPEATS):
+    _ = model.predict(samples)
+t1 = time.perf_counter()
+avg_latency_ms = (t1 - t0) / REPEATS * 1000
+
+# 5) 結果を出力
+metrics = {
+    "accuracy": acc,
+    "latency_ms": avg_latency_ms
+}
+with open(OUTPUT, "w") as f:
+    json.dump(metrics, f, indent=2)
+print("Current metrics:", metrics)
diff --git a/.ci/compare.py b/.ci/compare.py
@@ -0,0 +1,28 @@
+import json
+import sys
+
+THRESH_ACC_DROP = float(sys.argv[3])  # 例: 0.01  (1% 精度低下まで許容)
+THRESH_LAT_UP = float(sys.argv[4])    # 例: 1.2   (レイテンシは20% 増加まで許容)
+
+baseline_f, current_f = sys.argv[1], sys.argv[2]
+with open(baseline_f) as f:
+    base = json.load(f)
+with open(current_f) as f:
+    cur = json.load(f)
+
+ok = True
+# 精度チェック
+if cur["accuracy"] + THRESH_ACC_DROP < base["accuracy"]:
+    print(f"ERROR: accuracy dropped from {base['accuracy']} to {cur['accuracy']}")
+    ok = False
+else:
+    print(f"OK: accuracy {cur['accuracy']} >= {base['accuracy'] - THRESH_ACC_DROP}")
+
+# レイテンシチェック
+if cur["latency_ms"] > base["latency_ms"] * THRESH_LAT_UP:
+    print(f"ERROR: latency increased from {base['latency_ms']} to {cur['latency_ms']}")
+    ok = False
+else:
+    print(f"OK: latency {cur['latency_ms']} <= {base['latency_ms']} * {THRESH_LAT_UP}")
+
+sys.exit(0 if ok else 1)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -39,3 +39,22 @@ jobs:
     - name: Run model tests
       run: |
         pytest day5/演習3/tests/test_model.py -v
+
+  performance:
+    runs-on: ubuntu-latest
+    needs: test
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pandas scikit-learn joblib
+      - name: Run benchmark
+        run: python .ci/benchmark.py
+      - name: Compare with baseline
+        # 第3引数: 許容精度低下(0.01=1%) 、第4引数: レイテンシ増加倍率(1.2=20%増しまでOK)
+        run: python .ci/compare.py .ci/baseline_metrics.json current_metrics.json 0.01 1.2
diff --git a/day1/02_streamlit_app/app.py b/day1/02_streamlit_app/app.py
@@ -1,5 +1,9 @@
 # app.py
 import streamlit as st
+
+# --- アプリケーション設定 ---
+st.set_page_config(page_title="Gemma Chatbot", layout="wide")
+
 import ui                   # UIモジュール
 import llm                  # LLMモジュール
 import database             # データベースモジュール
@@ -10,9 +14,6 @@
 from config import MODEL_NAME
 from huggingface_hub import HfFolder
 
-# --- アプリケーション設定 ---
-st.set_page_config(page_title="Gemma Chatbot", layout="wide")
-
 # --- 初期化処理 ---
 # NLTKデータのダウンロード（初回起動時など）
 metrics.initialize_nltk()
@@ -23,31 +24,11 @@
 # データベースが空ならサンプルデータを投入
 data.ensure_initial_data()
 
-# LLMモデルのロード（キャッシュを利用）
-# モデルをキャッシュして再利用
-@st.cache_resource
-def load_model():
-    """LLMモデルをロードする"""
-    try:
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        st.info(f"Using device: {device}") # 使用デバイスを表示
-        pipe = pipeline(
-            "text-generation",
-            model=MODEL_NAME,
-            model_kwargs={"torch_dtype": torch.bfloat16},
-            device=device
-        )
-        st.success(f"モデル '{MODEL_NAME}' の読み込みに成功しました。")
-        return pipe
-    except Exception as e:
-        st.error(f"モデル '{MODEL_NAME}' の読み込みに失敗しました: {e}")
-        st.error("GPUメモリ不足の可能性があります。不要なプロセスを終了するか、より小さいモデルの使用を検討してください。")
-        return None
 pipe = llm.load_model()
 
 # --- Streamlit アプリケーション ---
-st.title("🤖 Gemma 2 Chatbot with Feedback")
-st.write("Gemmaモデルを使用したチャットボットです。回答に対してフィードバックを行えます。")
+st.title("💭 Gemma 2 Chatbot with Feedback")
+st.write("Gemmaモデルを使用したチャットボットです。初学者向けに平易な回答をします！回答に対してフィードバックを行えます。")
 st.markdown("---")
 
 # --- サイドバー ---
@@ -78,4 +59,4 @@ def load_model():
 
 # --- フッターなど（任意） ---
 st.sidebar.markdown("---")
-st.sidebar.info("開発者: [Your Name]")
+st.sidebar.info("開発者: Kei")# app.py
diff --git a/day1/02_streamlit_app/llm.py b/day1/02_streamlit_app/llm.py
@@ -1,87 +1,73 @@
 # llm.py
+import streamlit as st
 import os
+import time
 import torch
-from transformers import pipeline
 import streamlit as st
-import time
-from config import MODEL_NAME
+from transformers import pipeline
 from huggingface_hub import login
+from config import MODEL_NAME
 
-# モデルをキャッシュして再利用
+# ── ここに「必ず中学生にもわかるように例えを入れて解説する」システムプロンプトを定義
+SYSTEM_PROMPT = (
+    "【システム】これから中学生にもわかるように、"
+    "例えを交えて丁寧に説明してください。\n"
+)
 @st.cache_resource
 def load_model():
-    """LLMモデルをロードする"""
-    try:
+    """
+    LLMモデルをロードし、パイプラインをキャッシュする。
+    Hugging Face トークンのログイン処理もここで実施。
+    """
+    # HF トークンを取得してログイン（.streamlit/secrets.toml に設定しておくこと）
+    hf_token = os.environ["HUGGINGFACE_TOKEN"]
+    login(token=hf_token)
 
-        # アクセストークンを保存
-        hf_token = st.secrets["huggingface"]["token"]
-
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        st.info(f"Using device: {device}") # 使用デバイスを表示
-        pipe = pipeline(
-            "text-generation",
-            model=MODEL_NAME,
-            model_kwargs={"torch_dtype": torch.bfloat16},
-            device=device
-        )
-        st.success(f"モデル '{MODEL_NAME}' の読み込みに成功しました。")
-        return pipe
-    except Exception as e:
-        st.error(f"モデル '{MODEL_NAME}' の読み込みに失敗しました: {e}")
-        st.error("GPUメモリ不足の可能性があります。不要なプロセスを終了するか、より小さいモデルの使用を検討してください。")
-        return None
+    # デバイス設定
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    st.info(f"Using device: {device}")
 
-def generate_response(pipe, user_question):
-    """LLMを使用して質問に対する回答を生成する"""
-    if pipe is None:
-        return "モデルがロードされていないため、回答を生成できません。", 0
+    # テキスト生成パイプラインを構築
+    pipe = pipeline(
+        # ここでモデルの種類を指定するが、まだ未定
+        "text-generation",
+        model=MODEL_NAME,
+        model_kwargs={"torch_dtype": torch.bfloat16},
+        device=device,)
+    st.success(f"モデル '{MODEL_NAME}' の読み込みに成功しました。")
+    return pipe
 
-    try:
-        start_time = time.time()
-        messages = [
-            {"role": "user", "content": user_question},
-        ]
-        # max_new_tokensを調整可能にする（例）
-        outputs = pipe(messages, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9)
+pipe = load_model()
 
-        # Gemmaの出力形式に合わせて調整が必要な場合がある
-        # 最後のassistantのメッセージを取得
-        assistant_response = ""
-        if outputs and isinstance(outputs, list) and outputs[0].get("generated_text"):
-           if isinstance(outputs[0]["generated_text"], list) and len(outputs[0]["generated_text"]) > 0:
-               # messages形式の場合
-               last_message = outputs[0]["generated_text"][-1]
-               if last_message.get("role") == "assistant":
-                   assistant_response = last_message.get("content", "").strip()
-           elif isinstance(outputs[0]["generated_text"], str):
-               # 単純な文字列の場合（古いtransformers？） - プロンプト部分を除く処理が必要かも
-               # この部分はモデルやtransformersのバージョンによって調整が必要
-               full_text = outputs[0]["generated_text"]
-               # 簡単な方法：ユーザーの質問以降の部分を取得
-               prompt_end = user_question
-               response_start_index = full_text.find(prompt_end) + len(prompt_end)
-               # 応答部分のみを抽出（より堅牢な方法が必要な場合あり）
-               possible_response = full_text[response_start_index:].strip()
-               # 特定の開始トークンを探すなど、モデルに合わせた調整
-               if "<start_of_turn>model" in possible_response:
-                    assistant_response = possible_response.split("<start_of_turn>model\n")[-1].strip()
-               else:
-                    assistant_response = possible_response # フォールバック
+def generate_response(
+    user_question: str,
+    max_new_tokens: int = 256,
+    temperature: float = 0.7,
+    top_p: float = 0.9
+) -> tuple[str, float]:
+    """
+    ユーザーの質問にシステムプロンプトを先行付与し、
+    モデルから回答を取得する。
 
-        if not assistant_response:
-             # 上記で見つからない場合のフォールバックやデバッグ
-             print("Warning: Could not extract assistant response. Full output:", outputs)
-             assistant_response = "回答の抽出に失敗しました。"
+    Returns:
+      (回答文字列, 応答時間[s])
+    """
+    # 1) システムプロンプト + ユーザークエリ を結合
+    prompt = SYSTEM_PROMPT + user_question
 
+    # 2) モデル呼び出し＆時間計測
+    start_time = time.time()
+    outputs = pipe(
+        prompt,
+        max_new_tokens=512,
+        do_sample=True,
+        temperature=0.7,
+        top_p=top_p
+    )
+    elapsed = time.time() - start_time
 
-        end_time = time.time()
-        response_time = end_time - start_time
-        print(f"Generated response in {response_time:.2f}s") # デバッグ用
-        return assistant_response, response_time
+    # 3) 出力からシステムプロンプト部分を削って回答部分だけ抽出
+    full_text = outputs[0]["generated_text"]
+    answer = full_text[len(SYSTEM_PROMPT):].strip()
 
-    except Exception as e:
-        st.error(f"回答生成中にエラーが発生しました: {e}")
-        # エラーの詳細をログに出力
-        import traceback
-        traceback.print_exc()
-        return f"エラーが発生しました: {str(e)}", 0
+    return answer, elapsed
diff --git a/day1/02_streamlit_app/ui.py b/day1/02_streamlit_app/ui.py
@@ -10,7 +10,7 @@
 # --- チャットページのUI ---
 def display_chat_page(pipe):
     """チャットページのUIを表示する"""
-    st.subheader("質問を入力してください")
+    st.subheader("なんでも聞いてみよう！")
     user_question = st.text_area("質問", key="question_input", height=100, value=st.session_state.get("current_question", ""))
     submit_button = st.button("質問を送信")
 
@@ -31,7 +31,7 @@ def display_chat_page(pipe):
         st.session_state.feedback_given = False # フィードバック状態もリセット
 
         with st.spinner("モデルが回答を生成中..."):
-            answer, response_time = generate_response(pipe, user_question)
+            answer, response_time = generate_response(user_question)
             st.session_state.current_answer = answer
             st.session_state.response_time = response_time
             # ここでrerunすると回答とフィードバックが一度に表示される

diff --git a/day5/requirements.txt b/day5/requirements.txt
@@ -4,4 +4,3 @@ mlflow
 pandas
 pytest
 great_expectations
-black
diff --git a/day5/演習1/models/titanic_model.pkl b/day5/演習1/models/titanic_model.pkl
diff --git a/day5/演習2/black_check.py b/day5/演習2/black_check.py
@@ -1,7 +1,20 @@
+def say_hello(name):
+    print("Hello," + name + "!")  # greet
+
+
+def say_hello(name):
+    print("Hello," + name + "!")  # greet
+
+
+def add(a, b):
+    return a + b
+
 
-def say_hello(name):print("Hello,"+name+"!")   # greet
-def say_hello(name):print("Hello,"     +    name   +"!")   # greet
-def add( a,b):return a+b
-def add( a ,     b    ):return a+b
 def add(a, b):
-    return a+b
+    return a + b
+
+
+def add(a, b):
+    return a + b
+
+#整理される
diff --git a/day5/演習2/main.py b/day5/演習2/main.py
@@ -11,6 +11,7 @@
 import time
 import great_expectations as gx
 
+
 class DataLoader:
     """データロードを行うクラス"""
 

diff --git a/day5/演習2/models/titanic_model.pkl b/day5/演習2/models/titanic_model.pkl
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,4 +4,3 @@ mlflow @@
     pandas
     pytest
     great_expectations
-    black