fix(posimai-sc): 用語インデックス抽出アルゴリズムを再設計

旧アルゴリズムの問題： - <strong> の出現位置を問わず全て用語として分割していたため説明文中の強調タグ（例: <strong>種類の異なる</strong>）が偽の用語エントリになっていた（104件中80件以上が不正） - ヒントが助詞（は、が）や記号で始まるケースを除去できていなかった新アルゴリズム： - <br> で行分割し、各行の先頭 <strong> だけを用語として扱う - 先頭より前に地の文があれば inline 強調とみなしてスキップ - hint クリーンアップに＝・はを追加 - 結果: 104件 → 26件の正常エントリのみ残留 SW v8 → v9 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-20 22:51:41 +09:00 · 2026-04-20 22:51:41 +09:00 · ce195cee72
parent cd4159fec9
commit ce195cee72
2 changed files with 35 additions and 17 deletions
--- a/posimai-sc/js/app.js
+++ b/posimai-sc/js/app.js
@ -16,24 +16,42 @@ function shuffleInPlace(arr) {
 function extractTermsFromBeginnerBox(box) {
  const clone = box.cloneNode(true);
  clone.querySelector('.formula-label')?.remove();
-  const inner = clone.innerHTML.trim();
-  const parts = inner.split(/(?=<strong>)/i).map(s => s.trim()).filter(s => /^<strong>/i.test(s));
-  const terms = [];
-  for (const p of parts) {
-    const doc = new DOMParser().parseFromString('<div class="seg">'+p+'</div>', 'text/html');
-    const root = doc.querySelector('.seg');
-    const st = root && root.querySelector('strong');
-    if (!st) continue;
-    const term = st.textContent.trim();
-    let hint = '';
-    let n = st.nextSibling;
-    while (n) {
-      if (n.nodeType === 3) hint += n.textContent;
-      else if (n.nodeType === 1) hint += n.textContent;
-      n = n.nextSibling;
+
+  // <br> で行分割し、各行の先頭 <strong> だけを用語として扱う。
+  // 行中の inline <strong>（強調）は無視することで偽エントリを防ぐ。
+  const segments = [];
+  let cur = [];
+  for (const child of clone.childNodes) {
+    if (child.nodeName === 'BR') { segments.push(cur); cur = []; }
+    else cur.push(child);
  }
-    hint = hint.replace(/^[：:\s．]+/, '').trim();
-    if (term) terms.push({ term, hint });
+  if (cur.length) segments.push(cur);
+
+  const terms = [];
+  for (const seg of segments) {
+    // 先頭ノードが <strong> でなければ（前に地の文がある）定義行ではない
+    let termNode = null;
+    const beforeNodes = [];
+    for (const node of seg) {
+      if (node.nodeName === 'STRONG') { termNode = node; break; }
+      beforeNodes.push(node);
+    }
+    if (!termNode) continue;
+    // 先頭 <strong> の前に地の文があれば inline 強調とみなしてスキップ
+    if (beforeNodes.some(n => (n.textContent || '').trim())) continue;
+
+    const term = termNode.textContent.trim();
+    if (!term) continue;
+
+    // termNode 以降の全ノードの textContent を結合してヒントにする
+    const afterIdx = seg.indexOf(termNode) + 1;
+    const hint = seg.slice(afterIdx)
+      .map(n => n.textContent || '')
+      .join('')
+      .replace(/^[：:\s．。＝は]+/, '')
+      .trim();
+
+    terms.push({ term, hint });
  }
  return terms;
 }
--- a/posimai-sc/sw.js
+++ b/posimai-sc/sw.js
@ -1,5 +1,5 @@
 // posimai-sc SW — same-origin の静的資産のみキャッシュ（CDN は対象外）
-const CACHE = 'posimai-sc-v8';
+const CACHE = 'posimai-sc-v9';
 const STATIC = [
  '/',
  '/index.html',