Compare commits

..

No commits in common. "82a094f2f2d79842e5d847c8f532c80db4db384c" and "e4bd0a19015dfd1ed29d0b19c6934c18b108814d" have entirely different histories.

1 changed files with 79 additions and 133 deletions

212
server.js
View File

@ -83,11 +83,10 @@ function escapeHtml(str) {
} }
// ── Auth: JWT config ──────────────────────────────────────────────── // ── Auth: JWT config ────────────────────────────────────────────────
const JWT_SECRET = process.env.JWT_SECRET || 'dev-secret-CHANGE-IN-PRODUCTION';
if (!process.env.JWT_SECRET) { if (!process.env.JWT_SECRET) {
console.error('[SECURITY] JWT_SECRET env var is not set. Refusing to start.'); console.error('[SECURITY] JWT_SECRET is not set. Using insecure default. Set JWT_SECRET env var in production!');
process.exit(1);
} }
const JWT_SECRET = process.env.JWT_SECRET;
const JWT_TTL_SECONDS = 30 * 24 * 60 * 60; // 30 days const JWT_TTL_SECONDS = 30 * 24 * 60 * 60; // 30 days
// WebAuthn relying party config (from env) // WebAuthn relying party config (from env)
@ -178,13 +177,7 @@ const pool = new Pool({
user: process.env.DB_USER || 'gitea', user: process.env.DB_USER || 'gitea',
password: process.env.DB_PASSWORD || '', password: process.env.DB_PASSWORD || '',
database: 'posimai_brain', database: 'posimai_brain',
max: 15, max: 5
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 5000,
});
// プールレベルの接続エラーをキャッチ(未処理のままにしない)
pool.on('error', (err) => {
console.error('[DB] Unexpected pool error:', err.message);
}); });
// ── Gemini ──────────────────────────────── // ── Gemini ────────────────────────────────
@ -289,37 +282,15 @@ function normalizeCharset(raw) {
return 'utf-8'; return 'utf-8';
} }
// ── SSRF ガードfetchMeta / fetchFullTextViaJina 共用)──────────────
// RFC 1918 プライベート帯域・ループバック・クラウドメタデータ IP をブロック
const SSRF_BLOCKED = /^(127\.|localhost$|::1$|0\.0\.0\.0$|169\.254\.|10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.|100\.100\.100\.100|metadata\.google\.internal)/i;
function isSsrfSafe(rawUrl) {
let parsed;
try { parsed = new URL(rawUrl); } catch { return false; }
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return false;
if (SSRF_BLOCKED.test(parsed.hostname)) return false;
return true;
}
// ── OGP フェッチ ─────────────────────────── // ── OGP フェッチ ───────────────────────────
const FETCH_META_MAX_BYTES = 2 * 1024 * 1024; // 2 MB 上限
async function fetchMeta(url) { async function fetchMeta(url) {
if (!isSsrfSafe(url)) {
return { title: url.slice(0, 300), desc: '', ogImage: '', favicon: '' };
}
try { try {
const res = await fetch(url, { const res = await fetch(url, {
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; PosimaiBot/1.0)' }, headers: { 'User-Agent': 'Mozilla/5.0 (compatible; PosimaiBot/1.0)' },
signal: AbortSignal.timeout(6000) signal: AbortSignal.timeout(6000)
}); });
if (!res.ok) throw new Error(`HTTP ${res.status}`); if (!res.ok) throw new Error(`HTTP ${res.status}`);
const buffer = await res.arrayBuffer();
// レスポンスサイズを 2MB に制限OGP取得にそれ以上は不要
const contentLength = parseInt(res.headers.get('content-length') || '0', 10);
if (contentLength > FETCH_META_MAX_BYTES) throw new Error('Response too large');
const rawBuffer = await res.arrayBuffer();
const buffer = rawBuffer.byteLength > FETCH_META_MAX_BYTES
? rawBuffer.slice(0, FETCH_META_MAX_BYTES)
: rawBuffer;
// 文字コード判定: 1) Content-Typeヘッダー優先 2) HTMLメタタグ確認 // 文字コード判定: 1) Content-Typeヘッダー優先 2) HTMLメタタグ確認
// iso-8859-1はバイト値0-255をロスレスでデコードするためcharset検出に最適 // iso-8859-1はバイト値0-255をロスレスでデコードするためcharset検出に最適
@ -370,7 +341,6 @@ async function fetchMeta(url) {
// ── Jina Reader API フェッチ(新規追加)─── // ── Jina Reader API フェッチ(新規追加)───
async function fetchFullTextViaJina(url) { async function fetchFullTextViaJina(url) {
if (!isSsrfSafe(url)) return null;
try { try {
console.log(`[Brain API] Fetching full text via Jina Reader for: ${url}`); console.log(`[Brain API] Fetching full text via Jina Reader for: ${url}`);
@ -386,11 +356,7 @@ async function fetchFullTextViaJina(url) {
return null; return null;
} }
// レスポンスサイズを 1MB に制限AI 分析に必要な本文量の上限)
const jinaContentLength = parseInt(jinaResponse.headers.get('content-length') || '0', 10);
if (jinaContentLength > 1024 * 1024) return null;
let markdown = await jinaResponse.text(); let markdown = await jinaResponse.text();
if (markdown.length > 1024 * 1024) markdown = markdown.slice(0, 1024 * 1024);
// Markdown Content: マーカーの後ろを抽出 // Markdown Content: マーカーの後ろを抽出
const contentMarker = 'Markdown Content:'; const contentMarker = 'Markdown Content:';
@ -1397,7 +1363,7 @@ function buildRouter() {
} }
}); });
// ========== 記事保存(即時保存 + バックグラウンドメタ取得========== // ========== 記事保存(Jina Reader自動取得対応==========
r.post('/save', authMiddleware, async (req, res) => { r.post('/save', authMiddleware, async (req, res) => {
const { url, title: clientTitle, content, source: clientSource } = req.body || {}; const { url, title: clientTitle, content, source: clientSource } = req.body || {};
if (!url) return res.status(400).json({ error: 'url is required' }); if (!url) return res.status(400).json({ error: 'url is required' });
@ -1407,59 +1373,56 @@ function buildRouter() {
if (!['http:', 'https:'].includes(parsedUrl.protocol)) if (!['http:', 'https:'].includes(parsedUrl.protocol))
return res.status(400).json({ error: 'Only http/https' }); return res.status(400).json({ error: 'Only http/https' });
const source = clientSource || extractSource(url);
const domain = parsedUrl.hostname;
try { try {
// 1. URLだけ即座にDBへ保存してフロントに返すメタ取得・AIはバックグラウンド const meta = await fetchMeta(url);
const articleQuery = await pool.query(` let fullText = content || null;
INSERT INTO articles (user_id, url, title, full_text, summary, topics, source, reading_time, favicon, og_image) const source = clientSource || extractSource(url);
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
ON CONFLICT (user_id, url) DO UPDATE // 重要: contentが空の場合、Jina Reader APIで本文を自動取得
SET source=EXCLUDED.source, summary='⏳ 再分析中...' if (!fullText || fullText.trim().length === 0) {
RETURNING * console.log(`[Brain API] No content provided for ${url}, attempting Jina Reader fetch...`);
`, [req.userId, url, clientTitle || domain, content || null, '⏳ AI分析中...', ['その他'], source, 3,
`https://www.google.com/s2/favicons?domain=${domain}&sz=32`, '']); const jinaText = await fetchFullTextViaJina(url);
if (jinaText && jinaText.length > 0) {
fullText = jinaText;
console.log(`[Brain API] ✓ Using Jina Reader full text (${fullText.length} chars)`);
} else {
// Jina Reader失敗時はOGP descriptionをフォールバック
console.log(`[Brain API] ⚠ Jina Reader failed, falling back to OGP description`);
fullText = meta.desc || '';
}
} else {
console.log(`[Brain API] Using provided content (${fullText.length} chars)`);
}
// 即座に保存してフロントに返すAIはバックグラウンド
let articleQuery = await pool.query(`
INSERT INTO articles (user_id, url, title, full_text, summary, topics, source, reading_time, favicon, og_image)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
ON CONFLICT (user_id, url) DO UPDATE
SET title=EXCLUDED.title, full_text=EXCLUDED.full_text, source=EXCLUDED.source, summary='⏳ 再分析中...'
RETURNING *
`, [req.userId, url, clientTitle || meta.title, fullText, '⏳ AI分析中...', ['その他'], source, 3, meta.favicon, meta.ogImage]);
let article = articleQuery.rows[0];
const article = articleQuery.rows[0];
res.json({ ok: true, article, aiStatus: 'pending' }); res.json({ ok: true, article, aiStatus: 'pending' });
// 2. バックグラウンドでメタ情報取得 → DB更新 → AI分析 // バックグラウンドでAI処理ユーザーごとに 50記事/時間 まで)
const savedUserId = req.userId; if (checkRateLimit('gemini_analyze', req.userId, 50, 60 * 60 * 1000)) {
setImmediate(async () => { analyzeWithGemini(clientTitle || meta.title, fullText || meta.desc, url).then(async (ai) => {
try {
const meta = await fetchMeta(url);
let fullText = content || null;
if (!fullText || fullText.trim().length === 0) {
const jinaText = await fetchFullTextViaJina(url);
fullText = jinaText || meta.desc || '';
}
const finalTitle = clientTitle || meta.title;
await pool.query(` await pool.query(`
UPDATE articles SET title=$1, full_text=$2, favicon=$3, og_image=$4 UPDATE articles SET summary=$1, topics=$2, reading_time=$3
WHERE user_id=$5 AND url=$6 WHERE user_id=$4 AND url=$5
`, [finalTitle, fullText, meta.favicon, meta.ogImage, savedUserId, url]); `, [ai.summary, ai.topics, ai.readingTime, req.userId, url]);
console.log(`[Brain API] ✓ AI analysis completed for ${url}`);
if (checkRateLimit('gemini_analyze', savedUserId, 50, 60 * 60 * 1000)) { }).catch(e => console.error('[Background AI Error]:', e));
analyzeWithGemini(finalTitle, fullText || meta.desc, url).then(async (ai) => { }
await pool.query(`
UPDATE articles SET summary=$1, topics=$2, reading_time=$3
WHERE user_id=$4 AND url=$5
`, [ai.summary, ai.topics, ai.readingTime, savedUserId, url]);
console.log(`[Brain API] ✓ AI analysis completed for ${url}`);
}).catch(e => console.error('[Background AI Error]:', e));
}
} catch (e) {
console.error('[Background Meta Error]:', e.message);
}
});
} catch (e) { } catch (e) {
if (e.code === '23505') return res.status(409).json({ error: 'すでに保存済みです' }); if (e.code === '23505') return res.status(409).json({ error: 'すでに保存済みです' });
console.error(e); console.error(e); res.status(500).json({ error: 'DB error' });
if (!res.headersSent) res.status(500).json({ error: 'DB error' });
} }
}); });
@ -1496,67 +1459,51 @@ function buildRouter() {
} catch (e) { res.status(500).json({ error: 'DB error' }); } } catch (e) { res.status(500).json({ error: 'DB error' }); }
}); });
// クイック保存 (Bookmarklet等からのGET) — 即時保存 + バックグラウンドメタ取得 // クイック保存 (Bookmarklet等からのGET) — Jina Reader対応
r.get('/quick-save', authMiddleware, async (req, res) => { r.get('/quick-save', authMiddleware, async (req, res) => {
const url = req.query.url; const url = req.query.url;
if (!url) return res.status(400).send('<h1>URL not provided</h1>'); if (!url) return res.status(400).send('<h1>URL not provided</h1>');
let parsedUrl;
try { parsedUrl = new URL(url); } catch { return res.status(400).send('<h1>Invalid URL</h1>'); }
const domain = parsedUrl.hostname;
const source = extractSource(url);
try { try {
// 1. URLだけ即座に保存 const meta = await fetchMeta(url);
const source = extractSource(url);
// Jina Readerで本文取得を試みる
let fullText = await fetchFullTextViaJina(url);
if (!fullText || fullText.length === 0) {
fullText = meta.desc || '';
}
await pool.query(` await pool.query(`
INSERT INTO articles (user_id, url, title, full_text, summary, topics, source, reading_time, favicon, og_image) INSERT INTO articles (user_id, url, title, full_text, summary, topics, source, reading_time, favicon, og_image)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
ON CONFLICT (user_id, url) DO UPDATE ON CONFLICT (user_id, url) DO UPDATE
SET source=EXCLUDED.source, summary='⏳ 再分析中...' SET title=EXCLUDED.title, full_text=EXCLUDED.full_text, source=EXCLUDED.source, summary='⏳ 再分析中...'
`, [req.userId, url, domain, null, '⏳ AI分析中...', ['その他'], source, 3, `, [req.userId, url, meta.title, fullText, '⏳ AI分析中...', ['その他'], source, 3, meta.favicon, meta.ogImage]);
`https://www.google.com/s2/favicons?domain=${domain}&sz=32`, '']);
// 2. HTMLレスポンスを即座に返す // バックグラウンドAIユーザーごとに 50記事/時間 まで)
if (checkRateLimit('gemini_analyze', req.userId, 50, 60 * 60 * 1000)) {
analyzeWithGemini(meta.title, fullText, url).then(async (ai) => {
await pool.query(`
UPDATE articles SET summary=$1, topics=$2, reading_time=$3
WHERE user_id=$4 AND url=$5
`, [ai.summary, ai.topics, ai.readingTime, req.userId, url]);
}).catch(e => console.error('[Background AI Error]:', e));
}
// HTMLレスポンス自動で閉じる
res.send(` res.send(`
<!DOCTYPE html> <!DOCTYPE html>
<html><head><meta charset="utf-8"><title>保存完了</title></head> <html><head><meta charset="utf-8"><title>保存完了</title></head>
<body style="font-family:sans-serif;padding:40px;text-align:center;background:#0a0a0a;color:#e2e2e2"> <body style="font-family:sans-serif;padding:40px;text-align:center;background:#0a0a0a;color:#e2e2e2">
<h1 style="color:#6EE7B7"> 保存しました</h1> <h1 style="color:#818CF8"> 保存しました</h1>
<p style="color:#888">${escapeHtml(domain)}</p> <p>${escapeHtml(meta.title)}</p>
<p style="color:#888">タイトルAI分析をバックグラウンドで取得中...</p> <p style="color:#888">AI分析をバックグラウンドで開始しました</p>
<script>setTimeout(() => window.close(), 1200)</script> <script>setTimeout(() => window.close(), 1500)</script>
</body></html> </body></html>
`); `);
// 3. バックグラウンドでメタ情報取得 → DB更新 → AI分析
const savedUserId = req.userId;
setImmediate(async () => {
try {
const meta = await fetchMeta(url);
const jinaText = await fetchFullTextViaJina(url);
const fullText = jinaText || meta.desc || '';
await pool.query(`
UPDATE articles SET title=$1, full_text=$2, favicon=$3, og_image=$4
WHERE user_id=$5 AND url=$6
`, [meta.title, fullText, meta.favicon, meta.ogImage, savedUserId, url]);
if (checkRateLimit('gemini_analyze', savedUserId, 50, 60 * 60 * 1000)) {
analyzeWithGemini(meta.title, fullText, url).then(async (ai) => {
await pool.query(`
UPDATE articles SET summary=$1, topics=$2, reading_time=$3
WHERE user_id=$4 AND url=$5
`, [ai.summary, ai.topics, ai.readingTime, savedUserId, url]);
}).catch(e => console.error('[Background AI Error]:', e));
}
} catch (e) {
console.error('[Background Meta Error]:', e.message);
}
});
} catch (e) { } catch (e) {
if (!res.headersSent) res.status(500).send(`<h1>保存失敗: ${escapeHtml(e.message)}</h1>`); res.status(500).send(`<h1>保存失敗: ${escapeHtml(e.message)}</h1>`);
} }
}); });
@ -2327,7 +2274,7 @@ ${excerpt}
// fire-and-forget アーカイブ: Jina Reader → Gemini 要約(直列) // fire-and-forget アーカイブ: Jina Reader → Gemini 要約(直列)
async function archiveShare(shareId, url) { async function archiveShare(shareId, url) {
if (!url || !isSsrfSafe(url)) { if (!url) {
await pool.query(`UPDATE together_shares SET archive_status='failed' WHERE id=$1`, [shareId]); await pool.query(`UPDATE together_shares SET archive_status='failed' WHERE id=$1`, [shareId]);
return; return;
} }
@ -2337,9 +2284,7 @@ ${excerpt}
signal: AbortSignal.timeout(30000), signal: AbortSignal.timeout(30000),
}); });
if (!jinaRes.ok) throw new Error(`Jina ${jinaRes.status}`); if (!jinaRes.ok) throw new Error(`Jina ${jinaRes.status}`);
let fullContent = await jinaRes.text(); const fullContent = await jinaRes.text();
// レスポンスサイズを 1MB に制限DB の full_content カラムおよびGemini入力量の上限
if (fullContent.length > 1024 * 1024) fullContent = fullContent.slice(0, 1024 * 1024);
// Jina Reader のレスポンス先頭から "Title: ..." を抽出 // Jina Reader のレスポンス先頭から "Title: ..." を抽出
const titleMatch = fullContent.match(/^Title:\s*(.+)/m); const titleMatch = fullContent.match(/^Title:\s*(.+)/m);
@ -2459,7 +2404,8 @@ ${excerpt}
const { group_id, shared_by, url = null, title = null, message = '', tags = [] } = req.body || {}; const { group_id, shared_by, url = null, title = null, message = '', tags = [] } = req.body || {};
if (!group_id || !shared_by) return res.status(400).json({ error: 'group_id と shared_by は必須です' }); if (!group_id || !shared_by) return res.status(400).json({ error: 'group_id と shared_by は必須です' });
if (url) { if (url) {
if (!isSsrfSafe(url)) return res.status(400).json({ error: 'url は http/https のみ有効です' }); try { const p = new URL(url); if (!['http:', 'https:'].includes(p.protocol)) throw new Error(); }
catch { return res.status(400).json({ error: 'url は http/https のみ有効です' }); }
} }
try { try {
const grpCheck = await pool.query('SELECT id FROM together_groups WHERE id=$1', [group_id]); const grpCheck = await pool.query('SELECT id FROM together_groups WHERE id=$1', [group_id]);