Compare commits

...

4 Commits

1 changed files with 133 additions and 79 deletions

190
server.js
View File

@ -83,10 +83,11 @@ function escapeHtml(str) {
}
// ── Auth: JWT config ────────────────────────────────────────────────
const JWT_SECRET = process.env.JWT_SECRET || 'dev-secret-CHANGE-IN-PRODUCTION';
if (!process.env.JWT_SECRET) {
console.error('[SECURITY] JWT_SECRET is not set. Using insecure default. Set JWT_SECRET env var in production!');
console.error('[SECURITY] JWT_SECRET env var is not set. Refusing to start.');
process.exit(1);
}
const JWT_SECRET = process.env.JWT_SECRET;
const JWT_TTL_SECONDS = 30 * 24 * 60 * 60; // 30 days
// WebAuthn relying party config (from env)
@ -177,7 +178,13 @@ const pool = new Pool({
user: process.env.DB_USER || 'gitea',
password: process.env.DB_PASSWORD || '',
database: 'posimai_brain',
max: 5
max: 15,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 5000,
});
// プールレベルの接続エラーをキャッチ(未処理のままにしない)
pool.on('error', (err) => {
console.error('[DB] Unexpected pool error:', err.message);
});
// ── Gemini ────────────────────────────────
@ -282,15 +289,37 @@ function normalizeCharset(raw) {
return 'utf-8';
}
// ── SSRF ガードfetchMeta / fetchFullTextViaJina 共用)──────────────
// RFC 1918 プライベート帯域・ループバック・クラウドメタデータ IP をブロック
const SSRF_BLOCKED = /^(127\.|localhost$|::1$|0\.0\.0\.0$|169\.254\.|10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.|100\.100\.100\.100|metadata\.google\.internal)/i;
function isSsrfSafe(rawUrl) {
let parsed;
try { parsed = new URL(rawUrl); } catch { return false; }
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return false;
if (SSRF_BLOCKED.test(parsed.hostname)) return false;
return true;
}
// ── OGP フェッチ ───────────────────────────
const FETCH_META_MAX_BYTES = 2 * 1024 * 1024; // 2 MB 上限
async function fetchMeta(url) {
if (!isSsrfSafe(url)) {
return { title: url.slice(0, 300), desc: '', ogImage: '', favicon: '' };
}
try {
const res = await fetch(url, {
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; PosimaiBot/1.0)' },
signal: AbortSignal.timeout(6000)
});
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const buffer = await res.arrayBuffer();
// レスポンスサイズを 2MB に制限OGP取得にそれ以上は不要
const contentLength = parseInt(res.headers.get('content-length') || '0', 10);
if (contentLength > FETCH_META_MAX_BYTES) throw new Error('Response too large');
const rawBuffer = await res.arrayBuffer();
const buffer = rawBuffer.byteLength > FETCH_META_MAX_BYTES
? rawBuffer.slice(0, FETCH_META_MAX_BYTES)
: rawBuffer;
// 文字コード判定: 1) Content-Typeヘッダー優先 2) HTMLメタタグ確認
// iso-8859-1はバイト値0-255をロスレスでデコードするためcharset検出に最適
@ -341,6 +370,7 @@ async function fetchMeta(url) {
// ── Jina Reader API フェッチ(新規追加)───
async function fetchFullTextViaJina(url) {
if (!isSsrfSafe(url)) return null;
try {
console.log(`[Brain API] Fetching full text via Jina Reader for: ${url}`);
@ -356,7 +386,11 @@ async function fetchFullTextViaJina(url) {
return null;
}
// レスポンスサイズを 1MB に制限AI 分析に必要な本文量の上限)
const jinaContentLength = parseInt(jinaResponse.headers.get('content-length') || '0', 10);
if (jinaContentLength > 1024 * 1024) return null;
let markdown = await jinaResponse.text();
if (markdown.length > 1024 * 1024) markdown = markdown.slice(0, 1024 * 1024);
// Markdown Content: マーカーの後ろを抽出
const contentMarker = 'Markdown Content:';
@ -1363,7 +1397,7 @@ function buildRouter() {
}
});
// ========== 記事保存(Jina Reader自動取得対応==========
// ========== 記事保存(即時保存 + バックグラウンドメタ取得==========
r.post('/save', authMiddleware, async (req, res) => {
const { url, title: clientTitle, content, source: clientSource } = req.body || {};
if (!url) return res.status(400).json({ error: 'url is required' });
@ -1373,56 +1407,59 @@ function buildRouter() {
if (!['http:', 'https:'].includes(parsedUrl.protocol))
return res.status(400).json({ error: 'Only http/https' });
try {
const meta = await fetchMeta(url);
let fullText = content || null;
const source = clientSource || extractSource(url);
const domain = parsedUrl.hostname;
// 重要: contentが空の場合、Jina Reader APIで本文を自動取得
if (!fullText || fullText.trim().length === 0) {
console.log(`[Brain API] No content provided for ${url}, attempting Jina Reader fetch...`);
const jinaText = await fetchFullTextViaJina(url);
if (jinaText && jinaText.length > 0) {
fullText = jinaText;
console.log(`[Brain API] ✓ Using Jina Reader full text (${fullText.length} chars)`);
} else {
// Jina Reader失敗時はOGP descriptionをフォールバック
console.log(`[Brain API] ⚠ Jina Reader failed, falling back to OGP description`);
fullText = meta.desc || '';
}
} else {
console.log(`[Brain API] Using provided content (${fullText.length} chars)`);
}
// 即座に保存してフロントに返すAIはバックグラウンド
let articleQuery = await pool.query(`
try {
// 1. URLだけ即座にDBへ保存してフロントに返すメタ取得・AIはバックグラウンド
const articleQuery = await pool.query(`
INSERT INTO articles (user_id, url, title, full_text, summary, topics, source, reading_time, favicon, og_image)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
ON CONFLICT (user_id, url) DO UPDATE
SET title=EXCLUDED.title, full_text=EXCLUDED.full_text, source=EXCLUDED.source, summary='⏳ 再分析中...'
SET source=EXCLUDED.source, summary='⏳ 再分析中...'
RETURNING *
`, [req.userId, url, clientTitle || meta.title, fullText, '⏳ AI分析中...', ['その他'], source, 3, meta.favicon, meta.ogImage]);
let article = articleQuery.rows[0];
`, [req.userId, url, clientTitle || domain, content || null, '⏳ AI分析中...', ['その他'], source, 3,
`https://www.google.com/s2/favicons?domain=${domain}&sz=32`, '']);
const article = articleQuery.rows[0];
res.json({ ok: true, article, aiStatus: 'pending' });
// バックグラウンドでAI処理ユーザーごとに 50記事/時間 まで)
if (checkRateLimit('gemini_analyze', req.userId, 50, 60 * 60 * 1000)) {
analyzeWithGemini(clientTitle || meta.title, fullText || meta.desc, url).then(async (ai) => {
// 2. バックグラウンドでメタ情報取得 → DB更新 → AI分析
const savedUserId = req.userId;
setImmediate(async () => {
try {
const meta = await fetchMeta(url);
let fullText = content || null;
if (!fullText || fullText.trim().length === 0) {
const jinaText = await fetchFullTextViaJina(url);
fullText = jinaText || meta.desc || '';
}
const finalTitle = clientTitle || meta.title;
await pool.query(`
UPDATE articles SET title=$1, full_text=$2, favicon=$3, og_image=$4
WHERE user_id=$5 AND url=$6
`, [finalTitle, fullText, meta.favicon, meta.ogImage, savedUserId, url]);
if (checkRateLimit('gemini_analyze', savedUserId, 50, 60 * 60 * 1000)) {
analyzeWithGemini(finalTitle, fullText || meta.desc, url).then(async (ai) => {
await pool.query(`
UPDATE articles SET summary=$1, topics=$2, reading_time=$3
WHERE user_id=$4 AND url=$5
`, [ai.summary, ai.topics, ai.readingTime, req.userId, url]);
`, [ai.summary, ai.topics, ai.readingTime, savedUserId, url]);
console.log(`[Brain API] ✓ AI analysis completed for ${url}`);
}).catch(e => console.error('[Background AI Error]:', e));
}
} catch (e) {
console.error('[Background Meta Error]:', e.message);
}
});
} catch (e) {
if (e.code === '23505') return res.status(409).json({ error: 'すでに保存済みです' });
console.error(e); res.status(500).json({ error: 'DB error' });
console.error(e);
if (!res.headersSent) res.status(500).json({ error: 'DB error' });
}
});
@ -1459,51 +1496,67 @@ function buildRouter() {
} catch (e) { res.status(500).json({ error: 'DB error' }); }
});
// クイック保存 (Bookmarklet等からのGET) — Jina Reader対応
// クイック保存 (Bookmarklet等からのGET) — 即時保存 + バックグラウンドメタ取得
r.get('/quick-save', authMiddleware, async (req, res) => {
const url = req.query.url;
if (!url) return res.status(400).send('<h1>URL not provided</h1>');
try {
const meta = await fetchMeta(url);
let parsedUrl;
try { parsedUrl = new URL(url); } catch { return res.status(400).send('<h1>Invalid URL</h1>'); }
const domain = parsedUrl.hostname;
const source = extractSource(url);
// Jina Readerで本文取得を試みる
let fullText = await fetchFullTextViaJina(url);
if (!fullText || fullText.length === 0) {
fullText = meta.desc || '';
}
try {
// 1. URLだけ即座に保存
await pool.query(`
INSERT INTO articles (user_id, url, title, full_text, summary, topics, source, reading_time, favicon, og_image)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
ON CONFLICT (user_id, url) DO UPDATE
SET title=EXCLUDED.title, full_text=EXCLUDED.full_text, source=EXCLUDED.source, summary='⏳ 再分析中...'
`, [req.userId, url, meta.title, fullText, '⏳ AI分析中...', ['その他'], source, 3, meta.favicon, meta.ogImage]);
SET source=EXCLUDED.source, summary='⏳ 再分析中...'
`, [req.userId, url, domain, null, '⏳ AI分析中...', ['その他'], source, 3,
`https://www.google.com/s2/favicons?domain=${domain}&sz=32`, '']);
// バックグラウンドAIユーザーごとに 50記事/時間 まで)
if (checkRateLimit('gemini_analyze', req.userId, 50, 60 * 60 * 1000)) {
analyzeWithGemini(meta.title, fullText, url).then(async (ai) => {
await pool.query(`
UPDATE articles SET summary=$1, topics=$2, reading_time=$3
WHERE user_id=$4 AND url=$5
`, [ai.summary, ai.topics, ai.readingTime, req.userId, url]);
}).catch(e => console.error('[Background AI Error]:', e));
}
// HTMLレスポンス自動で閉じる
// 2. HTMLレスポンスを即座に返す
res.send(`
<!DOCTYPE html>
<html><head><meta charset="utf-8"><title>保存完了</title></head>
<body style="font-family:sans-serif;padding:40px;text-align:center;background:#0a0a0a;color:#e2e2e2">
<h1 style="color:#818CF8"> 保存しました</h1>
<p>${escapeHtml(meta.title)}</p>
<p style="color:#888">AI分析をバックグラウンドで開始しました</p>
<script>setTimeout(() => window.close(), 1500)</script>
<h1 style="color:#6EE7B7"> 保存しました</h1>
<p style="color:#888">${escapeHtml(domain)}</p>
<p style="color:#888">タイトルAI分析をバックグラウンドで取得中...</p>
<script>setTimeout(() => window.close(), 1200)</script>
</body></html>
`);
// 3. バックグラウンドでメタ情報取得 → DB更新 → AI分析
const savedUserId = req.userId;
setImmediate(async () => {
try {
const meta = await fetchMeta(url);
const jinaText = await fetchFullTextViaJina(url);
const fullText = jinaText || meta.desc || '';
await pool.query(`
UPDATE articles SET title=$1, full_text=$2, favicon=$3, og_image=$4
WHERE user_id=$5 AND url=$6
`, [meta.title, fullText, meta.favicon, meta.ogImage, savedUserId, url]);
if (checkRateLimit('gemini_analyze', savedUserId, 50, 60 * 60 * 1000)) {
analyzeWithGemini(meta.title, fullText, url).then(async (ai) => {
await pool.query(`
UPDATE articles SET summary=$1, topics=$2, reading_time=$3
WHERE user_id=$4 AND url=$5
`, [ai.summary, ai.topics, ai.readingTime, savedUserId, url]);
}).catch(e => console.error('[Background AI Error]:', e));
}
} catch (e) {
res.status(500).send(`<h1>保存失敗: ${escapeHtml(e.message)}</h1>`);
console.error('[Background Meta Error]:', e.message);
}
});
} catch (e) {
if (!res.headersSent) res.status(500).send(`<h1>保存失敗: ${escapeHtml(e.message)}</h1>`);
}
});
@ -2274,7 +2327,7 @@ ${excerpt}
// fire-and-forget アーカイブ: Jina Reader → Gemini 要約(直列)
async function archiveShare(shareId, url) {
if (!url) {
if (!url || !isSsrfSafe(url)) {
await pool.query(`UPDATE together_shares SET archive_status='failed' WHERE id=$1`, [shareId]);
return;
}
@ -2284,7 +2337,9 @@ ${excerpt}
signal: AbortSignal.timeout(30000),
});
if (!jinaRes.ok) throw new Error(`Jina ${jinaRes.status}`);
const fullContent = await jinaRes.text();
let fullContent = await jinaRes.text();
// レスポンスサイズを 1MB に制限DB の full_content カラムおよびGemini入力量の上限
if (fullContent.length > 1024 * 1024) fullContent = fullContent.slice(0, 1024 * 1024);
// Jina Reader のレスポンス先頭から "Title: ..." を抽出
const titleMatch = fullContent.match(/^Title:\s*(.+)/m);
@ -2404,8 +2459,7 @@ ${excerpt}
const { group_id, shared_by, url = null, title = null, message = '', tags = [] } = req.body || {};
if (!group_id || !shared_by) return res.status(400).json({ error: 'group_id と shared_by は必須です' });
if (url) {
try { const p = new URL(url); if (!['http:', 'https:'].includes(p.protocol)) throw new Error(); }
catch { return res.status(400).json({ error: 'url は http/https のみ有効です' }); }
if (!isSsrfSafe(url)) return res.status(400).json({ error: 'url は http/https のみ有効です' });
}
try {
const grpCheck = await pool.query('SELECT id FROM together_groups WHERE id=$1', [group_id]);