From 18c620345cccd896df96a922ed4e02592080970b Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sat, 9 Nov 2024 15:33:12 +0000 Subject: fix: Improve the robustness of the feed worker --- apps/workers/feedWorker.ts | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'apps/workers/feedWorker.ts') diff --git a/apps/workers/feedWorker.ts b/apps/workers/feedWorker.ts index 2f2c80f5..f4220ed3 100644 --- a/apps/workers/feedWorker.ts +++ b/apps/workers/feedWorker.ts @@ -89,14 +89,26 @@ async function run(req: DequeuedJob) { `[feed][${jobId}] Feed with id ${req.data.feedId} not found`, ); } + logger.info( + `[feed][${jobId}] Starting fetching feed "${feed.name}" (${feed.id}) ...`, + ); const response = await fetch(feed.url, { signal: AbortSignal.timeout(5000), + headers: { + UserAgent: "Hoarder / rss-parser", + Accept: "application/rss+xml", + }, }); + if (response.status !== 200) { + throw new Error( + `[feed][${jobId}] Feed "${feed.name}" (${feed.id}) returned a non-success status: ${response.status}.`, + ); + } const contentType = response.headers.get("content-type"); - if (!contentType || !contentType.includes("application/xml")) { + if (!contentType || !contentType.includes("xml")) { throw new Error( - `[feed][${jobId}] Feed with id ${req.data.feedId} is not a valid RSS feed`, + `[feed][${jobId}] Feed "${feed.name}" (${feed.id}) is not a valid RSS feed`, ); } const xmlData = await response.text(); @@ -105,7 +117,11 @@ async function run(req: DequeuedJob) { `[feed][${jobId}] Successfully fetched feed "${feed.name}" (${feed.id}) ...`, ); - const parser = new Parser(); + const parser = new Parser({ + customFields: { + item: ["id"], + }, + }); const feedData = await parser.parseString(xmlData); logger.info( @@ -117,6 +133,11 @@ async function run(req: DequeuedJob) { return; } + // For feeds that don't have guids, use the link as the id + feedData.items.forEach((item) => { + item.guid = item.guid ?? `${item.id}` ?? item.link; + }); + const exitingEntries = await db.query.rssFeedImportsTable.findMany({ where: and( eq(rssFeedImportsTable.rssFeedId, feed.id), @@ -131,7 +152,9 @@ async function run(req: DequeuedJob) { const newEntries = feedData.items.filter( (item) => - !exitingEntries.some((entry) => entry.entryId === item.guid) && item.link, + !exitingEntries.some((entry) => entry.entryId === item.guid) && + item.link && + item.guid, ); if (newEntries.length === 0) { -- cgit v1.2.3-70-g09d2