aboutsummaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
authorMohamed Bassem <me@mbassem.com>2025-03-02 12:45:52 +0000
committerMohamed Bassem <me@mbassem.com>2025-03-02 12:45:52 +0000
commit947ea29efdf3891e804d2aa346907088dd363f7d (patch)
tree81af75157032646e6d59828f25eacf1973b68021 /apps
parent2399dcb6fc7df3fd1753448c658e1cc9ce379687 (diff)
downloadkarakeep-947ea29efdf3891e804d2aa346907088dd363f7d.tar.zst
fix: Validate the output of the feed xml parser. Fixes #1085
Diffstat (limited to 'apps')
-rw-r--r--apps/workers/feedWorker.ts27
1 files changed, 19 insertions, 8 deletions
diff --git a/apps/workers/feedWorker.ts b/apps/workers/feedWorker.ts
index 67ee95f2..9d5c110b 100644
--- a/apps/workers/feedWorker.ts
+++ b/apps/workers/feedWorker.ts
@@ -3,6 +3,7 @@ import { DequeuedJob, Runner } from "liteque";
import cron from "node-cron";
import Parser from "rss-parser";
import { buildImpersonatingTRPCClient } from "trpc";
+import { z } from "zod";
import type { ZFeedRequestSchema } from "@hoarder/shared/queues";
import { db } from "@hoarder/db";
@@ -123,19 +124,31 @@ async function run(req: DequeuedJob<ZFeedRequestSchema>) {
item: ["id"],
},
});
- const feedData = await parser.parseString(xmlData);
+ const unparseFeedData = await parser.parseString(xmlData);
+
+ // Apparently, we can't trust the output of the xml parser. So let's do our own type
+ // validation.
+ const feedItemsSchema = z.object({
+ id: z.coerce.string(),
+ link: z.string().optional(),
+ guid: z.string().optional(),
+ });
+
+ const feedItems = unparseFeedData.items
+ .map((i) => feedItemsSchema.safeParse(i))
+ .flatMap((i) => (i.success ? [i.data] : []));
logger.info(
- `[feed][${jobId}] Found ${feedData.items.length} entries in feed "${feed.name}" (${feed.id}) ...`,
+ `[feed][${jobId}] Found ${feedItems.length} entries in feed "${feed.name}" (${feed.id}) ...`,
);
- if (feedData.items.length === 0) {
+ if (feedItems.length === 0) {
logger.info(`[feed][${jobId}] No entries found.`);
return;
}
// For feeds that don't have guids, use the link as the id
- feedData.items.forEach((item) => {
+ feedItems.forEach((item) => {
item.guid = item.guid ?? `${item.id}` ?? item.link;
});
@@ -144,14 +157,12 @@ async function run(req: DequeuedJob<ZFeedRequestSchema>) {
eq(rssFeedImportsTable.rssFeedId, feed.id),
inArray(
rssFeedImportsTable.entryId,
- feedData.items
- .map((item) => item.guid)
- .filter((id): id is string => !!id),
+ feedItems.map((item) => item.guid).filter((id): id is string => !!id),
),
),
});
- const newEntries = feedData.items.filter(
+ const newEntries = feedItems.filter(
(item) =>
!exitingEntries.some((entry) => entry.entryId === item.guid) &&
item.link &&