From d732acd469ff02373dba4093cc53198e97f51868 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Wed, 16 Apr 2025 23:49:44 +0000 Subject: tests: Add some simple crawler tests --- packages/e2e_tests/tests/workers/crawler.test.ts | 100 +++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 packages/e2e_tests/tests/workers/crawler.test.ts (limited to 'packages/e2e_tests/tests') diff --git a/packages/e2e_tests/tests/workers/crawler.test.ts b/packages/e2e_tests/tests/workers/crawler.test.ts new file mode 100644 index 00000000..df276cae --- /dev/null +++ b/packages/e2e_tests/tests/workers/crawler.test.ts @@ -0,0 +1,100 @@ +import { assert, beforeEach, describe, expect, inject, it } from "vitest"; + +import { createKarakeepClient } from "@karakeep/sdk"; + +import { createTestUser } from "../../utils/api"; +import { waitUntil } from "../../utils/general"; + +describe("Crawler Tests", () => { + const port = inject("hoarderPort"); + + if (!port) { + throw new Error("Missing required environment variables"); + } + + let client: ReturnType; + let apiKey: string; + + async function getBookmark(bookmarkId: string) { + const { data } = await client.GET(`/bookmarks/{bookmarkId}`, { + params: { + path: { + bookmarkId, + }, + query: { + includeContent: true, + }, + }, + }); + return data; + } + + beforeEach(async () => { + apiKey = await createTestUser(); + client = createKarakeepClient({ + baseUrl: `http://localhost:${port}/api/v1/`, + headers: { + "Content-Type": "application/json", + authorization: `Bearer ${apiKey}`, + }, + }); + }); + + it("should crawl a website", async () => { + let { data: bookmark } = await client.POST("/bookmarks", { + body: { + type: "link", + url: "http://nginx:80/hello.html", + }, + }); + assert(bookmark); + + await waitUntil( + async () => { + const data = await getBookmark(bookmark!.id); + assert(data); + assert(data.content.type === "link"); + return data.content.crawledAt !== null; + }, + "Bookmark is crawled", + 10000, + ); + + bookmark = await getBookmark(bookmark.id); + assert(bookmark && bookmark.content.type === "link"); + expect(bookmark.content.crawledAt).toBeDefined(); + expect(bookmark.content.htmlContent).toContain("Hello World"); + expect(bookmark.content.title).toContain("My test title"); + expect(bookmark.content.url).toBe("http://nginx:80/hello.html"); + expect( + bookmark.assets.find((a) => a.assetType === "screenshot"), + ).toBeDefined(); + }); + + it("image lings jobs be converted into images", async () => { + let { data: bookmark } = await client.POST("/bookmarks", { + body: { + type: "link", + url: "http://nginx:80/image.png", + }, + }); + assert(bookmark); + + await waitUntil( + async () => { + const data = await getBookmark(bookmark!.id); + assert(data); + return data.content.type === "asset"; + }, + "Bookmark is crawled and converted to an image", + 10000, + ); + + bookmark = await getBookmark(bookmark.id); + assert(bookmark && bookmark.content.type === "asset"); + expect(bookmark.content.assetType).toBe("image"); + expect(bookmark.content.assetId).toBeDefined(); + expect(bookmark.content.fileName).toBe("image.png"); + expect(bookmark.content.sourceUrl).toBe("http://nginx:80/image.png"); + }); +}); -- cgit v1.2.3-70-g09d2