import crypto from "node:crypto"; import fs from "node:fs"; import path from "node:path"; const couchUsername = process.env.COUCHDB_USERNAME; const couchPassword = process.env.COUCHDB_PASSWORD; /** * Generates the Basic Auth header for CouchDB using environment variables. * @returns {string} The Basic Auth header string. * @throws {Error} If CouchDB credentials are not set. */ function getAuthHeader() { if (!couchUsername || !couchPassword) { throw new Error( "CouchDB credentials COUCHDB_USERNAME COUCHDB_PASSWORD not set in environment variables", ); } const auth = Buffer.from(`${couchUsername}:${couchPassword}`).toString("base64"); return `Basic ${auth}`; } const baseUrl = "https://kartta.hel.fi/ws/geoserver/avoindata/wfs"; const layers = [ "Aluesarjat_avainluvut_2024", "Piirijako_peruspiiri", "Piirijako_pienalue", "RaideJokeri_pysakit", "RaideJokeri_ratalinja", "Seutukartta_aluejako_kuntarajat", "Seutukartta_aluejako_pienalue", "Seutukartta_liikenne_juna_asema", "Seutukartta_liikenne_juna_rata", "Seutukartta_liikenne_metro_rata", "Seutukartta_liikenne_metroasemat", "Seutukartta_liikenne_paatiet", "Seutukartta_maankaytto_jarvet", "Seutukartta_maankaytto_joet", "Seutukartta_meren_rantaviiva", "Toimipisterekisteri_palvelut", ]; const outputDir = path.join(process.cwd(), "app", "data"); const couchUrl = "https://couch.tammi.cc"; const dbName = "helsinki_wfs"; // Ensure output dir if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } /** * Creates headers for CouchDB requests, including authorization. * @returns {Headers} The Headers object for fetch requests. */ function getHeaders() { return new Headers({ // biome-ignore lint/style/useNamingConvention: database Authorization: getAuthHeader(), "Content-Type": "application/json", }); } /** * Creates the CouchDB database if it doesn't exist. * @returns {Promise} * @throws {Error} If database creation fails (other than already exists). */ async function createDatabase() { const url = `${couchUrl}/${dbName}`; const res = await fetch(url, { headers: getHeaders(), method: "PUT", }); if (res.ok || res.status === 412) { console.log(`Database ${dbName} ready.`); return; } else { throw new Error(await res.text()); } } /** * Ensures the design documents (views) exist in the database, creating or updating as needed. * @returns {Promise} */ async function ensureDesignDocs() { const designDoc = { _id: "_design/layers", views: { // biome-ignore lint/style/useNamingConvention: database by_layer: { map: `function(doc) { if (doc.type === 'feature' && doc.layer) { emit(doc.layer, null); } }`, }, }, }; const url = `${couchUrl}/${dbName}/_design/layers`; const res = await fetch(url, { headers: getHeaders() }); if (res.status === 404) { await fetch(url, { body: JSON.stringify(designDoc), headers: getHeaders(), method: "PUT", }); console.log("Created design document: layers/by_layer"); return; } else if (res.ok) { const existing = await res.json(); designDoc._rev = existing._rev; await fetch(url, { body: JSON.stringify(designDoc), headers: getHeaders(), method: "PUT", }); console.log("Updated design document"); return; } // If neither, implicitly return void, but log unexpected status console.warn(`Unexpected status when ensuring design docs: ${res.status}`); } /** * Downloads a GeoJSON layer from the WFS service. * @param {string} layer - The name of the layer to download. * @returns {Promise} The parsed GeoJSON object. * @throws {Error} If the fetch fails. */ async function downloadLayer(layer) { const url = `${baseUrl}?service=WFS&version=2.0.0&request=GetFeature&typeName=avoindata:${layer}&outputFormat=json&srsname=EPSG:4326`; const res = await fetch(url); if (!res.ok) throw new Error(res.statusText); const response = await res.json(); return response; } /** * Saves GeoJSON data to a local file. * Note: This function is defined but not currently used in the script. It could be called in processLayer if local saving is desired. * @param {string} layer - The layer name for the file. * @param {object} data - The GeoJSON data to save. * @returns {void} */ function saveToFile(layer, data) { const filePath = path.join(outputDir, `${layer}.geojson`); fs.writeFileSync(filePath, JSON.stringify(data, null, "\t")); console.log(`Saved: ${layer}.geojson`); } /** * Uploads or updates metadata for a layer in CouchDB. * @param {string} layer - The layer name. * @param {number} featureCount - The number of features in the layer. * @returns {Promise} * @throws {Error} If the upload fails. */ async function uploadLayerMetadata(layer, featureCount) { const docId = `layer_metadata:${layer}`; const doc = { _id: docId, // biome-ignore lint/style/useNamingConvention: database feature_count: featureCount, // biome-ignore lint/style/useNamingConvention: database last_updated: new Date().toISOString(), name: layer, projection: "EPSG:4326", type: "layer_metadata", }; const url = `${couchUrl}/${dbName}/${docId}`; const getRes = await fetch(url, { headers: getHeaders() }); if (getRes.ok) { const existing = await getRes.json(); doc._rev = existing._rev; } const putRes = await fetch(url, { body: JSON.stringify(doc), headers: getHeaders(), method: "PUT", }); if (!putRes.ok) throw new Error(await putRes.text()); console.log(`Metadata updated: ${layer} (${featureCount} features)`); return; } /** * Uploads a single feature document to CouchDB, with deduplication check. * @param {object} doc - The feature document to upload. * @returns {Promise} True if uploaded/updated, false if skipped (no changes). * @throws {Error} If the upload fails. */ async function uploadFeature(doc) { const url = `${couchUrl}/${dbName}/${doc._id}`; const getRes = await fetch(url, { headers: getHeaders() }); if (getRes.ok) { const existing = await getRes.json(); doc._rev = existing._rev; const geomEqual = JSON.stringify(doc.geometry) === JSON.stringify(existing.geometry); const propEqual = JSON.stringify(doc.properties) === JSON.stringify(existing.properties); if (geomEqual && propEqual) { return false; // skipped } } const putRes = await fetch(url, { body: JSON.stringify(doc), headers: getHeaders(), method: "PUT", }); if (!putRes.ok) throw new Error(await putRes.text()); return true; // uploaded or updated } /** * Processes a single layer: downloads GeoJSON, uploads features with dedup, and updates metadata. * @param {string} layer - The layer to process. * @returns {Promise<{uploaded: number, skipped: number}>} Counts of uploaded and skipped features. * @throws {Error} If download or uploads fail. */ async function processLayer(layer) { const geojson = await downloadLayer(layer); if (!geojson || !geojson.features) { throw new Error(`No features in ${layer}: ${JSON.stringify(geojson)}`); } let uploaded = 0; let skipped = 0; for (const feature of geojson.features) { // Stable ID: use feature.id, or property, or UUID const propId = feature.id || feature.properties?.id || feature.properties?.tunnus || feature.properties?.objectid || crypto.randomUUID(); const doc = { _id: `feature:${layer}:${propId}`, // biome-ignore lint/style/useNamingConvention: database downloaded_at: new Date().toISOString(), geometry: feature.geometry, layer: layer, properties: feature.properties || {}, type: "feature", }; const success = await uploadFeature(doc); success ? uploaded++ : skipped++; } await uploadLayerMetadata(layer, geojson.features.length); console.log(`Done: ${layer} | Uploaded: ${uploaded} | Skipped: ${skipped}`); return { skipped, uploaded }; } /** * Main entry point: sets up database, processes all layers. * @returns {Promise} */ async function main() { await createDatabase(); await ensureDesignDocs(); for (const layer of layers) { await processLayer(layer); // Optional: rate limiting await new Promise((r) => setTimeout(r, 500)); } console.log("All layers processed."); return; } if (process.argv[1] === new URL(import.meta.url).pathname) { main().catch(console.error); }