diff options
| author | Petri Hienonen <petri.hienonen@gmail.com> | 2025-11-04 17:07:24 +0200 |
|---|---|---|
| committer | Petri Hienonen <petri.hienonen@gmail.com> | 2025-11-09 22:48:55 +0200 |
| commit | be7ec90b500ac68e053f2b58feb085247ef95817 (patch) | |
| tree | aef7732ce0bbe505c6bc8486e1d0da2c06990e6a /download.js | |
| parent | a4ed99a370930b1a0c0f065906ed99c15a015fd4 (diff) | |
| download | housing-be7ec90b500ac68e053f2b58feb085247ef95817.tar.zst | |
Refactor application to use couchbase
Diffstat (limited to 'download.js')
| -rw-r--r-- | download.js | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/download.js b/download.js new file mode 100644 index 0000000..90a7d81 --- /dev/null +++ b/download.js @@ -0,0 +1,225 @@ +import crypto from "crypto"; +import fs from "fs"; +import path from "path"; + +const couchUsername = process.env.COUCHDB_USERNAME; +const couchPassword = process.env.COUCHDB_PASSWORD; + +function getAuthHeader() { + if (!couchUsername || !couchPassword) { + throw new Error("CouchDB credentials not set in environment variables"); + } + const auth = Buffer.from(`${couchUsername}:${couchPassword}`).toString("base64"); + return `Basic ${auth}`; +} + +// === CONFIG === +const baseUrl = "https://kartta.hel.fi/ws/geoserver/avoindata/wfs"; +const layers = [ + "Aluesarjat_avainluvut_2024", + "Piirijako_pienalue", + "Piirijako_peruspiiri", + "Seutukartta_liikenne_paatiet", + "Seutukartta_liikenne_metroasemat", + "Seutukartta_liikenne_metro_rata", + "Seutukartta_liikenne_juna_rata", + "Seutukartta_liikenne_juna_asema", + "Seutukartta_aluejako_pienalue", + "Seutukartta_aluejako_kuntarajat", + "Seutukartta_maankaytto_jarvet", + "Seutukartta_maankaytto_joet", + "Seutukartta_meren_rantaviiva", + "Toimipisterekisteri_palvelut", +]; + +const outputDir = path.join(process.cwd(), "app", "data"); +const couchUrl = "https://couch.tammi.cc"; +const dbName = "helsinki_wfs"; + +// Ensure output dir +if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); +} + +function getHeaders() { + return new Headers({ + // biome-ignore lint/style/useNamingConvention: database + Authorization: getAuthHeader(), + "Content-Type": "application/json", + }); +} + +// === COUCHDB HELPERS === +async function createDatabase() { + const url = `${couchUrl}/${dbName}`; + const res = await fetch(url, { + headers: getHeaders(), + method: "PUT", + }); + if (res.ok || res.status === 412) { + console.log(`Database ${dbName} ready.`); + } else { + throw new Error(await res.text()); + } +} + +async function ensureDesignDocs() { + const designDoc = { + _id: "_design/layers", + views: { + // biome-ignore lint/style/useNamingConvention: database + by_layer: { + map: `function(doc) { + if (doc.type === 'feature' && doc.layer) { + emit(doc.layer, null); + } + }`, + }, + }, + }; + + const url = `${couchUrl}/${dbName}/_design/layers`; + const res = await fetch(url, { headers: getHeaders() }); + if (res.status === 404) { + await fetch(url, { + body: JSON.stringify(designDoc), + headers: getHeaders(), + method: "PUT", + }); + console.log("Created design document: layers/by_layer"); + } else if (res.ok) { + const existing = await res.json(); + designDoc._rev = existing._rev; + await fetch(url, { + body: JSON.stringify(designDoc), + headers: getHeaders(), + method: "PUT", + }); + console.log("Updated design document"); + } +} + +// === DOWNLOAD === +async function downloadLayer(layer) { + const url = `${baseUrl}?service=WFS&version=2.0.0&request=GetFeature&typeName=avoindata:${layer}&outputFormat=json&srsname=EPSG:4326`; + const res = await fetch(url); + if (!res.ok) throw new Error(res.statusText); + const response = await res.json(); + return response; +} + +function saveToFile(layer, data) { + const filePath = path.join(outputDir, `${layer}.geojson`); + fs.writeFileSync(filePath, JSON.stringify(data, null, "\t")); + console.log(`Saved: ${layer}.geojson`); +} + +// === UPLOAD METADATA === +async function uploadLayerMetadata(layer, featureCount) { + const docId = `layer_metadata:${layer}`; + + const doc = { + _id: docId, + // biome-ignore lint/style/useNamingConvention: database + feature_count: featureCount, + // biome-ignore lint/style/useNamingConvention: database + last_updated: new Date().toISOString(), + name: layer, + projection: "EPSG:4326", + type: "layer_metadata", + }; + + const url = `${couchUrl}/${dbName}/${docId}`; + const getRes = await fetch(url, { headers: getHeaders() }); + if (getRes.ok) { + const existing = await getRes.json(); + doc._rev = existing._rev; + } + const putRes = await fetch(url, { + body: JSON.stringify(doc), + headers: getHeaders(), + method: "PUT", + }); + if (!putRes.ok) throw new Error(await putRes.text()); + console.log(`Metadata updated: ${layer} (${featureCount} features)`); +} + +// === UPLOAD SINGLE FEATURE (with deduplication) === +async function uploadFeature(doc) { + const url = `${couchUrl}/${dbName}/${doc._id}`; + const getRes = await fetch(url, { headers: getHeaders() }); + if (getRes.ok) { + const existing = await getRes.json(); + doc._rev = existing._rev; + + const geomEqual = JSON.stringify(doc.geometry) === JSON.stringify(existing.geometry); + const propEqual = JSON.stringify(doc.properties) === JSON.stringify(existing.properties); + if (geomEqual && propEqual) { + return false; // skipped + } + } + + const putRes = await fetch(url, { + body: JSON.stringify(doc), + headers: getHeaders(), + method: "PUT", + }); + + return putRes.ok; +} + +// === PROCESS LAYER === +async function processLayer(layer) { + const geojson = await downloadLayer(layer); + if (!geojson || !geojson.features) { + console.warn(`No features in ${layer} ${geojson}`); + process.exit(1); + } + + let uploaded = 0; + let skipped = 0; + + for (const feature of geojson.features) { + // Stable ID: use feature.id, or property, or UUID + const propId = + feature.id || + feature.properties?.id || + feature.properties?.tunnus || + feature.properties?.objectid || + crypto.randomUUID(); + + const doc = { + _id: `feature:${layer}:${propId}`, + // biome-ignore lint/style/useNamingConvention: database + downloaded_at: new Date().toISOString(), + geometry: feature.geometry, + layer: layer, + properties: feature.properties || {}, + type: "feature", + }; + + const success = await uploadFeature(doc); + success ? uploaded++ : skipped++; + } + + await uploadLayerMetadata(layer, geojson.features.length); + console.log(`Done: ${layer} | Uploaded: ${uploaded} | Skipped: ${skipped}`); +} + +// === MAIN === +async function main() { + await createDatabase(); + await ensureDesignDocs(); + + for (const layer of layers) { + await processLayer(layer); + // Optional: rate limiting + await new Promise((r) => setTimeout(r, 500)); + } + + console.log("All layers processed."); +} + +if (process.argv[1] === new URL(import.meta.url).pathname) { + main().catch(console.error); +} |
