diff options
| author | Petri Hienonen <petri.hienonen@gmail.com> | 2025-11-09 22:17:04 +0200 |
|---|---|---|
| committer | Petri Hienonen <petri.hienonen@gmail.com> | 2025-11-09 22:17:04 +0200 |
| commit | 5eba467a7eb84409aa43df83de78ecb843a79d7b (patch) | |
| tree | d737d224b4b20ae4a29bad91125072018fdba80d /download.js | |
| parent | d9f3f48b634917c182fd8e0e764ef0575b1ce218 (diff) | |
| download | housing-5eba467a7eb84409aa43df83de78ecb843a79d7b.tar.zst | |
Update
Diffstat (limited to 'download.js')
| -rw-r--r-- | download.js | 264 |
1 files changed, 224 insertions, 40 deletions
diff --git a/download.js b/download.js index d5c4f62..ad149d7 100644 --- a/download.js +++ b/download.js @@ -1,57 +1,241 @@ -const fs = require('fs'); -const path = require('path'); +import crypto from "crypto"; +import fs from "fs"; +import path from "path"; -// Base URL for the WFS service -const baseUrl = 'https://kartta.hel.fi/ws/geoserver/avoindata/wfs'; +const couchUsername = process.env.COUCHDB_USERNAME; +const couchPassword = process.env.COUCHDB_PASSWORD; -// List of layers to download (extendable by adding more items to this array) +function getAuthHeader() { + if (!couchUsername || !couchPassword) { + throw new Error("CouchDB credentials not set in environment variables"); + } + const auth = Buffer.from(`${couchUsername}:${couchPassword}`).toString("base64"); + return `Basic ${auth}`; +} + +// === CONFIG === +const baseUrl = "https://kartta.hel.fi/ws/geoserver/avoindata/wfs"; const layers = [ - 'Aluesarjat_avainluvut_2024', - 'Seutukartta_liikenne_paatiet', - 'Seutukartta_liikenne_metroasemat', - 'Seutukartta_liikenne_metro_rata', - 'Seutukartta_liikenne_juna_rata', - 'Seutukartta_liikenne_juna_asema', - 'Seutukartta_aluejako_pienalue', - 'Seutukartta_aluejako_kuntarajat', - 'Seutukartta_maankaytto_jarvet', - 'Seutukartta_maankaytto_joet', - 'Seutukartta_meren_rantaviiva', - 'Toimipisterekisteri_palvelut' + "Aluesarjat_avainluvut_2024", + "Piirijako_pienalue", + "Seutukartta_liikenne_paatiet", + "Seutukartta_liikenne_metroasemat", + "Seutukartta_liikenne_metro_rata", + "Seutukartta_liikenne_juna_rata", + "Seutukartta_liikenne_juna_asema", + "Seutukartta_aluejako_pienalue", + "Seutukartta_aluejako_kuntarajat", + "Seutukartta_maankaytto_jarvet", + "Seutukartta_maankaytto_joet", + "Seutukartta_meren_rantaviiva", + "Toimipisterekisteri_palvelut", ]; -// Output directory -const outputDir = path.join(__dirname, 'app', 'data'); +const outputDir = path.join(process.cwd(), "app", "data"); +const couchUrl = "https://couch.tammi.cc"; +const dbName = "helsinki_wfs"; -// Create output directory if it doesn't exist +// Ensure output dir if (!fs.existsSync(outputDir)) { - fs.mkdirSync(outputDir, { recursive: true }); + fs.mkdirSync(outputDir, { recursive: true }); +} + +function getHeaders() { + return new Headers({ + Authorization: getAuthHeader(), + "Content-Type": "application/json", + }); +} + +// === COUCHDB HELPERS === +async function createDatabase() { + const url = `${couchUrl}/${dbName}`; + try { + const res = await fetch(url, { + headers: getHeaders(), + method: "PUT", + }); + if (res.ok || res.status === 412) { + console.log(`Database ${dbName} ready.`); + } else { + throw new Error(await res.text()); + } + } catch (e) { + console.error("DB create error:", e.message); + } +} + +async function ensureDesignDocs() { + const designDoc = { + _id: "_design/layers", + views: { + by_layer: { + map: `function(doc) { + if (doc.type === 'feature' && doc.layer) { + emit(doc.layer, null); + } + }`, + }, + }, + }; + + const url = `${couchUrl}/${dbName}/_design/layers`; + try { + const res = await fetch(url, { headers: getHeaders() }); + if (res.status === 404) { + await fetch(url, { + body: JSON.stringify(designDoc), + headers: getHeaders(), + method: "PUT", + }); + console.log("Created design document: layers/by_layer"); + } else if (res.ok) { + const existing = await res.json(); + designDoc._rev = existing._rev; + await fetch(url, { + body: JSON.stringify(designDoc), + headers: getHeaders(), + method: "PUT", + }); + console.log("Updated design document"); + } + } catch (e) { + console.error("Design doc error:", e.message); + process.exit(1); + } } -// Function to download and save layer data +// === DOWNLOAD === async function downloadLayer(layer) { - const url = `${baseUrl}?service=WFS&version=2.0.0&request=GetFeature&typeName=avoindata:${layer}&outputFormat=application/json&srsname=EPSG:4326`; + const url = `${baseUrl}?service=WFS&version=2.0.0&request=GetFeature&typeName=avoindata:${layer}&outputFormat=json&srsname=EPSG:4326`; + try { + const res = await fetch(url); + if (!res.ok) throw new Error(res.statusText); + const response = await res.json(); + return response; + } catch (e) { + console.error(`Download: \n${url}\nfailed [${layer}] ${e.toString()}`); + return null; + } +} - try { - const response = await fetch(url); - if (!response.ok) { - throw new Error(`Failed to fetch ${layer}: ${response.statusText}`); - } - const data = await response.json(); +function saveToFile(layer, data) { + const filePath = path.join(outputDir, `${layer}.geojson`); + fs.writeFileSync(filePath, JSON.stringify(data, null, "\t")); + console.log(`Saved: ${layer}.geojson`); +} - const filePath = path.join(outputDir, `${layer}.json`); - fs.writeFileSync(filePath, JSON.stringify(data, null, '\t')); - console.log(`Downloaded and saved: ${layer}.json`); - } catch (error) { - console.error(`Error downloading ${layer}: ${error.message}`); - } +// === UPLOAD METADATA === +async function uploadLayerMetadata(layer, featureCount) { + const docId = `layer_metadata:${layer}`; + const doc = { + _id: docId, + feature_count: featureCount, + last_updated: new Date().toISOString(), + name: layer, + projection: "EPSG:4326", + type: "layer_metadata", + }; + + const url = `${couchUrl}/${dbName}/${docId}`; + try { + const getRes = await fetch(url, { headers: getHeaders() }); + if (getRes.ok) { + const existing = await getRes.json(); + doc._rev = existing._rev; + } + const putRes = await fetch(url, { + body: JSON.stringify(doc), + headers: getHeaders(), + method: "PUT", + }); + if (!putRes.ok) throw new Error(await putRes.text()); + console.log(`Metadata updated: ${layer} (${featureCount} features)`); + } catch (e) { + console.error(`Metadata error [${layer}]:`, e.message); + } } -// Download all layers sequentially +// === UPLOAD SINGLE FEATURE (with deduplication) === +async function uploadFeature(doc) { + const url = `${couchUrl}/${dbName}/${doc._id}`; + try { + const getRes = await fetch(url, { headers: getHeaders() }); + if (getRes.ok) { + const existing = await getRes.json(); + doc._rev = existing._rev; + + const geomEqual = JSON.stringify(doc.geometry) === JSON.stringify(existing.geometry); + const propEqual = JSON.stringify(doc.properties) === JSON.stringify(existing.properties); + if (geomEqual && propEqual) { + return false; // skipped + } + } + + const putRes = await fetch(url, { + body: JSON.stringify(doc), + headers: getHeaders(), + method: "PUT", + }); + + return putRes.ok; + } catch (e) { + console.warn(`Upload failed [${doc._id}]:`, e.message); + return false; + } +} + +// === PROCESS LAYER === +async function processLayer(layer) { + const geojson = await downloadLayer(layer); + if (!geojson || !geojson.features) { + console.warn(`No features in ${layer} ${geojson}`); + process.exit(1); + } + + let uploaded = 0; + let skipped = 0; + + for (const feature of geojson.features) { + // Stable ID: use feature.id, or property, or UUID + const propId = + feature.id || + feature.properties?.id || + feature.properties?.tunnus || + feature.properties?.objectid || + crypto.randomUUID(); + + const doc = { + _id: `feature:${layer}:${propId}`, + downloaded_at: new Date().toISOString(), + geometry: feature.geometry, + layer: layer, + properties: feature.properties || {}, + type: "feature", + }; + + const success = await uploadFeature(doc); + success ? uploaded++ : skipped++; + } + + await uploadLayerMetadata(layer, geojson.features.length); + console.log(`Done: ${layer} | Uploaded: ${uploaded} | Skipped: ${skipped}`); +} + +// === MAIN === async function main() { - for (const layer of layers) { - await downloadLayer(layer); - } + await createDatabase(); + await ensureDesignDocs(); + + for (const layer of layers) { + await processLayer(layer); + // Optional: rate limiting + await new Promise((r) => setTimeout(r, 500)); + } + + console.log("All layers processed."); } -main(); +if (process.argv[1] === new URL(import.meta.url).pathname) { + main().catch(console.error); +} |
