aboutsummaryrefslogtreecommitdiffstats
path: root/scrape/main.go
diff options
context:
space:
mode:
authorPetri Hienonen <petri.hienonen@gmail.com>2025-12-28 13:51:36 +0200
committerPetri Hienonen <petri.hienonen@gmail.com>2025-12-28 13:51:36 +0200
commit341b9d2f8be11d6149994a89ddbde505a0b7977e (patch)
treef13fce913488593298c85822e16a8cfdcae90f1c /scrape/main.go
parent0bdadb8f490dbc982954cda7f2c70eec365e05b8 (diff)
downloadhousing-341b9d2f8be11d6149994a89ddbde505a0b7977e.tar.zst
Update golang dependencies
Diffstat (limited to '')
-rw-r--r--scrape/main.go52
1 files changed, 26 insertions, 26 deletions
diff --git a/scrape/main.go b/scrape/main.go
index fed0397..13297bb 100644
--- a/scrape/main.go
+++ b/scrape/main.go
@@ -76,26 +76,26 @@ func (s *S3Client) UploadFromURL(imgURL, key string) (string, error) {
return "", fmt.Errorf("HTTP GET failed: %w", err)
}
defer resp.Body.Close()
-
+
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("image download failed with status %d", resp.StatusCode)
}
-
+
data, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("reading response body failed: %w", err)
}
-
+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
-
+
_, err = s.client.PutObject(ctx, s.bucket, key, bytes.NewReader(data), int64(len(data)), minio.PutObjectOptions{
ContentType: "image/webp",
})
if err != nil {
return "", fmt.Errorf("S3 upload failed: %w", err)
}
-
+
publicURL := fmt.Sprintf("https://%s/%s/%s", s.client.EndpointURL().Host, s.bucket, key)
log.Printf("Successfully uploaded image: %s", publicURL)
return publicURL, nil
@@ -133,13 +133,13 @@ func (c *CouchClient) Upsert(h *House) error {
if err != nil {
return fmt.Errorf("JSON marshal failed: %w", err)
}
-
+
reqURL := fmt.Sprintf("%s/%s/%s", c.baseURL, c.database, url.PathEscape(h.ID))
req, err := http.NewRequest("PUT", reqURL, bytes.NewReader(body))
if err != nil {
return fmt.Errorf("request creation failed: %w", err)
}
-
+
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "application/json")
@@ -148,12 +148,12 @@ func (c *CouchClient) Upsert(h *House) error {
return fmt.Errorf("HTTP request failed: %w", err)
}
defer resp.Body.Close()
-
+
if resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("couchDB responded with status %d: %s", resp.StatusCode, string(body))
}
-
+
var rev struct {
Rev string `json:"rev"`
}
@@ -193,7 +193,7 @@ func (osi *OikotieScraper) loadTokens() error {
if osi.otaToken == "" || osi.otaCuid == "" || osi.otaLoaded == "" || osi.phpSessID == "" {
log.Println("Missing one or more tokens – please enter them now:")
r := bufio.NewReader(os.Stdin)
-
+
if osi.otaToken == "" {
fmt.Print("OTA-token: ")
token, err := r.ReadString('\n')
@@ -202,7 +202,7 @@ func (osi *OikotieScraper) loadTokens() error {
}
osi.otaToken = strings.TrimSpace(token)
}
-
+
if osi.otaCuid == "" {
fmt.Print("OTA-cuid: ")
cuid, err := r.ReadString('\n')
@@ -211,7 +211,7 @@ func (osi *OikotieScraper) loadTokens() error {
}
osi.otaCuid = strings.TrimSpace(cuid)
}
-
+
if osi.otaLoaded == "" {
fmt.Print("OTA-loaded: ")
loaded, err := r.ReadString('\n')
@@ -220,7 +220,7 @@ func (osi *OikotieScraper) loadTokens() error {
}
osi.otaLoaded = strings.TrimSpace(loaded)
}
-
+
if osi.phpSessID == "" {
fmt.Print("PHPSESSID: ")
sessID, err := r.ReadString('\n')
@@ -264,7 +264,7 @@ func (os *OikotieScraper) ScrapeAll(ctx context.Context, couch *CouchClient) err
}
continue
}
-
+
log.Printf("Fetch error (offset %d): %v", offset, err)
retryCount++
if retryCount > maxRetries {
@@ -273,7 +273,7 @@ func (os *OikotieScraper) ScrapeAll(ctx context.Context, couch *CouchClient) err
time.Sleep(5 * time.Second)
continue
}
-
+
retryCount = 0 // Reset retry count on successful fetch
if len(cards) == 0 {
@@ -297,7 +297,7 @@ func (os *OikotieScraper) ScrapeAll(ctx context.Context, couch *CouchClient) err
}
}
- log.Printf("Batch %d-%d: %d/%d cards saved (total: %d, found: %d)",
+ log.Printf("Batch %d-%d: %d/%d cards saved (total: %d, found: %d)",
offset, offset+len(cards)-1, savedInBatch, len(cards), totalSaved, found)
if offset+len(cards) >= found {
@@ -311,11 +311,11 @@ func (os *OikotieScraper) ScrapeAll(ctx context.Context, couch *CouchClient) err
// Fixed: cardId can be number or string, so use json.Number
type apiCard struct {
- ID json.Number `json:"cardId"`
- Type int `json:"cardType"`
- SubType int `json:"cardSubType"`
- URL string `json:"url"`
- Status int `json:"status"`
+ ID json.Number `json:"cardId"`
+ Type int `json:"cardType"`
+ SubType int `json:"cardSubType"`
+ URL string `json:"url"`
+ Status int `json:"status"`
Data json.RawMessage `json:"data"`
Location json.RawMessage `json:"location"`
Company json.RawMessage `json:"company"`
@@ -367,13 +367,13 @@ func (os *OikotieScraper) fetchPage(offset, limit int) ([]apiCard, int, error) {
Found int `json:"found"`
Cards []apiCard `json:"cards"`
}
-
+
// Read the body first for better error reporting
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("reading response body failed: %w", err)
}
-
+
if err := json.Unmarshal(body, &payload); err != nil {
log.Printf("Raw response: %s", string(body))
return nil, 0, fmt.Errorf("JSON unmarshal failed: %w", err)
@@ -384,7 +384,7 @@ func (os *OikotieScraper) fetchPage(offset, limit int) ([]apiCard, int, error) {
func (os *OikotieScraper) convertCard(c apiCard) (*House, error) {
// Convert json.Number to string for the ID
cardID := c.ID.String()
-
+
h := &House{
ID: "oikotie_" + cardID,
Source: "oikotie",
@@ -413,7 +413,7 @@ func (os *OikotieScraper) convertCard(c apiCard) (*House, error) {
}
h.Images = append(h.Images, publicURL)
}
-
+
log.Printf("Successfully converted card %s with %d images", cardID, len(h.Images))
return h, nil
}
@@ -458,4 +458,4 @@ func main() {
log.Fatalf("Scrape failed: %v", err)
}
log.Println("Scraping completed successfully!")
-} \ No newline at end of file
+}