diff options
| author | Petri Hienonen <petri.hienonen@gmail.com> | 2025-12-28 13:51:36 +0200 |
|---|---|---|
| committer | Petri Hienonen <petri.hienonen@gmail.com> | 2025-12-28 13:51:36 +0200 |
| commit | 341b9d2f8be11d6149994a89ddbde505a0b7977e (patch) | |
| tree | f13fce913488593298c85822e16a8cfdcae90f1c /scrape/main.go | |
| parent | 0bdadb8f490dbc982954cda7f2c70eec365e05b8 (diff) | |
| download | housing-341b9d2f8be11d6149994a89ddbde505a0b7977e.tar.zst | |
Update golang dependencies
Diffstat (limited to '')
| -rw-r--r-- | scrape/main.go | 52 |
1 files changed, 26 insertions, 26 deletions
diff --git a/scrape/main.go b/scrape/main.go index fed0397..13297bb 100644 --- a/scrape/main.go +++ b/scrape/main.go @@ -76,26 +76,26 @@ func (s *S3Client) UploadFromURL(imgURL, key string) (string, error) { return "", fmt.Errorf("HTTP GET failed: %w", err) } defer resp.Body.Close() - + if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("image download failed with status %d", resp.StatusCode) } - + data, err := io.ReadAll(resp.Body) if err != nil { return "", fmt.Errorf("reading response body failed: %w", err) } - + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - + _, err = s.client.PutObject(ctx, s.bucket, key, bytes.NewReader(data), int64(len(data)), minio.PutObjectOptions{ ContentType: "image/webp", }) if err != nil { return "", fmt.Errorf("S3 upload failed: %w", err) } - + publicURL := fmt.Sprintf("https://%s/%s/%s", s.client.EndpointURL().Host, s.bucket, key) log.Printf("Successfully uploaded image: %s", publicURL) return publicURL, nil @@ -133,13 +133,13 @@ func (c *CouchClient) Upsert(h *House) error { if err != nil { return fmt.Errorf("JSON marshal failed: %w", err) } - + reqURL := fmt.Sprintf("%s/%s/%s", c.baseURL, c.database, url.PathEscape(h.ID)) req, err := http.NewRequest("PUT", reqURL, bytes.NewReader(body)) if err != nil { return fmt.Errorf("request creation failed: %w", err) } - + req.Header.Set("Content-Type", "application/json") req.Header.Set("Accept", "application/json") @@ -148,12 +148,12 @@ func (c *CouchClient) Upsert(h *House) error { return fmt.Errorf("HTTP request failed: %w", err) } defer resp.Body.Close() - + if resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return fmt.Errorf("couchDB responded with status %d: %s", resp.StatusCode, string(body)) } - + var rev struct { Rev string `json:"rev"` } @@ -193,7 +193,7 @@ func (osi *OikotieScraper) loadTokens() error { if osi.otaToken == "" || osi.otaCuid == "" || osi.otaLoaded == "" || osi.phpSessID == "" { log.Println("Missing one or more tokens – please enter them now:") r := bufio.NewReader(os.Stdin) - + if osi.otaToken == "" { fmt.Print("OTA-token: ") token, err := r.ReadString('\n') @@ -202,7 +202,7 @@ func (osi *OikotieScraper) loadTokens() error { } osi.otaToken = strings.TrimSpace(token) } - + if osi.otaCuid == "" { fmt.Print("OTA-cuid: ") cuid, err := r.ReadString('\n') @@ -211,7 +211,7 @@ func (osi *OikotieScraper) loadTokens() error { } osi.otaCuid = strings.TrimSpace(cuid) } - + if osi.otaLoaded == "" { fmt.Print("OTA-loaded: ") loaded, err := r.ReadString('\n') @@ -220,7 +220,7 @@ func (osi *OikotieScraper) loadTokens() error { } osi.otaLoaded = strings.TrimSpace(loaded) } - + if osi.phpSessID == "" { fmt.Print("PHPSESSID: ") sessID, err := r.ReadString('\n') @@ -264,7 +264,7 @@ func (os *OikotieScraper) ScrapeAll(ctx context.Context, couch *CouchClient) err } continue } - + log.Printf("Fetch error (offset %d): %v", offset, err) retryCount++ if retryCount > maxRetries { @@ -273,7 +273,7 @@ func (os *OikotieScraper) ScrapeAll(ctx context.Context, couch *CouchClient) err time.Sleep(5 * time.Second) continue } - + retryCount = 0 // Reset retry count on successful fetch if len(cards) == 0 { @@ -297,7 +297,7 @@ func (os *OikotieScraper) ScrapeAll(ctx context.Context, couch *CouchClient) err } } - log.Printf("Batch %d-%d: %d/%d cards saved (total: %d, found: %d)", + log.Printf("Batch %d-%d: %d/%d cards saved (total: %d, found: %d)", offset, offset+len(cards)-1, savedInBatch, len(cards), totalSaved, found) if offset+len(cards) >= found { @@ -311,11 +311,11 @@ func (os *OikotieScraper) ScrapeAll(ctx context.Context, couch *CouchClient) err // Fixed: cardId can be number or string, so use json.Number type apiCard struct { - ID json.Number `json:"cardId"` - Type int `json:"cardType"` - SubType int `json:"cardSubType"` - URL string `json:"url"` - Status int `json:"status"` + ID json.Number `json:"cardId"` + Type int `json:"cardType"` + SubType int `json:"cardSubType"` + URL string `json:"url"` + Status int `json:"status"` Data json.RawMessage `json:"data"` Location json.RawMessage `json:"location"` Company json.RawMessage `json:"company"` @@ -367,13 +367,13 @@ func (os *OikotieScraper) fetchPage(offset, limit int) ([]apiCard, int, error) { Found int `json:"found"` Cards []apiCard `json:"cards"` } - + // Read the body first for better error reporting body, err := io.ReadAll(resp.Body) if err != nil { return nil, 0, fmt.Errorf("reading response body failed: %w", err) } - + if err := json.Unmarshal(body, &payload); err != nil { log.Printf("Raw response: %s", string(body)) return nil, 0, fmt.Errorf("JSON unmarshal failed: %w", err) @@ -384,7 +384,7 @@ func (os *OikotieScraper) fetchPage(offset, limit int) ([]apiCard, int, error) { func (os *OikotieScraper) convertCard(c apiCard) (*House, error) { // Convert json.Number to string for the ID cardID := c.ID.String() - + h := &House{ ID: "oikotie_" + cardID, Source: "oikotie", @@ -413,7 +413,7 @@ func (os *OikotieScraper) convertCard(c apiCard) (*House, error) { } h.Images = append(h.Images, publicURL) } - + log.Printf("Successfully converted card %s with %d images", cardID, len(h.Images)) return h, nil } @@ -458,4 +458,4 @@ func main() { log.Fatalf("Scrape failed: %v", err) } log.Println("Scraping completed successfully!") -}
\ No newline at end of file +} |
