blob: ea9bf2e90bbff5a760411c426785b25d52215245 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
import type { Rules } from "metascraper";
/**
* Improved Amazon metascraper plugin that fixes image extraction.
*
* The default metascraper-amazon package uses `.a-dynamic-image` selector
* which matches the FIRST element with that class. On amazon.com pages,
* this is often the Prime logo instead of the product image.
*
* This plugin uses more specific selectors to target the actual product
* image:
* - #landingImage: The main product image ID
* - #imgTagWrapperId img: Fallback container for product images
* - #imageBlock img: Additional fallback for newer Amazon layouts
*
* By placing this plugin BEFORE metascraperAmazon() in the plugin chain,
* we ensure the correct image is extracted while keeping all other Amazon
* metadata (title, brand, description) from the original plugin.
*/
const REGEX_AMAZON_URL =
/https?:\/\/(.*amazon\..*\/.*|.*amzn\..*\/.*|.*a\.co\/.*)/i;
const test = ({ url }: { url: string }): boolean => REGEX_AMAZON_URL.test(url);
const metascraperAmazonImproved = () => {
const rules: Rules = {
pkgName: "metascraper-amazon-improved",
test,
image: ({ htmlDom }) => {
// Try the main product image ID first (most reliable)
// Prefer data-old-hires attribute for high-resolution images
const landingImageHires = htmlDom("#landingImage").attr("data-old-hires");
if (landingImageHires) {
return landingImageHires;
}
const landingImageSrc = htmlDom("#landingImage").attr("src");
if (landingImageSrc) {
return landingImageSrc;
}
// Fallback to image block container
const imgTagHires = htmlDom("#imgTagWrapperId img").attr(
"data-old-hires",
);
if (imgTagHires) {
return imgTagHires;
}
const imgTagSrc = htmlDom("#imgTagWrapperId img").attr("src");
if (imgTagSrc) {
return imgTagSrc;
}
// Additional fallback for newer Amazon layouts
const imageBlockHires = htmlDom("#imageBlock img")
.first()
.attr("data-old-hires");
if (imageBlockHires) {
return imageBlockHires;
}
const imageBlockSrc = htmlDom("#imageBlock img").first().attr("src");
if (imageBlockSrc) {
return imageBlockSrc;
}
// Return undefined to allow next plugin to try
return undefined;
},
};
return rules;
};
export default metascraperAmazonImproved;
|