import type { Rules } from "metascraper"; /** * Improved Amazon metascraper plugin that fixes image extraction. * * The default metascraper-amazon package uses `.a-dynamic-image` selector * which matches the FIRST element with that class. On amazon.com pages, * this is often the Prime logo instead of the product image. * * This plugin uses more specific selectors to target the actual product * image: * - #landingImage: The main product image ID * - #imgTagWrapperId img: Fallback container for product images * - #imageBlock img: Additional fallback for newer Amazon layouts * * By placing this plugin BEFORE metascraperAmazon() in the plugin chain, * we ensure the correct image is extracted while keeping all other Amazon * metadata (title, brand, description) from the original plugin. */ const REGEX_AMAZON_URL = /https?:\/\/(.*amazon\..*\/.*|.*amzn\..*\/.*|.*a\.co\/.*)/i; const test = ({ url }: { url: string }): boolean => REGEX_AMAZON_URL.test(url); const metascraperAmazonImproved = () => { const rules: Rules = { pkgName: "metascraper-amazon-improved", test, image: ({ htmlDom }) => { // Try the main product image ID first (most reliable) // Prefer data-old-hires attribute for high-resolution images const landingImageHires = htmlDom("#landingImage").attr("data-old-hires"); if (landingImageHires) { return landingImageHires; } const landingImageSrc = htmlDom("#landingImage").attr("src"); if (landingImageSrc) { return landingImageSrc; } // Fallback to image block container const imgTagHires = htmlDom("#imgTagWrapperId img").attr( "data-old-hires", ); if (imgTagHires) { return imgTagHires; } const imgTagSrc = htmlDom("#imgTagWrapperId img").attr("src"); if (imgTagSrc) { return imgTagSrc; } // Additional fallback for newer Amazon layouts const imageBlockHires = htmlDom("#imageBlock img") .first() .attr("data-old-hires"); if (imageBlockHires) { return imageBlockHires; } const imageBlockSrc = htmlDom("#imageBlock img").first().attr("src"); if (imageBlockSrc) { return imageBlockSrc; } // Return undefined to allow next plugin to try return undefined; }, }; return rules; }; export default metascraperAmazonImproved;