From f09b58fb63c0859d755f70ce756d9b990bf5a61e Mon Sep 17 00:00:00 2001 From: Norbert Maciaszek Date: Thu, 20 Nov 2025 20:33:04 +0100 Subject: [PATCH] Integrate Playwright for improved web scraping by launching a headless browser, enhancing product data retrieval with better error handling and dynamic content loading. Update logging for price saving confirmation. --- index.js | 67 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/index.js b/index.js index 58ba2a0..7d0f8e1 100644 --- a/index.js +++ b/index.js @@ -2,6 +2,7 @@ const cheerio = require("cheerio"); const cron = require("node-cron"); const axios = require("axios"); const fs = require("fs"); +const { chromium } = require("playwright"); const discordWebhook = "https://discord.com/api/webhooks/1439286509390921749/t2Hb8XloF6zhDRYD1yh_QlkHHa9eHUyXvd9TxZRHwqR_b_OxxbnwDgsm4em8TwA9NQIa"; @@ -49,7 +50,6 @@ async function compareAndSave(productsPrice) { for (const product of productsPrice) { sendMessage(`Początkowa cena **${product.name}**: ${product.price}`); } - console.log("First run completed"); isFirstRun = false; } @@ -70,36 +70,63 @@ async function getProducts() { } async function init() { + const browser = await chromium.launch({ headless: true }); + const context = await browser.newContext({ + userAgent: + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + }); + const page = await context.newPage(); + const productsIds = await getProducts(); const products = []; for (const productId of productsIds) { - const response = await axios.get(`https://www.ceneo.pl/${productId}`); - const $ = cheerio.load(response.data); + try { + await page.goto(`https://www.ceneo.pl/${productId}`, { + waitUntil: "networkidle", + timeout: 30000, + }); - const items = $(".product-offer__container").first(); - for (const item of items) { - let name = $(item).data("productname"); - let price = $(item).data("price"); - let link = `https://www.ceneo.pl/${$(item).data("click-url")}`; - const shop = $(item).data("shopurl") || "ceneo.pl"; + // Wait for bot detection to pass and content to load + await page + .waitForSelector(".product-offer__container, .product-top", { + timeout: 10000, + }) + .catch(() => { + console.log(`Timeout waiting for content on ${productId}`); + }); - if (!name || !price) { - name = $(item).find(".short-name__txt").text(); - price = $(item).find(".price").text(); - link = `https://www.ceneo.pl/${productId}`; + const html = await page.content(); + const $ = cheerio.load(html); + + const items = $(".product-offer__container").first(); + for (const item of items) { + let name = $(item).data("productname"); + let price = $(item).data("price"); + let link = `https://www.ceneo.pl/${$(item).data("click-url")}`; + const shop = $(item).data("shopurl") || "ceneo.pl"; + + if (!name || !price) { + name = $(item).find(".short-name__txt").text(); + price = $(item).find(".price").text(); + link = `https://www.ceneo.pl/${productId}`; + } + + if (!name || !price || !link) { + continue; + } + + products.push({ name, price, link, shop }); } - - if (!name || !price || !link) { - continue; - } - - products.push({ name, price, link, shop }); + } catch (error) { + console.error(`Error fetching product ${productId}:`, error.message); } } + await browser.close(); + await compareAndSave(products); - console.log("Sprawdzone! Aktualne ceny zapisane w productsPrice.json"); + console.log("Aktualne ceny zapisane w productsPrice.json"); } sendMessage("Startuję monitoring cen");