Integrate Playwright for improved web scraping by launching a headless browser, enhancing product data retrieval with better error handling and dynamic content loading. Update logging for price saving confirmation.

This commit is contained in:
Norbert Maciaszek
2025-11-20 20:33:04 +01:00
parent a25ab727b9
commit f09b58fb63

View File

@@ -2,6 +2,7 @@ const cheerio = require("cheerio");
const cron = require("node-cron");
const axios = require("axios");
const fs = require("fs");
const { chromium } = require("playwright");
const discordWebhook =
"https://discord.com/api/webhooks/1439286509390921749/t2Hb8XloF6zhDRYD1yh_QlkHHa9eHUyXvd9TxZRHwqR_b_OxxbnwDgsm4em8TwA9NQIa";
@@ -49,7 +50,6 @@ async function compareAndSave(productsPrice) {
for (const product of productsPrice) {
sendMessage(`Początkowa cena **${product.name}**: ${product.price}`);
}
console.log("First run completed");
isFirstRun = false;
}
@@ -70,12 +70,34 @@ async function getProducts() {
}
async function init() {
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
});
const page = await context.newPage();
const productsIds = await getProducts();
const products = [];
for (const productId of productsIds) {
const response = await axios.get(`https://www.ceneo.pl/${productId}`);
const $ = cheerio.load(response.data);
try {
await page.goto(`https://www.ceneo.pl/${productId}`, {
waitUntil: "networkidle",
timeout: 30000,
});
// Wait for bot detection to pass and content to load
await page
.waitForSelector(".product-offer__container, .product-top", {
timeout: 10000,
})
.catch(() => {
console.log(`Timeout waiting for content on ${productId}`);
});
const html = await page.content();
const $ = cheerio.load(html);
const items = $(".product-offer__container").first();
for (const item of items) {
@@ -96,10 +118,15 @@ async function init() {
products.push({ name, price, link, shop });
}
} catch (error) {
console.error(`Error fetching product ${productId}:`, error.message);
}
}
await browser.close();
await compareAndSave(products);
console.log("Sprawdzone! Aktualne ceny zapisane w productsPrice.json");
console.log("Aktualne ceny zapisane w productsPrice.json");
}
sendMessage("Startuję monitoring cen");