Integrate Playwright for improved web scraping by launching a headless browser, enhancing product data retrieval with better error handling and dynamic content loading. Update logging for price saving confirmation.

This commit is contained in:
Norbert Maciaszek
2025-11-20 20:33:04 +01:00
parent a25ab727b9
commit f09b58fb63

View File

@@ -2,6 +2,7 @@ const cheerio = require("cheerio");
const cron = require("node-cron"); const cron = require("node-cron");
const axios = require("axios"); const axios = require("axios");
const fs = require("fs"); const fs = require("fs");
const { chromium } = require("playwright");
const discordWebhook = const discordWebhook =
"https://discord.com/api/webhooks/1439286509390921749/t2Hb8XloF6zhDRYD1yh_QlkHHa9eHUyXvd9TxZRHwqR_b_OxxbnwDgsm4em8TwA9NQIa"; "https://discord.com/api/webhooks/1439286509390921749/t2Hb8XloF6zhDRYD1yh_QlkHHa9eHUyXvd9TxZRHwqR_b_OxxbnwDgsm4em8TwA9NQIa";
@@ -49,7 +50,6 @@ async function compareAndSave(productsPrice) {
for (const product of productsPrice) { for (const product of productsPrice) {
sendMessage(`Początkowa cena **${product.name}**: ${product.price}`); sendMessage(`Początkowa cena **${product.name}**: ${product.price}`);
} }
console.log("First run completed");
isFirstRun = false; isFirstRun = false;
} }
@@ -70,36 +70,63 @@ async function getProducts() {
} }
async function init() { async function init() {
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
});
const page = await context.newPage();
const productsIds = await getProducts(); const productsIds = await getProducts();
const products = []; const products = [];
for (const productId of productsIds) { for (const productId of productsIds) {
const response = await axios.get(`https://www.ceneo.pl/${productId}`); try {
const $ = cheerio.load(response.data); await page.goto(`https://www.ceneo.pl/${productId}`, {
waitUntil: "networkidle",
timeout: 30000,
});
const items = $(".product-offer__container").first(); // Wait for bot detection to pass and content to load
for (const item of items) { await page
let name = $(item).data("productname"); .waitForSelector(".product-offer__container, .product-top", {
let price = $(item).data("price"); timeout: 10000,
let link = `https://www.ceneo.pl/${$(item).data("click-url")}`; })
const shop = $(item).data("shopurl") || "ceneo.pl"; .catch(() => {
console.log(`Timeout waiting for content on ${productId}`);
});
if (!name || !price) { const html = await page.content();
name = $(item).find(".short-name__txt").text(); const $ = cheerio.load(html);
price = $(item).find(".price").text();
link = `https://www.ceneo.pl/${productId}`; const items = $(".product-offer__container").first();
for (const item of items) {
let name = $(item).data("productname");
let price = $(item).data("price");
let link = `https://www.ceneo.pl/${$(item).data("click-url")}`;
const shop = $(item).data("shopurl") || "ceneo.pl";
if (!name || !price) {
name = $(item).find(".short-name__txt").text();
price = $(item).find(".price").text();
link = `https://www.ceneo.pl/${productId}`;
}
if (!name || !price || !link) {
continue;
}
products.push({ name, price, link, shop });
} }
} catch (error) {
if (!name || !price || !link) { console.error(`Error fetching product ${productId}:`, error.message);
continue;
}
products.push({ name, price, link, shop });
} }
} }
await browser.close();
await compareAndSave(products); await compareAndSave(products);
console.log("Sprawdzone! Aktualne ceny zapisane w productsPrice.json"); console.log("Aktualne ceny zapisane w productsPrice.json");
} }
sendMessage("Startuję monitoring cen"); sendMessage("Startuję monitoring cen");