Integrate Playwright for improved web scraping by launching a headless browser, enhancing product data retrieval with better error handling and dynamic content loading. Update logging for price saving confirmation.
This commit is contained in:
67
index.js
67
index.js
@@ -2,6 +2,7 @@ const cheerio = require("cheerio");
|
||||
const cron = require("node-cron");
|
||||
const axios = require("axios");
|
||||
const fs = require("fs");
|
||||
const { chromium } = require("playwright");
|
||||
|
||||
const discordWebhook =
|
||||
"https://discord.com/api/webhooks/1439286509390921749/t2Hb8XloF6zhDRYD1yh_QlkHHa9eHUyXvd9TxZRHwqR_b_OxxbnwDgsm4em8TwA9NQIa";
|
||||
@@ -49,7 +50,6 @@ async function compareAndSave(productsPrice) {
|
||||
for (const product of productsPrice) {
|
||||
sendMessage(`Początkowa cena **${product.name}**: ${product.price}`);
|
||||
}
|
||||
console.log("First run completed");
|
||||
isFirstRun = false;
|
||||
}
|
||||
|
||||
@@ -70,36 +70,63 @@ async function getProducts() {
|
||||
}
|
||||
|
||||
async function init() {
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const context = await browser.newContext({
|
||||
userAgent:
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
});
|
||||
const page = await context.newPage();
|
||||
|
||||
const productsIds = await getProducts();
|
||||
const products = [];
|
||||
|
||||
for (const productId of productsIds) {
|
||||
const response = await axios.get(`https://www.ceneo.pl/${productId}`);
|
||||
const $ = cheerio.load(response.data);
|
||||
try {
|
||||
await page.goto(`https://www.ceneo.pl/${productId}`, {
|
||||
waitUntil: "networkidle",
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
const items = $(".product-offer__container").first();
|
||||
for (const item of items) {
|
||||
let name = $(item).data("productname");
|
||||
let price = $(item).data("price");
|
||||
let link = `https://www.ceneo.pl/${$(item).data("click-url")}`;
|
||||
const shop = $(item).data("shopurl") || "ceneo.pl";
|
||||
// Wait for bot detection to pass and content to load
|
||||
await page
|
||||
.waitForSelector(".product-offer__container, .product-top", {
|
||||
timeout: 10000,
|
||||
})
|
||||
.catch(() => {
|
||||
console.log(`Timeout waiting for content on ${productId}`);
|
||||
});
|
||||
|
||||
if (!name || !price) {
|
||||
name = $(item).find(".short-name__txt").text();
|
||||
price = $(item).find(".price").text();
|
||||
link = `https://www.ceneo.pl/${productId}`;
|
||||
const html = await page.content();
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
const items = $(".product-offer__container").first();
|
||||
for (const item of items) {
|
||||
let name = $(item).data("productname");
|
||||
let price = $(item).data("price");
|
||||
let link = `https://www.ceneo.pl/${$(item).data("click-url")}`;
|
||||
const shop = $(item).data("shopurl") || "ceneo.pl";
|
||||
|
||||
if (!name || !price) {
|
||||
name = $(item).find(".short-name__txt").text();
|
||||
price = $(item).find(".price").text();
|
||||
link = `https://www.ceneo.pl/${productId}`;
|
||||
}
|
||||
|
||||
if (!name || !price || !link) {
|
||||
continue;
|
||||
}
|
||||
|
||||
products.push({ name, price, link, shop });
|
||||
}
|
||||
|
||||
if (!name || !price || !link) {
|
||||
continue;
|
||||
}
|
||||
|
||||
products.push({ name, price, link, shop });
|
||||
} catch (error) {
|
||||
console.error(`Error fetching product ${productId}:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
|
||||
await compareAndSave(products);
|
||||
console.log("Sprawdzone! Aktualne ceny zapisane w productsPrice.json");
|
||||
console.log("Aktualne ceny zapisane w productsPrice.json");
|
||||
}
|
||||
|
||||
sendMessage("Startuję monitoring cen");
|
||||
|
||||
Reference in New Issue
Block a user