Integrate Playwright for improved web scraping by launching a headless browser, enhancing product data retrieval with better error handling and dynamic content loading. Update logging for price saving confirmation.
This commit is contained in:
67
index.js
67
index.js
@@ -2,6 +2,7 @@ const cheerio = require("cheerio");
|
|||||||
const cron = require("node-cron");
|
const cron = require("node-cron");
|
||||||
const axios = require("axios");
|
const axios = require("axios");
|
||||||
const fs = require("fs");
|
const fs = require("fs");
|
||||||
|
const { chromium } = require("playwright");
|
||||||
|
|
||||||
const discordWebhook =
|
const discordWebhook =
|
||||||
"https://discord.com/api/webhooks/1439286509390921749/t2Hb8XloF6zhDRYD1yh_QlkHHa9eHUyXvd9TxZRHwqR_b_OxxbnwDgsm4em8TwA9NQIa";
|
"https://discord.com/api/webhooks/1439286509390921749/t2Hb8XloF6zhDRYD1yh_QlkHHa9eHUyXvd9TxZRHwqR_b_OxxbnwDgsm4em8TwA9NQIa";
|
||||||
@@ -49,7 +50,6 @@ async function compareAndSave(productsPrice) {
|
|||||||
for (const product of productsPrice) {
|
for (const product of productsPrice) {
|
||||||
sendMessage(`Początkowa cena **${product.name}**: ${product.price}`);
|
sendMessage(`Początkowa cena **${product.name}**: ${product.price}`);
|
||||||
}
|
}
|
||||||
console.log("First run completed");
|
|
||||||
isFirstRun = false;
|
isFirstRun = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -70,36 +70,63 @@ async function getProducts() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function init() {
|
async function init() {
|
||||||
|
const browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext({
|
||||||
|
userAgent:
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
|
});
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
const productsIds = await getProducts();
|
const productsIds = await getProducts();
|
||||||
const products = [];
|
const products = [];
|
||||||
|
|
||||||
for (const productId of productsIds) {
|
for (const productId of productsIds) {
|
||||||
const response = await axios.get(`https://www.ceneo.pl/${productId}`);
|
try {
|
||||||
const $ = cheerio.load(response.data);
|
await page.goto(`https://www.ceneo.pl/${productId}`, {
|
||||||
|
waitUntil: "networkidle",
|
||||||
|
timeout: 30000,
|
||||||
|
});
|
||||||
|
|
||||||
const items = $(".product-offer__container").first();
|
// Wait for bot detection to pass and content to load
|
||||||
for (const item of items) {
|
await page
|
||||||
let name = $(item).data("productname");
|
.waitForSelector(".product-offer__container, .product-top", {
|
||||||
let price = $(item).data("price");
|
timeout: 10000,
|
||||||
let link = `https://www.ceneo.pl/${$(item).data("click-url")}`;
|
})
|
||||||
const shop = $(item).data("shopurl") || "ceneo.pl";
|
.catch(() => {
|
||||||
|
console.log(`Timeout waiting for content on ${productId}`);
|
||||||
|
});
|
||||||
|
|
||||||
if (!name || !price) {
|
const html = await page.content();
|
||||||
name = $(item).find(".short-name__txt").text();
|
const $ = cheerio.load(html);
|
||||||
price = $(item).find(".price").text();
|
|
||||||
link = `https://www.ceneo.pl/${productId}`;
|
const items = $(".product-offer__container").first();
|
||||||
|
for (const item of items) {
|
||||||
|
let name = $(item).data("productname");
|
||||||
|
let price = $(item).data("price");
|
||||||
|
let link = `https://www.ceneo.pl/${$(item).data("click-url")}`;
|
||||||
|
const shop = $(item).data("shopurl") || "ceneo.pl";
|
||||||
|
|
||||||
|
if (!name || !price) {
|
||||||
|
name = $(item).find(".short-name__txt").text();
|
||||||
|
price = $(item).find(".price").text();
|
||||||
|
link = `https://www.ceneo.pl/${productId}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!name || !price || !link) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
products.push({ name, price, link, shop });
|
||||||
}
|
}
|
||||||
|
} catch (error) {
|
||||||
if (!name || !price || !link) {
|
console.error(`Error fetching product ${productId}:`, error.message);
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
products.push({ name, price, link, shop });
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
await compareAndSave(products);
|
await compareAndSave(products);
|
||||||
console.log("Sprawdzone! Aktualne ceny zapisane w productsPrice.json");
|
console.log("Aktualne ceny zapisane w productsPrice.json");
|
||||||
}
|
}
|
||||||
|
|
||||||
sendMessage("Startuję monitoring cen");
|
sendMessage("Startuję monitoring cen");
|
||||||
|
|||||||
Reference in New Issue
Block a user