Compare commits

...

10 Commits

Author SHA1 Message Date
Norbert Maciaszek
f09b58fb63 Integrate Playwright for improved web scraping by launching a headless browser, enhancing product data retrieval with better error handling and dynamic content loading. Update logging for price saving confirmation. 2025-11-20 20:33:04 +01:00
Norbert Maciaszek
a25ab727b9 Add console log for first run completion in price comparison function to enhance debugging and monitoring. 2025-11-20 20:02:18 +01:00
Norbert Maciaszek
bf60325fac Add initial price notifications for products and adjust cron schedule to run every 15 minutes. Implement first run logic to send initial prices. 2025-11-20 19:54:29 +01:00
Norbert Maciaszek
63bae4f805 Refactor product ID retrieval to dynamically fetch from API instead of hardcoding values, enhancing flexibility for future updates. 2025-11-16 19:22:16 +01:00
Norbert Maciaszek
72ba9aae20 Refactor price monitoring to work exclusively with ceneo.pl, removing unused selectors and browser automation. Update product comparison logic to match by name and include shop information in notifications. 2025-11-16 18:40:36 +01:00
Norbert Maciaszek
1f41be62f1 Update cron schedule to run at the top of the hour and set timezone to Europe/Warsaw 2025-11-15 22:16:27 +01:00
Norbert Maciaszek
a64edaf22f Initialize productsPrice.json if it doesn't exist and update log message to Polish 2025-11-15 20:36:05 +01:00
Norbert Maciaszek
1559e4ccc9 Add logging for price checking and execute scheduled task immediately 2025-11-15 20:13:38 +01:00
Norbert Maciaszek
b62389085f Remove headless option from Chromium launch in init function 2025-11-15 20:10:16 +01:00
Norbert Maciaszek
beb1cf7bdc Add sendMessage function for Discord notifications 2025-11-15 20:06:29 +01:00
2 changed files with 236 additions and 83 deletions

166
index.js
View File

@@ -1,33 +1,32 @@
const cheerio = require("cheerio");
const cron = require("node-cron");
const axios = require("axios");
const { chromium } = require("playwright");
const fs = require("fs");
const { chromium } = require("playwright");
const discordWebhook =
"https://discord.com/api/webhooks/1439286509390921749/t2Hb8XloF6zhDRYD1yh_QlkHHa9eHUyXvd9TxZRHwqR_b_OxxbnwDgsm4em8TwA9NQIa";
const priceSelectors = {
miodowamydlarnia: ".projector_prices__price",
greentouch: ".main-price",
amazon: "#corePrice_feature_div .a-price .a-offscreen",
soxo: "#projector_price_value span",
empik: '[data-ta-section="priceMainContainer"] [data-ta="price"]',
notino: "#pd-price",
};
const excludePage = ["allegro", "homla.com.pl", "home-you.com"];
function sendMessage(message) {
axios.post(discordWebhook, {
content: message,
});
}
let isFirstRun = true;
async function compareAndSave(productsPrice) {
const productsPriceJson =
fs.readFileSync("productsPrice.json", "utf8") || "[]";
if (!fs.existsSync("productsPrice.json")) {
fs.writeFileSync("productsPrice.json", "[]");
}
const productsPriceJson = fs.readFileSync("productsPrice.json", "utf8");
const oldProductsPrice = JSON.parse(productsPriceJson);
const diffProducts = [];
for (const product of productsPrice) {
const oldProduct = oldProductsPrice.find(
(oldProduct) => oldProduct.link === product.link
(oldProduct) => oldProduct.name === product.name
);
if (oldProduct && oldProduct.price !== product.price) {
@@ -36,20 +35,22 @@ async function compareAndSave(productsPrice) {
newPrice: product.price,
oldPrice: oldProduct.price,
link: product.link,
shop: product.shop,
});
}
}
for (const product of diffProducts) {
await axios.post(discordWebhook, {
content: `Zmiana ceny **${product.name}**:\nCena: ${product.oldPrice} -> ${product.newPrice}\nLink: ${product.link}`,
});
sendMessage(
`Zmiana ceny **${product.name}**: ${product.oldPrice} -> ${product.newPrice}\nLink: ${product.link}`
);
}
if (diffProducts.length === 0) {
await axios.post(discordWebhook, {
content: "Brak zmian w cenach",
});
if (isFirstRun) {
for (const product of productsPrice) {
sendMessage(`Początkowa cena **${product.name}**: ${product.price}`);
}
isFirstRun = false;
}
fs.writeFileSync(
@@ -61,82 +62,81 @@ async function compareAndSave(productsPrice) {
async function getProducts() {
const products = await axios
.get(
"https://db.maciaszek.ovh/api/collections/gifts_items/records?fields=title,link"
"https://db.maciaszek.ovh/api/collections/gifts_items/records?fields=ceneo_id&filter=ceneo_id!=''"
)
.then((response) => response.data.items);
.then((response) => response.data.items.map((item) => item.ceneo_id));
return products
.filter((product) => product.link !== "")
.filter(
(product) => !excludePage.some((page) => product.link.includes(page))
);
return products;
}
async function init() {
const productsWithLinks = await getProducts();
const productsWithBrowser = [];
const productsPrice = [];
const selectors = Object.keys(priceSelectors);
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
});
const page = await context.newPage();
for (const product of productsWithLinks) {
if (product.link === "") continue;
const productsIds = await getProducts();
const products = [];
for (const productId of productsIds) {
try {
const { data } = await axios.get(product.link, {
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
},
await page.goto(`https://www.ceneo.pl/${productId}`, {
waitUntil: "networkidle",
timeout: 30000,
});
const $ = cheerio.load(data);
const selector = selectors.find((selector) =>
product.link.includes(selector)
);
const price = $(priceSelectors[selector]).text();
productsPrice.push({
name: product.title,
price: price,
link: product.link,
});
} catch {
productsWithBrowser.push(product);
// Wait for bot detection to pass and content to load
await page
.waitForSelector(".product-offer__container, .product-top", {
timeout: 10000,
})
.catch(() => {
console.log(`Timeout waiting for content on ${productId}`);
});
const html = await page.content();
const $ = cheerio.load(html);
const items = $(".product-offer__container").first();
for (const item of items) {
let name = $(item).data("productname");
let price = $(item).data("price");
let link = `https://www.ceneo.pl/${$(item).data("click-url")}`;
const shop = $(item).data("shopurl") || "ceneo.pl";
if (!name || !price) {
name = $(item).find(".short-name__txt").text();
price = $(item).find(".price").text();
link = `https://www.ceneo.pl/${productId}`;
}
if (!name || !price || !link) {
continue;
}
products.push({ name, price, link, shop });
}
} catch (error) {
console.error(`Error fetching product ${productId}:`, error.message);
}
}
if (productsWithBrowser.length > 0) {
const browser = await chromium.launch({ headless: false });
const context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
});
const page = await context.newPage();
await browser.close();
for (const product of productsWithBrowser) {
await page.goto(product.link);
const selector = selectors.find((selector) =>
product.link.includes(selector)
);
const element = await page.$(priceSelectors[selector]);
if (!element) continue;
const price = await element.textContent();
productsPrice.push({
name: product.title,
price: price,
link: product.link,
});
}
await browser.close();
}
await compareAndSave(productsPrice);
console.log("Done! Check productsPrice.json");
await compareAndSave(products);
console.log("Aktualne ceny zapisane w productsPrice.json");
}
cron.schedule("* */3 * * *", async () => {
await init();
sendMessage("Startuję monitoring cen");
const task = cron.schedule("*/15 7-23 * * *", init, {
timezone: "Europe/Warsaw",
});
cron.schedule("0 7 * * *", () => {
const date = new Date().toLocaleDateString("pl-PL");
sendMessage(`Zaczynamy monitoring ${date}`);
});
task.execute();

153
index.js.old Normal file
View File

@@ -0,0 +1,153 @@
const cheerio = require("cheerio");
const cron = require("node-cron");
const axios = require("axios");
const { chromium } = require("playwright");
const fs = require("fs");
const discordWebhook =
"https://discord.com/api/webhooks/1439286509390921749/t2Hb8XloF6zhDRYD1yh_QlkHHa9eHUyXvd9TxZRHwqR_b_OxxbnwDgsm4em8TwA9NQIa";
const priceSelectors = {
miodowamydlarnia: ".projector_prices__price",
greentouch: ".main-price",
amazon: "#corePrice_feature_div .a-price .a-offscreen",
soxo: "#projector_price_value span",
empik: '[data-ta-section="priceMainContainer"] [data-ta="price"]',
notino: "#pd-price",
};
const excludePage = ["allegro", "homla.com.pl", "home-you.com"];
function sendMessage(message) {
axios.post(discordWebhook, {
content: message,
});
}
async function compareAndSave(productsPrice) {
if (!fs.existsSync("productsPrice.json")) {
fs.writeFileSync("productsPrice.json", "[]");
}
const productsPriceJson = fs.readFileSync("productsPrice.json", "utf8");
const oldProductsPrice = JSON.parse(productsPriceJson);
const diffProducts = [];
for (const product of productsPrice) {
const oldProduct = oldProductsPrice.find(
(oldProduct) => oldProduct.link === product.link
);
if (oldProduct && oldProduct.price !== product.price) {
diffProducts.push({
name: product.name,
newPrice: product.price,
oldPrice: oldProduct.price,
link: product.link,
});
}
}
for (const product of diffProducts) {
sendMessage(
`Zmiana ceny **${product.name}**:\nCena: ${product.oldPrice} -> ${product.newPrice}\nLink: ${product.link}`
);
}
if (diffProducts.length === 0) {
sendMessage("Brak zmian w cenach");
}
fs.writeFileSync(
"productsPrice.json",
JSON.stringify(productsPrice, null, 2)
);
}
async function getProducts() {
const products = await axios
.get(
"https://db.maciaszek.ovh/api/collections/gifts_items/records?fields=title,link"
)
.then((response) => response.data.items);
return products
.filter((product) => product.link !== "")
.filter(
(product) => !excludePage.some((page) => product.link.includes(page))
);
}
async function init() {
const productsWithLinks = await getProducts();
const productsWithBrowser = [];
const productsPrice = [];
const selectors = Object.keys(priceSelectors);
console.log("Zaczynam sprawdzać ceny");
for (const product of productsWithLinks) {
if (product.link === "") continue;
try {
const { data } = await axios.get(product.link, {
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
},
});
const $ = cheerio.load(data);
const selector = selectors.find((selector) =>
product.link.includes(selector)
);
const price = $(priceSelectors[selector]).text();
productsPrice.push({
name: product.title,
price: price,
link: product.link,
});
} catch {
productsWithBrowser.push(product);
}
}
if (productsWithBrowser.length > 0) {
const browser = await chromium.launch();
const context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
});
const page = await context.newPage();
for (const product of productsWithBrowser) {
await page.goto(product.link);
const selector = selectors.find((selector) =>
product.link.includes(selector)
);
const element = await page.$(priceSelectors[selector]);
if (!element) continue;
const price = await element.textContent();
productsPrice.push({
name: product.title,
price: price,
link: product.link,
});
}
await browser.close();
}
await compareAndSave(productsPrice);
console.log("Sprawdzone! Aktualne ceny zapisane w productsPrice.json");
}
sendMessage("Zaczynam monitoring cen");
const task = cron.schedule("0 6,9,12,15,18,21 * * *", init, {
timezone: "Europe/Warsaw",
});
task.execute();