From 1c548d482427a7984e5437aa8f4ad81a2183c175 Mon Sep 17 00:00:00 2001 From: makefu Date: Sat, 24 Sep 2022 00:30:36 +0200 Subject: [PATCH] ma rss: init sofa --- makefu/2configs/deployment/rss/sofa-urls | 1 + makefu/2configs/deployment/rss/sofa.nix | 26 +++++++++++ makefu/2configs/deployment/rss/sofa.yml | 59 ++++++++++++++++++++++++ 3 files changed, 86 insertions(+) create mode 100644 makefu/2configs/deployment/rss/sofa-urls create mode 100644 makefu/2configs/deployment/rss/sofa.nix create mode 100644 makefu/2configs/deployment/rss/sofa.yml diff --git a/makefu/2configs/deployment/rss/sofa-urls b/makefu/2configs/deployment/rss/sofa-urls new file mode 100644 index 000000000..70a6c321d --- /dev/null +++ b/makefu/2configs/deployment/rss/sofa-urls @@ -0,0 +1 @@ +https://www.ebay-kleinanzeigen.de/s-ditzingen/preis::50/sofa/k0l8863r10 diff --git a/makefu/2configs/deployment/rss/sofa.nix b/makefu/2configs/deployment/rss/sofa.nix new file mode 100644 index 000000000..b9180fd45 --- /dev/null +++ b/makefu/2configs/deployment/rss/sofa.nix @@ -0,0 +1,26 @@ +{ pkgs, lib, config, ... }: +let + fqdn = "rss.euer.krebsco.de"; + ratt-path = "/var/lib/ratt/"; + out-path = "${ratt-path}/sofa.xml"; +in { + systemd.tmpfiles.rules = ["d ${ratt-path} 0750 nginx nginx - -" ]; + systemd.services.run-ratt-sofa = { + enable = true; + path = with pkgs; [ ratt xmlstarlet ]; + script = builtins.readFile ./ratt-hourly.sh; + scriptArgs = "${./sofa-urls} ${out-path}"; + + preStart = "install -v -m750 ${./sofa.yml} ${ratt-path}/sofa.yml"; # ratt requires the config file in the cwd + serviceConfig.User = "nginx"; + serviceConfig.WorkingDirectory = ratt-path; + startAt = "00/3:30"; # every 3 hours, fetch latest + }; + + services.nginx.virtualHosts."${fqdn}" = { + locations."=/ratt/sofa.xml" = { + alias = out-path; + }; + }; +} + diff --git a/makefu/2configs/deployment/rss/sofa.yml b/makefu/2configs/deployment/rss/sofa.yml new file mode 100644 index 000000000..3248f5c4e --- /dev/null +++ b/makefu/2configs/deployment/rss/sofa.yml @@ -0,0 +1,59 @@ +regex: https://www.ebay\-kleinanzeigen.de/s\-.* +selectors: + httpsettings: + cookie: {} + header: {} + useragent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) + Chrome/90.0.4430.72 Safari/537.36 + insecure: false + feed: + title: title + authorname: "" + authoremail: "" + item: + container: ul[id='srchrslt-adtable'] li[class='ad-listitem lazyload-item '] + title: | + title = sel:find("h2.text-module-begin"):first():text():gsub("^%s*(.-)%s*$", "%1") + print(title) + link: | + link = sel:find("a"):first():attr("href") + print("https://www.ebay-kleinanzeigen.de" .. link) + created: |- + created = "" + sel:find("div.aditem-main--top--right"):each(function(i, s) + created = s:text():gsub("^%s*(.-)%s*$", "%1") + end) + if created:match("Heute") then + time = created:gsub("^.*,", "") + print(os.date("%d.%m.%Y") .. time .. " CET") + return + end + if created:match("Gestern") then + time = created:gsub("^.*,", "") + print(os.date("%d.%m.%Y", os.time()-24*60*60) .. time .. " CET") + return + end + if created:match("\.") then + print(created .. " 00:00 CET") + return + end + createdformat: 02.01.2006 15:04 MST + description: |- + description = sel:find(".aditem-main--middle"):html() + place = sel:find(".aditem-main--top--left"):html() + print(description .. place) + content: "" + image: | + img = sel:find("div.imagebox"):first():attr("data-imgsrc") + if img ~= "" then + -- prepend host if needed + if not(img:match("https*:\/\/.*")) then + img = "https://www.ebay-kleinanzeigen.de" .. img + end + print(img) + end + nextpage: | + nextpage = sel:find("link[rel=next]"):attr("href") + print("https://www.ebay-kleinanzeigen.de" .. nextpage) + nextpagecount: 5 + sort: ""