Reaktor/plugins: limit url-title length

This commit is contained in:
lassulus 2018-04-20 23:25:36 +02:00
parent 5b8c4d24e2
commit cc0dfeda39

View File

@ -120,11 +120,24 @@ rec {
url-title = (buildSimpleReaktorPlugin "url-title" { url-title = (buildSimpleReaktorPlugin "url-title" {
pattern = "^.*(?P<args>http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+).*$$"; pattern = "^.*(?P<args>http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+).*$$";
path = with pkgs; [ curl perl ]; path = with pkgs; [ curl perl ];
script = pkgs.writeDash "lambda-pl" '' script = pkgs.writePython3 [ "beautifulsoup4" "lxml" ] "url-title" ''
if [ "$#" -gt 0 ]; then import sys
curl -SsL --max-time 5 "$1" | import urllib.request
perl -l -0777 -ne 'print $1 if /<title.*?>\s*(.*?)\s*<\/title/si' from bs4 import BeautifulSoup
fi
try:
soup = BeautifulSoup(urllib.request.urlopen(sys.argv[1]), "lxml")
title = soup.find('title').string
if title:
if len(title) > 512:
print('message to long, skipped')
elif len(title.split('\n')) > 5:
print('to many lines, skipped')
else:
print(title)
except: # noqa: E722
pass
''; '';
}); });