Reaktor/plugins: limit url-title length
This commit is contained in:
parent
5b8c4d24e2
commit
cc0dfeda39
@ -120,11 +120,24 @@ rec {
|
|||||||
url-title = (buildSimpleReaktorPlugin "url-title" {
|
url-title = (buildSimpleReaktorPlugin "url-title" {
|
||||||
pattern = "^.*(?P<args>http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+).*$$";
|
pattern = "^.*(?P<args>http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+).*$$";
|
||||||
path = with pkgs; [ curl perl ];
|
path = with pkgs; [ curl perl ];
|
||||||
script = pkgs.writeDash "lambda-pl" ''
|
script = pkgs.writePython3 [ "beautifulsoup4" "lxml" ] "url-title" ''
|
||||||
if [ "$#" -gt 0 ]; then
|
import sys
|
||||||
curl -SsL --max-time 5 "$1" |
|
import urllib.request
|
||||||
perl -l -0777 -ne 'print $1 if /<title.*?>\s*(.*?)\s*<\/title/si'
|
from bs4 import BeautifulSoup
|
||||||
fi
|
|
||||||
|
try:
|
||||||
|
soup = BeautifulSoup(urllib.request.urlopen(sys.argv[1]), "lxml")
|
||||||
|
title = soup.find('title').string
|
||||||
|
|
||||||
|
if title:
|
||||||
|
if len(title) > 512:
|
||||||
|
print('message to long, skipped')
|
||||||
|
elif len(title.split('\n')) > 5:
|
||||||
|
print('to many lines, skipped')
|
||||||
|
else:
|
||||||
|
print(title)
|
||||||
|
except: # noqa: E722
|
||||||
|
pass
|
||||||
'';
|
'';
|
||||||
});
|
});
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user