lib.uri: add {native,posix-extended}-regex

This commit is contained in:
tv 2021-01-07 21:00:04 +01:00
parent cbb28f34ee
commit 120eadb80a
2 changed files with 78 additions and 0 deletions

View File

@ -12,6 +12,7 @@ let
encodeName = replaceChars ["/"] ["\\x2f"]; encodeName = replaceChars ["/"] ["\\x2f"];
}; };
types = nixpkgs-lib.types // import ./types.nix { inherit lib; }; types = nixpkgs-lib.types // import ./types.nix { inherit lib; };
uri = import ./uri.nix { inherit lib; };
xml = import ./xml.nix { inherit lib; }; xml = import ./xml.nix { inherit lib; };
eq = x: y: x == y; eq = x: y: x == y;

77
lib/uri.nix Normal file
View File

@ -0,0 +1,77 @@
{ lib }:
with lib;
with builtins;
rec {
# Regular expression to match URIs per RFC3986
# From: # http://jmrware.com/articles/2009/uri_regexp/URI_regex.html#uri-40
native-regex = ''
# RFC-3986 URI component: URI
[A-Za-z][A-Za-z0-9+\-.]* : # scheme ":"
(?: // # hier-part
(?: (?:[A-Za-z0-9\-._~!$&'()*+,;=:]|%[0-9A-Fa-f]{2})* @)?
(?:
\[
(?:
(?:
(?: (?:[0-9A-Fa-f]{1,4}:){6}
| :: (?:[0-9A-Fa-f]{1,4}:){5}
| (?: [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){4}
| (?: (?:[0-9A-Fa-f]{1,4}:){0,1} [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){3}
| (?: (?:[0-9A-Fa-f]{1,4}:){0,2} [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){2}
| (?: (?:[0-9A-Fa-f]{1,4}:){0,3} [0-9A-Fa-f]{1,4})? :: [0-9A-Fa-f]{1,4}:
| (?: (?:[0-9A-Fa-f]{1,4}:){0,4} [0-9A-Fa-f]{1,4})? ::
) (?:
[0-9A-Fa-f]{1,4} : [0-9A-Fa-f]{1,4}
| (?: (?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) \.){3}
(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)
)
| (?: (?:[0-9A-Fa-f]{1,4}:){0,5} [0-9A-Fa-f]{1,4})? :: [0-9A-Fa-f]{1,4}
| (?: (?:[0-9A-Fa-f]{1,4}:){0,6} [0-9A-Fa-f]{1,4})? ::
)
| [Vv][0-9A-Fa-f]+\.[A-Za-z0-9\-._~!$&'()*+,;=:]+
)
\]
| (?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}
(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)
| (?:[A-Za-z0-9\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})*
)
(?: : [0-9]* )?
(?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )*
| /
(?: (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+
(?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )*
)?
| (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+
(?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )*
|
)
(?:\? (?:[A-Za-z0-9\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})* )? # [ "?" query ]
(?:\# (?:[A-Za-z0-9\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})* )? # [ "#" fragment ]
'';
posix-extended-regex =
let
removeComment = s:
elemAt (match "^((\\\\#|[^#])*)(#.*)?$" s) 0;
removeWhitespace =
replaceStrings [" "] [""];
moveDashToEndOfCharacterClass = s:
let
result = match "(.*)\\\\-([^]]+)(].*)" s;
s' = elemAt result 0 + elemAt result 1 + "-" + elemAt result 2;
in
if result != null then
moveDashToEndOfCharacterClass s'
else
s;
in
concatStrings
(foldl' (a: f: map f a) (splitString "\n" native-regex) [
removeComment
moveDashToEndOfCharacterClass
(replaceStrings ["(?:"] ["("])
removeWhitespace
]);
}