358 lines
13 KiB
Nix
358 lines
13 KiB
Nix
{ config, lib, pkgs, ... }: let
|
|
cfg = config.krebs.sync-containers3;
|
|
slib = import ../../lib/pure.nix { inherit lib; };
|
|
in {
|
|
options.krebs.sync-containers3 = {
|
|
inContainer = {
|
|
enable = lib.mkEnableOption "container config for syncing";
|
|
pubkey = lib.mkOption {
|
|
type = lib.types.str; # TODO ssh key
|
|
};
|
|
};
|
|
containers = lib.mkOption {
|
|
default = {};
|
|
type = lib.types.attrsOf (lib.types.submodule ({ config, ... }: {
|
|
options = {
|
|
name = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = config._module.args.name;
|
|
};
|
|
sshKey = lib.mkOption {
|
|
type = slib.types.absolute-pathname;
|
|
};
|
|
luksKey = lib.mkOption {
|
|
type = slib.types.absolute-pathname;
|
|
default = config.sshKey;
|
|
};
|
|
ephemeral = lib.mkOption {
|
|
type = lib.types.bool;
|
|
default = false;
|
|
};
|
|
runContainer = lib.mkOption {
|
|
type = lib.types.bool;
|
|
default = true;
|
|
};
|
|
startCommand = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = ''
|
|
set -efu
|
|
mkdir -p /var/state/var_src
|
|
ln -Tfrs /var/state/var_src /var/src
|
|
if test -e /var/src/nixos-config; then
|
|
/run/current-system/sw/bin/nixos-rebuild -I /var/src switch || :
|
|
fi
|
|
'';
|
|
};
|
|
};
|
|
}));
|
|
};
|
|
};
|
|
config = lib.mkMerge [
|
|
(lib.mkIf (cfg.containers != {}) {
|
|
|
|
containers = lib.mapAttrs' (n: ctr: lib.nameValuePair ctr.name {
|
|
config = {
|
|
environment.systemPackages = [
|
|
pkgs.dhcpcd
|
|
pkgs.git
|
|
pkgs.jq
|
|
];
|
|
networking.useDHCP = lib.mkForce true;
|
|
systemd.services.autoswitch = {
|
|
environment = {
|
|
NIX_REMOTE = "daemon";
|
|
};
|
|
wantedBy = [ "multi-user.target" ];
|
|
serviceConfig.ExecStart = pkgs.writers.writeDash "autoswitch" ctr.startCommand;
|
|
unitConfig.X-StopOnRemoval = false;
|
|
};
|
|
};
|
|
autoStart = false;
|
|
enableTun = true;
|
|
ephemeral = ctr.ephemeral;
|
|
privateNetwork = true;
|
|
hostBridge = "ctr0";
|
|
bindMounts = {
|
|
"/var/lib/self/disk" = {
|
|
hostPath = "/var/lib/sync-containers3/${ctr.name}/disk";
|
|
isReadOnly = false;
|
|
};
|
|
"/var/state" = {
|
|
hostPath = "/var/lib/sync-containers3/${ctr.name}/state";
|
|
isReadOnly = false;
|
|
};
|
|
};
|
|
}) (lib.filterAttrs (_: ctr: ctr.runContainer) cfg.containers);
|
|
|
|
systemd.services = lib.foldr lib.recursiveUpdate {} (lib.flatten (map (ctr: [
|
|
{ "${ctr.name}_syncer" = {
|
|
path = with pkgs; [
|
|
coreutils
|
|
inetutils
|
|
consul
|
|
rsync
|
|
openssh
|
|
systemd
|
|
];
|
|
startAt = "*:0/1";
|
|
serviceConfig = {
|
|
User = "${ctr.name}_container";
|
|
LoadCredential = [
|
|
"ssh_key:${ctr.sshKey}"
|
|
];
|
|
ExecCondition = pkgs.writers.writeDash "${ctr.name}_checker" ''
|
|
set -efu
|
|
! systemctl is-active --quiet container@${ctr.name}.service
|
|
'';
|
|
ExecStart = pkgs.writers.writeDash "${ctr.name}_syncer" ''
|
|
set -efux
|
|
consul lock sync_${ctr.name} ${pkgs.writers.writeDash "${ctr.name}-sync" ''
|
|
set -efux
|
|
if ping -c 1 ${ctr.name}.r; then
|
|
nice --adjustment=30 rsync -a -e "ssh -i $CREDENTIALS_DIRECTORY/ssh_key" --timeout=30 --inplace --sparse container_sync@${ctr.name}.r:disk "$HOME"/disk.rsync
|
|
touch "$HOME"/incomplete
|
|
nice --adjustment=30 rsync --inplace "$HOME"/disk.rsync "$HOME"/disk
|
|
rm -f "$HOME"/incomplete
|
|
fi
|
|
''}
|
|
'';
|
|
};
|
|
}; }
|
|
{ "${ctr.name}_watcher" = lib.mkIf ctr.runContainer {
|
|
path = with pkgs; [
|
|
coreutils
|
|
inetutils
|
|
consul
|
|
cryptsetup
|
|
curl
|
|
mount
|
|
util-linux
|
|
jq
|
|
retry
|
|
];
|
|
serviceConfig = {
|
|
ExecStart = pkgs.writers.writeDash "${ctr.name}_watcher" ''
|
|
set -efux
|
|
while sleep 5; do
|
|
# get the payload
|
|
# check if the host reacted recently
|
|
case $(curl -s -o /dev/null --retry 10 --retry-delay 10 -w '%{http_code}' http://127.0.0.1:8500/v1/kv/containers/${ctr.name}) in
|
|
404)
|
|
echo 'got 404 from kv, should kill the container'
|
|
break
|
|
;;
|
|
500)
|
|
echo 'got 500 from kv, will kill container'
|
|
break
|
|
;;
|
|
200)
|
|
# echo 'got 200 from kv, will check payload'
|
|
payload=$(consul kv get containers/${ctr.name}) || continue
|
|
export payload
|
|
if [ "$(jq -rn 'env.payload | fromjson.host')" = '${config.networking.hostName}' ]; then
|
|
# echo 'we are the host, trying to reach container'
|
|
if $(retry -t 10 -d 10 -- ping -q -c 1 ${ctr.name}.r > /dev/null); then
|
|
# echo 'container is reachable, continueing'
|
|
continue
|
|
else
|
|
echo 'container seems dead, killing'
|
|
break
|
|
fi
|
|
else
|
|
echo 'we are not host, killing container'
|
|
break
|
|
fi
|
|
;;
|
|
*)
|
|
echo 'unknown state, continuing'
|
|
continue
|
|
;;
|
|
esac
|
|
done
|
|
/run/current-system/sw/bin/nixos-container stop ${ctr.name} || :
|
|
umount /var/lib/sync-containers3/${ctr.name}/state || :
|
|
cryptsetup luksClose ${ctr.name} || :
|
|
'';
|
|
};
|
|
}; }
|
|
{ "${ctr.name}_scheduler" = lib.mkIf ctr.runContainer {
|
|
wantedBy = [ "multi-user.target" ];
|
|
path = with pkgs; [
|
|
coreutils
|
|
inetutils
|
|
consul
|
|
cryptsetup
|
|
mount
|
|
util-linux
|
|
curl
|
|
systemd
|
|
jq
|
|
retry
|
|
bc
|
|
];
|
|
serviceConfig = {
|
|
Restart = "always";
|
|
RestartSec = "30s";
|
|
ExecStart = pkgs.writers.writeDash "${ctr.name}_scheduler" ''
|
|
set -efux
|
|
# get the payload
|
|
# check if the host reacted recently
|
|
case $(curl -s -o /dev/null --retry 10 -w '%{http_code}' http://127.0.0.1:8500/v1/kv/containers/${ctr.name}) in
|
|
404)
|
|
# echo 'got 404 from kv, will create container'
|
|
;;
|
|
500)
|
|
# echo 'got 500 from kv, retrying again'
|
|
exit 0
|
|
;;
|
|
200)
|
|
# echo 'got 200 from kv, will check payload'
|
|
export payload=$(consul kv get containers/${ctr.name})
|
|
if [ "$(jq -rn 'env.payload | fromjson.host')" = '${config.networking.hostName}' ]; then
|
|
echo 'we are the host, starting container'
|
|
else
|
|
# echo 'we are not host, checking timestamp'
|
|
# if [ $(echo "$(date +%s) - $(jq -rn 'env.payload | fromjson.time') > 100" | bc) -eq 1 ]; then
|
|
if [ "$(jq -rn 'env.payload | fromjson.time | now - tonumber > 100')" = 'true' ]; then
|
|
echo 'last beacon is more than 100s ago, taking over'
|
|
else
|
|
# echo 'last beacon was recent. trying again'
|
|
exit 0
|
|
fi
|
|
fi
|
|
;;
|
|
*)
|
|
echo 'unknown state, bailing out'
|
|
exit 0
|
|
;;
|
|
esac
|
|
consul kv put containers/${ctr.name} "$(jq -cn '{host: "${config.networking.hostName}", time: now}')" >/dev/null
|
|
consul lock -verbose -monitor-retry=100 -timeout 30s -name container_${ctr.name} container_${ctr.name} ${pkgs.writers.writeBash "${ctr.name}-start" ''
|
|
set -efu
|
|
cryptsetup luksOpen --key-file ${ctr.luksKey} /var/lib/sync-containers3/${ctr.name}/disk ${ctr.name} || :
|
|
mkdir -p /var/lib/sync-containers3/${ctr.name}/state
|
|
mountpoint /var/lib/sync-containers3/${ctr.name}/state || mount /dev/mapper/${ctr.name} /var/lib/sync-containers3/${ctr.name}/state
|
|
/run/current-system/sw/bin/nixos-container start ${ctr.name}
|
|
# wait for system to become reachable for the first time
|
|
systemctl start ${ctr.name}_watcher.service
|
|
retry -t 10 -d 10 -- ping -q -c 1 ${ctr.name}.r > /dev/null
|
|
while systemctl is-active container@${ctr.name}.service >/devnull && ping -q -c 3 ${ctr.name}.r >/dev/null; do
|
|
consul kv put containers/${ctr.name} "$(jq -cn '{host: "${config.networking.hostName}", time: now}')" >/dev/null
|
|
sleep 10
|
|
done
|
|
''}
|
|
'';
|
|
};
|
|
}; }
|
|
{ "container@${ctr.name}" = lib.mkIf ctr.runContainer {
|
|
serviceConfig = {
|
|
ExecStop = pkgs.writers.writeDash "remove_interface" ''
|
|
${pkgs.iproute2}/bin/ip link del vb-${ctr.name}
|
|
'';
|
|
ExecStartPost = [
|
|
(pkgs.writers.writeDash "bind-to-bridge" ''
|
|
${pkgs.iproute2}/bin/ip link set "vb-$INSTANCE" master ctr0
|
|
'')
|
|
];
|
|
};
|
|
}; }
|
|
]) (lib.attrValues cfg.containers)));
|
|
|
|
systemd.timers = lib.mapAttrs' (n: ctr: lib.nameValuePair "${ctr.name}_syncer" {
|
|
timerConfig = {
|
|
RandomizedDelaySec = 100;
|
|
};
|
|
}) cfg.containers;
|
|
|
|
users.groups = lib.mapAttrs' (_: ctr: lib.nameValuePair "${ctr.name}_container" {
|
|
}) cfg.containers;
|
|
users.users = lib.mapAttrs' (_: ctr: lib.nameValuePair "${ctr.name}_container" ({
|
|
group = "${ctr.name}_container";
|
|
isNormalUser = true;
|
|
uid = slib.genid_uint31 "container_${ctr.name}";
|
|
home = "/var/lib/sync-containers3/${ctr.name}";
|
|
createHome = true;
|
|
homeMode = "705";
|
|
})) cfg.containers;
|
|
|
|
environment.systemPackages = lib.mapAttrsToList (_: ctr: (pkgs.writers.writeDashBin "${ctr.name}_init" ''
|
|
set -efux
|
|
export PATH=${lib.makeBinPath [
|
|
pkgs.coreutils
|
|
pkgs.cryptsetup
|
|
pkgs.libxfs.bin
|
|
]}:$PATH
|
|
truncate -s 5G /var/lib/sync-containers3/${ctr.name}/disk
|
|
cryptsetup luksFormat /var/lib/sync-containers3/${ctr.name}/disk ${ctr.luksKey}
|
|
cryptsetup luksOpen --key-file ${ctr.luksKey} /var/lib/sync-containers3/${ctr.name}/disk ${ctr.name}
|
|
mkfs.xfs /dev/mapper/${ctr.name}
|
|
mkdir -p /var/lib/sync-containers3/${ctr.name}/state
|
|
mountpoint /var/lib/sync-containers3/${ctr.name}/state || mount /dev/mapper/${ctr.name} /var/lib/sync-containers3/${ctr.name}/state
|
|
/run/current-system/sw/bin/nixos-container start ${ctr.name}
|
|
/run/current-system/sw/bin/nixos-container run ${ctr.name} -- ${pkgs.writeDash "init" ''
|
|
mkdir -p /var/state
|
|
''}
|
|
'')) cfg.containers;
|
|
})
|
|
(lib.mkIf (cfg.containers != {}) {
|
|
# networking
|
|
|
|
# needed because otherwise we lose local dns
|
|
environment.etc."resolv.conf".source = lib.mkForce "/run/systemd/resolve/resolv.conf";
|
|
|
|
boot.kernel.sysctl."net.ipv4.ip_forward" = lib.mkForce 1;
|
|
systemd.network.networks.ctr0 = {
|
|
name = "ctr0";
|
|
address = [
|
|
"10.233.0.1/24"
|
|
];
|
|
networkConfig = {
|
|
# IPForward = "yes";
|
|
# IPMasquerade = "both";
|
|
ConfigureWithoutCarrier = true;
|
|
DHCPServer = "yes";
|
|
};
|
|
};
|
|
systemd.network.netdevs.ctr0.netdevConfig = {
|
|
Kind = "bridge";
|
|
Name = "ctr0";
|
|
};
|
|
networking.networkmanager.unmanaged = [ "ctr0" ];
|
|
krebs.iptables.tables.filter.INPUT.rules = [
|
|
{ predicate = "-i ctr0"; target = "ACCEPT"; }
|
|
];
|
|
krebs.iptables.tables.filter.FORWARD.rules = [
|
|
{ predicate = "-i ctr0"; target = "ACCEPT"; }
|
|
{ predicate = "-o ctr0"; target = "ACCEPT"; }
|
|
];
|
|
krebs.iptables.tables.nat.POSTROUTING.rules = [
|
|
{ v6 = false; predicate = "-s 10.233.0.0/24"; target = "MASQUERADE"; }
|
|
];
|
|
})
|
|
(lib.mkIf cfg.inContainer.enable {
|
|
users.groups.container_sync = {};
|
|
users.users.container_sync = {
|
|
group = "container_sync";
|
|
uid = slib.genid_uint31 "container_sync";
|
|
isNormalUser = true;
|
|
home = "/var/lib/self";
|
|
createHome = true;
|
|
openssh.authorizedKeys.keys = [
|
|
cfg.inContainer.pubkey
|
|
];
|
|
};
|
|
|
|
networking.useHostResolvConf = false;
|
|
networking.useNetworkd = true;
|
|
systemd.network = {
|
|
enable = true;
|
|
networks.eth0 = {
|
|
matchConfig.Name = "eth0";
|
|
DHCP = "yes";
|
|
dhcpV4Config.UseDNS = true;
|
|
};
|
|
};
|
|
})
|
|
];
|
|
}
|