Skip to content

Commit 3d6b1b7

Browse files
authored
Merge pull request #181 from input-output-hk/docker-registry-repair
Docker registry repair
2 parents cf6b88f + bfc3697 commit 3d6b1b7

File tree

5 files changed

+318
-12
lines changed

5 files changed

+318
-12
lines changed

modules/docker-registry.nix

Lines changed: 113 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
etcEncrypted,
66
...
77
}: let
8+
inherit (lib) boolToString last makeBinPath mkDefault mkEnableOption mkIf mkOption;
9+
inherit (lib.types) bool listOf package str;
10+
inherit (lib.types.ints) unsigned;
11+
812
deployType = config.currentCoreNode.deployType or config.currentAwsAutoScalingGroup.deployType;
913
domain =
1014
config
@@ -15,18 +19,20 @@
1519
}
1620
.domain;
1721
isSops = deployType == "aws";
18-
relEncryptedFolder = lib.last (builtins.split "-" (toString config.secrets.encryptedRoot));
22+
relEncryptedFolder = last (builtins.split "-" (toString config.secrets.encryptedRoot));
1923
cfg = config.services.docker-registry;
2024
in {
2125
options.services.docker-registry = {
22-
enable = lib.mkEnableOption "Docker registry";
23-
registryFqdn = lib.mkOption {
24-
type = lib.types.str;
26+
enable = mkEnableOption "Docker registry";
27+
28+
registryFqdn = mkOption {
29+
type = str;
2530
default = "registry.${domain}";
2631
description = "The default host fqdn for the traefik routed registry service.";
2732
};
28-
traefikTags = lib.mkOption {
29-
type = with lib.types; listOf str;
33+
34+
traefikTags = mkOption {
35+
type = listOf str;
3036
default = [
3137
"ingress"
3238
"traefik.enable=true"
@@ -45,16 +51,79 @@ in {
4551
a basic-auth file for registry authentication.
4652
'';
4753
};
54+
55+
enableRepair = mkOption {
56+
type = bool;
57+
default = true;
58+
description = "Enables the docker registry repair service.";
59+
};
60+
61+
repairDeleteTag = mkOption {
62+
type = bool;
63+
default = false;
64+
description = "Also delete all tag references during repair.";
65+
};
66+
67+
repairDryRun = mkOption {
68+
type = bool;
69+
default = false;
70+
description = "Avoid deleting anything during repair.";
71+
};
72+
73+
repairPkg = mkOption {
74+
type = package;
75+
default = pkgs.docker-registry-repair;
76+
description = ''
77+
The registry repair package to utilize.
78+
Assumes a bin file of ''${cfg.repairPkg}/bin/docker-registry-repair.
79+
'';
80+
};
81+
82+
repairRegistryPath = mkOption {
83+
type = str;
84+
default = "/var/lib/docker-registry/docker/registry/v2";
85+
description = "The registry path.";
86+
};
87+
88+
repairTailDelay = mkOption {
89+
type = unsigned;
90+
default = 5;
91+
description = "The time delay in seconds between repair spawn jobs.";
92+
};
93+
94+
repairTailLookback = mkOption {
95+
type = str;
96+
default = "-1h";
97+
description = ''
98+
The lookback period for journal history.
99+
This needs to be a valid journalctl -S parameter formatted string.
100+
'';
101+
};
102+
103+
repairTailPkg = mkOption {
104+
type = package;
105+
default = pkgs.docker-registry-tail;
106+
description = ''
107+
The registry repair tail package to utilize.
108+
Assumes a bin file of ''${cfg.repairTailPkg}/bin/docker-registry-tail.
109+
'';
110+
};
111+
112+
repairTailService = mkOption {
113+
type = str;
114+
default = "docker-registry.service";
115+
description = "The systemd service to tail.";
116+
};
48117
};
49118

50-
config = lib.mkIf cfg.enable {
119+
config = mkIf cfg.enable {
51120
networking.firewall.allowedTCPPorts = [
52121
config.services.dockerRegistry.port
53122
];
54123

55124
services = {
56125
dockerRegistry = {
57-
enable = lib.mkDefault true;
126+
enable = mkDefault true;
58127
enableDelete = true;
59128
enableGarbageCollect = true;
60129
enableRedisCache = true;
@@ -97,8 +166,40 @@ in {
97166
})
98167
.systemdService;
99168

100-
secrets.generate.redis-password = lib.mkIf isSops ''
101-
export PATH="${lib.makeBinPath (with pkgs; [coreutils sops xkcdpass])}"
169+
environment.systemPackages = with pkgs; [
170+
docker-registry-repair
171+
docker-registry-tail
172+
];
173+
174+
systemd.services.docker-registry-repair = mkIf cfg.enableRepair {
175+
wantedBy = ["multi-user.target"];
176+
177+
startLimitIntervalSec = 0;
178+
startLimitBurst = 0;
179+
180+
serviceConfig = {
181+
Restart = "always";
182+
RestartSec = 5;
183+
184+
ExecStart = let
185+
script = pkgs.writeShellApplication {
186+
name = "docker-registry-repair-tail";
187+
text = ''
188+
exec ${cfg.repairTailPkg}/bin/docker-registry-tail \
189+
--since ${cfg.repairTailLookback} \
190+
--service ${cfg.repairTailService} \
191+
--repair-path ${cfg.repairPkg}/bin/docker-registry-repair \
192+
--delay ${toString cfg.repairTailDelay} \
193+
--delete-tag ${boolToString cfg.repairDeleteTag} \
194+
--dry-run ${boolToString cfg.repairDryRun}
195+
'';
196+
};
197+
in "${script}/bin/docker-registry-repair-tail";
198+
};
199+
};
200+
201+
secrets.generate.redis-password = mkIf isSops ''
202+
export PATH="${makeBinPath (with pkgs; [coreutils sops xkcdpass])}"
102203
103204
if [ ! -s ${relEncryptedFolder}/redis-password.json ]; then
104205
xkcdpass \
@@ -107,7 +208,7 @@ in {
107208
fi
108209
'';
109210

110-
secrets.install.redis-password = lib.mkIf isSops {
211+
secrets.install.redis-password = mkIf isSops {
111212
source = "${etcEncrypted}/redis-password.json";
112213
target = /run/keys/redis-password;
113214
inputType = "binary";
@@ -117,7 +218,7 @@ in {
117218
# For the prem case, hydrate-secrets handles the push to vault instead of sops
118219
# TODO: add proper docker password generation creds in the Rakefile
119220
# TODO: add more unified handling between aws and prem secrets
120-
age.secrets = lib.mkIf (!isSops) {
221+
age.secrets = mkIf (!isSops) {
121222
redis-password = {
122223
file = config.age.encryptedRoot + "/redis/password.age";
123224
path = "/run/keys/redis-password";

overlay.nix

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ in
4242
cue = prev.callPackage ./pkgs/cue.nix {};
4343
devShell = final.callPackage ./pkgs/dev-shell.nix {};
4444
docker-distribution = prev.callPackage ./pkgs/docker-distribution.nix {};
45+
docker-registry-repair = prev.callPackage ./pkgs/docker-registry/default.nix {name = "docker-registry-repair";};
46+
docker-registry-tail = prev.callPackage ./pkgs/docker-registry/default.nix {name = "docker-registry-tail";};
4547

4648
# Pin docker and containerd to avoid unexpected cluster wide docker daemon restarts
4749
# during metal deploy resulting in OCI jobs being killed or behaving unexpectedly

pkgs/docker-registry/default.nix

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
{
2+
name,
3+
coreutils,
4+
lib,
5+
ruby,
6+
stdenv,
7+
writeShellApplication,
8+
}: let
9+
inherit name;
10+
wrapperApp = writeShellApplication {
11+
inherit name;
12+
runtimeInputs = [coreutils ruby];
13+
text = ''exec ruby "$(dirname "$(readlink -f "$0")")/.${name}-wrapped" "$@"'';
14+
};
15+
in
16+
stdenv.mkDerivation rec {
17+
inherit name;
18+
src = ./${name}.rb;
19+
20+
dontUnpack = true;
21+
dontPatch = true;
22+
dontConfigure = true;
23+
dontBuild = true;
24+
25+
installPhase = ''
26+
install -Dm555 "$src" "$out/bin/.${name}-wrapped"
27+
cp ${wrapperApp}/bin/${name} $out/bin
28+
'';
29+
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
require 'digest/sha2'
2+
require 'json'
3+
require 'fileutils'
4+
require 'optparse'
5+
6+
OPTIONS = {
7+
delete_tag: false,
8+
dry_run: false,
9+
registry: '/var/lib/docker-registry/docker/registry/v2',
10+
repo: nil,
11+
tag: nil,
12+
}
13+
14+
op = OptionParser.new do |parser|
15+
parser.banner = 'Usage: docker-registry-repair [options]'
16+
17+
parser.on '-d', '--dry-run [FLAG]', TrueClass, 'avoid deleting anything' do |v|
18+
OPTIONS[:dry_run] = v.nil? ? true : v
19+
end
20+
21+
parser.on '-r', '--repo REPO', 'repository part of the image name, like `cardano-public-documentation`' do |v|
22+
OPTIONS[:repo] = v
23+
end
24+
25+
parser.on '-t', '--tag TAG', 'tag of the image, the part after the `:`' do |v|
26+
OPTIONS[:tag] = v
27+
end
28+
29+
parser.on '--delete-tag [FLAG]', TrueClass, 'also delete all tag references' do |v|
30+
OPTIONS[:delete_tag] = v.nil? ? true : v
31+
end
32+
33+
parser.on '--registry-path PATH', "the registry path, defaults to: #{OPTIONS[:registry]}" do |v|
34+
OPTIONS[:registry] = v
35+
end
36+
end
37+
38+
op.parse!
39+
40+
def dry_run?; OPTIONS[:dry_run] end
41+
def delete_tag?; OPTIONS[:delete_tag] end
42+
def repo; OPTIONS[:repo] end
43+
def tag; OPTIONS[:tag] end
44+
def registry; OPTIONS[:registry] end
45+
def prefix(hash) hash[/sha256:(..)/, 1] end
46+
def suffix(hash) hash[/sha256:(.*)/, 1] end
47+
def blob(hash) "#{registry}/blobs/sha256/#{prefix(hash)}/#{suffix(hash)}/data" end
48+
49+
def rm(path)
50+
puts "removing #{path}"
51+
FileUtils.rm_rf(path) unless dry_run?
52+
end
53+
54+
def remove_layers(hash)
55+
`redis-cli --raw keys '*#{hash}*'`.each_line do |key|
56+
key.strip!
57+
puts "removing #{key} from redis"
58+
system('redis-cli', 'del', key) unless dry_run?
59+
end
60+
61+
Dir.glob("#{registry}/repositories/*/_layers/sha256/#{hash}") do |layer_file|
62+
rm(layer_file)
63+
end
64+
65+
Dir.glob("#{registry}/repositories/*/_manifests/revisions/sha256/#{hash}") do |revision_file|
66+
rm(revision_file)
67+
end
68+
69+
Dir.glob("#{registry}/repositories/*/_manifests/tags/*/index/sha256/#{hash}") do |index_file|
70+
rm(index_file)
71+
end
72+
end
73+
74+
def repair(desired)
75+
blob_path = blob(desired)
76+
77+
unless File.file?(blob_path)
78+
puts "missing file for #{blob_path}"
79+
remove_layers(desired)
80+
return
81+
end
82+
83+
actual = Digest::SHA256.file(blob_path).hexdigest
84+
return if desired == "sha256:#{actual}"
85+
86+
remove_layers(desired)
87+
puts "removing #{blob(desired)}"
88+
system('redis-cli', 'del', "blobs::sha256:#{desired}")
89+
rm(blob(desired))
90+
end
91+
92+
puts "verifying #{repo}:#{tag}"
93+
puts "--dry-run is enabled, will not actually delete anything" if dry_run?
94+
95+
link = "#{registry}/repositories/#{repo}/_manifests/tags/#{tag}/current/link"
96+
unless File.file?(link)
97+
puts "#{link} is missing, cannot read manifest"
98+
end
99+
100+
link_hash = File.read(link).strip
101+
manifest = JSON.parse(File.read(blob(link_hash)))
102+
103+
repair manifest['config']['digest']
104+
105+
manifest['layers'].each do |layer|
106+
repair layer['digest']
107+
end
108+
109+
# The following code may actually be needed in more serious cases
110+
exit unless delete_tag?
111+
112+
remove_layers(link_hash)
113+
114+
puts "removing #{link}"
115+
rm("#{registry}/repositories/#{repo}/_manifests/tags/#{tag}")
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
require 'open3'
2+
require 'set'
3+
require 'optparse'
4+
5+
OPTIONS = {
6+
delay: 5,
7+
delete_tag: false,
8+
dry_run: false,
9+
script: 'docker-registry-repair',
10+
service: 'docker-registry.service',
11+
since: '-1h',
12+
}
13+
14+
op = OptionParser.new do |parser|
15+
parser.banner = 'Usage: docker-registry-tail [options]'
16+
17+
parser.on '--repair-path PATH', "the repair script path to utilize; defaults to: #{OPTIONS[:script]}" do |v|
18+
OPTIONS[:script] = v
19+
end
20+
21+
parser.on '-s', '--since LOOKBACK', "the lookback period for journal history; defaults to: #{OPTIONS[:since]}" do |v|
22+
OPTIONS[:since] = v
23+
end
24+
25+
parser.on '-u', '--service SERVICE', "the systemd service to tail; defaults to: #{OPTIONS[:service]}" do |v|
26+
OPTIONS[:service] = v
27+
end
28+
29+
parser.on '-t', '--delay SEC', Integer, "the time delay in seconds between repair spawn jobs; defaults to: #{OPTIONS[:delay]}" do |v|
30+
OPTIONS[:delay] = v
31+
end
32+
33+
parser.on '-d', '--dry-run [FLAG]', TrueClass, 'avoid deleting anything' do |v|
34+
OPTIONS[:dry_run] = v.nil? ? true : v
35+
end
36+
37+
parser.on '--delete-tag [FLAG]', TrueClass, 'also delete all tag references' do |v|
38+
OPTIONS[:delete_tag] = v.nil? ? true : v
39+
end
40+
end
41+
42+
op.parse!
43+
44+
def dry_run?; OPTIONS[:dry_run] end
45+
def delete_tag?; OPTIONS[:delete_tag] end
46+
def delay; OPTIONS[:delay] end
47+
def script; OPTIONS[:script] end
48+
def service; OPTIONS[:service] end
49+
def since; OPTIONS[:since] end
50+
51+
# Check back in systemd log history, and follow all newly pushed images
52+
Open3.popen2e('journalctl', '-S', since, '-f', '-u', service, '-g', '"PUT /v2/.+/manifests') do |_, out|
53+
out.each_line do |line|
54+
%r!/v2/(?<repo>[^/\s]+)/manifests/(?<tag>[^/\s]+)! =~ line
55+
next unless repo && tag
56+
system(script, '--repo', repo, '--tag', tag, '--dry-run', dry_run? ? "true" : "false", '--delete-tag', delete_tag? ? "true" : "false")
57+
sleep delay
58+
end
59+
end

0 commit comments

Comments
 (0)