From c17c6d0a3a850ff3f9e7e62b5a638269c052f2a3 Mon Sep 17 00:00:00 2001 From: benjaminjb Date: Thu, 25 Sep 2025 12:10:18 -0500 Subject: [PATCH 1/6] Update GitHub builds * Adjust chmod for licenses, queries * Adjust license aggregation Issues: [PGO-2695] --- .dockerignore | 1 + Dockerfile | 7 +- hack/extract-licenses.go | 227 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 233 insertions(+), 2 deletions(-) create mode 100644 hack/extract-licenses.go diff --git a/.dockerignore b/.dockerignore index 6ff2842b87..32fab58f69 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,4 +3,5 @@ /.git /bin /hack +!/hack/extract-licenses.go !/hack/tools/queries diff --git a/Dockerfile b/Dockerfile index a218dfe492..6fed212c29 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,12 +10,15 @@ COPY hack/tools/queries /opt/crunchy/conf WORKDIR /usr/src/app COPY . . ENV GOCACHE=/var/cache/go + +# Build the operator and assemble the licenses RUN --mount=type=cache,target=/var/cache/go go build ./cmd/postgres-operator +RUN go run ./hack/extract-licenses.go licenses postgres-operator FROM docker.io/library/debian:bookworm -COPY --from=build /licenses /licenses -COPY --from=build /opt/crunchy/conf /opt/crunchy/conf +COPY --from=build --chmod=0444 /usr/src/app/licenses /licenses +COPY --from=build --chmod=0444 /opt/crunchy/conf /opt/crunchy/conf COPY --from=build /usr/src/app/postgres-operator /usr/local/bin USER 2 diff --git a/hack/extract-licenses.go b/hack/extract-licenses.go new file mode 100644 index 0000000000..c318b00729 --- /dev/null +++ b/hack/extract-licenses.go @@ -0,0 +1,227 @@ +//go:build go1.21 + +package main + +import ( + "bytes" + "context" + "encoding/csv" + "encoding/json" + "errors" + "flag" + "fmt" + "io" + "io/fs" + "os" + "os/exec" + "os/signal" + "path/filepath" + "slices" + "strings" + "syscall" +) + +func main() { + flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError) + flags.Usage = func() { + fmt.Fprintln(flags.Output(), strings.TrimSpace(` +Usage: `+flags.Name()+` {directory} {executables...} + +This program downloads and extracts the licenses of Go modules used to build +Go executables. + +The first argument is a directory that will receive license files. It will be +created if it does not exist. This program will overwrite existing files but +not delete them. Remaining arguments must be Go executables. + +Go modules are downloaded to the Go module cache which can be changed via +the environment: https://go.dev/ref/mod#module-cache`, + )) + } + if _ = flags.Parse(os.Args[1:]); flags.NArg() < 2 || slices.ContainsFunc( + os.Args, func(arg string) bool { return arg == "-help" || arg == "--help" }, + ) { + flags.Usage() + os.Exit(2) + } + + ctx, cancel := context.WithCancel(context.Background()) + signals := make(chan os.Signal, 1) + signal.Notify(signals, os.Interrupt, syscall.SIGTERM) + go func() { <-signals; cancel() }() + + // Create the target directory. + if err := os.MkdirAll(flags.Arg(0), 0o755); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + // Extract module information from remaining arguments. + modules := identifyModules(ctx, flags.Args()[1:]...) + + // Ignore packages from Crunchy Data. Most are not available in any [proxy], + // and we handle their licenses elsewhere. + // + // This is also a quick fix to avoid the [replace] directive in our projects. + // The logic below cannot handle them. Showing xxhash versus a replace: + // + // dep github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= + // dep github.com/crunchydata/postgres-operator v0.0.0-00010101000000-000000000000 + // => ./postgres-operator (devel) + // + // [proxy]: https://go.dev/ref/mod#module-proxy + // [replace]: https://go.dev/ref/mod#go-mod-file-replace + modules = slices.DeleteFunc(modules, func(s string) bool { + return strings.HasPrefix(s, "git.crunchydata.com/") || + strings.HasPrefix(s, "github.com/crunchydata/") + }) + + // Download modules to the Go module cache. + directories := downloadModules(ctx, modules...) + + // Gather license files from every module into the target directory. + for module, directory := range directories { + for _, license := range findLicenses(ctx, directory) { + relative := module + strings.TrimPrefix(license, directory) + destination := filepath.Join(flags.Arg(0), relative) + + var data []byte + err := ctx.Err() + + if err == nil { + err = os.MkdirAll(filepath.Dir(destination), 0o755) + } + if err == nil { + data, err = os.ReadFile(license) + } + if err == nil { + err = os.WriteFile(destination, data, 0o644) + } + if err == nil { + fmt.Println(license, "=>", destination) + } + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + } + } +} + +func downloadModules(ctx context.Context, modules ...string) map[string]string { + var stdout bytes.Buffer + + // Download modules and read their details into a series of JSON objects. + // - https://go.dev/ref/mod#go-mod-download + cmd := exec.CommandContext(ctx, os.Getenv("GO"), append([]string{"mod", "download", "-json"}, modules...)...) + if cmd.Path == "" { + cmd.Path, cmd.Err = exec.LookPath("go") + } + cmd.Stderr = os.Stderr + cmd.Stdout = &stdout + if err := cmd.Run(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(cmd.ProcessState.ExitCode()) + } + + decoder := json.NewDecoder(&stdout) + results := make(map[string]string, len(modules)) + + // NOTE: The directory in the cache is a normalized spelling of the module path; + // ask Go for the directory; do not try to spell it yourself. + // - https://go.dev/ref/mod#module-cache + // - https://go.dev/ref/mod#module-path + for { + var module struct{ Path, Version, Dir string } + err := decoder.Decode(&module) + + if err == nil { + results[module.Path+"@"+module.Version] = module.Dir + continue + } + if errors.Is(err, io.EOF) { + break + } + + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return results +} + +func findLicenses(ctx context.Context, directory string) []string { + var results []string + + // Syft maintains a list of license filenames that began as a list maintained by + // Go. We gather a similar list by matching on "copying" and "license" filenames. + // - https://pkg.go.dev/github.com/anchore/syft@v1.3.0/internal/licenses#FileNames + // + // Ignore Go files and anything in the special "testdata" directory. + // - https://go.dev/cmd/go + err := filepath.WalkDir(directory, func(path string, d fs.DirEntry, err error) error { + if d.IsDir() && d.Name() == "testdata" { + return fs.SkipDir + } + if d.IsDir() || strings.HasSuffix(path, ".go") { + return err + } + + lower := strings.ToLower(d.Name()) + if strings.Contains(lower, "copying") || strings.Contains(lower, "license") { + results = append(results, path) + } + + return err + }) + + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return results +} + +func identifyModules(ctx context.Context, executables ...string) []string { + var stdout bytes.Buffer + + // Use `go version -m` to read the embedded module information as a text table. + // - https://go.dev/ref/mod#go-version-m + cmd := exec.CommandContext(ctx, os.Getenv("GO"), append([]string{"version", "-m"}, executables...)...) + if cmd.Path == "" { + cmd.Path, cmd.Err = exec.LookPath("go") + } + cmd.Stderr = os.Stderr + cmd.Stdout = &stdout + if err := cmd.Run(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(cmd.ProcessState.ExitCode()) + } + + // Parse the tab-separated table without checking row lengths. + reader := csv.NewReader(&stdout) + reader.Comma = '\t' + reader.FieldsPerRecord = -1 + + lines, _ := reader.ReadAll() + result := make([]string, 0, len(lines)) + + for _, fields := range lines { + if len(fields) > 3 && fields[1] == "dep" { + result = append(result, fields[2]+"@"+fields[3]) + } + if len(fields) > 4 && fields[1] == "mod" && fields[4] != "" { + result = append(result, fields[2]+"@"+fields[3]) + } + } + + // The `go version -m` command returns no information for empty files, and it + // is possible for a Go executable to have no main module and no dependencies. + if len(result) == 0 { + fmt.Fprintf(os.Stderr, "no Go modules in %v\n", executables) + os.Exit(0) + } + + return result +} From 4c7a633a7d02d7b03e68f1a45fa709e6fc7fcdc6 Mon Sep 17 00:00:00 2001 From: benjaminjb Date: Mon, 29 Sep 2025 14:54:19 -0500 Subject: [PATCH 2/6] Clean up golangci lint errors Linter was noting a lot of issues with the hack/extract-licenses.go, but we are pretty certain we want this to be this way. --- hack/extract-licenses.go | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/hack/extract-licenses.go b/hack/extract-licenses.go index c318b00729..d78ee0b2d5 100644 --- a/hack/extract-licenses.go +++ b/hack/extract-licenses.go @@ -72,8 +72,7 @@ the environment: https://go.dev/ref/mod#module-cache`, // [proxy]: https://go.dev/ref/mod#module-proxy // [replace]: https://go.dev/ref/mod#go-mod-file-replace modules = slices.DeleteFunc(modules, func(s string) bool { - return strings.HasPrefix(s, "git.crunchydata.com/") || - strings.HasPrefix(s, "github.com/crunchydata/") + return strings.HasPrefix(s, "github.com/crunchydata/") }) // Download modules to the Go module cache. @@ -81,7 +80,7 @@ the environment: https://go.dev/ref/mod#module-cache`, // Gather license files from every module into the target directory. for module, directory := range directories { - for _, license := range findLicenses(ctx, directory) { + for _, license := range findLicenses(directory) { relative := module + strings.TrimPrefix(license, directory) destination := filepath.Join(flags.Arg(0), relative) @@ -95,9 +94,12 @@ the environment: https://go.dev/ref/mod#module-cache`, data, err = os.ReadFile(license) } if err == nil { + //nolint:gosec // gosec warns on permissions more open than 600 + // but we need these licenses to be readable by all err = os.WriteFile(destination, data, 0o644) } if err == nil { + //nolint:forbidigo // This is an intentional print to console to inform the user fmt.Println(license, "=>", destination) } if err != nil { @@ -113,6 +115,7 @@ func downloadModules(ctx context.Context, modules ...string) map[string]string { // Download modules and read their details into a series of JSON objects. // - https://go.dev/ref/mod#go-mod-download + //nolint:gosec // Suppressing unnecessary warning re: potentially tainted inputs (G204) cmd := exec.CommandContext(ctx, os.Getenv("GO"), append([]string{"mod", "download", "-json"}, modules...)...) if cmd.Path == "" { cmd.Path, cmd.Err = exec.LookPath("go") @@ -132,7 +135,11 @@ func downloadModules(ctx context.Context, modules ...string) map[string]string { // - https://go.dev/ref/mod#module-cache // - https://go.dev/ref/mod#module-path for { - var module struct{ Path, Version, Dir string } + var module struct { + Path string `json:"path,omitempty"` + Version string `json:"version,omitempty"` + Dir string `json:"dir,omitempty"` + } err := decoder.Decode(&module) if err == nil { @@ -150,7 +157,7 @@ func downloadModules(ctx context.Context, modules ...string) map[string]string { return results } -func findLicenses(ctx context.Context, directory string) []string { +func findLicenses(directory string) []string { var results []string // Syft maintains a list of license filenames that began as a list maintained by @@ -188,6 +195,7 @@ func identifyModules(ctx context.Context, executables ...string) []string { // Use `go version -m` to read the embedded module information as a text table. // - https://go.dev/ref/mod#go-version-m + //nolint:gosec // Suppressing unnecessary warning re: potentially tainted inputs (G204) cmd := exec.CommandContext(ctx, os.Getenv("GO"), append([]string{"version", "-m"}, executables...)...) if cmd.Path == "" { cmd.Path, cmd.Err = exec.LookPath("go") From 58bdb34225c8acffad78a0e417c400b838cc63ce Mon Sep 17 00:00:00 2001 From: Benjamin Blattberg Date: Mon, 29 Sep 2025 16:17:13 -0500 Subject: [PATCH 3/6] Apply suggestions from code review Co-authored-by: Chris Bandy --- hack/extract-licenses.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hack/extract-licenses.go b/hack/extract-licenses.go index d78ee0b2d5..1715a21e84 100644 --- a/hack/extract-licenses.go +++ b/hack/extract-licenses.go @@ -99,8 +99,7 @@ the environment: https://go.dev/ref/mod#module-cache`, err = os.WriteFile(destination, data, 0o644) } if err == nil { - //nolint:forbidigo // This is an intentional print to console to inform the user - fmt.Println(license, "=>", destination) + fmt.Fprintln(os.Stdout, license, "=>", destination) } if err != nil { fmt.Fprintln(os.Stderr, err) @@ -115,7 +114,7 @@ func downloadModules(ctx context.Context, modules ...string) map[string]string { // Download modules and read their details into a series of JSON objects. // - https://go.dev/ref/mod#go-mod-download - //nolint:gosec // Suppressing unnecessary warning re: potentially tainted inputs (G204) + //gosec:disable G204 -- Use this environment variable to switch Go versions without touching PATH cmd := exec.CommandContext(ctx, os.Getenv("GO"), append([]string{"mod", "download", "-json"}, modules...)...) if cmd.Path == "" { cmd.Path, cmd.Err = exec.LookPath("go") From 855c9fb5c3ce3ca5964d56fc31741a9a1aa12ac9 Mon Sep 17 00:00:00 2001 From: benjaminjb Date: Mon, 29 Sep 2025 16:24:14 -0500 Subject: [PATCH 4/6] PR feedback --- hack/extract-licenses.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/hack/extract-licenses.go b/hack/extract-licenses.go index 1715a21e84..086d2feac6 100644 --- a/hack/extract-licenses.go +++ b/hack/extract-licenses.go @@ -1,3 +1,7 @@ +// Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + //go:build go1.21 package main @@ -94,9 +98,9 @@ the environment: https://go.dev/ref/mod#module-cache`, data, err = os.ReadFile(license) } if err == nil { - //nolint:gosec // gosec warns on permissions more open than 600 - // but we need these licenses to be readable by all - err = os.WriteFile(destination, data, 0o644) + // When we copy the licenses in the Dockerfiles, make sure + // to `--chmod` them to an appropriate permissions, e.g., 0o444 + err = os.WriteFile(destination, data, 0o600) } if err == nil { fmt.Fprintln(os.Stdout, license, "=>", destination) @@ -194,7 +198,7 @@ func identifyModules(ctx context.Context, executables ...string) []string { // Use `go version -m` to read the embedded module information as a text table. // - https://go.dev/ref/mod#go-version-m - //nolint:gosec // Suppressing unnecessary warning re: potentially tainted inputs (G204) + //gosec:disable G204 -- Use this environment variable to switch Go versions without touching PATH cmd := exec.CommandContext(ctx, os.Getenv("GO"), append([]string{"version", "-m"}, executables...)...) if cmd.Path == "" { cmd.Path, cmd.Err = exec.LookPath("go") From 7469b1ee140467c7b41c4f900f71ac8f902fa041 Mon Sep 17 00:00:00 2001 From: benjaminjb Date: Tue, 30 Sep 2025 11:27:21 -0500 Subject: [PATCH 5/6] Remove unnecessary build constraint --- hack/extract-licenses.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/hack/extract-licenses.go b/hack/extract-licenses.go index 086d2feac6..5bc0ab923b 100644 --- a/hack/extract-licenses.go +++ b/hack/extract-licenses.go @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: Apache-2.0 -//go:build go1.21 - package main import ( From 65272e18d804afabdf7a484b788e45000fe0d71c Mon Sep 17 00:00:00 2001 From: benjaminjb Date: Tue, 30 Sep 2025 12:34:33 -0500 Subject: [PATCH 6/6] rm licenses/.gitignore This seems to be a leftover from when we aggregated licenses in the licenses dir --- licenses/.gitignore | 1 - 1 file changed, 1 deletion(-) delete mode 100644 licenses/.gitignore diff --git a/licenses/.gitignore b/licenses/.gitignore deleted file mode 100644 index 72e8ffc0db..0000000000 --- a/licenses/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*