From b315464e98ed7e6d8f6182a3f4031115ae180a08 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Sat, 5 Oct 2024 11:46:00 +0200 Subject: [PATCH 01/25] chore(deps): update Noto Color Emoji to Unicode 16.0 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a3918c1bb..d391354d4 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ DOCKER_REPOSITORY=gotenberg GOTENBERG_VERSION=snapshot GOTENBERG_USER_GID=1001 GOTENBERG_USER_UID=1001 -NOTO_COLOR_EMOJI_VERSION=v2.042 # See https://github.com/googlefonts/noto-emoji/releases. +NOTO_COLOR_EMOJI_VERSION=v2.047 # See https://github.com/googlefonts/noto-emoji/releases. PDFTK_VERSION=v3.3.3 # See https://gitlab.com/pdftk-java/pdftk/-/releases - Binary package. GOLANGCI_LINT_VERSION=v1.60.3 # See https://github.com/golangci/golangci-lint/releases. From 7df786c5c6ac0d967895c0b75c810a341b035abe Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Sat, 5 Oct 2024 11:45:10 +0200 Subject: [PATCH 02/25] fix(webhook): retrieve values from echo.Context before it get recycled --- pkg/modules/webhook/middleware.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pkg/modules/webhook/middleware.go b/pkg/modules/webhook/middleware.go index 39169375e..63150cce9 100644 --- a/pkg/modules/webhook/middleware.go +++ b/pkg/modules/webhook/middleware.go @@ -113,13 +113,19 @@ func webhookMiddleware(w *Webhook) api.Middleware { } } + // Retrieve values from echo.Context before it get recycled. + // See https://github.com/gotenberg/gotenberg/issues/1000. + startTime := c.Get("startTime").(time.Time) + traceHeader := c.Get("traceHeader").(string) + trace := c.Get("trace").(string) + client := &client{ url: webhookUrl, method: webhookMethod, errorUrl: webhookErrorUrl, errorMethod: webhookErrorMethod, extraHttpHeaders: extraHttpHeaders, - startTime: c.Get("startTime").(time.Time), + startTime: startTime, client: &retryablehttp.Client{ HTTPClient: &http.Client{ @@ -157,8 +163,8 @@ func webhookMiddleware(w *Webhook) api.Middleware { } headers := map[string]string{ - echo.HeaderContentType: echo.MIMEApplicationJSON, - c.Get("traceHeader").(string): c.Get("trace").(string), + echo.HeaderContentType: echo.MIMEApplicationJSON, + traceHeader: trace, } err = client.send(bytes.NewReader(b), headers, true) @@ -236,7 +242,7 @@ func webhookMiddleware(w *Webhook) api.Middleware { echo.HeaderContentDisposition: fmt.Sprintf("attachement; filename=%q", ctx.OutputFilename(outputPath)), echo.HeaderContentType: http.DetectContentType(fileHeader), echo.HeaderContentLength: strconv.FormatInt(fileStat.Size(), 10), - c.Get("traceHeader").(string): c.Get("trace").(string), + traceHeader: trace, } // Send the output file to the webhook. From aa57b17254342e2aa494647b65538ef75a6e67f0 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Sun, 6 Oct 2024 09:52:48 +0200 Subject: [PATCH 03/25] chore(deps): update Go dependencies --- go.mod | 18 +++++++++--------- go.sum | 36 ++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/go.mod b/go.mod index 1de4a7607..5a76a53af 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/alexliesenfeld/health v0.8.0 github.com/andybalholm/brotli v1.1.0 // indirect github.com/barasher/go-exiftool v1.10.0 - github.com/chromedp/cdproto v0.0.0-20240919203636-12af5e8a671f + github.com/chromedp/cdproto v0.0.0-20241003230502-a4a8f7c660df github.com/chromedp/chromedp v0.10.0 github.com/golang/snappy v0.0.4 // indirect github.com/google/uuid v1.6.0 @@ -28,13 +28,13 @@ require ( github.com/ulikunitz/xz v0.5.12 // indirect go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 - golang.org/x/crypto v0.27.0 // indirect - golang.org/x/image v0.20.0 // indirect - golang.org/x/net v0.29.0 + golang.org/x/crypto v0.28.0 // indirect + golang.org/x/image v0.21.0 // indirect + golang.org/x/net v0.30.0 golang.org/x/sync v0.8.0 - golang.org/x/sys v0.25.0 // indirect - golang.org/x/term v0.24.0 - golang.org/x/text v0.18.0 + golang.org/x/sys v0.26.0 // indirect + golang.org/x/term v0.25.0 + golang.org/x/text v0.19.0 ) require github.com/dlclark/regexp2 v1.11.4 @@ -59,13 +59,13 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.59.1 // indirect + github.com/prometheus/common v0.60.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect - golang.org/x/time v0.6.0 // indirect + golang.org/x/time v0.7.0 // indirect google.golang.org/protobuf v1.34.2 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect ) diff --git a/go.sum b/go.sum index 5e7dea865..84cf82716 100644 --- a/go.sum +++ b/go.sum @@ -12,8 +12,8 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chromedp/cdproto v0.0.0-20240801214329-3f85d328b335/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= -github.com/chromedp/cdproto v0.0.0-20240919203636-12af5e8a671f h1:dEjjp+iN34En5Pl9XIi978DmR2/CMwuOxoPWtiHixKQ= -github.com/chromedp/cdproto v0.0.0-20240919203636-12af5e8a671f/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= +github.com/chromedp/cdproto v0.0.0-20241003230502-a4a8f7c660df h1:cbtSn19AtqQha1cxmP2Qvgd3fFMz51AeAEKLJMyEUhc= +github.com/chromedp/cdproto v0.0.0-20241003230502-a4a8f7c660df/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= github.com/chromedp/chromedp v0.10.0 h1:bRclRYVpMm/UVD76+1HcRW9eV3l58rFfy7AdBvKab1E= github.com/chromedp/chromedp v0.10.0/go.mod h1:ei/1ncZIqXX1YnAYDkxhD4gzBgavMEUu7JCKvztdomE= github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic= @@ -111,8 +111,8 @@ github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zI github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.59.1 h1:LXb1quJHWm1P6wq/U824uxYi4Sg0oGvNeUm1z5dJoX0= -github.com/prometheus/common v0.59.1/go.mod h1:GpWM7dewqmVYcd7SmRaiWVe9SSqjf0UrwnYnpEZNuT0= +github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= +github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= @@ -146,25 +146,25 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= -golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= -golang.org/x/image v0.20.0 h1:7cVCUjQwfL18gyBJOmYvptfSHS8Fb3YUDtfLIZ7Nbpw= -golang.org/x/image v0.20.0/go.mod h1:0a88To4CYVBAHp5FXJm8o7QbUl37Vd85ply1vyD8auM= -golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= -golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/image v0.21.0 h1:c5qV36ajHpdj4Qi0GnE0jUc/yuo33OLFaa0d+crTD5s= +golang.org/x/image v0.21.0/go.mod h1:vUbsLavqK/W303ZroQQVKQ+Af3Yl6Uz1Ppu5J/cLz78= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= -golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= -golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= -golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= -golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= -golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= +golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= +golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= From bf205c579d33e4c2348e991fe766c9acf7706c0e Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Sun, 6 Oct 2024 18:32:09 +0200 Subject: [PATCH 04/25] fix(pdfcpu): switch to CLI --- Makefile | 3 ++ build/Dockerfile | 35 ++++++++++++++++++++-- go.mod | 8 ----- go.sum | 26 ---------------- pkg/modules/pdfcpu/doc.go | 2 +- pkg/modules/pdfcpu/pdfcpu.go | 42 +++++++++++++++++++------- pkg/modules/pdfcpu/pdfcpu_test.go | 50 ++++++++++++++++++++++++++++++- pkg/modules/pdftk/pdftk.go | 2 +- pkg/modules/pdftk/pdftk_test.go | 2 ++ pkg/modules/qpdf/qpdf_test.go | 2 ++ scripts/release.sh | 8 +++-- 11 files changed, 127 insertions(+), 53 deletions(-) diff --git a/Makefile b/Makefile index d391354d4..2b3c23b3b 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ GOTENBERG_USER_GID=1001 GOTENBERG_USER_UID=1001 NOTO_COLOR_EMOJI_VERSION=v2.047 # See https://github.com/googlefonts/noto-emoji/releases. PDFTK_VERSION=v3.3.3 # See https://gitlab.com/pdftk-java/pdftk/-/releases - Binary package. +PDFCPU_VERSION=v0.8.1 # See https://github.com/pdfcpu/pdfcpu/releases. GOLANGCI_LINT_VERSION=v1.60.3 # See https://github.com/golangci/golangci-lint/releases. .PHONY: build @@ -24,6 +25,7 @@ build: ## Build the Gotenberg's Docker image --build-arg GOTENBERG_USER_UID=$(GOTENBERG_USER_UID) \ --build-arg NOTO_COLOR_EMOJI_VERSION=$(NOTO_COLOR_EMOJI_VERSION) \ --build-arg PDFTK_VERSION=$(PDFTK_VERSION) \ + --build-arg PDFCPU_VERSION=$(PDFCPU_VERSION) \ -t $(DOCKER_REGISTRY)/$(DOCKER_REPOSITORY):$(GOTENBERG_VERSION) \ -f build/Dockerfile . @@ -197,6 +199,7 @@ release: ## Build the Gotenberg's Docker image and push it to a Docker repositor $(GOTENBERG_USER_UID) \ $(NOTO_COLOR_EMOJI_VERSION) \ $(PDFTK_VERSION) \ + $(PDFCPU_VERSION) \ $(DOCKER_REGISTRY) \ $(DOCKER_REPOSITORY) \ $(LINUX_AMD64_RELEASE) diff --git a/build/Dockerfile b/build/Dockerfile index 133d6f8e9..38771906b 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -3,10 +3,35 @@ # stage that uses them. ARG GOLANG_VERSION +# ---------------------------------------------- +# pdfcpu binary build stage +# ---------------------------------------------- +# Note: this stage is required as pdfcpu does not release an armhf variant by +# default. + +FROM golang:$GOLANG_VERSION AS pdfcpu-binary-stage + +ARG PDFCPU_VERSION +ENV CGO_ENABLED=0 + +# Define the working directory outside of $GOPATH (we're using go modules). +WORKDIR /home + +RUN curl -Ls "https://github.com/pdfcpu/pdfcpu/archive/refs/tags/$PDFCPU_VERSION.tar.gz" -o pdfcpu.tar.gz &&\ + tar --strip-components=1 -xvzf pdfcpu.tar.gz + +# Install module dependencies. +RUN go mod download &&\ + go mod verify + +RUN go build -o pdfcpu -ldflags "-s -w -X 'main.version=$PDFCPU_VERSION' -X 'github.com/pdfcpu/pdfcpu/pkg/pdfcpu.VersionStr=$PDFCPU_VERSION' -X main.builtBy=gotenberg" ./cmd/pdfcpu &&\ + # Verify installation. + ./pdfcpu version + # ---------------------------------------------- # Gotenberg binary build stage # ---------------------------------------------- -FROM golang:$GOLANG_VERSION AS binary-stage +FROM golang:$GOLANG_VERSION AS gotenberg-binary-stage ARG GOTENBERG_VERSION ENV CGO_ENABLED=0 @@ -187,8 +212,11 @@ RUN \ # https://github.com/arachnys/athenapdf/commit/ba25a8d80a25d08d58865519c4cd8756dc9a336d. COPY build/fonts.conf /etc/fonts/conf.d/100-gotenberg.conf -# Copy the Gotenberg binary from the binary stage. -COPY --from=binary-stage /home/gotenberg /usr/bin/ +# Copy the pdfcpu binary from the pdfcpu-binary-stage. +COPY --from=pdfcpu-binary-stage /home/pdfcpu /usr/bin/ + +# Copy the Gotenberg binary from the gotenberg-binary-stage. +COPY --from=gotenberg-binary-stage /home/gotenberg /usr/bin/ # Environment variables required by modules or else. ENV CHROMIUM_BIN_PATH=/usr/bin/chromium @@ -197,6 +225,7 @@ ENV UNOCONVERTER_BIN_PATH=/usr/bin/unoconverter ENV PDFTK_BIN_PATH=/usr/bin/pdftk ENV QPDF_BIN_PATH=/usr/bin/qpdf ENV EXIFTOOL_BIN_PATH=/usr/bin/exiftool +ENV PDFCPU_BIN_PATH=/usr/bin/pdfcpu USER gotenberg WORKDIR /home/gotenberg diff --git a/go.mod b/go.mod index 5a76a53af..cc3688e99 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,6 @@ require ( github.com/mholt/archiver/v3 v3.5.1 github.com/microcosm-cc/bluemonday v1.0.27 github.com/nwaples/rardecode v1.1.3 // indirect - github.com/pdfcpu/pdfcpu v0.8.1 github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/prometheus/client_golang v1.20.4 github.com/russross/blackfriday/v2 v2.1.0 @@ -29,7 +28,6 @@ require ( go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 golang.org/x/crypto v0.28.0 // indirect - golang.org/x/image v0.21.0 // indirect golang.org/x/net v0.30.0 golang.org/x/sync v0.8.0 golang.org/x/sys v0.26.0 // indirect @@ -50,22 +48,16 @@ require ( github.com/gobwas/ws v1.4.0 // indirect github.com/golang-jwt/jwt v3.2.2+incompatible // indirect github.com/gorilla/css v1.0.1 // indirect - github.com/hhrutter/lzw v1.0.0 // indirect - github.com/hhrutter/tiff v1.0.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-runewidth v0.0.16 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.60.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect - github.com/rivo/uniseg v0.4.7 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect golang.org/x/time v0.7.0 // indirect google.golang.org/protobuf v1.34.2 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect ) diff --git a/go.sum b/go.sum index 84cf82716..0e058597e 100644 --- a/go.sum +++ b/go.sum @@ -52,10 +52,6 @@ github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB1 github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= -github.com/hhrutter/lzw v1.0.0 h1:laL89Llp86W3rRs83LvKbwYRx6INE8gDn0XNb1oXtm0= -github.com/hhrutter/lzw v1.0.0/go.mod h1:2HC6DJSn/n6iAZfgM3Pg+cP1KxeWc3ezG8bBqW5+WEo= -github.com/hhrutter/tiff v1.0.1 h1:MIus8caHU5U6823gx7C6jrfoEvfSTGtEFRiM8/LOzC0= -github.com/hhrutter/tiff v1.0.1/go.mod h1:zU/dNgDm0cMIa8y8YwcYBeuEEveI4B0owqHyiPpJPHc= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= @@ -66,10 +62,6 @@ github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= -github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/labstack/echo/v4 v4.12.0 h1:IKpw49IMryVB2p1a4dzwlhP1O2Tf2E0Ir/450lH+kI0= @@ -85,8 +77,6 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= -github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo= github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4= github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk= @@ -98,13 +88,9 @@ github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9l github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw= github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0= -github.com/pdfcpu/pdfcpu v0.8.1 h1:AiWUb8uXlrXqJ73OmiYXBjDF0Qxt4OuM281eAfkAOMA= -github.com/pdfcpu/pdfcpu v0.8.1/go.mod h1:M5SFotxdaw0fedxthpjbA/PADytAo6wJnGH0SSBWJ7s= github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= @@ -115,11 +101,6 @@ github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJN github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= -github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= @@ -148,8 +129,6 @@ go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= -golang.org/x/image v0.21.0 h1:c5qV36ajHpdj4Qi0GnE0jUc/yuo33OLFaa0d+crTD5s= -golang.org/x/image v0.21.0/go.mod h1:vUbsLavqK/W303ZroQQVKQ+Af3Yl6Uz1Ppu5J/cLz78= golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= @@ -168,10 +147,5 @@ golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/modules/pdfcpu/doc.go b/pkg/modules/pdfcpu/doc.go index f98fdd1c2..e68e2a61f 100644 --- a/pkg/modules/pdfcpu/doc.go +++ b/pkg/modules/pdfcpu/doc.go @@ -1,5 +1,5 @@ // Package pdfcpu provides an implementation of the gotenberg.PdfEngine -// interface using the pdfcpu library. This package allows for: +// interface using the pdfcpu command-line tool. This package allows for: // // 1. The merging of PDF files. // diff --git a/pkg/modules/pdfcpu/pdfcpu.go b/pkg/modules/pdfcpu/pdfcpu.go index 18ad79ae5..ac2d53589 100644 --- a/pkg/modules/pdfcpu/pdfcpu.go +++ b/pkg/modules/pdfcpu/pdfcpu.go @@ -2,11 +2,10 @@ package pdfcpu import ( "context" + "errors" "fmt" + "os" - pdfcpuAPI "github.com/pdfcpu/pdfcpu/pkg/api" - pdfcpuLog "github.com/pdfcpu/pdfcpu/pkg/log" - pdfcpuConfig "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" "go.uber.org/zap" "github.com/gotenberg/gotenberg/v8/pkg/gotenberg" @@ -16,10 +15,10 @@ func init() { gotenberg.MustRegisterModule(new(PdfCpu)) } -// PdfCpu abstracts the pdfcpu library and implements the [gotenberg.PdfEngine] -// interface. +// PdfCpu abstracts the CLI tool pdfcpu and implements the +// [gotenberg.PdfEngine] interface. type PdfCpu struct { - conf *pdfcpuConfig.Configuration + binPath string } // Descriptor returns a [PdfCpu]'s module descriptor. @@ -32,16 +31,38 @@ func (engine *PdfCpu) Descriptor() gotenberg.ModuleDescriptor { // Provision sets the engine properties. func (engine *PdfCpu) Provision(ctx *gotenberg.Context) error { - pdfcpuConfig.ConfigPath = "disable" - pdfcpuLog.DisableLoggers() - engine.conf = pdfcpuConfig.NewDefaultConfiguration() + binPath, ok := os.LookupEnv("PDFCPU_BIN_PATH") + if !ok { + return errors.New("PDFCPU_BIN_PATH environment variable is not set") + } + + engine.binPath = binPath + + return nil +} + +// Validate validates the module properties. +func (engine *PdfCpu) Validate() error { + _, err := os.Stat(engine.binPath) + if os.IsNotExist(err) { + return fmt.Errorf("pdfcpu binary path does not exist: %w", err) + } return nil } // Merge combines multiple PDFs into a single PDF. func (engine *PdfCpu) Merge(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { - err := pdfcpuAPI.MergeCreateFile(inputPaths, outputPath, false, engine.conf) + var args []string + args = append(args, "merge", outputPath) + args = append(args, inputPaths...) + + cmd, err := gotenberg.CommandContext(ctx, logger, engine.binPath, args...) + if err != nil { + return fmt.Errorf("create command: %w", err) + } + + _, err = cmd.Exec() if err == nil { return nil } @@ -68,5 +89,6 @@ func (engine *PdfCpu) WriteMetadata(ctx context.Context, logger *zap.Logger, met var ( _ gotenberg.Module = (*PdfCpu)(nil) _ gotenberg.Provisioner = (*PdfCpu)(nil) + _ gotenberg.Validator = (*PdfCpu)(nil) _ gotenberg.PdfEngine = (*PdfCpu)(nil) ) diff --git a/pkg/modules/pdfcpu/pdfcpu_test.go b/pkg/modules/pdfcpu/pdfcpu_test.go index 6172fe2fd..f009218a2 100644 --- a/pkg/modules/pdfcpu/pdfcpu_test.go +++ b/pkg/modules/pdfcpu/pdfcpu_test.go @@ -33,14 +33,59 @@ func TestPdfCpu_Provision(t *testing.T) { } } +func TestPdfCpu_Validate(t *testing.T) { + for _, tc := range []struct { + scenario string + binPath string + expectError bool + }{ + { + scenario: "empty bin path", + binPath: "", + expectError: true, + }, + { + scenario: "bin path does not exist", + binPath: "/foo", + expectError: true, + }, + { + scenario: "validate success", + binPath: os.Getenv("PDFTK_BIN_PATH"), + expectError: false, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + engine := new(PdfCpu) + engine.binPath = tc.binPath + err := engine.Validate() + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + if tc.expectError && err == nil { + t.Fatal("expected error but got none") + } + }) + } +} + func TestPdfCpu_Merge(t *testing.T) { for _, tc := range []struct { scenario string + ctx context.Context inputPaths []string expectError bool }{ + { + scenario: "invalid context", + ctx: nil, + expectError: true, + }, { scenario: "invalid input path", + ctx: context.TODO(), inputPaths: []string{ "foo", }, @@ -48,6 +93,7 @@ func TestPdfCpu_Merge(t *testing.T) { }, { scenario: "single file success", + ctx: context.TODO(), inputPaths: []string{ "/tests/test/testdata/pdfengines/sample1.pdf", }, @@ -55,10 +101,12 @@ func TestPdfCpu_Merge(t *testing.T) { }, { scenario: "many files success", + ctx: context.TODO(), inputPaths: []string{ "/tests/test/testdata/pdfengines/sample1.pdf", "/tests/test/testdata/pdfengines/sample2.pdf", }, + expectError: false, }, } { t.Run(tc.scenario, func(t *testing.T) { @@ -81,7 +129,7 @@ func TestPdfCpu_Merge(t *testing.T) { } }() - err = engine.Merge(nil, nil, tc.inputPaths, outputDir+"/foo.pdf") + err = engine.Merge(tc.ctx, zap.NewNop(), tc.inputPaths, outputDir+"/foo.pdf") if !tc.expectError && err != nil { t.Fatalf("expected no error but got: %v", err) diff --git a/pkg/modules/pdftk/pdftk.go b/pkg/modules/pdftk/pdftk.go index 571b8016b..9846ee9df 100644 --- a/pkg/modules/pdftk/pdftk.go +++ b/pkg/modules/pdftk/pdftk.go @@ -45,7 +45,7 @@ func (engine *PdfTk) Provision(ctx *gotenberg.Context) error { func (engine *PdfTk) Validate() error { _, err := os.Stat(engine.binPath) if os.IsNotExist(err) { - return fmt.Errorf("PdfTk binary path does not exist: %w", err) + return fmt.Errorf("PDFtk binary path does not exist: %w", err) } return nil diff --git a/pkg/modules/pdftk/pdftk_test.go b/pkg/modules/pdftk/pdftk_test.go index bdc0a508e..c7b864eca 100644 --- a/pkg/modules/pdftk/pdftk_test.go +++ b/pkg/modules/pdftk/pdftk_test.go @@ -97,6 +97,7 @@ func TestPdfTk_Merge(t *testing.T) { inputPaths: []string{ "/tests/test/testdata/pdfengines/sample1.pdf", }, + expectError: false, }, { scenario: "many files success", @@ -105,6 +106,7 @@ func TestPdfTk_Merge(t *testing.T) { "/tests/test/testdata/pdfengines/sample1.pdf", "/tests/test/testdata/pdfengines/sample2.pdf", }, + expectError: false, }, } { t.Run(tc.scenario, func(t *testing.T) { diff --git a/pkg/modules/qpdf/qpdf_test.go b/pkg/modules/qpdf/qpdf_test.go index a17bafb5a..a966928d0 100644 --- a/pkg/modules/qpdf/qpdf_test.go +++ b/pkg/modules/qpdf/qpdf_test.go @@ -97,6 +97,7 @@ func TestQPdf_Merge(t *testing.T) { inputPaths: []string{ "/tests/test/testdata/pdfengines/sample1.pdf", }, + expectError: false, }, { scenario: "many files success", @@ -105,6 +106,7 @@ func TestQPdf_Merge(t *testing.T) { "/tests/test/testdata/pdfengines/sample1.pdf", "/tests/test/testdata/pdfengines/sample2.pdf", }, + expectError: false, }, } { t.Run(tc.scenario, func(t *testing.T) { diff --git a/scripts/release.sh b/scripts/release.sh index 1b56ce2f5..e6303df7d 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -9,9 +9,10 @@ GOTENBERG_USER_GID="$3" GOTENBERG_USER_UID="$4" NOTO_COLOR_EMOJI_VERSION="$5" PDFTK_VERSION="$6" -DOCKER_REGISTRY="$7" -DOCKER_REPOSITORY="$8" -LINUX_AMD64_RELEASE="$9" +PDFCPU_VERSION="$7" +DOCKER_REGISTRY="$8" +DOCKER_REPOSITORY="$9" +LINUX_AMD64_RELEASE="$10" # Find out if given version is "semver". GOTENBERG_VERSION="${GOTENBERG_VERSION//v}" @@ -65,6 +66,7 @@ docker buildx build \ --build-arg GOTENBERG_USER_UID="$GOTENBERG_USER_UID" \ --build-arg NOTO_COLOR_EMOJI_VERSION="$NOTO_COLOR_EMOJI_VERSION" \ --build-arg PDFTK_VERSION="$PDFTK_VERSION" \ + --build-arg PDFCPU_VERSION="$PDFCPU_VERSION" \ $PLATFORM_FLAG \ "${TAGS[@]}" \ --push \ From 119bba4f045cc48cbaaf3301dd8b705091f015c0 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Tue, 8 Oct 2024 16:23:12 +0200 Subject: [PATCH 05/25] feat(api): add flag --api-bind-ip --- Makefile | 8 ++-- pkg/modules/api/api.go | 18 ++++++-- pkg/modules/api/api_test.go | 88 ++++++++++++++++++++++++++++--------- 3 files changed, 88 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index 2b3c23b3b..67529de68 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,7 @@ build: ## Build the Gotenberg's Docker image GOTENBERG_GRACEFUL_SHUTDOWN_DURATION=30s API_PORT=3000 API_PORT_FROM_ENV= +API_BIND_IP= API_START_TIMEOUT=30s API_TIMEOUT=30s API_BODY_LIMIT= @@ -97,6 +98,7 @@ run: ## Start a Gotenberg container --gotenberg-graceful-shutdown-duration=$(GOTENBERG_GRACEFUL_SHUTDOWN_DURATION) \ --api-port=$(API_PORT) \ --api-port-from-env=$(API_PORT_FROM_ENV) \ + --api-bind-ip=$(API_BIND_IP) \ --api-start-timeout=$(API_START_TIMEOUT) \ --api-timeout=$(API_TIMEOUT) \ --api-body-limit="$(API_BODY_LIMIT)" \ @@ -104,9 +106,9 @@ run: ## Start a Gotenberg container --api-trace-header=$(API_TRACE_HEADER) \ --api-enable-basic-auth=$(API_ENABLE_BASIC_AUTH) \ --api-download-from-allow-list=$(API-DOWNLOAD-FROM-ALLOW-LIST) \ - --api-download-from-deny-list=$(API-DOWNLOAD-FROM-DENY-LIST) \ - --api-download-from-max-retry=$(API-DOWNLOAD-FROM-FROM-MAX-RETRY) \ - --api-disable-download-from=$(API-DISABLE-DOWNLOAD-FROM) \ + --api-download-from-deny-list=$(API-DOWNLOAD-FROM-DENY-LIST) \ + --api-download-from-max-retry=$(API-DOWNLOAD-FROM-FROM-MAX-RETRY) \ + --api-disable-download-from=$(API-DISABLE-DOWNLOAD-FROM) \ --api-disable-health-check-logging=$(API_DISABLE_HEALTH_CHECK_LOGGING) \ --chromium-restart-after=$(CHROMIUM_RESTART_AFTER) \ --chromium-auto-start=$(CHROMIUM_AUTO_START) \ diff --git a/pkg/modules/api/api.go b/pkg/modules/api/api.go index 227e7eafc..e352ea563 100644 --- a/pkg/modules/api/api.go +++ b/pkg/modules/api/api.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "net" "net/http" "sort" "strings" @@ -29,6 +30,7 @@ func init() { // middlewares or health checks. type Api struct { port int + bindIp string tlsCertFile string tlsKeyFile string startTimeout time.Duration @@ -171,6 +173,7 @@ func (a *Api) Descriptor() gotenberg.ModuleDescriptor { fs := flag.NewFlagSet("api", flag.ExitOnError) fs.Int("api-port", 3000, "Set the port on which the API should listen") fs.String("api-port-from-env", "", "Set the environment variable with the port on which the API should listen - override the default port") + fs.String("api-bind-ip", "", "Set the IP address the API should bind to for incoming connections") fs.String("api-tls-cert-file", "", "Path to the TLS/SSL certificate file - for HTTPS support") fs.String("api-tls-key-file", "", "Path to the TLS/SSL key file - for HTTPS support") fs.Duration("api-start-timeout", time.Duration(30)*time.Second, "Set the time limit for the API to start") @@ -194,6 +197,7 @@ func (a *Api) Descriptor() gotenberg.ModuleDescriptor { func (a *Api) Provision(ctx *gotenberg.Context) error { flags := ctx.ParsedFlags() a.port = flags.MustInt("api-port") + a.bindIp = flags.MustString("api-bind-ip") a.tlsCertFile = flags.MustString("api-tls-cert-file") a.tlsKeyFile = flags.MustString("api-tls-key-file") a.startTimeout = flags.MustDuration("api-start-timeout") @@ -329,6 +333,10 @@ func (a *Api) Validate() error { ) } + if a.bindIp != "" && net.ParseIP(a.bindIp) == nil { + err = multierr.Append(err, errors.New("IP must be a valid IP address")) + } + if (a.tlsCertFile != "" && a.tlsKeyFile == "") || (a.tlsCertFile == "" && a.tlsKeyFile != "") { err = multierr.Append(err, errors.New("both TLS certificate and key files must be set"), @@ -522,11 +530,11 @@ func (a *Api) Start() error { var err error if a.tlsCertFile != "" && a.tlsKeyFile != "" { // Start an HTTPS server (supports HTTP/2). - err = a.srv.StartTLS(fmt.Sprintf(":%d", a.port), a.tlsCertFile, a.tlsKeyFile) + err = a.srv.StartTLS(fmt.Sprintf("%s:%d", a.bindIp, a.port), a.tlsCertFile, a.tlsKeyFile) } else { // Start an HTTP/2 Cleartext (non-HTTPS) server. server := &http2.Server{} - err = a.srv.StartH2CServer(fmt.Sprintf(":%d", a.port), server) + err = a.srv.StartH2CServer(fmt.Sprintf("%s:%d", a.bindIp, a.port), server) } if !errors.Is(err, http.ErrServerClosed) { a.logger.Fatal(err.Error()) @@ -538,7 +546,11 @@ func (a *Api) Start() error { // StartupMessage returns a custom startup message. func (a *Api) StartupMessage() string { - return fmt.Sprintf("server listening on port %d", a.port) + ip := a.bindIp + if a.bindIp == "" { + ip = "[::]" + } + return fmt.Sprintf("server started on %s:%d", ip, a.port) } // Stop stops the HTTP server. diff --git a/pkg/modules/api/api_test.go b/pkg/modules/api/api_test.go index 8e8a46156..885076af4 100644 --- a/pkg/modules/api/api_test.go +++ b/pkg/modules/api/api_test.go @@ -58,10 +58,10 @@ func TestApi_Provision(t *testing.T) { expectError: true, }, { - scenario: "basic auth: non-existing GOTENBERG_API_BASIC_AUTH_USERNAME environment variable", + scenario: "port from env: invalid environment variable value", ctx: func() *gotenberg.Context { fs := new(Api).Descriptor().FlagSet - err := fs.Parse([]string{"--api-enable-basic-auth=true"}) + err := fs.Parse([]string{"--api-port-from-env=PORT"}) if err != nil { t.Fatalf("expected no error but got: %v", err) } @@ -73,10 +73,16 @@ func TestApi_Provision(t *testing.T) { nil, ) }(), + setEnv: func() { + err := os.Setenv("PORT", "foo") + if err != nil { + t.Fatalf("expected no error but got: %v", err) + } + }, expectError: true, }, { - scenario: "basic auth: non-existing GOTENBERG_API_BASIC_AUTH_PASSWORD environment variable", + scenario: "basic auth: non-existing GOTENBERG_API_BASIC_AUTH_USERNAME environment variable", ctx: func() *gotenberg.Context { fs := new(Api).Descriptor().FlagSet err := fs.Parse([]string{"--api-enable-basic-auth=true"}) @@ -91,19 +97,13 @@ func TestApi_Provision(t *testing.T) { nil, ) }(), - setEnv: func() { - err := os.Setenv("GOTENBERG_API_BASIC_AUTH_USERNAME", "foo") - if err != nil { - t.Fatalf("expected no error but got: %v", err) - } - }, expectError: true, }, { - scenario: "port from env: invalid environment variable value", + scenario: "basic auth: non-existing GOTENBERG_API_BASIC_AUTH_PASSWORD environment variable", ctx: func() *gotenberg.Context { fs := new(Api).Descriptor().FlagSet - err := fs.Parse([]string{"--api-port-from-env=PORT"}) + err := fs.Parse([]string{"--api-enable-basic-auth=true"}) if err != nil { t.Fatalf("expected no error but got: %v", err) } @@ -116,7 +116,7 @@ func TestApi_Provision(t *testing.T) { ) }(), setEnv: func() { - err := os.Setenv("PORT", "foo") + err := os.Setenv("GOTENBERG_API_BASIC_AUTH_USERNAME", "foo") if err != nil { t.Fatalf("expected no error but got: %v", err) } @@ -462,6 +462,7 @@ func TestApi_Validate(t *testing.T) { for _, tc := range []struct { scenario string port int + bindIp string tlsCertFile string tlsKeyFile string rootPath string @@ -473,6 +474,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid port (< 1)", port: 0, + bindIp: "127.0.0.1", rootPath: "/foo/", traceHeader: "foo", routes: nil, @@ -482,6 +484,17 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid port (> 65535)", port: 65536, + bindIp: "127.0.0.1", + rootPath: "/foo/", + traceHeader: "foo", + routes: nil, + middlewares: nil, + expectError: true, + }, + { + scenario: "invalid IP", + port: 10, + bindIp: "foo", rootPath: "/foo/", traceHeader: "foo", routes: nil, @@ -491,6 +504,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid TLS files: only cert file provided", port: 10, + bindIp: "127.0.0.1", tlsCertFile: "cert.pem", rootPath: "/foo/", traceHeader: "foo", @@ -501,6 +515,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid TLS files: only key file provided", port: 10, + bindIp: "127.0.0.1", tlsKeyFile: "key.pem", rootPath: "/foo/", traceHeader: "foo", @@ -511,6 +526,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid root path: missing / prefix", port: 10, + bindIp: "127.0.0.1", rootPath: "foo/", traceHeader: "foo", routes: nil, @@ -520,6 +536,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid root path: missing / suffix", port: 10, + bindIp: "127.0.0.1", rootPath: "/foo", traceHeader: "foo", routes: nil, @@ -529,6 +546,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid trace header", port: 10, + bindIp: "127.0.0.1", rootPath: "/foo/", traceHeader: "", routes: nil, @@ -538,6 +556,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid route: empty path", port: 10, + bindIp: "127.0.0.1", rootPath: "/foo/", traceHeader: "foo", routes: []Route{ @@ -551,6 +570,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid route: missing / prefix in path", port: 10, + bindIp: "127.0.0.1", rootPath: "/foo/", traceHeader: "foo", routes: []Route{ @@ -564,6 +584,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid multipart route: no /forms prefix in path", port: 10, + bindIp: "127.0.0.1", rootPath: "/foo/", traceHeader: "foo", routes: []Route{ @@ -578,6 +599,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid route: no method", port: 10, + bindIp: "127.0.0.1", rootPath: "/foo/", traceHeader: "foo", routes: []Route{ @@ -592,6 +614,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid route: nil handler", port: 10, + bindIp: "127.0.0.1", rootPath: "/foo/", traceHeader: "foo", routes: []Route{ @@ -607,6 +630,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid route: path already existing", port: 10, + bindIp: "127.0.0.1", rootPath: "/foo/", traceHeader: "foo", routes: []Route{ @@ -627,6 +651,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "invalid middleware: nil handler", port: 10, + bindIp: "127.0.0.1", rootPath: "/foo/", traceHeader: "foo", routes: nil, @@ -641,6 +666,7 @@ func TestApi_Validate(t *testing.T) { { scenario: "success", port: 10, + bindIp: "127.0.0.1", rootPath: "/foo/", traceHeader: "foo", routes: []Route{ @@ -694,6 +720,7 @@ func TestApi_Validate(t *testing.T) { t.Run(tc.scenario, func(t *testing.T) { mod := Api{ port: tc.port, + bindIp: tc.bindIp, tlsCertFile: tc.tlsCertFile, tlsKeyFile: tc.tlsKeyFile, rootPath: tc.rootPath, @@ -918,15 +945,36 @@ func TestApi_Start(t *testing.T) { } func TestApi_StartupMessage(t *testing.T) { - mod := Api{ - port: 3000, - } - - actual := mod.StartupMessage() - expect := "server listening on port 3000" + for _, tc := range []struct { + scenario string + port int + bindIp string + expectMessage string + }{ + { + scenario: "no custom IP", + port: 3000, + bindIp: "", + expectMessage: "server started on [::]:3000", + }, + { + scenario: "custom IP", + port: 3000, + bindIp: "127.0.0.1", + expectMessage: "server started on 127.0.0.1:3000", + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + mod := Api{ + port: tc.port, + bindIp: tc.bindIp, + } - if actual != expect { - t.Errorf("expected '%s' but got '%s'", expect, actual) + actual := mod.StartupMessage() + if actual != tc.expectMessage { + t.Errorf("expected '%s' but got '%s'", tc.expectMessage, actual) + } + }) } } From 9daecc127ac0b8fafa4f8420527b5ec2d8fd3432 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Tue, 8 Oct 2024 16:24:08 +0200 Subject: [PATCH 06/25] chore(deps): update Go dependencies --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index cc3688e99..837d542ca 100644 --- a/go.mod +++ b/go.mod @@ -59,5 +59,5 @@ require ( github.com/valyala/fasttemplate v1.2.2 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect golang.org/x/time v0.7.0 // indirect - google.golang.org/protobuf v1.34.2 // indirect + google.golang.org/protobuf v1.35.1 // indirect ) diff --git a/go.sum b/go.sum index 0e058597e..6412b9793 100644 --- a/go.sum +++ b/go.sum @@ -145,7 +145,7 @@ golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 8a9f0a245e67c73f81dd21521d285ede360b10c0 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Tue, 8 Oct 2024 16:30:00 +0200 Subject: [PATCH 07/25] chore(api): improve --api-body-limit flag description to include examples --- pkg/modules/api/api.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/modules/api/api.go b/pkg/modules/api/api.go index e352ea563..3d6cb4598 100644 --- a/pkg/modules/api/api.go +++ b/pkg/modules/api/api.go @@ -178,7 +178,7 @@ func (a *Api) Descriptor() gotenberg.ModuleDescriptor { fs.String("api-tls-key-file", "", "Path to the TLS/SSL key file - for HTTPS support") fs.Duration("api-start-timeout", time.Duration(30)*time.Second, "Set the time limit for the API to start") fs.Duration("api-timeout", time.Duration(30)*time.Second, "Set the time limit for requests") - fs.String("api-body-limit", "", "Set the body limit for multipart/form-data requests") + fs.String("api-body-limit", "", "Set the body limit for multipart/form-data requests - it accepts values like 5MB, 1GB, etc") fs.String("api-root-path", "/", "Set the root path of the API - for service discovery via URL paths") fs.String("api-trace-header", "Gotenberg-Trace", "Set the header name to use for identifying requests") fs.Bool("api-enable-basic-auth", false, "Enable basic authentication - will look for the GOTENBERG_API_BASIC_AUTH_USERNAME and GOTENBERG_API_BASIC_AUTH_PASSWORD environment variables") From 99c328c30226813df14a668de1bf35cac85bfc1b Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Thu, 10 Oct 2024 13:15:56 +0200 Subject: [PATCH 08/25] fix(ci): LINUX_AMD64_RELEASE is no more ignored thanks to curly braces --- scripts/release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release.sh b/scripts/release.sh index e6303df7d..f9c014ff9 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -12,7 +12,7 @@ PDFTK_VERSION="$6" PDFCPU_VERSION="$7" DOCKER_REGISTRY="$8" DOCKER_REPOSITORY="$9" -LINUX_AMD64_RELEASE="$10" +LINUX_AMD64_RELEASE="${10}" # Find out if given version is "semver". GOTENBERG_VERSION="${GOTENBERG_VERSION//v}" From 8ff9d3bcf113a938fcfb16eb3838e107360c1014 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Fri, 11 Oct 2024 09:32:01 +0200 Subject: [PATCH 09/25] feat(chromium): add scope to extraHttpHeaders --- pkg/modules/chromium/browser.go | 12 +- pkg/modules/chromium/browser_test.go | 77 +++++++- pkg/modules/chromium/chromium.go | 19 +- pkg/modules/chromium/events.go | 87 ++++++++- pkg/modules/chromium/routes.go | 54 +++++- pkg/modules/chromium/routes_test.go | 261 ++++++++++++++++++++++----- pkg/modules/chromium/tasks.go | 49 ++--- 7 files changed, 471 insertions(+), 88 deletions(-) diff --git a/pkg/modules/chromium/browser.go b/pkg/modules/chromium/browser.go index c7b105aee..4e895203a 100644 --- a/pkg/modules/chromium/browser.go +++ b/pkg/modules/chromium/browser.go @@ -228,7 +228,6 @@ func (b *chromiumBrowser) pdf(ctx context.Context, logger *zap.Logger, url, outp disableJavaScriptActionFunc(logger, b.arguments.disableJavaScript), setCookiesActionFunc(logger, options.Cookies), userAgentOverride(logger, options.UserAgent), - extraHttpHeadersActionFunc(logger, options.ExtraHttpHeaders), navigateActionFunc(logger, url, options.SkipNetworkIdleEvent), hideDefaultWhiteBackgroundActionFunc(logger, options.OmitBackground, options.PrintBackground), forceExactColorsActionFunc(), @@ -252,7 +251,6 @@ func (b *chromiumBrowser) screenshot(ctx context.Context, logger *zap.Logger, ur disableJavaScriptActionFunc(logger, b.arguments.disableJavaScript), setCookiesActionFunc(logger, options.Cookies), userAgentOverride(logger, options.UserAgent), - extraHttpHeadersActionFunc(logger, options.ExtraHttpHeaders), navigateActionFunc(logger, url, options.SkipNetworkIdleEvent), hideDefaultWhiteBackgroundActionFunc(logger, options.OmitBackground, true), forceExactColorsActionFunc(), @@ -291,8 +289,14 @@ func (b *chromiumBrowser) do(ctx context.Context, logger *zap.Logger, url string defer taskCancel() // We validate all others requests against our allow / deny lists. - // If a request does not pass the validation, we make it fail. - listenForEventRequestPaused(taskCtx, logger, b.arguments.allowList, b.arguments.denyList) + // If a request does not pass the validation, we make it fail. It also set + // the extra HTTP headers, if any. + // See https://github.com/gotenberg/gotenberg/issues/1011. + listenForEventRequestPaused(taskCtx, logger, eventRequestPausedOptions{ + allowList: b.arguments.allowList, + denyList: b.arguments.denyList, + extraHttpHeaders: options.ExtraHttpHeaders, + }) var ( invalidHttpStatusCode error diff --git a/pkg/modules/chromium/browser_test.go b/pkg/modules/chromium/browser_test.go index dfee69ab2..9cc4bcb02 100644 --- a/pkg/modules/chromium/browser_test.go +++ b/pkg/modules/chromium/browser_test.go @@ -702,8 +702,21 @@ func TestChromiumBrowser_pdf(t *testing.T) { return fs }(), options: PdfOptions{ - Options: Options{ExtraHttpHeaders: map[string]string{ - "X-Foo": "Bar", + Options: Options{ExtraHttpHeaders: []ExtraHttpHeader{ + { + Name: "X-Foo", + Value: "foo", + }, + { + Name: "X-Bar", + Value: "bar", + Scope: regexp2.MustCompile(`.*index\.html.*`, 0), + }, + { + Name: "X-Baz", + Value: "baz", + Scope: regexp2.MustCompile(`.*another\.html.*`, 0), + }, }}, }, noDeadline: false, @@ -711,6 +724,10 @@ func TestChromiumBrowser_pdf(t *testing.T) { expectError: false, expectedLogEntries: []string{ "extra HTTP headers:", + "extra HTTP header 'X-Foo' will be set for request URL", + "extra HTTP header 'X-Bar' (scoped) will be set for request URL", + "extra HTTP header 'X-Baz' (scoped) will not be set for request URL", + "setting extra HTTP headers for request URL", }, }, { @@ -1716,6 +1733,41 @@ func TestChromiumBrowser_screenshot(t *testing.T) { "set cookie", }, }, + { + scenario: "user agent override", + browser: newChromiumBrowser( + browserArguments{ + binPath: os.Getenv("CHROMIUM_BIN_PATH"), + wsUrlReadTimeout: 5 * time.Second, + allowList: regexp2.MustCompile("", 0), + denyList: regexp2.MustCompile("", 0), + }, + ), + fs: func() *gotenberg.FileSystem { + fs := gotenberg.NewFileSystem() + + err := os.MkdirAll(fs.WorkingDirPath(), 0o755) + if err != nil { + t.Fatalf(fmt.Sprintf("expected no error but got: %v", err)) + } + + err = os.WriteFile(fmt.Sprintf("%s/index.html", fs.WorkingDirPath()), []byte("

User-Agent override

"), 0o755) + if err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + return fs + }(), + options: ScreenshotOptions{ + Options: Options{UserAgent: "foo"}, + }, + noDeadline: false, + start: true, + expectError: false, + expectedLogEntries: []string{ + fmt.Sprintf("user agent override: foo"), + }, + }, { scenario: "extra HTTP headers", browser: newChromiumBrowser( @@ -1742,8 +1794,21 @@ func TestChromiumBrowser_screenshot(t *testing.T) { return fs }(), options: ScreenshotOptions{ - Options: Options{ExtraHttpHeaders: map[string]string{ - "X-Foo": "Bar", + Options: Options{ExtraHttpHeaders: []ExtraHttpHeader{ + { + Name: "X-Foo", + Value: "foo", + }, + { + Name: "X-Bar", + Value: "bar", + Scope: regexp2.MustCompile(`.*index\.html.*`, 0), + }, + { + Name: "X-Baz", + Value: "baz", + Scope: regexp2.MustCompile(`.*another\.html.*`, 0), + }, }}, }, noDeadline: false, @@ -1751,6 +1816,10 @@ func TestChromiumBrowser_screenshot(t *testing.T) { expectError: false, expectedLogEntries: []string{ "extra HTTP headers:", + "extra HTTP header 'X-Foo' will be set for request URL", + "extra HTTP header 'X-Bar' (scoped) will be set for request URL", + "extra HTTP header 'X-Baz' (scoped) will not be set for request URL", + "setting extra HTTP headers for request URL", }, }, { diff --git a/pkg/modules/chromium/chromium.go b/pkg/modules/chromium/chromium.go index 735b2700e..0a8a8a2e3 100644 --- a/pkg/modules/chromium/chromium.go +++ b/pkg/modules/chromium/chromium.go @@ -9,6 +9,7 @@ import ( "github.com/alexliesenfeld/health" "github.com/chromedp/cdproto/network" + "github.com/dlclark/regexp2" flag "github.com/spf13/pflag" "go.uber.org/zap" @@ -109,7 +110,7 @@ type Options struct { // ExtraHttpHeaders are extra HTTP headers to send by Chromium while // loading he HTML document. - ExtraHttpHeaders map[string]string + ExtraHttpHeaders []ExtraHttpHeader // EmulatedMediaType is the media type to emulate, either "screen" or // "print". @@ -289,6 +290,22 @@ type Cookie struct { SameSite network.CookieSameSite `json:"sameSite,omitempty"` } +// ExtraHttpHeader are extra HTTP headers to send by Chromium. +type ExtraHttpHeader struct { + // Name is the header name. + // Required. + Name string + + // Value is the header value. + // Required. + Value string + + // Scope is the header scope. If nil, the header will be applied to ALL + // requests from the page. + // Optional. + Scope *regexp2.Regexp +} + // Api helps to interact with Chromium for converting HTML documents to PDF. type Api interface { Pdf(ctx context.Context, logger *zap.Logger, url, outputPath string, options PdfOptions) error diff --git a/pkg/modules/chromium/events.go b/pkg/modules/chromium/events.go index 8a8ca1dd4..85d714560 100644 --- a/pkg/modules/chromium/events.go +++ b/pkg/modules/chromium/events.go @@ -20,10 +20,22 @@ import ( "github.com/gotenberg/gotenberg/v8/pkg/gotenberg" ) +type eventRequestPausedOptions struct { + allowList, denyList *regexp2.Regexp + extraHttpHeaders []ExtraHttpHeader +} + // listenForEventRequestPaused listens for requests to check if they are -// allowed or not.network.SetBlockedURLS() +// allowed or not. It also set the extra HTTP headers, if any. +// See https://github.com/gotenberg/gotenberg/issues/1011. // TODO: https://chromedevtools.github.io/devtools-protocol/tot/Network/#method-setBlockedURLs (experimental for now). -func listenForEventRequestPaused(ctx context.Context, logger *zap.Logger, allowList *regexp2.Regexp, denyList *regexp2.Regexp) { +func listenForEventRequestPaused(ctx context.Context, logger *zap.Logger, options eventRequestPausedOptions) { + if len(options.extraHttpHeaders) == 0 { + logger.Debug("no extra HTTP headers") + } else { + logger.Debug(fmt.Sprintf("extra HTTP headers: %+v", options.extraHttpHeaders)) + } + chromedp.ListenTarget(ctx, func(ev interface{}) { switch e := ev.(type) { case *fetch.EventRequestPaused: @@ -37,7 +49,7 @@ func listenForEventRequestPaused(ctx context.Context, logger *zap.Logger, allowL return } - err := gotenberg.FilterDeadline(allowList, denyList, e.Request.URL, deadline) + err := gotenberg.FilterDeadline(options.allowList, options.denyList, e.Request.URL, deadline) if err != nil { logger.Warn(err.Error()) allow = false @@ -46,19 +58,78 @@ func listenForEventRequestPaused(ctx context.Context, logger *zap.Logger, allowL cctx := chromedp.FromContext(ctx) executorCtx := cdp.WithExecutor(ctx, cctx.Target) - if allow { - req := fetch.ContinueRequest(e.RequestID) + if !allow { + req := fetch.FailRequest(e.RequestID, network.ErrorReasonAccessDenied) err = req.Do(executorCtx) if err != nil { - logger.Error(fmt.Sprintf("continue request: %s", err)) + logger.Error(fmt.Sprintf("fail request: %s", err)) } return } - req := fetch.FailRequest(e.RequestID, network.ErrorReasonAccessDenied) + req := fetch.ContinueRequest(e.RequestID) + + var extraHttpHeadersToSet []ExtraHttpHeader + if len(options.extraHttpHeaders) > 0 { + // The user want to set extra HTTP headers. + + // First, we have to check if at least one header has to be + // set for current request. + for _, header := range options.extraHttpHeaders { + if header.Scope == nil { + // Non-scoped header. + logger.Debug(fmt.Sprintf("extra HTTP header '%s' will be set for request URL '%s'", header.Name, e.Request.URL)) + extraHttpHeadersToSet = append(extraHttpHeadersToSet, header) + continue + } + + ok, err := header.Scope.MatchString(e.Request.URL) + if err != nil { + logger.Error(fmt.Sprintf("fail to match extra HTTP header '%s' scope with URL '%s': %s", header.Name, e.Request.URL, err)) + } else if ok { + logger.Debug(fmt.Sprintf("extra HTTP header '%s' (scoped) will be set for request URL '%s'", header.Name, e.Request.URL)) + extraHttpHeadersToSet = append(extraHttpHeadersToSet, header) + } else { + logger.Debug(fmt.Sprintf("scoped extra HTTP header '%s' (scoped) will not be set for request URL '%s'", header.Name, e.Request.URL)) + } + } + } + + if len(extraHttpHeadersToSet) > 0 { + logger.Debug(fmt.Sprintf("setting extra HTTP headers for request URL '%s': %+v", e.Request.URL, extraHttpHeadersToSet)) + + originalHeaders := e.Request.Headers + headers := make(map[string]string) + + for key, value := range originalHeaders { + strValue, ok := value.(string) + if ok { + headers[key] = strValue + } else { + logger.Error(fmt.Sprintf("ignoring header '%s' for URL '%s' since it cannot be cast to a string", key, e.Request.URL)) + } + } + + var headersEntries []*fetch.HeaderEntry + for key, value := range headers { + headersEntries = append(headersEntries, &fetch.HeaderEntry{ + Name: key, + Value: value, + }) + } + for _, header := range extraHttpHeadersToSet { + headersEntries = append(headersEntries, &fetch.HeaderEntry{ + Name: header.Name, + Value: header.Value, + }) + } + + req.Headers = headersEntries + } + err = req.Do(executorCtx) if err != nil { - logger.Error(fmt.Sprintf("fail request: %s", err)) + logger.Error(fmt.Sprintf("continue request: %s", err)) } }() } diff --git a/pkg/modules/chromium/routes.go b/pkg/modules/chromium/routes.go index ba1e7be24..a725c7cd4 100644 --- a/pkg/modules/chromium/routes.go +++ b/pkg/modules/chromium/routes.go @@ -13,6 +13,7 @@ import ( "strings" "time" + "github.com/dlclark/regexp2" "github.com/labstack/echo/v4" "github.com/microcosm-cc/bluemonday" "github.com/russross/blackfriday/v2" @@ -36,7 +37,7 @@ func FormDataChromiumOptions(ctx *api.Context) (*api.FormData, Options) { waitForExpression string cookies []Cookie userAgent string - extraHttpHeaders map[string]string + extraHttpHeaders []ExtraHttpHeader emulatedMediaType string omitBackground bool ) @@ -86,12 +87,59 @@ func FormDataChromiumOptions(ctx *api.Context) (*api.FormData, Options) { return nil } - err := json.Unmarshal([]byte(value), &extraHttpHeaders) + var headers map[string]string + err := json.Unmarshal([]byte(value), &headers) if err != nil { return fmt.Errorf("unmarshal extraHttpHeaders: %w", err) } - return nil + for k, v := range headers { + var scope string + var valueTokens []string + var invalidScopeToken bool + + tokens := strings.Split(v, ";") + for _, token := range tokens { + if strings.HasPrefix(strings.ToLower(strings.TrimSpace(token)), "scope") { + tokenNoSpaces := strings.Join(strings.Fields(token), "") + parts := strings.SplitN(tokenNoSpaces, "=", 2) + + if len(parts) == 2 && strings.ToLower(parts[0]) == "scope" && parts[1] != "" { + scope = parts[1] + } else { + err = multierr.Append(err, fmt.Errorf("invalid scope '%s' for header '%s'", scope, k)) + invalidScopeToken = true + break + } + } else { + if token != "" { + valueTokens = append(valueTokens, token) + } + } + } + + if invalidScopeToken { + continue + } + + var scopeRegexp *regexp2.Regexp + if len(scope) > 0 { + p, errCompile := regexp2.Compile(scope, 0) + if errCompile != nil { + err = multierr.Append(err, fmt.Errorf("invalid scope regex pattern for header '%s': %w", k, errCompile)) + continue + } + scopeRegexp = p + } + + extraHttpHeaders = append(extraHttpHeaders, ExtraHttpHeader{ + Name: k, + Value: strings.Join(valueTokens, "; "), + Scope: scopeRegexp, + }) + } + + return err }). Custom("emulatedMediaType", func(value string) error { if value == "" { diff --git a/pkg/modules/chromium/routes_test.go b/pkg/modules/chromium/routes_test.go index ba957e89c..29fb9d10c 100644 --- a/pkg/modules/chromium/routes_test.go +++ b/pkg/modules/chromium/routes_test.go @@ -7,8 +7,10 @@ import ( "net/http" "os" "reflect" + "sort" "testing" + "github.com/dlclark/regexp2" "github.com/google/uuid" "github.com/labstack/echo/v4" "go.uber.org/zap" @@ -19,14 +21,18 @@ import ( func TestFormDataChromiumOptions(t *testing.T) { for _, tc := range []struct { - scenario string - ctx *api.ContextMock - expectedOptions Options + scenario string + ctx *api.ContextMock + expectedOptions Options + compareWithoutDeepEqual bool + expectValidationError bool }{ { - scenario: "no custom form fields", - ctx: &api.ContextMock{Context: new(api.Context)}, - expectedOptions: DefaultOptions(), + scenario: "no custom form fields", + ctx: &api.ContextMock{Context: new(api.Context)}, + expectedOptions: DefaultOptions(), + compareWithoutDeepEqual: false, + expectValidationError: false, }, { scenario: "invalid failOnHttpStatusCodes form field", @@ -44,6 +50,8 @@ func TestFormDataChromiumOptions(t *testing.T) { options.FailOnHttpStatusCodes = nil return options }(), + compareWithoutDeepEqual: false, + expectValidationError: true, }, { scenario: "valid failOnHttpStatusCodes form field", @@ -61,6 +69,8 @@ func TestFormDataChromiumOptions(t *testing.T) { options.FailOnHttpStatusCodes = []int64{399, 499, 599} return options }(), + compareWithoutDeepEqual: false, + expectValidationError: false, }, { scenario: "invalid cookies form field", @@ -73,7 +83,9 @@ func TestFormDataChromiumOptions(t *testing.T) { }) return ctx }(), - expectedOptions: DefaultOptions(), + expectedOptions: DefaultOptions(), + compareWithoutDeepEqual: false, + expectValidationError: true, }, { scenario: "invalid cookies form field (missing required values)", @@ -93,6 +105,8 @@ func TestFormDataChromiumOptions(t *testing.T) { options.Cookies = []Cookie{{}} return options }(), + compareWithoutDeepEqual: false, + expectValidationError: true, }, { scenario: "valid cookies form field", @@ -114,9 +128,11 @@ func TestFormDataChromiumOptions(t *testing.T) { }} return options }(), + compareWithoutDeepEqual: false, + expectValidationError: false, }, { - scenario: "invalid extraHttpHeaders form field", + scenario: "invalid extraHttpHeaders form field: cannot unmarshall", ctx: func() *api.ContextMock { ctx := &api.ContextMock{Context: new(api.Context)} ctx.SetValues(map[string][]string{ @@ -126,7 +142,39 @@ func TestFormDataChromiumOptions(t *testing.T) { }) return ctx }(), - expectedOptions: DefaultOptions(), + expectedOptions: DefaultOptions(), + compareWithoutDeepEqual: false, + expectValidationError: true, + }, + { + scenario: "invalid extraHttpHeaders form field: invalid scope", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "extraHttpHeaders": { + `{"foo":"bar;scope;;"}`, + }, + }) + return ctx + }(), + expectedOptions: DefaultOptions(), + compareWithoutDeepEqual: false, + expectValidationError: true, + }, + { + scenario: "invalid extraHttpHeaders form field: invalid scope regex pattern", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "extraHttpHeaders": { + `{"foo":"bar;scope=*."}`, + }, + }) + return ctx + }(), + expectedOptions: DefaultOptions(), + compareWithoutDeepEqual: false, + expectValidationError: true, }, { scenario: "valid extraHttpHeaders form field", @@ -134,18 +182,28 @@ func TestFormDataChromiumOptions(t *testing.T) { ctx := &api.ContextMock{Context: new(api.Context)} ctx.SetValues(map[string][]string{ "extraHttpHeaders": { - `{"foo":"bar"}`, + `{"foo":"bar","baz":"qux;scope=https?:\\/\\/([a-zA-Z0-9-]+\\.)*qux\\.com\\/.*"}`, }, }) return ctx }(), expectedOptions: func() Options { options := DefaultOptions() - options.ExtraHttpHeaders = map[string]string{ - "foo": "bar", + options.ExtraHttpHeaders = []ExtraHttpHeader{ + { + Name: "foo", + Value: "bar", + }, + { + Name: "baz", + Value: "qux", + Scope: regexp2.MustCompile(`https?:\/\/([a-zA-Z0-9-]+\.)*qux\.com\/.*`, 0), + }, } return options }(), + compareWithoutDeepEqual: true, + expectValidationError: false, }, { scenario: "invalid emulatedMediaType form field", @@ -158,7 +216,8 @@ func TestFormDataChromiumOptions(t *testing.T) { }) return ctx }(), - expectedOptions: DefaultOptions(), + expectedOptions: DefaultOptions(), + expectValidationError: true, }, { scenario: "valid emulatedMediaType form field", @@ -176,14 +235,61 @@ func TestFormDataChromiumOptions(t *testing.T) { options.EmulatedMediaType = "screen" return options }(), + expectValidationError: false, }, } { t.Run(tc.scenario, func(t *testing.T) { tc.ctx.SetLogger(zap.NewNop()) - _, actual := FormDataChromiumOptions(tc.ctx.Context) + form, actual := FormDataChromiumOptions(tc.ctx.Context) - if !reflect.DeepEqual(actual, tc.expectedOptions) { - t.Fatalf("expected %+v but got: %+v", tc.expectedOptions, actual) + if tc.compareWithoutDeepEqual { + if len(tc.expectedOptions.ExtraHttpHeaders) != len(actual.ExtraHttpHeaders) { + t.Fatalf("expected %d extra HTTP headers, but got %d", len(tc.expectedOptions.ExtraHttpHeaders), len(actual.ExtraHttpHeaders)) + } + + sort.Slice(tc.expectedOptions.ExtraHttpHeaders, func(i, j int) bool { + return tc.expectedOptions.ExtraHttpHeaders[i].Name < tc.expectedOptions.ExtraHttpHeaders[j].Name + }) + sort.Slice(actual.ExtraHttpHeaders, func(i, j int) bool { + return actual.ExtraHttpHeaders[i].Name < actual.ExtraHttpHeaders[j].Name + }) + + for i := range tc.expectedOptions.ExtraHttpHeaders { + if tc.expectedOptions.ExtraHttpHeaders[i].Name != actual.ExtraHttpHeaders[i].Name { + t.Fatalf("expected '%s' extra HTTP header, but got '%s'", tc.expectedOptions.ExtraHttpHeaders[i].Name, tc.expectedOptions.ExtraHttpHeaders[i].Name) + } + + if tc.expectedOptions.ExtraHttpHeaders[i].Value != actual.ExtraHttpHeaders[i].Value { + t.Fatalf("expected '%s' as value for extra HTTP header '%s', but got '%s'", tc.expectedOptions.ExtraHttpHeaders[i].Value, tc.expectedOptions.ExtraHttpHeaders[i].Name, actual.ExtraHttpHeaders[i].Value) + } + + var expectedScope string + if tc.expectedOptions.ExtraHttpHeaders[i].Scope != nil { + expectedScope = tc.expectedOptions.ExtraHttpHeaders[i].Scope.String() + } + var actualScope string + if actual.ExtraHttpHeaders[i].Scope != nil { + actualScope = actual.ExtraHttpHeaders[i].Scope.String() + } + + if expectedScope != actualScope { + t.Fatalf("expected '%s' as scope for extra HTTP header '%s', but got '%s'", expectedScope, tc.expectedOptions.ExtraHttpHeaders[i].Name, actualScope) + } + } + } else { + if !reflect.DeepEqual(actual, tc.expectedOptions) { + t.Fatalf("expected %+v but got: %+v", tc.expectedOptions, actual) + } + } + + err := form.Validate() + + if tc.expectValidationError && err == nil { + t.Fatal("expected validation error but got none", err) + } + + if !tc.expectValidationError && err != nil { + t.Fatalf("expected no validation error but got: %v", err) } }) } @@ -191,14 +297,16 @@ func TestFormDataChromiumOptions(t *testing.T) { func TestFormDataChromiumPdfOptions(t *testing.T) { for _, tc := range []struct { - scenario string - ctx *api.ContextMock - expectedOptions PdfOptions + scenario string + ctx *api.ContextMock + expectedOptions PdfOptions + expectValidationError bool }{ { - scenario: "no custom form fields", - ctx: &api.ContextMock{Context: new(api.Context)}, - expectedOptions: DefaultPdfOptions(), + scenario: "no custom form fields", + ctx: &api.ContextMock{Context: new(api.Context)}, + expectedOptions: DefaultPdfOptions(), + expectValidationError: false, }, { scenario: "custom form fields (Options & PdfOptions)", @@ -220,29 +328,42 @@ func TestFormDataChromiumPdfOptions(t *testing.T) { options.EmulatedMediaType = "screen" return options }(), + expectValidationError: false, }, } { t.Run(tc.scenario, func(t *testing.T) { tc.ctx.SetLogger(zap.NewNop()) - _, actual := FormDataChromiumPdfOptions(tc.ctx.Context) + form, actual := FormDataChromiumPdfOptions(tc.ctx.Context) if !reflect.DeepEqual(actual, tc.expectedOptions) { t.Fatalf("expected %+v but got: %+v", tc.expectedOptions, actual) } + + err := form.Validate() + + if tc.expectValidationError && err == nil { + t.Fatal("expected validation error but got none", err) + } + + if !tc.expectValidationError && err != nil { + t.Fatalf("expected no validation error but got: %v", err) + } }) } } func TestFormDataChromiumScreenshotOptions(t *testing.T) { for _, tc := range []struct { - scenario string - ctx *api.ContextMock - expectedOptions ScreenshotOptions + scenario string + ctx *api.ContextMock + expectedOptions ScreenshotOptions + expectValidationError bool }{ { - scenario: "no custom form fields", - ctx: &api.ContextMock{Context: new(api.Context)}, - expectedOptions: DefaultScreenshotOptions(), + scenario: "no custom form fields", + ctx: &api.ContextMock{Context: new(api.Context)}, + expectedOptions: DefaultScreenshotOptions(), + expectValidationError: false, }, { scenario: "invalid format form field", @@ -260,6 +381,7 @@ func TestFormDataChromiumScreenshotOptions(t *testing.T) { options.Format = "" return options }(), + expectValidationError: true, }, { scenario: "valid png format form field", @@ -277,6 +399,7 @@ func TestFormDataChromiumScreenshotOptions(t *testing.T) { options.Format = "png" return options }(), + expectValidationError: false, }, { scenario: "valid jpeg format form field", @@ -294,6 +417,7 @@ func TestFormDataChromiumScreenshotOptions(t *testing.T) { options.Format = "jpeg" return options }(), + expectValidationError: false, }, { scenario: "valid webp format form field", @@ -311,6 +435,7 @@ func TestFormDataChromiumScreenshotOptions(t *testing.T) { options.Format = "webp" return options }(), + expectValidationError: false, }, { scenario: "invalid quality form field (not an integer)", @@ -328,6 +453,7 @@ func TestFormDataChromiumScreenshotOptions(t *testing.T) { options.Quality = 0 return options }(), + expectValidationError: true, }, { scenario: "invalid quality form field (< 0)", @@ -345,6 +471,7 @@ func TestFormDataChromiumScreenshotOptions(t *testing.T) { options.Quality = 0 return options }(), + expectValidationError: true, }, { scenario: "invalid quality form field (> 100)", @@ -362,6 +489,7 @@ func TestFormDataChromiumScreenshotOptions(t *testing.T) { options.Quality = 0 return options }(), + expectValidationError: true, }, { scenario: "valid quality form field", @@ -379,6 +507,7 @@ func TestFormDataChromiumScreenshotOptions(t *testing.T) { options.Quality = 50 return options }(), + expectValidationError: false, }, { scenario: "custom form fields (Options & ScreenshotOptions)", @@ -412,29 +541,42 @@ func TestFormDataChromiumScreenshotOptions(t *testing.T) { options.EmulatedMediaType = "screen" return options }(), + expectValidationError: false, }, } { t.Run(tc.scenario, func(t *testing.T) { tc.ctx.SetLogger(zap.NewNop()) - _, actual := FormDataChromiumScreenshotOptions(tc.ctx.Context) + form, actual := FormDataChromiumScreenshotOptions(tc.ctx.Context) if !reflect.DeepEqual(actual, tc.expectedOptions) { t.Fatalf("expected %+v but got: %+v", tc.expectedOptions, actual) } + + err := form.Validate() + + if tc.expectValidationError && err == nil { + t.Fatal("expected validation error but got none", err) + } + + if !tc.expectValidationError && err != nil { + t.Fatalf("expected no validation error but got: %v", err) + } }) } } func TestFormDataChromiumPdfFormats(t *testing.T) { for _, tc := range []struct { - scenario string - ctx *api.ContextMock - expectedPdfFormats gotenberg.PdfFormats + scenario string + ctx *api.ContextMock + expectedPdfFormats gotenberg.PdfFormats + expectValidationError bool }{ { - scenario: "no custom form fields", - ctx: &api.ContextMock{Context: new(api.Context)}, - expectedPdfFormats: gotenberg.PdfFormats{}, + scenario: "no custom form fields", + ctx: &api.ContextMock{Context: new(api.Context)}, + expectedPdfFormats: gotenberg.PdfFormats{}, + expectValidationError: false, }, { scenario: "pdfa and pdfua form fields", @@ -450,30 +592,44 @@ func TestFormDataChromiumPdfFormats(t *testing.T) { }) return ctx }(), - expectedPdfFormats: gotenberg.PdfFormats{PdfA: "foo", PdfUa: true}, + expectedPdfFormats: gotenberg.PdfFormats{PdfA: "foo", PdfUa: true}, + expectValidationError: false, }, } { t.Run(tc.scenario, func(t *testing.T) { tc.ctx.SetLogger(zap.NewNop()) - actual := FormDataChromiumPdfFormats(tc.ctx.Context.FormData()) + form := tc.ctx.Context.FormData() + actual := FormDataChromiumPdfFormats(form) if !reflect.DeepEqual(actual, tc.expectedPdfFormats) { t.Fatalf("expected %+v but got: %+v", tc.expectedPdfFormats, actual) } + + err := form.Validate() + + if tc.expectValidationError && err == nil { + t.Fatal("expected validation error but got none", err) + } + + if !tc.expectValidationError && err != nil { + t.Fatalf("expected no validation error but got: %v", err) + } }) } } func TestFormDataPdfMetadata(t *testing.T) { for _, tc := range []struct { - scenario string - ctx *api.ContextMock - expectedMetadata map[string]interface{} + scenario string + ctx *api.ContextMock + expectedMetadata map[string]interface{} + expectValidationError bool }{ { - scenario: "no metadata form field", - ctx: &api.ContextMock{Context: new(api.Context)}, - expectedMetadata: nil, + scenario: "no metadata form field", + ctx: &api.ContextMock{Context: new(api.Context)}, + expectedMetadata: nil, + expectValidationError: false, }, { scenario: "invalid metadata form field", @@ -486,7 +642,8 @@ func TestFormDataPdfMetadata(t *testing.T) { }) return ctx }(), - expectedMetadata: nil, + expectedMetadata: nil, + expectValidationError: true, }, { scenario: "valid metadata form field", @@ -502,15 +659,27 @@ func TestFormDataPdfMetadata(t *testing.T) { expectedMetadata: map[string]interface{}{ "foo": "bar", }, + expectValidationError: false, }, } { t.Run(tc.scenario, func(t *testing.T) { tc.ctx.SetLogger(zap.NewNop()) - actual := FormDataPdfMetadata(tc.ctx.Context.FormData()) + form := tc.ctx.Context.FormData() + actual := FormDataPdfMetadata(form) if !reflect.DeepEqual(actual, tc.expectedMetadata) { t.Fatalf("expected %+v but got: %+v", tc.expectedMetadata, actual) } + + err := form.Validate() + + if tc.expectValidationError && err == nil { + t.Fatal("expected validation error but got none", err) + } + + if !tc.expectValidationError && err != nil { + t.Fatalf("expected no validation error but got: %v", err) + } }) } } diff --git a/pkg/modules/chromium/tasks.go b/pkg/modules/chromium/tasks.go index ac5d6e198..4e841d75c 100644 --- a/pkg/modules/chromium/tasks.go +++ b/pkg/modules/chromium/tasks.go @@ -298,28 +298,33 @@ func userAgentOverride(logger *zap.Logger, userAgent string) chromedp.ActionFunc } } -func extraHttpHeadersActionFunc(logger *zap.Logger, extraHttpHeaders map[string]string) chromedp.ActionFunc { - return func(ctx context.Context) error { - if len(extraHttpHeaders) == 0 { - logger.Debug("no extra HTTP headers") - return nil - } - - logger.Debug(fmt.Sprintf("extra HTTP headers: %+v", extraHttpHeaders)) - - headers := make(network.Headers, len(extraHttpHeaders)) - for key, value := range extraHttpHeaders { - headers[key] = value - } - - err := network.SetExtraHTTPHeaders(headers).Do(ctx) - if err == nil { - return nil - } - - return fmt.Errorf("set extra HTTP headers: %w", err) - } -} +// This code has been replaced with the listenForEventRequestPaused function. +// Indeed, the user may want to scope the headers per domain, but using +// network.SetExtraHTTPHeaders set the headers for ALL requests from the page. +// See https://github.com/gotenberg/gotenberg/issues/1011. +// +//func extraHttpHeadersActionFunc(logger *zap.Logger, extraHttpHeaders map[string]string) chromedp.ActionFunc { +// return func(ctx context.Context) error { +// if len(extraHttpHeaders) == 0 { +// logger.Debug("no extra HTTP headers") +// return nil +// } +// +// logger.Debug(fmt.Sprintf("extra HTTP headers: %+v", extraHttpHeaders)) +// +// headers := make(network.Headers, len(extraHttpHeaders)) +// for key, value := range extraHttpHeaders { +// headers[key] = value +// } +// +// err := network.SetExtraHTTPHeaders(headers).Do(ctx) +// if err == nil { +// return nil +// } +// +// return fmt.Errorf("set extra HTTP headers: %w", err) +// } +//} func navigateActionFunc(logger *zap.Logger, url string, skipNetworkIdleEvent bool) chromedp.ActionFunc { return func(ctx context.Context) error { From 249745f2569eb64c17cb2b838d24b642ed83a38d Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Sun, 13 Oct 2024 11:29:38 +0200 Subject: [PATCH 10/25] feat(pdfengines): one flag per PDF engine method for a more granular selection of PDF engines --- Makefile | 8 + pkg/modules/pdfengines/multi.go | 29 ++- pkg/modules/pdfengines/multi_test.go | 240 ++++++++++++++-------- pkg/modules/pdfengines/pdfengines.go | 131 ++++++++---- pkg/modules/pdfengines/pdfengines_test.go | 102 +++++++-- 5 files changed, 361 insertions(+), 149 deletions(-) diff --git a/Makefile b/Makefile index 67529de68..230519df5 100644 --- a/Makefile +++ b/Makefile @@ -72,6 +72,10 @@ LOG_LEVEL=info LOG_FORMAT=auto LOG_FIELDS_PREFIX= PDFENGINES_ENGINES= +PDFENGINES_MERGE_ENGINES=qpdf,pdfcpu,pdftk +PDFENGINES_CONVERT_ENGINES=libreoffice-pdfengine +PDFENGINES_READ_METADATA_ENGINES=exiftool +PDFENGINES_WRITE_METADATA_ENGINES=exiftool PDFENGINES_DISABLE_ROUTES=false PROMETHEUS_NAMESPACE=gotenberg PROMETHEUS_COLLECT_INTERVAL=1s @@ -136,6 +140,10 @@ run: ## Start a Gotenberg container --log-format=$(LOG_FORMAT) \ --log-fields-prefix=$(LOG_FIELDS_PREFIX) \ --pdfengines-engines=$(PDFENGINES_ENGINES) \ + --pdfengines-merge-engines=$(PDFENGINES_MERGE_ENGINES) \ + --pdfengines-convert-engines=$(PDFENGINES_CONVERT_ENGINES) \ + --pdfengines-read-metadata-engines=$(PDFENGINES_READ_METADATA_ENGINES) \ + --pdfengines-write-metadata-engines=$(PDFENGINES_WRITE_METADATA_ENGINES) \ --pdfengines-disable-routes=$(PDFENGINES_DISABLE_ROUTES) \ --prometheus-namespace=$(PROMETHEUS_NAMESPACE) \ --prometheus-collect-interval=$(PROMETHEUS_COLLECT_INTERVAL) \ diff --git a/pkg/modules/pdfengines/multi.go b/pkg/modules/pdfengines/multi.go index d948b4130..4cbbc3eac 100644 --- a/pkg/modules/pdfengines/multi.go +++ b/pkg/modules/pdfengines/multi.go @@ -12,12 +12,23 @@ import ( ) type multiPdfEngines struct { - engines []gotenberg.PdfEngine + mergeEngines []gotenberg.PdfEngine + convertEngines []gotenberg.PdfEngine + readMedataEngines []gotenberg.PdfEngine + writeMedataEngines []gotenberg.PdfEngine } -func newMultiPdfEngines(engines ...gotenberg.PdfEngine) *multiPdfEngines { +func newMultiPdfEngines( + mergeEngines, + convertEngines, + readMetadataEngines, + writeMedataEngines []gotenberg.PdfEngine, +) *multiPdfEngines { return &multiPdfEngines{ - engines: engines, + mergeEngines: mergeEngines, + convertEngines: convertEngines, + readMedataEngines: readMetadataEngines, + writeMedataEngines: writeMedataEngines, } } @@ -27,7 +38,7 @@ func (multi *multiPdfEngines) Merge(ctx context.Context, logger *zap.Logger, inp var err error errChan := make(chan error, 1) - for _, engine := range multi.engines { + for _, engine := range multi.mergeEngines { go func(engine gotenberg.PdfEngine) { errChan <- engine.Merge(ctx, logger, inputPaths, outputPath) }(engine) @@ -52,7 +63,7 @@ func (multi *multiPdfEngines) Convert(ctx context.Context, logger *zap.Logger, f var err error errChan := make(chan error, 1) - for _, engine := range multi.engines { + for _, engine := range multi.convertEngines { go func(engine gotenberg.PdfEngine) { errChan <- engine.Convert(ctx, logger, formats, inputPath, outputPath) }(engine) @@ -80,16 +91,16 @@ func (multi *multiPdfEngines) ReadMetadata(ctx context.Context, logger *zap.Logg var err error var mu sync.Mutex // to safely append errors. - resultChan := make(chan readMetadataResult, len(multi.engines)) + resultChan := make(chan readMetadataResult, len(multi.readMedataEngines)) - for _, engine := range multi.engines { + for _, engine := range multi.readMedataEngines { go func(engine gotenberg.PdfEngine) { metadata, err := engine.ReadMetadata(ctx, logger, inputPath) resultChan <- readMetadataResult{metadata: metadata, err: err} }(engine) } - for range multi.engines { + for range multi.readMedataEngines { select { case result := <-resultChan: if result.err != nil { @@ -111,7 +122,7 @@ func (multi *multiPdfEngines) WriteMetadata(ctx context.Context, logger *zap.Log var err error errChan := make(chan error, 1) - for _, engine := range multi.engines { + for _, engine := range multi.writeMedataEngines { go func(engine gotenberg.PdfEngine) { errChan <- engine.WriteMetadata(ctx, logger, metadata, inputPath) }(engine) diff --git a/pkg/modules/pdfengines/multi_test.go b/pkg/modules/pdfengines/multi_test.go index 3a204b70f..00e706d78 100644 --- a/pkg/modules/pdfengines/multi_test.go +++ b/pkg/modules/pdfengines/multi_test.go @@ -20,11 +20,16 @@ func TestMultiPdfEngines_Merge(t *testing.T) { { scenario: "nominal behavior", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { - return nil + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { + return nil + }, }, }, + nil, + nil, + nil, ), ctx: context.Background(), expectError: false, @@ -32,16 +37,21 @@ func TestMultiPdfEngines_Merge(t *testing.T) { { scenario: "at least one engine does not return an error", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { - return errors.New("foo") + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { + return errors.New("foo") + }, }, - }, - &gotenberg.PdfEngineMock{ - MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { - return nil + &gotenberg.PdfEngineMock{ + MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { + return nil + }, }, }, + nil, + nil, + nil, ), ctx: context.Background(), expectError: false, @@ -49,16 +59,21 @@ func TestMultiPdfEngines_Merge(t *testing.T) { { scenario: "all engines return an error", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { - return errors.New("foo") + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { + return errors.New("foo") + }, }, - }, - &gotenberg.PdfEngineMock{ - MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { - return errors.New("foo") + &gotenberg.PdfEngineMock{ + MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { + return errors.New("foo") + }, }, }, + nil, + nil, + nil, ), ctx: context.Background(), expectError: true, @@ -66,11 +81,16 @@ func TestMultiPdfEngines_Merge(t *testing.T) { { scenario: "context expired", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { - return nil + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { + return nil + }, }, }, + nil, + nil, + nil, ), ctx: func() context.Context { ctx, cancel := context.WithCancel(context.Background()) @@ -105,43 +125,58 @@ func TestMultiPdfEngines_Convert(t *testing.T) { { scenario: "nominal behavior", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { - return nil + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return nil + }, }, }, + nil, + nil, ), ctx: context.Background(), }, { scenario: "at least one engine does not return an error", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { - return errors.New("foo") + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return errors.New("foo") + }, }, - }, - &gotenberg.PdfEngineMock{ - ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { - return nil + &gotenberg.PdfEngineMock{ + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return nil + }, }, }, + nil, + nil, ), ctx: context.Background(), }, { scenario: "all engines return an error", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { - return errors.New("foo") + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return errors.New("foo") + }, }, - }, - &gotenberg.PdfEngineMock{ - ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { - return errors.New("foo") + &gotenberg.PdfEngineMock{ + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return errors.New("foo") + }, }, }, + nil, + nil, ), ctx: context.Background(), expectError: true, @@ -149,11 +184,16 @@ func TestMultiPdfEngines_Convert(t *testing.T) { { scenario: "context expired", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { - return nil + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return nil + }, }, }, + nil, + nil, ), ctx: func() context.Context { ctx, cancel := context.WithCancel(context.Background()) @@ -188,43 +228,58 @@ func TestMultiPdfEngines_ReadMetadata(t *testing.T) { { scenario: "nominal behavior", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { - return make(map[string]interface{}), nil + nil, + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { + return make(map[string]interface{}), nil + }, }, }, + nil, ), ctx: context.Background(), }, { scenario: "at least one engine does not return an error", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { - return nil, errors.New("foo") + nil, + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { + return nil, errors.New("foo") + }, }, - }, - &gotenberg.PdfEngineMock{ - ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { - return make(map[string]interface{}), nil + &gotenberg.PdfEngineMock{ + ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { + return make(map[string]interface{}), nil + }, }, }, + nil, ), ctx: context.Background(), }, { scenario: "all engines return an error", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { - return nil, errors.New("foo") + nil, + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { + return nil, errors.New("foo") + }, }, - }, - &gotenberg.PdfEngineMock{ - ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { - return nil, errors.New("foo") + &gotenberg.PdfEngineMock{ + ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { + return nil, errors.New("foo") + }, }, }, + nil, ), ctx: context.Background(), expectError: true, @@ -232,11 +287,16 @@ func TestMultiPdfEngines_ReadMetadata(t *testing.T) { { scenario: "context expired", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { - return make(map[string]interface{}), nil + nil, + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { + return make(map[string]interface{}), nil + }, }, }, + nil, ), ctx: func() context.Context { ctx, cancel := context.WithCancel(context.Background()) @@ -271,9 +331,14 @@ func TestMultiPdfEngines_WriteMetadata(t *testing.T) { { scenario: "nominal behavior", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { - return nil + nil, + nil, + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return nil + }, }, }, ), @@ -282,14 +347,19 @@ func TestMultiPdfEngines_WriteMetadata(t *testing.T) { { scenario: "at least one engine does not return an error", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { - return errors.New("foo") + nil, + nil, + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return errors.New("foo") + }, }, - }, - &gotenberg.PdfEngineMock{ - WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { - return nil + &gotenberg.PdfEngineMock{ + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return nil + }, }, }, ), @@ -298,14 +368,19 @@ func TestMultiPdfEngines_WriteMetadata(t *testing.T) { { scenario: "all engines return an error", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { - return errors.New("foo") + nil, + nil, + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return errors.New("foo") + }, }, - }, - &gotenberg.PdfEngineMock{ - WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { - return errors.New("foo") + &gotenberg.PdfEngineMock{ + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return errors.New("foo") + }, }, }, ), @@ -315,9 +390,14 @@ func TestMultiPdfEngines_WriteMetadata(t *testing.T) { { scenario: "context expired", engine: newMultiPdfEngines( - &gotenberg.PdfEngineMock{ - WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { - return nil + nil, + nil, + nil, + []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return nil + }, }, }, ), diff --git a/pkg/modules/pdfengines/pdfengines.go b/pkg/modules/pdfengines/pdfengines.go index c71e95571..7303f34ef 100644 --- a/pkg/modules/pdfengines/pdfengines.go +++ b/pkg/modules/pdfengines/pdfengines.go @@ -27,9 +27,12 @@ func init() { // the [api.Router] interface to expose relevant PDF processing routes if // enabled. type PdfEngines struct { - names []string - engines []gotenberg.PdfEngine - disableRoutes bool + mergeNames []string + convertNames []string + readMetadataNames []string + writeMedataNames []string + engines []gotenberg.PdfEngine + disableRoutes bool } // Descriptor returns a PdfEngines' module descriptor. @@ -38,9 +41,18 @@ func (mod *PdfEngines) Descriptor() gotenberg.ModuleDescriptor { ID: "pdfengines", FlagSet: func() *flag.FlagSet { fs := flag.NewFlagSet("pdfengines", flag.ExitOnError) - fs.StringSlice("pdfengines-engines", make([]string, 0), "Set the PDF engines and their order - all by default") + fs.StringSlice("pdfengines-merge-engines", []string{"qpdf", "pdfcpu", "pdftk"}, "Set the PDF engines and their order for the merge feature") + fs.StringSlice("pdfengines-convert-engines", []string{"libreoffice-pdfengine"}, "Set the PDF engines and their order for the convert feature") + fs.StringSlice("pdfengines-read-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the read metadata feature") + fs.StringSlice("pdfengines-write-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the write metadata feature") fs.Bool("pdfengines-disable-routes", false, "Disable the routes") + fs.StringSlice("pdfengines-engines", make([]string, 0), "Set the default PDF engines and their default order - all by default") + err := fs.MarkDeprecated("pdfengines-engines", "use other flags for a more granular selection of PDF engines per method") + if err != nil { + panic(err) + } + return fs }(), New: func() gotenberg.Module { return new(PdfEngines) }, @@ -51,7 +63,10 @@ func (mod *PdfEngines) Descriptor() gotenberg.ModuleDescriptor { // selected by the user thanks to the "engines" flag. func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error { flags := ctx.ParsedFlags() - names := flags.MustStringSlice("pdfengines-engines") + mergeNames := flags.MustStringSlice("pdfengines-merge-engines") + convertNames := flags.MustStringSlice("pdfengines-convert-engines") + readMetadataNames := flags.MustStringSlice("pdfengines-read-metadata-engines") + writeMetadataNames := flags.MustStringSlice("pdfengines-write-metadata-engines") mod.disableRoutes = flags.MustBool("pdfengines-disable-routes") engines, err := ctx.Modules(new(gotenberg.PdfEngine)) @@ -65,26 +80,37 @@ func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error { mod.engines[i] = engine.(gotenberg.PdfEngine) } - if len(names) > 0 { - // Selection from user. - mod.names = names + defaultNames := make([]string, len(mod.engines)) + for i, engine := range mod.engines { + defaultNames[i] = engine.(gotenberg.Module).Descriptor().ID + } - // Example in case of deprecated module name. - //for i, name := range names { - // if name == "unoconv-pdfengine" || name == "uno-pdfengine" { - // logger.Warn(fmt.Sprintf("%s is deprecated; prefer libreoffice-pdfengine instead", name)) - // mod.names[i] = "libreoffice-pdfengine" - // } - //} + // Example in case of deprecated module name. + //for i, name := range defaultNames { + // if name == "unoconv-pdfengine" || name == "uno-pdfengine" { + // logger.Warn(fmt.Sprintf("%s is deprecated; prefer libreoffice-pdfengine instead", name)) + // mod.defaultNames[i] = "libreoffice-pdfengine" + // } + //} + + mod.mergeNames = defaultNames + if len(mergeNames) > 0 { + mod.mergeNames = mergeNames + } - return nil + mod.convertNames = defaultNames + if len(convertNames) > 0 { + mod.convertNames = convertNames } - // No selection from user, use all PDF engines available. - mod.names = make([]string, len(mod.engines)) + mod.readMetadataNames = defaultNames + if len(readMetadataNames) > 0 { + mod.readMetadataNames = readMetadataNames + } - for i, engine := range mod.engines { - mod.names[i] = engine.(gotenberg.Module).Descriptor().ID + mod.writeMedataNames = defaultNames + if len(writeMetadataNames) > 0 { + mod.writeMedataNames = writeMetadataNames } return nil @@ -105,22 +131,40 @@ func (mod *PdfEngines) Validate() error { } nonExistingEngines := make([]string, 0) + findNonExistingEngines := func(names []string) { + for _, name := range names { + engineExists := false + + for _, engine := range mod.engines { + if name == engine.(gotenberg.Module).Descriptor().ID { + engineExists = true + break + } + } - for _, name := range mod.names { - engineExists := false + if engineExists { + continue + } - for _, engine := range mod.engines { - if name == engine.(gotenberg.Module).Descriptor().ID { - engineExists = true - break + alreadyInSlice := false + for _, engine := range nonExistingEngines { + if engine == name { + alreadyInSlice = true + break + } } - } - if !engineExists { - nonExistingEngines = append(nonExistingEngines, name) + if !alreadyInSlice { + nonExistingEngines = append(nonExistingEngines, name) + } } } + findNonExistingEngines(mod.mergeNames) + findNonExistingEngines(mod.convertNames) + findNonExistingEngines(mod.readMetadataNames) + findNonExistingEngines(mod.writeMedataNames) + if len(nonExistingEngines) == 0 { return nil } @@ -132,24 +176,35 @@ func (mod *PdfEngines) Validate() error { // modules. func (mod *PdfEngines) SystemMessages() []string { return []string{ - strings.Join(mod.names[:], " "), + fmt.Sprintf("merge engines - %s", strings.Join(mod.mergeNames[:], " ")), + fmt.Sprintf("convert engines - %s", strings.Join(mod.convertNames[:], " ")), + fmt.Sprintf("read metadata engines - %s", strings.Join(mod.readMetadataNames[:], " ")), + fmt.Sprintf("write medata engines - %s", strings.Join(mod.writeMedataNames[:], " ")), } } // PdfEngine returns a [gotenberg.PdfEngine]. func (mod *PdfEngines) PdfEngine() (gotenberg.PdfEngine, error) { - engines := make([]gotenberg.PdfEngine, len(mod.names)) - - for i, name := range mod.names { - for _, engine := range mod.engines { - if name == engine.(gotenberg.Module).Descriptor().ID { - engines[i] = engine - break + engines := func(names []string) []gotenberg.PdfEngine { + list := make([]gotenberg.PdfEngine, len(names)) + for i, name := range names { + for _, engine := range mod.engines { + if name == engine.(gotenberg.Module).Descriptor().ID { + list[i] = engine + break + } } } + + return list } - return newMultiPdfEngines(engines...), nil + return newMultiPdfEngines( + engines(mod.mergeNames), + engines(mod.convertNames), + engines(mod.readMetadataNames), + engines(mod.writeMedataNames), + ), nil } // Routes returns the HTTP routes. diff --git a/pkg/modules/pdfengines/pdfengines_test.go b/pkg/modules/pdfengines/pdfengines_test.go index bf12c8a56..fe999432d 100644 --- a/pkg/modules/pdfengines/pdfengines_test.go +++ b/pkg/modules/pdfengines/pdfengines_test.go @@ -2,6 +2,7 @@ package pdfengines import ( "errors" + "fmt" "reflect" "strings" "testing" @@ -22,10 +23,13 @@ func TestPdfEngines_Descriptor(t *testing.T) { func TestPdfEngines_Provision(t *testing.T) { for _, tc := range []struct { - scenario string - ctx *gotenberg.Context - expectedPdfEngines []string - expectError bool + scenario string + ctx *gotenberg.Context + expectedMergePdfEngines []string + expectedConvertPdfEngines []string + expectedReadMetadataPdfEngines []string + expectedWriteMetadataPdfEngines []string + expectError bool }{ { scenario: "no selection from user", @@ -61,8 +65,11 @@ func TestPdfEngines_Provision(t *testing.T) { }, ) }(), - expectedPdfEngines: []string{"bar"}, - expectError: false, + expectedMergePdfEngines: []string{"qpdf", "pdfcpu", "pdftk"}, + expectedConvertPdfEngines: []string{"libreoffice-pdfengine"}, + expectedReadMetadataPdfEngines: []string{"exiftool"}, + expectedWriteMetadataPdfEngines: []string{"exiftool"}, + expectError: false, }, { scenario: "selection from user", @@ -100,7 +107,7 @@ func TestPdfEngines_Provision(t *testing.T) { } fs := new(PdfEngines).Descriptor().FlagSet - err := fs.Parse([]string{"--pdfengines-engines=b", "--pdfengines-engines=a"}) + err := fs.Parse([]string{"--pdfengines-merge-engines=b", "--pdfengines-convert-engines=b", "--pdfengines-read-metadata-engines=a", "--pdfengines-write-metadata-engines=a"}) if err != nil { t.Fatalf("expected no error but got: %v", err) } @@ -116,8 +123,12 @@ func TestPdfEngines_Provision(t *testing.T) { }, ) }(), - expectedPdfEngines: []string{"b", "a"}, - expectError: false, + + expectedMergePdfEngines: []string{"b"}, + expectedConvertPdfEngines: []string{"b"}, + expectedReadMetadataPdfEngines: []string{"a"}, + expectedWriteMetadataPdfEngines: []string{"a"}, + expectError: false, }, { scenario: "no valid PDF engine", @@ -167,13 +178,43 @@ func TestPdfEngines_Provision(t *testing.T) { t.Fatal("expected error but got none") } - if len(tc.expectedPdfEngines) != len(mod.names) { - t.Fatalf("expected %d names but got %d", len(tc.expectedPdfEngines), len(mod.names)) + if len(tc.expectedMergePdfEngines) != len(mod.mergeNames) { + t.Fatalf("expected %d merge names but got %d", len(tc.expectedMergePdfEngines), len(mod.mergeNames)) + } + + if len(tc.expectedConvertPdfEngines) != len(mod.convertNames) { + t.Fatalf("expected %d convert names but got %d", len(tc.expectedConvertPdfEngines), len(mod.convertNames)) + } + + if len(tc.expectedReadMetadataPdfEngines) != len(mod.readMetadataNames) { + t.Fatalf("expected %d read metadata names but got %d", len(tc.expectedReadMetadataPdfEngines), len(mod.readMetadataNames)) + } + + if len(tc.expectedWriteMetadataPdfEngines) != len(mod.writeMedataNames) { + t.Fatalf("expected %d write metadata names but got %d", len(tc.expectedWriteMetadataPdfEngines), len(mod.writeMedataNames)) + } + + for index, name := range mod.mergeNames { + if name != tc.expectedMergePdfEngines[index] { + t.Fatalf("expected merge name at index %d to be %s, but got: %s", index, name, tc.expectedMergePdfEngines[index]) + } } - for index, name := range mod.names { - if name != tc.expectedPdfEngines[index] { - t.Fatalf("expected scenario at index %d to be %s, but got: %s", index, name, tc.expectedPdfEngines[index]) + for index, name := range mod.convertNames { + if name != tc.expectedConvertPdfEngines[index] { + t.Fatalf("expected convert name at index %d to be %s, but got: %s", index, name, tc.expectedConvertPdfEngines[index]) + } + } + + for index, name := range mod.readMetadataNames { + if name != tc.expectedReadMetadataPdfEngines[index] { + t.Fatalf("expected read metadata name at index %d to be %s, but got: %s", index, name, tc.expectedReadMetadataPdfEngines[index]) + } + } + + for index, name := range mod.writeMedataNames { + if name != tc.expectedWriteMetadataPdfEngines[index] { + t.Fatalf("expected write metadat name at index %d to be %s, but got: %s", index, name, tc.expectedWriteMetadataPdfEngines[index]) } } }) @@ -239,8 +280,11 @@ func TestPdfEngines_Validate(t *testing.T) { } { t.Run(tc.scenario, func(t *testing.T) { mod := PdfEngines{ - names: tc.names, - engines: tc.engines, + mergeNames: tc.names, + convertNames: tc.names, + readMetadataNames: tc.names, + writeMedataNames: tc.names, + engines: tc.engines, } err := mod.Validate() @@ -258,22 +302,36 @@ func TestPdfEngines_Validate(t *testing.T) { func TestPdfEngines_SystemMessages(t *testing.T) { mod := new(PdfEngines) - mod.names = []string{"foo", "bar"} + mod.mergeNames = []string{"foo", "bar"} + mod.convertNames = []string{"foo", "bar"} + mod.readMetadataNames = []string{"foo", "bar"} + mod.writeMedataNames = []string{"foo", "bar"} messages := mod.SystemMessages() - if len(messages) != 1 { + if len(messages) != 4 { t.Errorf("expected one and only one message, but got %d", len(messages)) } - expect := strings.Join(mod.names[:], " ") - if messages[0] != expect { - t.Errorf("expected message '%s', but got '%s'", expect, messages[0]) + expect := []string{ + fmt.Sprintf("merge engines - %s", strings.Join(mod.mergeNames[:], " ")), + fmt.Sprintf("convert engines - %s", strings.Join(mod.convertNames[:], " ")), + fmt.Sprintf("read metadata engines - %s", strings.Join(mod.readMetadataNames[:], " ")), + fmt.Sprintf("write medata engines - %s", strings.Join(mod.writeMedataNames[:], " ")), + } + + for i, message := range messages { + if message != expect[i] { + t.Errorf("expected message at index %d to be %s, but got %s", i, message, expect[i]) + } } } func TestPdfEngines_PdfEngine(t *testing.T) { mod := PdfEngines{ - names: []string{"foo", "bar"}, + mergeNames: []string{"foo", "bar"}, + convertNames: []string{"foo", "bar"}, + readMetadataNames: []string{"foo", "bar"}, + writeMedataNames: []string{"foo", "bar"}, engines: func() []gotenberg.PdfEngine { engine1 := &struct { gotenberg.ModuleMock From b2f957d1d809172d1d969dd050b6521e74e58c8d Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Mon, 14 Oct 2024 09:24:42 +0200 Subject: [PATCH 11/25] chore(pdfengines): update new flags with a description of the empty value behavior --- pkg/modules/pdfengines/pdfengines.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/modules/pdfengines/pdfengines.go b/pkg/modules/pdfengines/pdfengines.go index 7303f34ef..7bd000187 100644 --- a/pkg/modules/pdfengines/pdfengines.go +++ b/pkg/modules/pdfengines/pdfengines.go @@ -41,10 +41,10 @@ func (mod *PdfEngines) Descriptor() gotenberg.ModuleDescriptor { ID: "pdfengines", FlagSet: func() *flag.FlagSet { fs := flag.NewFlagSet("pdfengines", flag.ExitOnError) - fs.StringSlice("pdfengines-merge-engines", []string{"qpdf", "pdfcpu", "pdftk"}, "Set the PDF engines and their order for the merge feature") - fs.StringSlice("pdfengines-convert-engines", []string{"libreoffice-pdfengine"}, "Set the PDF engines and their order for the convert feature") - fs.StringSlice("pdfengines-read-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the read metadata feature") - fs.StringSlice("pdfengines-write-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the write metadata feature") + fs.StringSlice("pdfengines-merge-engines", []string{"qpdf", "pdfcpu", "pdftk"}, "Set the PDF engines and their order for the merge feature - empty means all") + fs.StringSlice("pdfengines-convert-engines", []string{"libreoffice-pdfengine"}, "Set the PDF engines and their order for the convert feature - empty means all") + fs.StringSlice("pdfengines-read-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the read metadata feature - empty means all") + fs.StringSlice("pdfengines-write-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the write metadata feature - empty means all") fs.Bool("pdfengines-disable-routes", false, "Disable the routes") fs.StringSlice("pdfengines-engines", make([]string, 0), "Set the default PDF engines and their default order - all by default") From 711b43070dd95c164eb76b17cc9babb7ea24f50a Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Tue, 5 Nov 2024 11:44:01 +0100 Subject: [PATCH 12/25] feat(chromium): add failOnResourceLoadingFailed and failOnResourceHttpStatusCodes form fields --- pkg/modules/chromium/browser.go | 59 +++++++-- pkg/modules/chromium/browser_test.go | 143 ++++++++++++++++++++- pkg/modules/chromium/chromium.go | 42 ++++-- pkg/modules/chromium/events.go | 102 +++++++++++---- pkg/modules/chromium/routes.go | 84 ++++++++---- pkg/modules/chromium/routes_test.go | 86 +++++++++++++ test/testdata/chromium/html/index.html | 8 +- test/testdata/chromium/markdown/index.html | 8 +- 8 files changed, 458 insertions(+), 74 deletions(-) diff --git a/pkg/modules/chromium/browser.go b/pkg/modules/chromium/browser.go index 4e895203a..380dcccfb 100644 --- a/pkg/modules/chromium/browser.go +++ b/pkg/modules/chromium/browser.go @@ -299,13 +299,25 @@ func (b *chromiumBrowser) do(ctx context.Context, logger *zap.Logger, url string }) var ( - invalidHttpStatusCode error - invalidHttpStatusCodeMu sync.RWMutex + invalidHttpStatusCode error + invalidHttpStatusCodeMu sync.RWMutex + invalidResourceHttpStatusCode error + invalidResourceHttpStatusCodeMu sync.RWMutex ) - // See https://github.com/gotenberg/gotenberg/issues/613. - if len(options.FailOnHttpStatusCodes) != 0 { - listenForEventResponseReceived(taskCtx, logger, url, options.FailOnHttpStatusCodes, &invalidHttpStatusCode, &invalidHttpStatusCodeMu) + // See: + // https://github.com/gotenberg/gotenberg/issues/613. + // https://github.com/gotenberg/gotenberg/issues/1021. + if len(options.FailOnHttpStatusCodes) != 0 || len(options.FailOnResourceHttpStatusCodes) != 0 { + listenForEventResponseReceived(taskCtx, logger, eventResponseReceivedOptions{ + mainPageUrl: url, + failOnHttpStatusCodes: options.FailOnHttpStatusCodes, + invalidHttpStatusCode: &invalidHttpStatusCode, + invalidHttpStatusCodeMu: &invalidHttpStatusCodeMu, + failOnResourceOnHttpStatusCode: options.FailOnResourceHttpStatusCodes, + invalidResourceHttpStatusCode: &invalidResourceHttpStatusCode, + invalidResourceHttpStatusCodeMu: &invalidResourceHttpStatusCodeMu, + }) } var ( @@ -319,14 +331,22 @@ func (b *chromiumBrowser) do(ctx context.Context, logger *zap.Logger, url string } var ( - loadingFailed error - loadingFailedMu sync.RWMutex + loadingFailed error + loadingFailedMu sync.RWMutex + resourceLoadingFailed error + resourceLoadingFailedMu sync.RWMutex ) // See: - // https://github.com/gotenberg/gotenberg/issues/913 - // https://github.com/gotenberg/gotenberg/issues/959 - listenForEventLoadingFailed(taskCtx, logger, &loadingFailed, &loadingFailedMu) + // https://github.com/gotenberg/gotenberg/issues/913. + // https://github.com/gotenberg/gotenberg/issues/959. + // https://github.com/gotenberg/gotenberg/issues/1021. + listenForEventLoadingFailed(taskCtx, logger, eventLoadingFailedOptions{ + loadingFailed: &loadingFailed, + loadingFailedMu: &loadingFailedMu, + resourceLoadingFailed: &resourceLoadingFailed, + resourceLoadingFailedMu: &resourceLoadingFailedMu, + }) err = chromedp.Run(taskCtx, tasks...) if err != nil { @@ -355,6 +375,14 @@ func (b *chromiumBrowser) do(ctx context.Context, logger *zap.Logger, url string return fmt.Errorf("%v: %w", invalidHttpStatusCode, ErrInvalidHttpStatusCode) } + // See https://github.com/gotenberg/gotenberg/issues/1021. + invalidResourceHttpStatusCodeMu.RLock() + defer invalidResourceHttpStatusCodeMu.RUnlock() + + if invalidResourceHttpStatusCode != nil { + return fmt.Errorf("%v: %w", invalidResourceHttpStatusCode, ErrInvalidResourceHttpStatusCode) + } + // See https://github.com/gotenberg/gotenberg/issues/262. consoleExceptionsMu.RLock() defer consoleExceptionsMu.RUnlock() @@ -364,8 +392,8 @@ func (b *chromiumBrowser) do(ctx context.Context, logger *zap.Logger, url string } // See: - // https://github.com/gotenberg/gotenberg/issues/913 - // https://github.com/gotenberg/gotenberg/issues/959 + // https://github.com/gotenberg/gotenberg/issues/913. + // https://github.com/gotenberg/gotenberg/issues/959. loadingFailedMu.RLock() defer loadingFailedMu.RUnlock() @@ -373,6 +401,13 @@ func (b *chromiumBrowser) do(ctx context.Context, logger *zap.Logger, url string return fmt.Errorf("%v: %w", loadingFailed, ErrLoadingFailed) } + // See https://github.com/gotenberg/gotenberg/issues/1021. + if options.FailOnResourceLoadingFailed { + if resourceLoadingFailed != nil { + return fmt.Errorf("%v: %w", resourceLoadingFailed, ErrResourceLoadingFailed) + } + } + return nil } diff --git a/pkg/modules/chromium/browser_test.go b/pkg/modules/chromium/browser_test.go index 9cc4bcb02..3ba608b4b 100644 --- a/pkg/modules/chromium/browser_test.go +++ b/pkg/modules/chromium/browser_test.go @@ -446,6 +446,44 @@ func TestChromiumBrowser_pdf(t *testing.T) { expectError: true, expectedError: ErrInvalidHttpStatusCode, }, + { + scenario: "ErrInvalidResourceHttpStatusCode", + browser: newChromiumBrowser( + browserArguments{ + binPath: os.Getenv("CHROMIUM_BIN_PATH"), + wsUrlReadTimeout: 5 * time.Second, + allowList: regexp2.MustCompile("", 0), + denyList: regexp2.MustCompile("", 0), + }, + ), + fs: func() *gotenberg.FileSystem { + fs := gotenberg.NewFileSystem() + + err := os.MkdirAll(fs.WorkingDirPath(), 0o755) + if err != nil { + t.Fatalf(fmt.Sprintf("expected no error but got: %v", err)) + } + + err = os.WriteFile(fmt.Sprintf("%s/style.css", fs.WorkingDirPath()), []byte("body{font-family: Arial, Helvetica, sans-serif;}"), 0o755) + if err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + err = os.WriteFile(fmt.Sprintf("%s/index.html", fs.WorkingDirPath()), []byte("

ErrInvalidResourceHttpStatusCode

"), 0o755) + if err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + return fs + }(), + options: PdfOptions{ + Options: Options{FailOnResourceHttpStatusCodes: []int64{200}}, + }, + noDeadline: false, + start: true, + expectError: true, + expectedError: ErrInvalidResourceHttpStatusCode, + }, { scenario: "ErrConsoleExceptions", browser: newChromiumBrowser( @@ -505,6 +543,39 @@ func TestChromiumBrowser_pdf(t *testing.T) { expectError: true, expectedError: ErrLoadingFailed, }, + { + scenario: "ErrResourceLoadingFailed", + browser: newChromiumBrowser( + browserArguments{ + binPath: os.Getenv("CHROMIUM_BIN_PATH"), + wsUrlReadTimeout: 5 * time.Second, + allowList: regexp2.MustCompile("", 0), + denyList: regexp2.MustCompile("", 0), + }, + ), + fs: func() *gotenberg.FileSystem { + fs := gotenberg.NewFileSystem() + + err := os.MkdirAll(fs.WorkingDirPath(), 0o755) + if err != nil { + t.Fatalf(fmt.Sprintf("expected no error but got: %v", err)) + } + + err = os.WriteFile(fmt.Sprintf("%s/index.html", fs.WorkingDirPath()), []byte("

ErrResourceLoadingFailed

"), 0o755) + if err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + return fs + }(), + options: PdfOptions{ + Options: Options{FailOnResourceLoadingFailed: true}, + }, + noDeadline: false, + start: true, + expectError: true, + expectedError: ErrResourceLoadingFailed, + }, { scenario: "clear cache", browser: newChromiumBrowser( @@ -1537,6 +1608,44 @@ func TestChromiumBrowser_screenshot(t *testing.T) { expectError: true, expectedError: ErrInvalidHttpStatusCode, }, + { + scenario: "ErrInvalidResourceHttpStatusCode", + browser: newChromiumBrowser( + browserArguments{ + binPath: os.Getenv("CHROMIUM_BIN_PATH"), + wsUrlReadTimeout: 5 * time.Second, + allowList: regexp2.MustCompile("", 0), + denyList: regexp2.MustCompile("", 0), + }, + ), + fs: func() *gotenberg.FileSystem { + fs := gotenberg.NewFileSystem() + + err := os.MkdirAll(fs.WorkingDirPath(), 0o755) + if err != nil { + t.Fatalf(fmt.Sprintf("expected no error but got: %v", err)) + } + + err = os.WriteFile(fmt.Sprintf("%s/style.css", fs.WorkingDirPath()), []byte("body{font-family: Arial, Helvetica, sans-serif;}"), 0o755) + if err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + err = os.WriteFile(fmt.Sprintf("%s/index.html", fs.WorkingDirPath()), []byte("

ErrInvalidResourceHttpStatusCode

"), 0o755) + if err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + return fs + }(), + options: ScreenshotOptions{ + Options: Options{FailOnResourceHttpStatusCodes: []int64{299}}, + }, + noDeadline: false, + start: true, + expectError: true, + expectedError: ErrInvalidResourceHttpStatusCode, + }, { scenario: "ErrConsoleExceptions", browser: newChromiumBrowser( @@ -1580,7 +1689,6 @@ func TestChromiumBrowser_screenshot(t *testing.T) { denyList: regexp2.MustCompile("", 0), }, ), - fs: func() *gotenberg.FileSystem { fs := gotenberg.NewFileSystem() @@ -1597,6 +1705,39 @@ func TestChromiumBrowser_screenshot(t *testing.T) { expectError: true, expectedError: ErrLoadingFailed, }, + { + scenario: "ErrResourceLoadingFailed", + browser: newChromiumBrowser( + browserArguments{ + binPath: os.Getenv("CHROMIUM_BIN_PATH"), + wsUrlReadTimeout: 5 * time.Second, + allowList: regexp2.MustCompile("", 0), + denyList: regexp2.MustCompile("", 0), + }, + ), + fs: func() *gotenberg.FileSystem { + fs := gotenberg.NewFileSystem() + + err := os.MkdirAll(fs.WorkingDirPath(), 0o755) + if err != nil { + t.Fatalf(fmt.Sprintf("expected no error but got: %v", err)) + } + + err = os.WriteFile(fmt.Sprintf("%s/index.html", fs.WorkingDirPath()), []byte("

ErrResourceLoadingFailed

"), 0o755) + if err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + return fs + }(), + options: ScreenshotOptions{ + Options: Options{FailOnResourceLoadingFailed: true}, + }, + noDeadline: false, + start: true, + expectError: true, + expectedError: ErrResourceLoadingFailed, + }, { scenario: "clear cache", browser: newChromiumBrowser( diff --git a/pkg/modules/chromium/chromium.go b/pkg/modules/chromium/chromium.go index 0a8a8a2e3..08f3c64ef 100644 --- a/pkg/modules/chromium/chromium.go +++ b/pkg/modules/chromium/chromium.go @@ -38,14 +38,22 @@ var ( // matches with one of the entry in [Options.FailOnHttpStatusCodes]. ErrInvalidHttpStatusCode = errors.New("invalid HTTP status code") + // ErrInvalidResourceHttpStatusCode happens when the status code from one + // or more resources matches with one of the entry in + // [Options.FailOnResourceHttpStatusCodes]. + ErrInvalidResourceHttpStatusCode = errors.New("invalid resource HTTP status code") + // ErrConsoleExceptions happens when there are exceptions in the Chromium // console. It also happens only if the [Options.FailOnConsoleExceptions] // is set to true. ErrConsoleExceptions = errors.New("console exceptions") - // ErrLoadingFailed happens when a URL failed to load. + // ErrLoadingFailed happens when the main page failed to load. ErrLoadingFailed = errors.New("loading failed") + // ErrResourceLoadingFailed happens when one or more resources failed to load. + ErrResourceLoadingFailed = errors.New("resource loading failed") + // PDF specific. // ErrOmitBackgroundWithoutPrintBackground happens if @@ -86,6 +94,14 @@ type Options struct { // code from the main page matches with one of its entries. FailOnHttpStatusCodes []int64 + // FailOnResourceHttpStatusCodes sets if the conversion should fail if the + // status code from at least one resource matches with one if its entries. + FailOnResourceHttpStatusCodes []int64 + + // FailOnResourceLoadingFailed sets if the conversion should fail like the + // main page if Chromium fails to load at least one resource. + FailOnResourceLoadingFailed bool + // FailOnConsoleExceptions sets if the conversion should fail if there are // exceptions in the Chromium console. FailOnConsoleExceptions bool @@ -124,17 +140,19 @@ type Options struct { // DefaultOptions returns the default values for Options. func DefaultOptions() Options { return Options{ - SkipNetworkIdleEvent: true, - FailOnHttpStatusCodes: []int64{499, 599}, - FailOnConsoleExceptions: false, - WaitDelay: 0, - WaitWindowStatus: "", - WaitForExpression: "", - Cookies: nil, - UserAgent: "", - ExtraHttpHeaders: nil, - EmulatedMediaType: "", - OmitBackground: false, + SkipNetworkIdleEvent: true, + FailOnHttpStatusCodes: []int64{499, 599}, + FailOnResourceHttpStatusCodes: nil, + FailOnResourceLoadingFailed: false, + FailOnConsoleExceptions: false, + WaitDelay: 0, + WaitWindowStatus: "", + WaitForExpression: "", + Cookies: nil, + UserAgent: "", + ExtraHttpHeaders: nil, + EmulatedMediaType: "", + OmitBackground: false, } } diff --git a/pkg/modules/chromium/events.go b/pkg/modules/chromium/events.go index 85d714560..ec02ef08d 100644 --- a/pkg/modules/chromium/events.go +++ b/pkg/modules/chromium/events.go @@ -3,6 +3,7 @@ package chromium import ( "context" "fmt" + "net/http" "slices" "sync" @@ -136,14 +137,36 @@ func listenForEventRequestPaused(ctx context.Context, logger *zap.Logger, option }) } +type eventResponseReceivedOptions struct { + mainPageUrl string + failOnHttpStatusCodes []int64 + invalidHttpStatusCode *error + invalidHttpStatusCodeMu *sync.RWMutex + failOnResourceOnHttpStatusCode []int64 + invalidResourceHttpStatusCode *error + invalidResourceHttpStatusCodeMu *sync.RWMutex +} + // listenForEventResponseReceived listens for an invalid HTTP status code that -// is returned by the main page. -// See https://github.com/gotenberg/gotenberg/issues/613. -func listenForEventResponseReceived(ctx context.Context, logger *zap.Logger, url string, failOnHttpStatusCodes []int64, invalidHttpStatusCode *error, invalidHttpStatusCodeMu *sync.RWMutex) { +// is returned by the main page or by one or more resources. +// See: +// https://github.com/gotenberg/gotenberg/issues/613. +// https://github.com/gotenberg/gotenberg/issues/1021. +func listenForEventResponseReceived( + ctx context.Context, + logger *zap.Logger, + options eventResponseReceivedOptions, +) { for _, code := range []int64{199, 299, 399, 499, 599} { - if slices.Contains(failOnHttpStatusCodes, code) { + if slices.Contains(options.failOnHttpStatusCodes, code) { + for i := code - 99; i <= code; i++ { + options.failOnHttpStatusCodes = append(options.failOnHttpStatusCodes, i) + } + } + + if slices.Contains(options.failOnResourceOnHttpStatusCode, code) { for i := code - 99; i <= code; i++ { - failOnHttpStatusCodes = append(failOnHttpStatusCodes, i) + options.failOnResourceOnHttpStatusCode = append(options.failOnResourceOnHttpStatusCode, i) } } } @@ -151,41 +174,53 @@ func listenForEventResponseReceived(ctx context.Context, logger *zap.Logger, url chromedp.ListenTarget(ctx, func(ev interface{}) { switch ev := ev.(type) { case *network.EventResponseReceived: - if ev.Response.URL != url { + if ev.Response.URL == options.mainPageUrl { + logger.Debug(fmt.Sprintf("event EventResponseReceived fired for main page: %+v", ev.Response)) + + if slices.Contains(options.failOnHttpStatusCodes, ev.Response.Status) { + options.invalidHttpStatusCodeMu.Lock() + defer options.invalidHttpStatusCodeMu.Unlock() + + *options.invalidHttpStatusCode = fmt.Errorf("%d: %s", ev.Response.Status, ev.Response.StatusText) + } + return } - logger.Debug(fmt.Sprintf("event EventResponseReceived fired for main page: %+v", ev.Response)) + logger.Debug(fmt.Sprintf("event EventResponseReceived fired for a resource: %+v", ev.Response)) - if slices.Contains(failOnHttpStatusCodes, ev.Response.Status) { - invalidHttpStatusCodeMu.Lock() - defer invalidHttpStatusCodeMu.Unlock() + if slices.Contains(options.failOnResourceOnHttpStatusCode, ev.Response.Status) { + options.invalidResourceHttpStatusCodeMu.Lock() + defer options.invalidResourceHttpStatusCodeMu.Unlock() - *invalidHttpStatusCode = fmt.Errorf("%d: %s", ev.Response.Status, ev.Response.StatusText) + *options.invalidResourceHttpStatusCode = multierr.Append( + *options.invalidResourceHttpStatusCode, + fmt.Errorf("%s - %d: %s", ev.Response.URL, ev.Response.Status, http.StatusText(int(ev.Response.Status))), + ) } } }) } +type eventLoadingFailedOptions struct { + loadingFailed *error + loadingFailedMu *sync.RWMutex + resourceLoadingFailed *error + resourceLoadingFailedMu *sync.RWMutex +} + // listenForEventLoadingFailed listens for an event indicating that the main -// page failed to load. +// page or one or more resources failed to load. // See: // https://github.com/gotenberg/gotenberg/issues/913. // https://github.com/gotenberg/gotenberg/issues/959. -func listenForEventLoadingFailed(ctx context.Context, logger *zap.Logger, loadingFailed *error, loadingFailedMu *sync.RWMutex) { +// https://github.com/gotenberg/gotenberg/issues/1021. +func listenForEventLoadingFailed(ctx context.Context, logger *zap.Logger, options eventLoadingFailedOptions) { chromedp.ListenTarget(ctx, func(ev interface{}) { switch ev := ev.(type) { case *network.EventLoadingFailed: logger.Debug(fmt.Sprintf("event EventLoadingFailed fired: %+v", ev.ErrorText)) - if ev.Type != network.ResourceTypeDocument { - logger.Debug("skip EventLoadingFailed: is not resource type Document") - return - } - - // Supposition: except iframe, an event loading failed with a - // resource type Document is about the main page. - // We are looking for common errors. // TODO: sufficient? errors := []string{ @@ -199,16 +234,35 @@ func listenForEventLoadingFailed(ctx context.Context, logger *zap.Logger, loadin "net::ERR_ADDRESS_UNREACHABLE", "net::ERR_BLOCKED_BY_CLIENT", "net::ERR_BLOCKED_BY_RESPONSE", + "net::ERR_FILE_NOT_FOUND", } if !slices.Contains(errors, ev.ErrorText) { logger.Debug(fmt.Sprintf("skip EventLoadingFailed: '%s' is not part of %+v", ev.ErrorText, errors)) return } - loadingFailedMu.Lock() - defer loadingFailedMu.Unlock() + if ev.Type == network.ResourceTypeDocument { + // Supposition: except iframe, an event loading failed with a + // resource type Document is about the main page. + logger.Debug("event EventLoadingFailed fired for main page") + + options.loadingFailedMu.Lock() + defer options.loadingFailedMu.Unlock() + + *options.loadingFailed = fmt.Errorf("%s", ev.ErrorText) + + return + } + + logger.Debug("event EventLoadingFailed fired for a resource") + + options.resourceLoadingFailedMu.Lock() + defer options.resourceLoadingFailedMu.Unlock() - *loadingFailed = fmt.Errorf("%s", ev.ErrorText) + *options.resourceLoadingFailed = multierr.Append( + *options.resourceLoadingFailed, + fmt.Errorf("resource %s: %s", ev.Type, ev.ErrorText), + ) } }) } diff --git a/pkg/modules/chromium/routes.go b/pkg/modules/chromium/routes.go index a725c7cd4..4662371c7 100644 --- a/pkg/modules/chromium/routes.go +++ b/pkg/modules/chromium/routes.go @@ -29,17 +29,19 @@ func FormDataChromiumOptions(ctx *api.Context) (*api.FormData, Options) { defaultOptions := DefaultOptions() var ( - skipNetworkIdleEvent bool - failOnHttpStatusCodes []int64 - failOnConsoleExceptions bool - waitDelay time.Duration - waitWindowStatus string - waitForExpression string - cookies []Cookie - userAgent string - extraHttpHeaders []ExtraHttpHeader - emulatedMediaType string - omitBackground bool + skipNetworkIdleEvent bool + failOnHttpStatusCodes []int64 + failOnResourceHttpStatusCodes []int64 + failOnResourceLoadingFailed bool + failOnConsoleExceptions bool + waitDelay time.Duration + waitWindowStatus string + waitForExpression string + cookies []Cookie + userAgent string + extraHttpHeaders []ExtraHttpHeader + emulatedMediaType string + omitBackground bool ) form := ctx.FormData(). @@ -57,6 +59,20 @@ func FormDataChromiumOptions(ctx *api.Context) (*api.FormData, Options) { return nil }). + Custom("failOnResourceHttpStatusCodes", func(value string) error { + if value == "" { + failOnResourceHttpStatusCodes = defaultOptions.FailOnResourceHttpStatusCodes + return nil + } + + err := json.Unmarshal([]byte(value), &failOnResourceHttpStatusCodes) + if err != nil { + return fmt.Errorf("unmarshal failOnResourceHttpStatusCodes: %w", err) + } + + return nil + }). + Bool("failOnResourceLoadingFailed", &failOnResourceLoadingFailed, defaultOptions.FailOnResourceLoadingFailed). Bool("failOnConsoleExceptions", &failOnConsoleExceptions, defaultOptions.FailOnConsoleExceptions). Duration("waitDelay", &waitDelay, defaultOptions.WaitDelay). String("waitWindowStatus", &waitWindowStatus, defaultOptions.WaitWindowStatus). @@ -158,17 +174,19 @@ func FormDataChromiumOptions(ctx *api.Context) (*api.FormData, Options) { Bool("omitBackground", &omitBackground, defaultOptions.OmitBackground) options := Options{ - SkipNetworkIdleEvent: skipNetworkIdleEvent, - FailOnHttpStatusCodes: failOnHttpStatusCodes, - FailOnConsoleExceptions: failOnConsoleExceptions, - WaitDelay: waitDelay, - WaitWindowStatus: waitWindowStatus, - WaitForExpression: waitForExpression, - Cookies: cookies, - UserAgent: userAgent, - ExtraHttpHeaders: extraHttpHeaders, - EmulatedMediaType: emulatedMediaType, - OmitBackground: omitBackground, + SkipNetworkIdleEvent: skipNetworkIdleEvent, + FailOnHttpStatusCodes: failOnHttpStatusCodes, + FailOnResourceHttpStatusCodes: failOnResourceHttpStatusCodes, + FailOnResourceLoadingFailed: failOnResourceLoadingFailed, + FailOnConsoleExceptions: failOnConsoleExceptions, + WaitDelay: waitDelay, + WaitWindowStatus: waitWindowStatus, + WaitForExpression: waitForExpression, + Cookies: cookies, + UserAgent: userAgent, + ExtraHttpHeaders: extraHttpHeaders, + EmulatedMediaType: emulatedMediaType, + OmitBackground: omitBackground, } return form, options @@ -726,12 +744,22 @@ func handleChromiumError(err error, options Options) error { ) } + if errors.Is(err, ErrInvalidResourceHttpStatusCode) { + return api.WrapError( + err, + api.NewSentinelHttpError( + http.StatusConflict, + fmt.Sprintf("Invalid HTTP status code from resources:\n%s", strings.ReplaceAll(err.Error(), fmt.Sprintf(": %s", ErrInvalidResourceHttpStatusCode.Error()), "")), + ), + ) + } + if errors.Is(err, ErrConsoleExceptions) { return api.WrapError( err, api.NewSentinelHttpError( http.StatusConflict, - fmt.Sprintf("Chromium console exceptions:\n %s", strings.ReplaceAll(err.Error(), ErrConsoleExceptions.Error(), "")), + fmt.Sprintf("Chromium console exceptions:\n%s", strings.ReplaceAll(err.Error(), ErrConsoleExceptions.Error(), "")), ), ) } @@ -746,5 +774,15 @@ func handleChromiumError(err error, options Options) error { ) } + if errors.Is(err, ErrResourceLoadingFailed) { + return api.WrapError( + err, + api.NewSentinelHttpError( + http.StatusConflict, + fmt.Sprintf("Chromium failed to load resources: %v", strings.ReplaceAll(err.Error(), fmt.Sprintf(": %s", ErrResourceLoadingFailed.Error()), "")), + ), + ) + } + return err } diff --git a/pkg/modules/chromium/routes_test.go b/pkg/modules/chromium/routes_test.go index 29fb9d10c..629dfbb8f 100644 --- a/pkg/modules/chromium/routes_test.go +++ b/pkg/modules/chromium/routes_test.go @@ -72,6 +72,44 @@ func TestFormDataChromiumOptions(t *testing.T) { compareWithoutDeepEqual: false, expectValidationError: false, }, + { + scenario: "invalid failOnResourceHttpStatusCodes form field", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "failOnResourceHttpStatusCodes": { + "foo", + }, + }) + return ctx + }(), + expectedOptions: func() Options { + options := DefaultOptions() + options.FailOnResourceHttpStatusCodes = nil + return options + }(), + compareWithoutDeepEqual: false, + expectValidationError: true, + }, + { + scenario: "valid failOnResourceHttpStatusCodes form field", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "failOnResourceHttpStatusCodes": { + `[399,499,599]`, + }, + }) + return ctx + }(), + expectedOptions: func() Options { + options := DefaultOptions() + options.FailOnResourceHttpStatusCodes = []int64{399, 499, 599} + return options + }(), + compareWithoutDeepEqual: false, + expectValidationError: false, + }, { scenario: "invalid cookies form field", ctx: func() *api.ContextMock { @@ -1592,6 +1630,18 @@ func TestConvertUrl(t *testing.T) { expectHttpStatus: http.StatusConflict, expectOutputPathsCount: 0, }, + { + scenario: "ErrInvalidResourceHttpStatusCode", + ctx: &api.ContextMock{Context: new(api.Context)}, + api: &ApiMock{PdfMock: func(ctx context.Context, logger *zap.Logger, url, outputPath string, options PdfOptions) error { + return ErrInvalidResourceHttpStatusCode + }}, + options: DefaultPdfOptions(), + expectError: true, + expectHttpError: true, + expectHttpStatus: http.StatusConflict, + expectOutputPathsCount: 0, + }, { scenario: "ErrConsoleExceptions", ctx: &api.ContextMock{Context: new(api.Context)}, @@ -1616,6 +1666,18 @@ func TestConvertUrl(t *testing.T) { expectHttpStatus: http.StatusBadRequest, expectOutputPathsCount: 0, }, + { + scenario: "ErrResourceLoadingFailed", + ctx: &api.ContextMock{Context: new(api.Context)}, + api: &ApiMock{PdfMock: func(ctx context.Context, logger *zap.Logger, url, outputPath string, options PdfOptions) error { + return ErrResourceLoadingFailed + }}, + options: DefaultPdfOptions(), + expectError: true, + expectHttpError: true, + expectHttpStatus: http.StatusConflict, + expectOutputPathsCount: 0, + }, { scenario: "error from Chromium", ctx: &api.ContextMock{Context: new(api.Context)}, @@ -1802,6 +1864,18 @@ func TestScreenshotUrl(t *testing.T) { expectHttpStatus: http.StatusConflict, expectOutputPathsCount: 0, }, + { + scenario: "ErrInvalidResourceHttpStatusCode", + ctx: &api.ContextMock{Context: new(api.Context)}, + api: &ApiMock{ScreenshotMock: func(ctx context.Context, logger *zap.Logger, url, outputPath string, options ScreenshotOptions) error { + return ErrInvalidResourceHttpStatusCode + }}, + options: DefaultScreenshotOptions(), + expectError: true, + expectHttpError: true, + expectHttpStatus: http.StatusConflict, + expectOutputPathsCount: 0, + }, { scenario: "ErrConsoleExceptions", ctx: &api.ContextMock{Context: new(api.Context)}, @@ -1826,6 +1900,18 @@ func TestScreenshotUrl(t *testing.T) { expectHttpStatus: http.StatusBadRequest, expectOutputPathsCount: 0, }, + { + scenario: "ErrResourceLoadingFailed", + ctx: &api.ContextMock{Context: new(api.Context)}, + api: &ApiMock{ScreenshotMock: func(ctx context.Context, logger *zap.Logger, url, outputPath string, options ScreenshotOptions) error { + return ErrResourceLoadingFailed + }}, + options: DefaultScreenshotOptions(), + expectError: true, + expectHttpError: true, + expectHttpStatus: http.StatusConflict, + expectOutputPathsCount: 0, + }, { scenario: "error from Chromium", ctx: &api.ContextMock{Context: new(api.Context)}, diff --git a/test/testdata/chromium/html/index.html b/test/testdata/chromium/html/index.html index 29a787beb..a19f166e1 100644 --- a/test/testdata/chromium/html/index.html +++ b/test/testdata/chromium/html/index.html @@ -2,6 +2,12 @@ + + + + + + Gutenberg @@ -23,7 +29,7 @@

Gutenberg

It is a press, certainly, but a press from which shall flow in inexhaustible streams...Through it, God will spread His Word. A spring of truth shall flow from it: like a new star it shall scatter the darkness of ignorance, and cause a light heretofore unknown to shine amongst men.

- +
diff --git a/test/testdata/chromium/markdown/index.html b/test/testdata/chromium/markdown/index.html index cedb23ac5..faa522dbc 100644 --- a/test/testdata/chromium/markdown/index.html +++ b/test/testdata/chromium/markdown/index.html @@ -2,6 +2,12 @@ + + + + + + Gutenberg @@ -15,7 +21,7 @@

Gutenberg

It is a press, certainly, but a press from which shall flow in inexhaustible streams...Through it, God will spread His Word. A spring of truth shall flow from it: like a new star it shall scatter the darkness of ignorance, and cause a light heretofore unknown to shine amongst men.

- +
From e65a5e6d2235778fe389873f2929555d5d7cfca2 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Tue, 5 Nov 2024 11:44:29 +0100 Subject: [PATCH 13/25] chore(deps): update Go dependencies --- go.mod | 14 +++++++------- go.sum | 32 ++++++++++++++++---------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/go.mod b/go.mod index 837d542ca..8076f8186 100644 --- a/go.mod +++ b/go.mod @@ -4,15 +4,15 @@ go 1.23.0 require ( github.com/alexliesenfeld/health v0.8.0 - github.com/andybalholm/brotli v1.1.0 // indirect + github.com/andybalholm/brotli v1.1.1 // indirect github.com/barasher/go-exiftool v1.10.0 - github.com/chromedp/cdproto v0.0.0-20241003230502-a4a8f7c660df - github.com/chromedp/chromedp v0.10.0 + github.com/chromedp/cdproto v0.0.0-20241030022559-23c28aebe8cb + github.com/chromedp/chromedp v0.11.1 github.com/golang/snappy v0.0.4 // indirect github.com/google/uuid v1.6.0 github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/hashicorp/go-retryablehttp v0.7.7 - github.com/klauspost/compress v1.17.10 // indirect + github.com/klauspost/compress v1.17.11 // indirect github.com/klauspost/pgzip v1.2.6 // indirect github.com/labstack/echo/v4 v4.12.0 github.com/labstack/gommon v0.4.2 @@ -21,7 +21,7 @@ require ( github.com/microcosm-cc/bluemonday v1.0.27 github.com/nwaples/rardecode v1.1.3 // indirect github.com/pierrec/lz4/v4 v4.1.21 // indirect - github.com/prometheus/client_golang v1.20.4 + github.com/prometheus/client_golang v1.20.5 github.com/russross/blackfriday/v2 v2.1.0 github.com/spf13/pflag v1.0.5 github.com/ulikunitz/xz v0.5.12 // indirect @@ -41,7 +41,7 @@ require ( github.com/aymerick/douceur v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect - github.com/chromedp/sysutil v1.0.0 // indirect + github.com/chromedp/sysutil v1.1.0 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect github.com/gobwas/httphead v0.1.0 // indirect github.com/gobwas/pool v0.2.1 // indirect @@ -53,7 +53,7 @@ require ( github.com/mattn/go-colorable v0.1.13 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.60.0 // indirect + github.com/prometheus/common v0.60.1 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect diff --git a/go.sum b/go.sum index 6412b9793..fa82c247d 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,8 @@ github.com/alexliesenfeld/health v0.8.0 h1:lCV0i+ZJPTbqP7LfKG7p3qZBl5VhelwUFCIVWl77fgk= github.com/alexliesenfeld/health v0.8.0/go.mod h1:TfNP0f+9WQVWMQRzvMUjlws4ceXKEL3WR+6Hp95HUFc= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= -github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= -github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/barasher/go-exiftool v1.10.0 h1:f5JY5jc42M7tzR6tbL9508S2IXdIcG9QyieEXNMpIhs= @@ -11,13 +11,12 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chromedp/cdproto v0.0.0-20240801214329-3f85d328b335/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= -github.com/chromedp/cdproto v0.0.0-20241003230502-a4a8f7c660df h1:cbtSn19AtqQha1cxmP2Qvgd3fFMz51AeAEKLJMyEUhc= -github.com/chromedp/cdproto v0.0.0-20241003230502-a4a8f7c660df/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= -github.com/chromedp/chromedp v0.10.0 h1:bRclRYVpMm/UVD76+1HcRW9eV3l58rFfy7AdBvKab1E= -github.com/chromedp/chromedp v0.10.0/go.mod h1:ei/1ncZIqXX1YnAYDkxhD4gzBgavMEUu7JCKvztdomE= -github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic= -github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww= +github.com/chromedp/cdproto v0.0.0-20241030022559-23c28aebe8cb h1:yBPpAakATGLWZsVgYRcU9FopbOqzoazzbFaStQ9DCMc= +github.com/chromedp/cdproto v0.0.0-20241030022559-23c28aebe8cb/go.mod h1:4XqMl3iIW08jtieURWL6Tt5924w21pxirC6th662XUM= +github.com/chromedp/chromedp v0.11.1 h1:Spca8egFqUlv+JDW+yIs+ijlHlJDPufgrfXPwtq6NMs= +github.com/chromedp/chromedp v0.11.1/go.mod h1:lr8dFRLKsdTTWb75C/Ttol2vnBKOSnt0BW8R9Xaupi8= +github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM= +github.com/chromedp/sysutil v1.1.0/go.mod h1:WiThHUdltqCNKGc4gaU50XgYjwjYIhKWoHGPTUfWTJ8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -56,8 +55,8 @@ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8Hm github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0= -github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= @@ -93,12 +92,12 @@ github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= -github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= -github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= +github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= +github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= @@ -121,6 +120,8 @@ github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQ github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo= github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= @@ -135,7 +136,6 @@ golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= From 62ba4877acd21b2e9f2acf29ec4e70dbeefe0b98 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Tue, 5 Nov 2024 11:53:20 +0100 Subject: [PATCH 14/25] chore(deps): update golangci-lint --- .github/workflows/continuous_integration.yml | 2 +- Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index 809a35686..35b5df5ad 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -24,7 +24,7 @@ jobs: - name: Run linters uses: golangci/golangci-lint-action@v6 with: - version: v1.60.3 + version: v1.61.0 tests: needs: diff --git a/Makefile b/Makefile index 230519df5..def2bb19b 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ GOTENBERG_USER_UID=1001 NOTO_COLOR_EMOJI_VERSION=v2.047 # See https://github.com/googlefonts/noto-emoji/releases. PDFTK_VERSION=v3.3.3 # See https://gitlab.com/pdftk-java/pdftk/-/releases - Binary package. PDFCPU_VERSION=v0.8.1 # See https://github.com/pdfcpu/pdfcpu/releases. -GOLANGCI_LINT_VERSION=v1.60.3 # See https://github.com/golangci/golangci-lint/releases. +GOLANGCI_LINT_VERSION=v1.61.0 # See https://github.com/golangci/golangci-lint/releases. .PHONY: build build: ## Build the Gotenberg's Docker image From 79f396f6ffb37d1809b0e95065b61d1d34be5832 Mon Sep 17 00:00:00 2001 From: Tam Nguyen Date: Mon, 18 Nov 2024 15:03:39 +1100 Subject: [PATCH 15/25] feat(chromium): add GenerateDocumentOutline option --- pkg/modules/chromium/chromium.go | 35 ++++++++++++++++++-------------- pkg/modules/chromium/routes.go | 35 +++++++++++++++++--------------- pkg/modules/chromium/tasks.go | 1 + 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/pkg/modules/chromium/chromium.go b/pkg/modules/chromium/chromium.go index 08f3c64ef..b31c5e707 100644 --- a/pkg/modules/chromium/chromium.go +++ b/pkg/modules/chromium/chromium.go @@ -213,26 +213,31 @@ type PdfOptions struct { // PreferCssPageSize defines whether to prefer page size as defined by CSS. // If false, the content will be scaled to fit the paper size. PreferCssPageSize bool + + // GenerateDocumentOutline defines whether the document outline should be + // embedded into the PDF. + GenerateDocumentOutline bool } // DefaultPdfOptions returns the default values for PdfOptions. func DefaultPdfOptions() PdfOptions { return PdfOptions{ - Options: DefaultOptions(), - Landscape: false, - PrintBackground: false, - Scale: 1.0, - SinglePage: false, - PaperWidth: 8.5, - PaperHeight: 11, - MarginTop: 0.39, - MarginBottom: 0.39, - MarginLeft: 0.39, - MarginRight: 0.39, - PageRanges: "", - HeaderTemplate: "", - FooterTemplate: "", - PreferCssPageSize: false, + Options: DefaultOptions(), + Landscape: false, + PrintBackground: false, + Scale: 1.0, + SinglePage: false, + PaperWidth: 8.5, + PaperHeight: 11, + MarginTop: 0.39, + MarginBottom: 0.39, + MarginLeft: 0.39, + MarginRight: 0.39, + PageRanges: "", + HeaderTemplate: "", + FooterTemplate: "", + PreferCssPageSize: false, + GenerateDocumentOutline: false, } } diff --git a/pkg/modules/chromium/routes.go b/pkg/modules/chromium/routes.go index 4662371c7..a806838cd 100644 --- a/pkg/modules/chromium/routes.go +++ b/pkg/modules/chromium/routes.go @@ -205,6 +205,7 @@ func FormDataChromiumPdfOptions(ctx *api.Context) (*api.FormData, PdfOptions) { pageRanges string headerTemplate, footerTemplate string preferCssPageSize bool + generateDocumentOutline bool ) form. @@ -221,24 +222,26 @@ func FormDataChromiumPdfOptions(ctx *api.Context) (*api.FormData, PdfOptions) { String("nativePageRanges", &pageRanges, defaultPdfOptions.PageRanges). Content("header.html", &headerTemplate, defaultPdfOptions.HeaderTemplate). Content("footer.html", &footerTemplate, defaultPdfOptions.FooterTemplate). - Bool("preferCssPageSize", &preferCssPageSize, defaultPdfOptions.PreferCssPageSize) + Bool("preferCssPageSize", &preferCssPageSize, defaultPdfOptions.PreferCssPageSize). + Bool("generateDocumentOutline", &generateDocumentOutline, defaultPdfOptions.GenerateDocumentOutline) pdfOptions := PdfOptions{ - Options: options, - Landscape: landscape, - PrintBackground: printBackground, - Scale: scale, - SinglePage: singlePage, - PaperWidth: paperWidth, - PaperHeight: paperHeight, - MarginTop: marginTop, - MarginBottom: marginBottom, - MarginLeft: marginLeft, - MarginRight: marginRight, - PageRanges: pageRanges, - HeaderTemplate: headerTemplate, - FooterTemplate: footerTemplate, - PreferCssPageSize: preferCssPageSize, + Options: options, + Landscape: landscape, + PrintBackground: printBackground, + Scale: scale, + SinglePage: singlePage, + PaperWidth: paperWidth, + PaperHeight: paperHeight, + MarginTop: marginTop, + MarginBottom: marginBottom, + MarginLeft: marginLeft, + MarginRight: marginRight, + PageRanges: pageRanges, + HeaderTemplate: headerTemplate, + FooterTemplate: footerTemplate, + PreferCssPageSize: preferCssPageSize, + GenerateDocumentOutline: generateDocumentOutline, } return form, pdfOptions diff --git a/pkg/modules/chromium/tasks.go b/pkg/modules/chromium/tasks.go index 4e841d75c..5f19a2884 100644 --- a/pkg/modules/chromium/tasks.go +++ b/pkg/modules/chromium/tasks.go @@ -48,6 +48,7 @@ func printToPdfActionFunc(logger *zap.Logger, outputPath string, options PdfOpti WithMarginRight(options.MarginRight). WithPageRanges(pageRanges). WithPreferCSSPageSize(options.PreferCssPageSize). + WithGenerateDocumentOutline(options.GenerateDocumentOutline). // Does not seem to work. // See https://github.com/gotenberg/gotenberg/issues/831. WithGenerateTaggedPDF(false) From 66dbdc09b5d06bef3a9f2816a98d7d79e53ee1c3 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Mon, 18 Nov 2024 09:39:05 +0100 Subject: [PATCH 16/25] chore(deps): update Go dependencies --- go.mod | 20 ++++++++++---------- go.sum | 40 ++++++++++++++++++++-------------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/go.mod b/go.mod index 8076f8186..02b01da49 100644 --- a/go.mod +++ b/go.mod @@ -6,8 +6,8 @@ require ( github.com/alexliesenfeld/health v0.8.0 github.com/andybalholm/brotli v1.1.1 // indirect github.com/barasher/go-exiftool v1.10.0 - github.com/chromedp/cdproto v0.0.0-20241030022559-23c28aebe8cb - github.com/chromedp/chromedp v0.11.1 + github.com/chromedp/cdproto v0.0.0-20241110205750-a72e6703cd9b + github.com/chromedp/chromedp v0.11.2 github.com/golang/snappy v0.0.4 // indirect github.com/google/uuid v1.6.0 github.com/hashicorp/go-cleanhttp v0.5.2 // indirect @@ -27,12 +27,12 @@ require ( github.com/ulikunitz/xz v0.5.12 // indirect go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 - golang.org/x/crypto v0.28.0 // indirect - golang.org/x/net v0.30.0 - golang.org/x/sync v0.8.0 - golang.org/x/sys v0.26.0 // indirect - golang.org/x/term v0.25.0 - golang.org/x/text v0.19.0 + golang.org/x/crypto v0.29.0 // indirect + golang.org/x/net v0.31.0 + golang.org/x/sync v0.9.0 + golang.org/x/sys v0.27.0 // indirect + golang.org/x/term v0.26.0 + golang.org/x/text v0.20.0 ) require github.com/dlclark/regexp2 v1.11.4 @@ -58,6 +58,6 @@ require ( github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect - golang.org/x/time v0.7.0 // indirect - google.golang.org/protobuf v1.35.1 // indirect + golang.org/x/time v0.8.0 // indirect + google.golang.org/protobuf v1.35.2 // indirect ) diff --git a/go.sum b/go.sum index fa82c247d..6f328e5f4 100644 --- a/go.sum +++ b/go.sum @@ -11,10 +11,10 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chromedp/cdproto v0.0.0-20241030022559-23c28aebe8cb h1:yBPpAakATGLWZsVgYRcU9FopbOqzoazzbFaStQ9DCMc= -github.com/chromedp/cdproto v0.0.0-20241030022559-23c28aebe8cb/go.mod h1:4XqMl3iIW08jtieURWL6Tt5924w21pxirC6th662XUM= -github.com/chromedp/chromedp v0.11.1 h1:Spca8egFqUlv+JDW+yIs+ijlHlJDPufgrfXPwtq6NMs= -github.com/chromedp/chromedp v0.11.1/go.mod h1:lr8dFRLKsdTTWb75C/Ttol2vnBKOSnt0BW8R9Xaupi8= +github.com/chromedp/cdproto v0.0.0-20241110205750-a72e6703cd9b h1:md1Gk5jkNE91SZxFDCMHmKqX0/GsEr1/VTejht0sCbY= +github.com/chromedp/cdproto v0.0.0-20241110205750-a72e6703cd9b/go.mod h1:4XqMl3iIW08jtieURWL6Tt5924w21pxirC6th662XUM= +github.com/chromedp/chromedp v0.11.2 h1:ZRHTh7DjbNTlfIv3NFTbB7eVeu5XCNkgrpcGSpn2oX0= +github.com/chromedp/chromedp v0.11.2/go.mod h1:lr8dFRLKsdTTWb75C/Ttol2vnBKOSnt0BW8R9Xaupi8= github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM= github.com/chromedp/sysutil v1.1.0/go.mod h1:WiThHUdltqCNKGc4gaU50XgYjwjYIhKWoHGPTUfWTJ8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -128,24 +128,24 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= +golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= +golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= +golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= -golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= -golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= +golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= +golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= -google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= +google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 333ab7066c05273bcef6cbcd0c54914fa15a6b27 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Tue, 19 Nov 2024 14:25:07 +0100 Subject: [PATCH 17/25] chore(libreoffice): remove non-relevant comment about old gc module --- pkg/modules/libreoffice/api/libreoffice.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pkg/modules/libreoffice/api/libreoffice.go b/pkg/modules/libreoffice/api/libreoffice.go index 3eb7796fd..f8c4415b2 100644 --- a/pkg/modules/libreoffice/api/libreoffice.go +++ b/pkg/modules/libreoffice/api/libreoffice.go @@ -365,13 +365,6 @@ func (p *libreOfficeProcess) pdf(ctx context.Context, logger *zap.Logger, inputP return ErrRuntimeException } - // Possible errors: - // 1. LibreOffice failed for some reason. - // 2. Context done. - // - // On the second scenario, LibreOffice might not have time to remove some - // of its temporary files, as it has been killed without warning. The - // garbage collector will delete them for us (if the module is loaded). return fmt.Errorf("convert to PDF: %w", err) } From 1b49b35365fdeab773de47261ce1cb5d9f837e2e Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Thu, 21 Nov 2024 09:55:10 +0100 Subject: [PATCH 18/25] fix(exiftool): convert interface{} array to string array --- pkg/modules/exiftool/exiftool.go | 14 +++++++++++- pkg/modules/exiftool/exiftool_test.go | 33 ++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/pkg/modules/exiftool/exiftool.go b/pkg/modules/exiftool/exiftool.go index 13ed02b8e..7d2cb8d97 100644 --- a/pkg/modules/exiftool/exiftool.go +++ b/pkg/modules/exiftool/exiftool.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "reflect" "github.com/barasher/go-exiftool" "go.uber.org/zap" @@ -109,6 +110,17 @@ func (engine *ExifTool) WriteMetadata(ctx context.Context, logger *zap.Logger, m fileMetadata[0].SetString(key, val) case []string: fileMetadata[0].SetStrings(key, val) + case []interface{}: + // See https://github.com/gotenberg/gotenberg/issues/1048. + strings := make([]string, len(val)) + for i, entry := range val { + if str, ok := entry.(string); ok { + strings[i] = str + continue + } + return fmt.Errorf("write PDF metadata with ExifTool: %s %+v %s %w", key, val, reflect.TypeOf(val), gotenberg.ErrPdfEngineMetadataValueNotSupported) + } + fileMetadata[0].SetStrings(key, strings) case bool: fileMetadata[0].SetString(key, fmt.Sprintf("%t", val)) case int: @@ -122,7 +134,7 @@ func (engine *ExifTool) WriteMetadata(ctx context.Context, logger *zap.Logger, m // TODO: support more complex cases, e.g., arrays and nested objects // (limitations in underlying library). default: - return fmt.Errorf("write PDF metadata with ExifTool: %w", gotenberg.ErrPdfEngineMetadataValueNotSupported) + return fmt.Errorf("write PDF metadata with ExifTool: %s %+v %s %w", key, val, reflect.TypeOf(val), gotenberg.ErrPdfEngineMetadataValueNotSupported) } } diff --git a/pkg/modules/exiftool/exiftool_test.go b/pkg/modules/exiftool/exiftool_test.go index 96f3eeabb..949087ec8 100644 --- a/pkg/modules/exiftool/exiftool_test.go +++ b/pkg/modules/exiftool/exiftool_test.go @@ -162,7 +162,20 @@ func TestExiftool_WriteMetadata(t *testing.T) { expectError: true, }, { - scenario: "gotenberg.ErrPdfEngineMetadataValueNotSupported", + scenario: "gotenberg.ErrPdfEngineMetadataValueNotSupported (not string array)", + createCopy: true, + inputPath: "/tests/test/testdata/pdfengines/sample1.pdf", + metadata: map[string]interface{}{ + "Unsupported": []interface{}{ + "foo", + 1, + }, + }, + expectError: true, + expectedError: gotenberg.ErrPdfEngineMetadataValueNotSupported, + }, + { + scenario: "gotenberg.ErrPdfEngineMetadataValueNotSupported (default)", createCopy: true, inputPath: "/tests/test/testdata/pdfengines/sample1.pdf", metadata: map[string]interface{}{ @@ -171,6 +184,24 @@ func TestExiftool_WriteMetadata(t *testing.T) { expectError: true, expectedError: gotenberg.ErrPdfEngineMetadataValueNotSupported, }, + { + scenario: "success (interface array to string array)", + createCopy: true, + inputPath: "/tests/test/testdata/pdfengines/sample1.pdf", + metadata: map[string]interface{}{ + "Keywords": []interface{}{ + "first", + "second", + }, + }, + expectMetadata: map[string]interface{}{ + "Keywords": []interface{}{ + "first", + "second", + }, + }, + expectError: false, + }, { scenario: "success", createCopy: true, From 42ee593708277e8ce849a32ceaf579b0f9129fd3 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Mon, 25 Nov 2024 09:28:42 +0100 Subject: [PATCH 19/25] chore(pdfengines): add stubs for common workflows --- pkg/modules/chromium/routes.go | 77 ++----- pkg/modules/chromium/routes_test.go | 119 ----------- pkg/modules/libreoffice/routes.go | 60 ++---- pkg/modules/pdfengines/routes.go | 209 +++++++++++-------- pkg/modules/pdfengines/routes_test.go | 282 ++++++++++++++++++++++++++ 5 files changed, 431 insertions(+), 316 deletions(-) diff --git a/pkg/modules/chromium/routes.go b/pkg/modules/chromium/routes.go index a806838cd..2ae33b250 100644 --- a/pkg/modules/chromium/routes.go +++ b/pkg/modules/chromium/routes.go @@ -21,6 +21,7 @@ import ( "github.com/gotenberg/gotenberg/v8/pkg/gotenberg" "github.com/gotenberg/gotenberg/v8/pkg/modules/api" + "github.com/gotenberg/gotenberg/v8/pkg/modules/pdfengines" ) // FormDataChromiumOptions creates [Options] from the form data. Fallback to @@ -316,39 +317,6 @@ func FormDataChromiumScreenshotOptions(ctx *api.Context) (*api.FormData, Screens return form, screenshotOptions } -// FormDataChromiumPdfFormats creates [gotenberg.PdfFormats] from the form -// data. Fallback to default value if the considered key is not present. -func FormDataChromiumPdfFormats(form *api.FormData) gotenberg.PdfFormats { - var ( - pdfa string - pdfua bool - ) - - form. - String("pdfa", &pdfa, ""). - Bool("pdfua", &pdfua, false) - - return gotenberg.PdfFormats{ - PdfA: pdfa, - PdfUa: pdfua, - } -} - -// FormDataPdfMetadata creates metadata object from the form data. -func FormDataPdfMetadata(form *api.FormData) map[string]interface{} { - var metadata map[string]interface{} - form.Custom("metadata", func(value string) error { - if len(value) > 0 { - err := json.Unmarshal([]byte(value), &metadata) - if err != nil { - return fmt.Errorf("unmarshal metadata: %w", err) - } - } - return nil - }) - return metadata -} - // convertUrlRoute returns an [api.Route] which can convert a URL to PDF. func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { return api.Route{ @@ -358,8 +326,8 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { Handler: func(c echo.Context) error { ctx := c.Get("context").(*api.Context) form, options := FormDataChromiumPdfOptions(ctx) - pdfFormats := FormDataChromiumPdfFormats(form) - metadata := FormDataPdfMetadata(form) + pdfFormats := pdfengines.FormDataPdfFormats(form) + metadata := pdfengines.FormDataPdfMetadata(form) var url string err := form. @@ -418,8 +386,8 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { Handler: func(c echo.Context) error { ctx := c.Get("context").(*api.Context) form, options := FormDataChromiumPdfOptions(ctx) - pdfFormats := FormDataChromiumPdfFormats(form) - metadata := FormDataPdfMetadata(form) + pdfFormats := pdfengines.FormDataPdfFormats(form) + metadata := pdfengines.FormDataPdfMetadata(form) var inputPath string err := form. @@ -480,8 +448,8 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { Handler: func(c echo.Context) error { ctx := c.Get("context").(*api.Context) form, options := FormDataChromiumPdfOptions(ctx) - pdfFormats := FormDataChromiumPdfFormats(form) - metadata := FormDataPdfMetadata(form) + pdfFormats := pdfengines.FormDataPdfFormats(form) + metadata := pdfengines.FormDataPdfMetadata(form) var ( inputPath string @@ -664,34 +632,19 @@ func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url return fmt.Errorf("convert to PDF: %w", err) } - // So far so good, the URL has been converted to PDF. - // Now, let's check if the client want to convert the resulting PDF - // to specific formats. - zeroValued := gotenberg.PdfFormats{} - if pdfFormats != zeroValued { - convertInputPath := outputPath - convertOutputPath := ctx.GeneratePath(".pdf") - - err = engine.Convert(ctx, ctx.Log(), pdfFormats, convertInputPath, convertOutputPath) - if err != nil { - return fmt.Errorf("convert PDF: %w", err) - } - - // Important: the output path is now the converted file. - outputPath = convertOutputPath + outputPaths, err := pdfengines.ConvertStub(ctx, engine, pdfFormats, []string{outputPath}) + if err != nil { + return fmt.Errorf("convert PDF: %w", err) } - // Writes and potentially overrides metadata entries, if any. - if len(metadata) > 0 { - err = engine.WriteMetadata(ctx, ctx.Log(), metadata, outputPath) - if err != nil { - return fmt.Errorf("write metadata: %w", err) - } + err = pdfengines.WriteMetadataStub(ctx, engine, metadata, outputPaths) + if err != nil { + return fmt.Errorf("write metadata: %w", err) } - err = ctx.AddOutputPaths(outputPath) + err = ctx.AddOutputPaths(outputPaths...) if err != nil { - return fmt.Errorf("add output path: %w", err) + return fmt.Errorf("add output paths: %w", err) } return nil diff --git a/pkg/modules/chromium/routes_test.go b/pkg/modules/chromium/routes_test.go index 629dfbb8f..1e7363ba6 100644 --- a/pkg/modules/chromium/routes_test.go +++ b/pkg/modules/chromium/routes_test.go @@ -603,125 +603,6 @@ func TestFormDataChromiumScreenshotOptions(t *testing.T) { } } -func TestFormDataChromiumPdfFormats(t *testing.T) { - for _, tc := range []struct { - scenario string - ctx *api.ContextMock - expectedPdfFormats gotenberg.PdfFormats - expectValidationError bool - }{ - { - scenario: "no custom form fields", - ctx: &api.ContextMock{Context: new(api.Context)}, - expectedPdfFormats: gotenberg.PdfFormats{}, - expectValidationError: false, - }, - { - scenario: "pdfa and pdfua form fields", - ctx: func() *api.ContextMock { - ctx := &api.ContextMock{Context: new(api.Context)} - ctx.SetValues(map[string][]string{ - "pdfa": { - "foo", - }, - "pdfua": { - "true", - }, - }) - return ctx - }(), - expectedPdfFormats: gotenberg.PdfFormats{PdfA: "foo", PdfUa: true}, - expectValidationError: false, - }, - } { - t.Run(tc.scenario, func(t *testing.T) { - tc.ctx.SetLogger(zap.NewNop()) - form := tc.ctx.Context.FormData() - actual := FormDataChromiumPdfFormats(form) - - if !reflect.DeepEqual(actual, tc.expectedPdfFormats) { - t.Fatalf("expected %+v but got: %+v", tc.expectedPdfFormats, actual) - } - - err := form.Validate() - - if tc.expectValidationError && err == nil { - t.Fatal("expected validation error but got none", err) - } - - if !tc.expectValidationError && err != nil { - t.Fatalf("expected no validation error but got: %v", err) - } - }) - } -} - -func TestFormDataPdfMetadata(t *testing.T) { - for _, tc := range []struct { - scenario string - ctx *api.ContextMock - expectedMetadata map[string]interface{} - expectValidationError bool - }{ - { - scenario: "no metadata form field", - ctx: &api.ContextMock{Context: new(api.Context)}, - expectedMetadata: nil, - expectValidationError: false, - }, - { - scenario: "invalid metadata form field", - ctx: func() *api.ContextMock { - ctx := &api.ContextMock{Context: new(api.Context)} - ctx.SetValues(map[string][]string{ - "metadata": { - "foo", - }, - }) - return ctx - }(), - expectedMetadata: nil, - expectValidationError: true, - }, - { - scenario: "valid metadata form field", - ctx: func() *api.ContextMock { - ctx := &api.ContextMock{Context: new(api.Context)} - ctx.SetValues(map[string][]string{ - "metadata": { - "{\"foo\":\"bar\"}", - }, - }) - return ctx - }(), - expectedMetadata: map[string]interface{}{ - "foo": "bar", - }, - expectValidationError: false, - }, - } { - t.Run(tc.scenario, func(t *testing.T) { - tc.ctx.SetLogger(zap.NewNop()) - form := tc.ctx.Context.FormData() - actual := FormDataPdfMetadata(form) - - if !reflect.DeepEqual(actual, tc.expectedMetadata) { - t.Fatalf("expected %+v but got: %+v", tc.expectedMetadata, actual) - } - - err := form.Validate() - - if tc.expectValidationError && err == nil { - t.Fatal("expected validation error but got none", err) - } - - if !tc.expectValidationError && err != nil { - t.Fatalf("expected no validation error but got: %v", err) - } - }) - } -} - func TestConvertUrlRoute(t *testing.T) { for _, tc := range []struct { scenario string diff --git a/pkg/modules/libreoffice/routes.go b/pkg/modules/libreoffice/routes.go index ba8340c66..b49677d64 100644 --- a/pkg/modules/libreoffice/routes.go +++ b/pkg/modules/libreoffice/routes.go @@ -13,6 +13,7 @@ import ( "github.com/gotenberg/gotenberg/v8/pkg/gotenberg" "github.com/gotenberg/gotenberg/v8/pkg/modules/api" libreofficeapi "github.com/gotenberg/gotenberg/v8/pkg/modules/libreoffice/api" + "github.com/gotenberg/gotenberg/v8/pkg/modules/pdfengines" ) // convertRoute returns an [api.Route] which can convert LibreOffice documents @@ -26,7 +27,10 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap ctx := c.Get("context").(*api.Context) defaultOptions := libreofficeapi.DefaultOptions() - // Let's get the data from the form and validate them. + form := ctx.FormData() + pdfFormats := pdfengines.FormDataPdfFormats(form) + metadata := pdfengines.FormDataPdfMetadata(form) + var ( inputPaths []string password string @@ -51,14 +55,11 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap quality int reduceImageResolution bool maxImageResolution int - pdfa string - pdfua bool nativePdfFormats bool merge bool - metadata map[string]interface{} ) - err := ctx.FormData(). + err := form. MandatoryPaths(libreOffice.Extensions(), &inputPaths). String("password", &password, defaultOptions.Password). Bool("landscape", &landscape, defaultOptions.Landscape). @@ -120,8 +121,6 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap maxImageResolution = intValue return nil }). - String("pdfa", &pdfa, ""). - Bool("pdfua", &pdfua, false). Bool("nativePdfFormats", &nativePdfFormats, true). Bool("merge", &merge, false). Custom("metadata", func(value string) error { @@ -138,12 +137,6 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap return fmt.Errorf("validate form data: %w", err) } - pdfFormats := gotenberg.PdfFormats{ - PdfA: pdfa, - PdfUa: pdfua, - } - - // Alright, let's convert each document to PDF. outputPaths := make([]string, len(inputPaths)) for i, inputPath := range inputPaths { outputPaths[i] = ctx.GeneratePath(".pdf") @@ -206,11 +199,8 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap } } - // So far so good, let's check if we have to merge the PDFs. - if len(outputPaths) > 1 && merge { - outputPath := ctx.GeneratePath(".pdf") - - err = engine.Merge(ctx, ctx.Log(), outputPaths, outputPath) + if merge { + outputPath, err := pdfengines.MergeStub(ctx, engine, outputPaths) if err != nil { return fmt.Errorf("merge PDFs: %w", err) } @@ -219,34 +209,16 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap outputPaths = []string{outputPath} } - // Let's check if the client want to convert each PDF to a specific - // PDF format. - zeroValued := gotenberg.PdfFormats{} - if !nativePdfFormats && pdfFormats != zeroValued { - convertOutputPaths := make([]string, len(outputPaths)) - - for i, outputPath := range outputPaths { - convertInputPath := outputPath - convertOutputPaths[i] = ctx.GeneratePath(".pdf") - - err = engine.Convert(ctx, ctx.Log(), pdfFormats, convertInputPath, convertOutputPaths[i]) - if err != nil { - return fmt.Errorf("convert PDF: %w", err) - } + if !nativePdfFormats { + outputPaths, err = pdfengines.ConvertStub(ctx, engine, pdfFormats, outputPaths) + if err != nil { + return fmt.Errorf("convert PDFs: %w", err) } - - // Important: the output paths are now the converted files. - outputPaths = convertOutputPaths } - // Writes and potentially overrides metadata entries, if any. - if len(metadata) > 0 { - for _, outputPath := range outputPaths { - err = engine.WriteMetadata(ctx, ctx.Log(), metadata, outputPath) - if err != nil { - return fmt.Errorf("write metadata: %w", err) - } - } + err = pdfengines.WriteMetadataStub(ctx, engine, metadata, outputPaths) + if err != nil { + return fmt.Errorf("write metadata: %w", err) } if len(outputPaths) > 1 { @@ -263,8 +235,6 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap } } - // Last but not least, add the output paths to the context so that - // the API is able to send them as a response to the client. err = ctx.AddOutputPaths(outputPaths...) if err != nil { return fmt.Errorf("add output paths: %w", err) diff --git a/pkg/modules/pdfengines/routes.go b/pkg/modules/pdfengines/routes.go index 7ee4a2788..a0ddb756e 100644 --- a/pkg/modules/pdfengines/routes.go +++ b/pkg/modules/pdfengines/routes.go @@ -13,6 +13,98 @@ import ( "github.com/gotenberg/gotenberg/v8/pkg/modules/api" ) +// FormDataPdfFormats creates [gotenberg.PdfFormats] from the form data. +// Fallback to default value if the considered key is not present. +func FormDataPdfFormats(form *api.FormData) gotenberg.PdfFormats { + var ( + pdfa string + pdfua bool + ) + + form. + String("pdfa", &pdfa, ""). + Bool("pdfua", &pdfua, false) + + return gotenberg.PdfFormats{ + PdfA: pdfa, + PdfUa: pdfua, + } +} + +// FormDataPdfMetadata creates metadata object from the form data. +func FormDataPdfMetadata(form *api.FormData) map[string]interface{} { + var metadata map[string]interface{} + form.Custom("metadata", func(value string) error { + if len(value) > 0 { + err := json.Unmarshal([]byte(value), &metadata) + if err != nil { + return fmt.Errorf("unmarshal metadata: %w", err) + } + } + return nil + }) + return metadata +} + +// MergeStub merges given PDFs. If only one input PDF, it does nothing and +// returns the corresponding input path. +func MergeStub(ctx *api.Context, engine gotenberg.PdfEngine, inputPaths []string) (string, error) { + if len(inputPaths) == 0 { + return "", errors.New("no input paths") + } + + if len(inputPaths) == 1 { + return inputPaths[0], nil + } + + outputPath := ctx.GeneratePath(".pdf") + err := engine.Merge(ctx, ctx.Log(), inputPaths, outputPath) + if err != nil { + return "", fmt.Errorf("merge %d PDFs: %w", len(inputPaths), err) + } + + return outputPath, nil +} + +// ConvertStub transforms a given PDF to the specified formats defined in +// [gotenberg.PdfFormats]. If no format, it does nothing and returns the input +// paths. +func ConvertStub(ctx *api.Context, engine gotenberg.PdfEngine, formats gotenberg.PdfFormats, inputPaths []string) ([]string, error) { + zeroValued := gotenberg.PdfFormats{} + if formats == zeroValued { + return inputPaths, nil + } + + outputPaths := make([]string, len(inputPaths)) + for i, inputPath := range inputPaths { + outputPaths[i] = ctx.GeneratePath(".pdf") + + err := engine.Convert(ctx, ctx.Log(), formats, inputPath, outputPaths[i]) + if err != nil { + return nil, fmt.Errorf("convert '%s': %w", inputPath, err) + } + } + + return outputPaths, nil +} + +// WriteMetadataStub writes the metadata into PDF files. If no metadata, it +// does nothing. +func WriteMetadataStub(ctx *api.Context, engine gotenberg.PdfEngine, metadata map[string]interface{}, inputPaths []string) error { + if len(metadata) == 0 { + return nil + } + + for _, inputPath := range inputPaths { + err := engine.WriteMetadata(ctx, ctx.Log(), metadata, inputPath) + if err != nil { + return fmt.Errorf("write metadata into '%s': %w", inputPath, err) + } + } + + return nil +} + // mergeRoute returns an [api.Route] which can merge PDFs. func mergeRoute(engine gotenberg.PdfEngine) api.Route { return api.Route{ @@ -22,75 +114,37 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route { Handler: func(c echo.Context) error { ctx := c.Get("context").(*api.Context) - // Let's get the data from the form and validate them. - var ( - inputPaths []string - pdfa string - pdfua bool - metadata map[string]interface{} - ) + form := ctx.FormData() + pdfFormats := FormDataPdfFormats(form) + metadata := FormDataPdfMetadata(form) - err := ctx.FormData(). + var inputPaths []string + err := form. MandatoryPaths([]string{".pdf"}, &inputPaths). - String("pdfa", &pdfa, ""). - Bool("pdfua", &pdfua, false). - Custom("metadata", func(value string) error { - if len(value) > 0 { - err := json.Unmarshal([]byte(value), &metadata) - if err != nil { - return fmt.Errorf("unmarshal metadata: %w", err) - } - } - return nil - }). Validate() if err != nil { return fmt.Errorf("validate form data: %w", err) } - pdfFormats := gotenberg.PdfFormats{ - PdfA: pdfa, - PdfUa: pdfua, - } - - // Alright, let's merge the PDFs. outputPath := ctx.GeneratePath(".pdf") - err = engine.Merge(ctx, ctx.Log(), inputPaths, outputPath) if err != nil { return fmt.Errorf("merge PDFs: %w", err) } - // So far so good, the PDFs are merged into one unique PDF. - // Now, let's check if the client want to convert this result PDF - // to specific PDF formats. - zeroValued := gotenberg.PdfFormats{} - if pdfFormats != zeroValued { - convertInputPath := outputPath - convertOutputPath := ctx.GeneratePath(".pdf") - - err = engine.Convert(ctx, ctx.Log(), pdfFormats, convertInputPath, convertOutputPath) - if err != nil { - return fmt.Errorf("convert PDF: %w", err) - } - - // Important: the output path is now the converted file. - outputPath = convertOutputPath + outputPaths, err := ConvertStub(ctx, engine, pdfFormats, []string{outputPath}) + if err != nil { + return fmt.Errorf("convert PDF: %w", err) } - // Writes and potentially overrides metadata entries, if any. - if len(metadata) > 0 { - err = engine.WriteMetadata(ctx, ctx.Log(), metadata, outputPath) - if err != nil { - return fmt.Errorf("write metadata: %w", err) - } + err = WriteMetadataStub(ctx, engine, metadata, outputPaths) + if err != nil { + return fmt.Errorf("write metadata: %w", err) } - // Last but not least, add the output path to the context so that - // the API is able to send it as a response to the client. - err = ctx.AddOutputPaths(outputPath) + err = ctx.AddOutputPaths(outputPaths...) if err != nil { - return fmt.Errorf("add output path: %w", err) + return fmt.Errorf("add output paths: %w", err) } return nil @@ -108,27 +162,17 @@ func convertRoute(engine gotenberg.PdfEngine) api.Route { Handler: func(c echo.Context) error { ctx := c.Get("context").(*api.Context) - // Let's get the data from the form and validate them. - var ( - inputPaths []string - pdfa string - pdfua bool - ) + form := ctx.FormData() + pdfFormats := FormDataPdfFormats(form) - err := ctx.FormData(). + var inputPaths []string + err := form. MandatoryPaths([]string{".pdf"}, &inputPaths). - String("pdfa", &pdfa, ""). - Bool("pdfua", &pdfua, false). Validate() if err != nil { return fmt.Errorf("validate form data: %w", err) } - pdfFormats := gotenberg.PdfFormats{ - PdfA: pdfa, - PdfUa: pdfua, - } - zeroValued := gotenberg.PdfFormats{} if pdfFormats == zeroValued { return api.WrapError( @@ -140,18 +184,14 @@ func convertRoute(engine gotenberg.PdfEngine) api.Route { ) } - // Alright, let's convert the PDFs. - outputPaths := make([]string, len(inputPaths)) - for i, inputPath := range inputPaths { - outputPaths[i] = ctx.GeneratePath(".pdf") - - err = engine.Convert(ctx, ctx.Log(), pdfFormats, inputPath, outputPaths[i]) - if err != nil { - return fmt.Errorf("convert PDF: %w", err) - } + outputPaths, err := ConvertStub(ctx, engine, pdfFormats, inputPaths) + if err != nil { + return fmt.Errorf("convert PDFs: %w", err) + } - if len(outputPaths) > 1 { - // If .zip archive, keep the original filename. + if len(outputPaths) > 1 { + // If .zip archive, keep the original filename. + for i, inputPath := range inputPaths { err = ctx.Rename(outputPaths[i], inputPath) if err != nil { return fmt.Errorf("rename output path: %w", err) @@ -161,8 +201,6 @@ func convertRoute(engine gotenberg.PdfEngine) api.Route { } } - // Last but not least, add the output paths to the context so that - // the API is able to send them as a response to the client. err = ctx.AddOutputPaths(outputPaths...) if err != nil { return fmt.Errorf("add output paths: %w", err) @@ -182,9 +220,7 @@ func readMetadataRoute(engine gotenberg.PdfEngine) api.Route { Handler: func(c echo.Context) error { ctx := c.Get("context").(*api.Context) - // Let's get the data from the form and validate them. var inputPaths []string - err := ctx.FormData(). MandatoryPaths([]string{".pdf"}, &inputPaths). Validate() @@ -192,7 +228,6 @@ func readMetadataRoute(engine gotenberg.PdfEngine) api.Route { return fmt.Errorf("validate form data: %w", err) } - // Alright, let's read the metadata. res := make(map[string]map[string]interface{}, len(inputPaths)) for _, inputPath := range inputPaths { metadata, err := engine.ReadMetadata(ctx, ctx.Log(), inputPath) @@ -223,7 +258,6 @@ func writeMetadataRoute(engine gotenberg.PdfEngine) api.Route { Handler: func(c echo.Context) error { ctx := c.Get("context").(*api.Context) - // Let's get the data from the form and validate them. var ( inputPaths []string metadata map[string]interface{} @@ -248,16 +282,11 @@ func writeMetadataRoute(engine gotenberg.PdfEngine) api.Route { return fmt.Errorf("validate form data: %w", err) } - // Alright, let's convert the PDFs. - for _, inputPath := range inputPaths { - err = engine.WriteMetadata(ctx, ctx.Log(), metadata, inputPath) - if err != nil { - return fmt.Errorf("write metadata: %w", err) - } + err = WriteMetadataStub(ctx, engine, metadata, inputPaths) + if err != nil { + return fmt.Errorf("write metadata: %w", err) } - // Last but not least, add the output paths to the context so that - // the API is able to send them as a response to the client. err = ctx.AddOutputPaths(inputPaths...) if err != nil { return fmt.Errorf("add output paths: %w", err) diff --git a/pkg/modules/pdfengines/routes_test.go b/pkg/modules/pdfengines/routes_test.go index a3cdd940a..94df1688d 100644 --- a/pkg/modules/pdfengines/routes_test.go +++ b/pkg/modules/pdfengines/routes_test.go @@ -5,6 +5,7 @@ import ( "errors" "net/http" "net/http/httptest" + "reflect" "slices" "strings" "testing" @@ -16,6 +17,287 @@ import ( "github.com/gotenberg/gotenberg/v8/pkg/modules/api" ) +func TestFormDataPdfFormats(t *testing.T) { + for _, tc := range []struct { + scenario string + ctx *api.ContextMock + expectedPdfFormats gotenberg.PdfFormats + expectValidationError bool + }{ + { + scenario: "no custom form fields", + ctx: &api.ContextMock{Context: new(api.Context)}, + expectedPdfFormats: gotenberg.PdfFormats{}, + expectValidationError: false, + }, + { + scenario: "pdfa and pdfua form fields", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "pdfa": { + "foo", + }, + "pdfua": { + "true", + }, + }) + return ctx + }(), + expectedPdfFormats: gotenberg.PdfFormats{PdfA: "foo", PdfUa: true}, + expectValidationError: false, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + tc.ctx.SetLogger(zap.NewNop()) + form := tc.ctx.Context.FormData() + actual := FormDataPdfFormats(form) + + if !reflect.DeepEqual(actual, tc.expectedPdfFormats) { + t.Fatalf("expected %+v but got: %+v", tc.expectedPdfFormats, actual) + } + + err := form.Validate() + + if tc.expectValidationError && err == nil { + t.Fatal("expected validation error but got none", err) + } + + if !tc.expectValidationError && err != nil { + t.Fatalf("expected no validation error but got: %v", err) + } + }) + } +} + +func TestFormDataPdfMetadata(t *testing.T) { + for _, tc := range []struct { + scenario string + ctx *api.ContextMock + expectedMetadata map[string]interface{} + expectValidationError bool + }{ + { + scenario: "no metadata form field", + ctx: &api.ContextMock{Context: new(api.Context)}, + expectedMetadata: nil, + expectValidationError: false, + }, + { + scenario: "invalid metadata form field", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "metadata": { + "foo", + }, + }) + return ctx + }(), + expectedMetadata: nil, + expectValidationError: true, + }, + { + scenario: "valid metadata form field", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "metadata": { + "{\"foo\":\"bar\"}", + }, + }) + return ctx + }(), + expectedMetadata: map[string]interface{}{ + "foo": "bar", + }, + expectValidationError: false, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + tc.ctx.SetLogger(zap.NewNop()) + form := tc.ctx.Context.FormData() + actual := FormDataPdfMetadata(form) + + if !reflect.DeepEqual(actual, tc.expectedMetadata) { + t.Fatalf("expected %+v but got: %+v", tc.expectedMetadata, actual) + } + + err := form.Validate() + + if tc.expectValidationError && err == nil { + t.Fatal("expected validation error but got none", err) + } + + if !tc.expectValidationError && err != nil { + t.Fatalf("expected no validation error but got: %v", err) + } + }) + } +} + +func TestMergeStub(t *testing.T) { + for _, tc := range []struct { + scenario string + engine gotenberg.PdfEngine + inputPaths []string + expectError bool + }{ + { + scenario: "no input path (nil)", + inputPaths: nil, + expectError: true, + }, + { + scenario: "no input path (empty)", + inputPaths: make([]string, 0), + expectError: true, + }, + { + scenario: "only one input path", + inputPaths: []string{"my.pdf"}, + expectError: false, + }, + { + scenario: "merge error", + engine: &gotenberg.PdfEngineMock{ + MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { + return errors.New("foo") + }, + }, + inputPaths: []string{"my.pdf", "my2.pdf"}, + expectError: true, + }, + { + scenario: "merge success", + engine: &gotenberg.PdfEngineMock{ + MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { + return nil + }, + }, + inputPaths: []string{"my.pdf", "my2.pdf"}, + expectError: false, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + _, err := MergeStub(new(api.Context), tc.engine, tc.inputPaths) + + if tc.expectError && err == nil { + t.Fatal("expected error but got none", err) + } + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + }) + } +} + +func TestConvertStub(t *testing.T) { + for _, tc := range []struct { + scenario string + engine gotenberg.PdfEngine + pdfFormats gotenberg.PdfFormats + expectError bool + }{ + { + scenario: "no PDF formats", + pdfFormats: gotenberg.PdfFormats{}, + expectError: false, + }, + { + scenario: "convert error", + engine: &gotenberg.PdfEngineMock{ + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return errors.New("foo") + }, + }, + pdfFormats: gotenberg.PdfFormats{ + PdfA: gotenberg.PdfA3b, + PdfUa: true, + }, + expectError: true, + }, + { + scenario: "convert success", + engine: &gotenberg.PdfEngineMock{ + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return nil + }, + }, + pdfFormats: gotenberg.PdfFormats{ + PdfA: gotenberg.PdfA3b, + PdfUa: true, + }, + expectError: false, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + _, err := ConvertStub(new(api.Context), tc.engine, tc.pdfFormats, []string{"my.pdf", "my2.pdf"}) + + if tc.expectError && err == nil { + t.Fatal("expected error but got none", err) + } + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + }) + } +} + +func TestWriteMetadataStub(t *testing.T) { + for _, tc := range []struct { + scenario string + engine gotenberg.PdfEngine + metadata map[string]interface{} + expectError bool + }{ + { + scenario: "no metadata (nil)", + metadata: nil, + expectError: false, + }, + { + scenario: "no metadata (empty)", + metadata: make(map[string]interface{}, 0), + expectError: false, + }, + { + scenario: "write metadata error", + engine: &gotenberg.PdfEngineMock{ + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return errors.New("foo") + }, + }, + metadata: map[string]interface{}{"foo": "bar"}, + expectError: true, + }, + { + scenario: "write metadata success", + engine: &gotenberg.PdfEngineMock{ + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return nil + }, + }, + metadata: map[string]interface{}{"foo": "bar"}, + expectError: false, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + err := WriteMetadataStub(new(api.Context), tc.engine, tc.metadata, []string{"my.pdf", "my2.pdf"}) + + if tc.expectError && err == nil { + t.Fatal("expected error but got none", err) + } + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + }) + } +} + func TestMergeHandler(t *testing.T) { for _, tc := range []struct { scenario string From c30da805b3af537111dd1accd8f027b243f61b46 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Fri, 20 Dec 2024 15:51:57 +0100 Subject: [PATCH 20/25] feat(pdfengines): add split feature --- Makefile | 2 + pkg/gotenberg/fs.go | 67 ++- pkg/gotenberg/fs_test.go | 195 +++++- pkg/gotenberg/mocks.go | 17 + pkg/gotenberg/mocks_test.go | 22 + pkg/gotenberg/pdfengine.go | 27 + pkg/modules/api/api.go | 2 +- pkg/modules/api/api_test.go | 2 +- pkg/modules/api/context.go | 27 +- pkg/modules/api/context_test.go | 97 ++- pkg/modules/api/middlewares.go | 4 + pkg/modules/api/middlewares_test.go | 7 +- pkg/modules/api/mocks.go | 8 + pkg/modules/api/mocks_test.go | 17 +- pkg/modules/chromium/browser.go | 2 +- pkg/modules/chromium/browser_test.go | 122 ++-- pkg/modules/chromium/routes.go | 41 +- pkg/modules/chromium/routes_test.go | 60 +- pkg/modules/exiftool/exiftool.go | 5 + pkg/modules/exiftool/exiftool_test.go | 11 +- pkg/modules/libreoffice/api/libreoffice.go | 2 +- .../libreoffice/api/libreoffice_test.go | 26 +- .../libreoffice/pdfengine/pdfengine.go | 5 + .../libreoffice/pdfengine/pdfengine_test.go | 25 +- pkg/modules/libreoffice/routes.go | 48 +- pkg/modules/libreoffice/routes_test.go | 213 ++++++- pkg/modules/pdfcpu/doc.go | 1 + pkg/modules/pdfcpu/pdfcpu.go | 33 + pkg/modules/pdfcpu/pdfcpu_test.go | 91 ++- pkg/modules/pdfengines/multi.go | 41 ++ pkg/modules/pdfengines/multi_test.go | 235 +++++--- pkg/modules/pdfengines/pdfengines.go | 12 + pkg/modules/pdfengines/pdfengines_test.go | 18 +- pkg/modules/pdfengines/routes.go | 205 ++++++- pkg/modules/pdfengines/routes_test.go | 569 +++++++++++++++++- pkg/modules/pdftk/doc.go | 1 + pkg/modules/pdftk/pdftk.go | 26 + pkg/modules/pdftk/pdftk_test.go | 84 ++- pkg/modules/qpdf/doc.go | 1 + pkg/modules/qpdf/qpdf.go | 28 +- pkg/modules/qpdf/qpdf_test.go | 84 ++- 41 files changed, 2145 insertions(+), 338 deletions(-) diff --git a/Makefile b/Makefile index def2bb19b..b8f0f10e4 100644 --- a/Makefile +++ b/Makefile @@ -73,6 +73,7 @@ LOG_FORMAT=auto LOG_FIELDS_PREFIX= PDFENGINES_ENGINES= PDFENGINES_MERGE_ENGINES=qpdf,pdfcpu,pdftk +PDFENGINES_SPLIT_ENGINES=pdfcpu,qpdf,pdftk PDFENGINES_CONVERT_ENGINES=libreoffice-pdfengine PDFENGINES_READ_METADATA_ENGINES=exiftool PDFENGINES_WRITE_METADATA_ENGINES=exiftool @@ -141,6 +142,7 @@ run: ## Start a Gotenberg container --log-fields-prefix=$(LOG_FIELDS_PREFIX) \ --pdfengines-engines=$(PDFENGINES_ENGINES) \ --pdfengines-merge-engines=$(PDFENGINES_MERGE_ENGINES) \ + --pdfengines-split-engines=$(PDFENGINES_SPLIT_ENGINES) \ --pdfengines-convert-engines=$(PDFENGINES_CONVERT_ENGINES) \ --pdfengines-read-metadata-engines=$(PDFENGINES_READ_METADATA_ENGINES) \ --pdfengines-write-metadata-engines=$(PDFENGINES_WRITE_METADATA_ENGINES) \ diff --git a/pkg/gotenberg/fs.go b/pkg/gotenberg/fs.go index 8c2d0a98c..99b403eb0 100644 --- a/pkg/gotenberg/fs.go +++ b/pkg/gotenberg/fs.go @@ -3,22 +3,56 @@ package gotenberg import ( "fmt" "os" + "path/filepath" + "strings" "github.com/google/uuid" ) +// MkdirAll defines the method signature for create a directory. Implement this +// interface if you don't want to rely on [os.MkdirAll], notably for testing +// purpose. +type MkdirAll interface { + // MkdirAll uses the same signature as [os.MkdirAll]. + MkdirAll(path string, perm os.FileMode) error +} + +// OsMkdirAll implements the [MkdirAll] interface with [os.MkdirAll]. +type OsMkdirAll struct{} + +// MkdirAll is a wrapper around [os.MkdirAll]. +func (o *OsMkdirAll) MkdirAll(path string, perm os.FileMode) error { return os.MkdirAll(path, perm) } + +// PathRename defines the method signature for renaming files. Implement this +// interface if you don't want to rely on [os.Rename], notably for testing +// purpose. +type PathRename interface { + // Rename uses the same signature as [os.Rename]. + Rename(oldpath, newpath string) error +} + +// OsPathRename implements the [PathRename] interface with [os.Rename]. +type OsPathRename struct{} + +// Rename is a wrapper around [os.Rename]. +func (o *OsPathRename) Rename(oldpath, newpath string) error { + return os.Rename(oldpath, newpath) +} + // FileSystem provides utilities for managing temporary directories. It creates // unique directory names based on UUIDs to ensure isolation of temporary files // for different modules. type FileSystem struct { workingDir string + mkdirAll MkdirAll } // NewFileSystem initializes a new [FileSystem] instance with a unique working // directory. -func NewFileSystem() *FileSystem { +func NewFileSystem(mkdirAll MkdirAll) *FileSystem { return &FileSystem{ workingDir: uuid.NewString(), + mkdirAll: mkdirAll, } } @@ -44,7 +78,7 @@ func (fs *FileSystem) NewDirPath() string { func (fs *FileSystem) MkdirAll() (string, error) { path := fs.NewDirPath() - err := os.MkdirAll(path, 0o755) + err := fs.mkdirAll.MkdirAll(path, 0o755) if err != nil { return "", fmt.Errorf("create directory %s: %w", path, err) } @@ -52,10 +86,27 @@ func (fs *FileSystem) MkdirAll() (string, error) { return path, nil } -// PathRename defines the method signature for renaming files. Implement this -// interface if you don't want to rely on [os.Rename], notably for testing -// purpose. -type PathRename interface { - // Rename uses the same signature as [os.Rename]. - Rename(oldpath, newpath string) error +// WalkDir walks through the root level of a directory and returns a list of +// files paths that match the specified file extension. +func WalkDir(dir, ext string) ([]string, error) { + var files []string + err := filepath.Walk(dir, func(path string, info os.FileInfo, pathErr error) error { + if pathErr != nil { + return pathErr + } + if info.IsDir() { + return nil + } + if strings.EqualFold(filepath.Ext(info.Name()), ext) { + files = append(files, path) + } + return nil + }) + return files, err } + +// Interface guards. +var ( + _ MkdirAll = (*OsMkdirAll)(nil) + _ PathRename = (*OsPathRename)(nil) +) diff --git a/pkg/gotenberg/fs_test.go b/pkg/gotenberg/fs_test.go index f074acb66..d7f641204 100644 --- a/pkg/gotenberg/fs_test.go +++ b/pkg/gotenberg/fs_test.go @@ -1,14 +1,84 @@ package gotenberg import ( + "errors" "fmt" + "io" "os" + "path/filepath" + "reflect" "strings" "testing" + + "github.com/google/uuid" ) +func TestOsMkdirAll_MkdirAll(t *testing.T) { + dirPath, err := NewFileSystem(new(OsMkdirAll)).MkdirAll() + if err != nil { + t.Fatalf("create working directory: %v", err) + } + + err = os.RemoveAll(dirPath) + if err != nil { + t.Fatalf("remove working directory: %v", err) + } +} + +func TestOsPathRename_Rename(t *testing.T) { + dirPath, err := NewFileSystem(new(OsMkdirAll)).MkdirAll() + if err != nil { + t.Fatalf("create working directory: %v", err) + } + + path := "/tests/test/testdata/api/sample1.txt" + copyPath := filepath.Join(dirPath, fmt.Sprintf("%s.txt", uuid.NewString())) + + in, err := os.Open(path) + if err != nil { + t.Fatalf("open file: %v", err) + } + + defer func() { + err := in.Close() + if err != nil { + t.Fatalf("close file: %v", err) + } + }() + + out, err := os.Create(copyPath) + if err != nil { + t.Fatalf("create new file: %v", err) + } + + defer func() { + err := out.Close() + if err != nil { + t.Fatalf("close new file: %v", err) + } + }() + + _, err = io.Copy(out, in) + if err != nil { + t.Fatalf("copy file to new file: %v", err) + } + + rename := new(OsPathRename) + newPath := filepath.Join(dirPath, fmt.Sprintf("%s.txt", uuid.NewString())) + + err = rename.Rename(copyPath, newPath) + if err != nil { + t.Errorf("expected no error but got: %v", err) + } + + err = os.RemoveAll(dirPath) + if err != nil { + t.Fatalf("remove working directory: %v", err) + } +} + func TestFileSystem_WorkingDir(t *testing.T) { - fs := NewFileSystem() + fs := NewFileSystem(new(MkdirAllMock)) dirName := fs.WorkingDir() if dirName == "" { @@ -17,7 +87,7 @@ func TestFileSystem_WorkingDir(t *testing.T) { } func TestFileSystem_WorkingDirPath(t *testing.T) { - fs := NewFileSystem() + fs := NewFileSystem(new(MkdirAllMock)) expectedPath := fmt.Sprintf("%s/%s", os.TempDir(), fs.WorkingDir()) if fs.WorkingDirPath() != expectedPath { @@ -26,7 +96,7 @@ func TestFileSystem_WorkingDirPath(t *testing.T) { } func TestFileSystem_NewDirPath(t *testing.T) { - fs := NewFileSystem() + fs := NewFileSystem(new(MkdirAllMock)) newDir := fs.NewDirPath() expectedPrefix := fs.WorkingDirPath() @@ -36,20 +106,117 @@ func TestFileSystem_NewDirPath(t *testing.T) { } func TestFileSystem_MkdirAll(t *testing.T) { - fs := NewFileSystem() + for _, tc := range []struct { + scenario string + mkdirAll MkdirAll + expectError bool + }{ + { + scenario: "error", + mkdirAll: &MkdirAllMock{ + MkdirAllMock: func(path string, perm os.FileMode) error { + return errors.New("foo") + }, + }, + expectError: true, + }, + { + scenario: "success", + mkdirAll: &MkdirAllMock{ + MkdirAllMock: func(path string, perm os.FileMode) error { + return nil + }, + }, + expectError: false, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + fs := NewFileSystem(tc.mkdirAll) - newPath, err := fs.MkdirAll() - if err != nil { - t.Fatalf("expected no error but got: %v", err) - } + _, err := fs.MkdirAll() + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } - _, err = os.Stat(newPath) - if os.IsNotExist(err) { - t.Errorf("expected directory '%s' to exist but it doesn't", newPath) + if tc.expectError && err == nil { + t.Fatal("expected error but got none") + } + }) } +} - err = os.RemoveAll(fs.WorkingDirPath()) - if err != nil { - t.Fatalf("expected no error while cleaning up but got: %v", err) +func TestWalkDir(t *testing.T) { + for _, tc := range []struct { + scenario string + dir string + ext string + expectError bool + expectFiles []string + }{ + { + scenario: "directory does not exist", + dir: uuid.NewString(), + ext: ".pdf", + expectError: true, + }, + { + scenario: "find PDF files", + dir: func() string { + path := fmt.Sprintf("%s/a_directory", os.TempDir()) + + err := os.MkdirAll(path, 0o755) + if err != nil { + t.Fatalf(fmt.Sprintf("expected no error but got: %v", err)) + } + + err = os.WriteFile(fmt.Sprintf("%s/a_foo_file.pdf", path), []byte{1}, 0o755) + if err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + err = os.WriteFile(fmt.Sprintf("%s/a_bar_file.PDF", path), []byte{1}, 0o755) + if err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + err = os.WriteFile(fmt.Sprintf("%s/a_baz_file.txt", path), []byte{1}, 0o755) + if err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + return path + }(), + ext: ".pdf", + expectError: false, + expectFiles: []string{"/tmp/a_directory/a_bar_file.PDF", "/tmp/a_directory/a_foo_file.pdf"}, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + defer func() { + err := os.RemoveAll(tc.dir) + if err != nil { + t.Fatalf("expected no error while cleaning up but got: %v", err) + } + }() + + files, err := WalkDir(tc.dir, tc.ext) + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + if tc.expectError && err == nil { + t.Fatal("expected error but got none") + } + + if tc.expectError && err != nil { + return + } + + if !reflect.DeepEqual(files, tc.expectFiles) { + t.Errorf("expected files %+v, but got %+v", tc.expectFiles, files) + } + }) } } diff --git a/pkg/gotenberg/mocks.go b/pkg/gotenberg/mocks.go index 49154c32d..2ade89525 100644 --- a/pkg/gotenberg/mocks.go +++ b/pkg/gotenberg/mocks.go @@ -2,6 +2,7 @@ package gotenberg import ( "context" + "os" "go.uber.org/zap" ) @@ -36,6 +37,7 @@ func (mod *ValidatorMock) Validate() error { // PdfEngineMock is a mock for the [PdfEngine] interface. type PdfEngineMock struct { MergeMock func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error + SplitMock func(ctx context.Context, logger *zap.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error) ConvertMock func(ctx context.Context, logger *zap.Logger, formats PdfFormats, inputPath, outputPath string) error ReadMetadataMock func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) WriteMetadataMock func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error @@ -45,6 +47,10 @@ func (engine *PdfEngineMock) Merge(ctx context.Context, logger *zap.Logger, inpu return engine.MergeMock(ctx, logger, inputPaths, outputPath) } +func (engine *PdfEngineMock) Split(ctx context.Context, logger *zap.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error) { + return engine.SplitMock(ctx, logger, mode, inputPath, outputDirPath) +} + func (engine *PdfEngineMock) Convert(ctx context.Context, logger *zap.Logger, formats PdfFormats, inputPath, outputPath string) error { return engine.ConvertMock(ctx, logger, formats, inputPath, outputPath) } @@ -137,6 +143,15 @@ func (provider *MetricsProviderMock) Metrics() ([]Metric, error) { return provider.MetricsMock() } +// MkdirAllMock is a mock for the [MkdirAll] interface. +type MkdirAllMock struct { + MkdirAllMock func(path string, perm os.FileMode) error +} + +func (mkdirAll *MkdirAllMock) MkdirAll(path string, perm os.FileMode) error { + return mkdirAll.MkdirAllMock(path, perm) +} + // PathRenameMock is a mock for the [PathRename] interface. type PathRenameMock struct { RenameMock func(oldpath, newpath string) error @@ -156,4 +171,6 @@ var ( _ ProcessSupervisor = (*ProcessSupervisorMock)(nil) _ LoggerProvider = (*LoggerProviderMock)(nil) _ MetricsProvider = (*MetricsProviderMock)(nil) + _ MkdirAll = (*MkdirAllMock)(nil) + _ PathRename = (*PathRenameMock)(nil) ) diff --git a/pkg/gotenberg/mocks_test.go b/pkg/gotenberg/mocks_test.go index 1be6c658c..953a6ec28 100644 --- a/pkg/gotenberg/mocks_test.go +++ b/pkg/gotenberg/mocks_test.go @@ -2,6 +2,7 @@ package gotenberg import ( "context" + "os" "testing" "go.uber.org/zap" @@ -52,6 +53,9 @@ func TestPDFEngineMock(t *testing.T) { MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { return nil }, + SplitMock: func(ctx context.Context, logger *zap.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, nil + }, ConvertMock: func(ctx context.Context, logger *zap.Logger, formats PdfFormats, inputPath, outputPath string) error { return nil }, @@ -68,6 +72,11 @@ func TestPDFEngineMock(t *testing.T) { t.Errorf("expected no error from PdfEngineMock.Merge, but got: %v", err) } + _, err = mock.Split(context.Background(), zap.NewNop(), SplitMode{}, "", "") + if err != nil { + t.Errorf("expected no error from PdfEngineMock.Split, but got: %v", err) + } + err = mock.Convert(context.Background(), zap.NewNop(), PdfFormats{}, "", "") if err != nil { t.Errorf("expected no error from PdfEngineMock.Convert, but got: %v", err) @@ -205,6 +214,19 @@ func TestMetricsProviderMock(t *testing.T) { } } +func TestMkdirAllMock(t *testing.T) { + mock := &MkdirAllMock{ + MkdirAllMock: func(dir string, perm os.FileMode) error { + return nil + }, + } + + err := mock.MkdirAll("/foo", 0o755) + if err != nil { + t.Errorf("expected no error from MkdirAllMock.MkdirAll, but got: %v", err) + } +} + func TestPathRenameMock(t *testing.T) { mock := &PathRenameMock{ RenameMock: func(oldpath, newpath string) error { diff --git a/pkg/gotenberg/pdfengine.go b/pkg/gotenberg/pdfengine.go index 87c32c158..bc74f09f2 100644 --- a/pkg/gotenberg/pdfengine.go +++ b/pkg/gotenberg/pdfengine.go @@ -12,6 +12,10 @@ var ( // PdfEngine interface is not supported by its current implementation. ErrPdfEngineMethodNotSupported = errors.New("method not supported") + // ErrPdfSplitModeNotSupported is returned when the Split method of the + // PdfEngine interface does not sumport a requested PDF split mode. + ErrPdfSplitModeNotSupported = errors.New("split mode not supported") + // ErrPdfFormatNotSupported is returned when the Convert method of the // PdfEngine interface does not support a requested PDF format conversion. ErrPdfFormatNotSupported = errors.New("PDF format not supported") @@ -21,6 +25,26 @@ var ( ErrPdfEngineMetadataValueNotSupported = errors.New("metadata value not supported") ) +const ( + // SplitModeIntervals represents a mode where a PDF is split at specific + // intervals. + SplitModeIntervals string = "intervals" + + // SplitModePages represents a mode where a PDF is split at specific page + // ranges. + SplitModePages string = "pages" +) + +// SplitMode gathers the data required to split a PDF into multiple parts. +type SplitMode struct { + // Mode is either "intervals" or "pages". + Mode string + + // Span is either the intervals or the page ranges to extract, depending on + // the selected mode. + Span string +} + const ( // PdfA1a represents the PDF/A-1a format. PdfA1a string = "PDF/A-1a" @@ -65,6 +89,9 @@ type PdfEngine interface { // is determined by the order of files provided in inputPaths. Merge(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error + // Split splits a given PDF file. + Split(ctx context.Context, logger *zap.Logger, mode SplitMode, inputPath, outputDirPath string) ([]string, error) + // Convert transforms a given PDF to the specified formats defined in // PdfFormats. If no format, it does nothing. Convert(ctx context.Context, logger *zap.Logger, formats PdfFormats, inputPath, outputPath string) error diff --git a/pkg/modules/api/api.go b/pkg/modules/api/api.go index 3d6cb4598..7daa18925 100644 --- a/pkg/modules/api/api.go +++ b/pkg/modules/api/api.go @@ -318,7 +318,7 @@ func (a *Api) Provision(ctx *gotenberg.Context) error { a.logger = logger // File system. - a.fs = gotenberg.NewFileSystem() + a.fs = gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) return nil } diff --git a/pkg/modules/api/api_test.go b/pkg/modules/api/api_test.go index 885076af4..b32eace84 100644 --- a/pkg/modules/api/api_test.go +++ b/pkg/modules/api/api_test.go @@ -850,7 +850,7 @@ func TestApi_Start(t *testing.T) { }, } mod.readyFn = tc.readyFn - mod.fs = gotenberg.NewFileSystem() + mod.fs = gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) mod.logger = zap.NewNop() err := mod.Start() diff --git a/pkg/modules/api/context.go b/pkg/modules/api/context.go index 7d895810d..d761248b2 100644 --- a/pkg/modules/api/context.go +++ b/pkg/modules/api/context.go @@ -47,6 +47,7 @@ type Context struct { logger *zap.Logger echoCtx echo.Context + mkdirAll gotenberg.MkdirAll pathRename gotenberg.PathRename context.Context } @@ -81,12 +82,6 @@ type downloadFrom struct { ExtraHttpHeaders map[string]string `json:"extraHttpHeaders"` } -type osPathRename struct{} - -func (o *osPathRename) Rename(oldpath, newpath string) error { - return os.Rename(oldpath, newpath) -} - // newContext returns a [Context] by parsing a "multipart/form-data" request. func newContext(echoCtx echo.Context, logger *zap.Logger, fs *gotenberg.FileSystem, timeout time.Duration, bodyLimit int64, downloadFromCfg downloadFromConfig, traceHeader, trace string) (*Context, context.CancelFunc, error) { processCtx, processCancel := context.WithTimeout(context.Background(), timeout) @@ -112,7 +107,8 @@ func newContext(echoCtx echo.Context, logger *zap.Logger, fs *gotenberg.FileSyst cancelled: false, logger: logger, echoCtx: echoCtx, - pathRename: new(osPathRename), + mkdirAll: new(gotenberg.OsMkdirAll), + pathRename: new(gotenberg.OsPathRename), Context: processCtx, } @@ -414,9 +410,21 @@ func (ctx *Context) GeneratePath(extension string) string { return fmt.Sprintf("%s/%s%s", ctx.dirPath, uuid.New().String(), extension) } +// CreateSubDirectory creates a subdirectory within the context's working +// directory. +func (ctx *Context) CreateSubDirectory(dirName string) (string, error) { + path := fmt.Sprintf("%s/%s", ctx.dirPath, dirName) + err := ctx.mkdirAll.MkdirAll(path, 0o755) + if err != nil { + return "", fmt.Errorf("create sub-directory %s: %w", path, err) + } + return path, nil +} + // Rename is just a wrapper around [os.Rename], as we need to mock this // behavior in our tests. func (ctx *Context) Rename(oldpath, newpath string) error { + ctx.Log().Debug(fmt.Sprintf("rename %s to %s", oldpath, newpath)) err := ctx.pathRename.Rename(oldpath, newpath) if err != nil { return fmt.Errorf("rename path: %w", err) @@ -496,8 +504,3 @@ func (ctx *Context) OutputFilename(outputPath string) string { return fmt.Sprintf("%s%s", filename, filepath.Ext(outputPath)) } - -// Interface guard. -var ( - _ gotenberg.PathRename = (*osPathRename)(nil) -) diff --git a/pkg/modules/api/context_test.go b/pkg/modules/api/context_test.go index ddb7e9f35..05649df8a 100644 --- a/pkg/modules/api/context_test.go +++ b/pkg/modules/api/context_test.go @@ -4,78 +4,22 @@ import ( "bytes" "context" "errors" - "fmt" - "io" "mime/multipart" "net/http" "net/http/httptest" "os" - "path/filepath" "reflect" "strings" "testing" "time" "github.com/dlclark/regexp2" - "github.com/google/uuid" "github.com/labstack/echo/v4" "go.uber.org/zap" "github.com/gotenberg/gotenberg/v8/pkg/gotenberg" ) -func TestOsPathRename_Rename(t *testing.T) { - dirPath, err := gotenberg.NewFileSystem().MkdirAll() - if err != nil { - t.Fatalf("create working directory: %v", err) - } - - path := "/tests/test/testdata/api/sample1.txt" - copyPath := filepath.Join(dirPath, fmt.Sprintf("%s.txt", uuid.NewString())) - - in, err := os.Open(path) - if err != nil { - t.Fatalf("open file: %v", err) - } - - defer func() { - err := in.Close() - if err != nil { - t.Fatalf("close file: %v", err) - } - }() - - out, err := os.Create(copyPath) - if err != nil { - t.Fatalf("create new file: %v", err) - } - - defer func() { - err := out.Close() - if err != nil { - t.Fatalf("close new file: %v", err) - } - }() - - _, err = io.Copy(out, in) - if err != nil { - t.Fatalf("copy file to new file: %v", err) - } - - rename := new(osPathRename) - newPath := filepath.Join(dirPath, fmt.Sprintf("%s.txt", uuid.NewString())) - - err = rename.Rename(copyPath, newPath) - if err != nil { - t.Errorf("expected no error but got: %v", err) - } - - err = os.RemoveAll(dirPath) - if err != nil { - t.Fatalf("remove working directory: %v", err) - } -} - func TestNewContext(t *testing.T) { defaultAllowList, err := regexp2.Compile("", 0) if err != nil { @@ -548,7 +492,7 @@ func TestNewContext(t *testing.T) { } handler := func(c echo.Context) error { - ctx, cancel, err := newContext(c, zap.NewNop(), gotenberg.NewFileSystem(), time.Duration(10)*time.Second, tc.bodyLimit, tc.downloadFromCfg, "Gotenberg-Trace", "123") + ctx, cancel, err := newContext(c, zap.NewNop(), gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), time.Duration(10)*time.Second, tc.bodyLimit, tc.downloadFromCfg, "Gotenberg-Trace", "123") defer cancel() // Context already cancelled. defer cancel() @@ -647,6 +591,42 @@ func TestContext_FormData(t *testing.T) { } } +func TestContext_CreateSubDirectory(t *testing.T) { + for _, tc := range []struct { + scenario string + ctx *Context + expectError bool + }{ + { + scenario: "failure", + ctx: &Context{mkdirAll: &gotenberg.MkdirAllMock{MkdirAllMock: func(path string, perm os.FileMode) error { + return errors.New("cannot rename") + }}}, + expectError: true, + }, + { + scenario: "success", + ctx: &Context{mkdirAll: &gotenberg.MkdirAllMock{MkdirAllMock: func(path string, perm os.FileMode) error { + return nil + }}}, + expectError: false, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + tc.ctx.logger = zap.NewNop() + _, err := tc.ctx.CreateSubDirectory("foo") + + if tc.expectError && err == nil { + t.Fatal("expected error but got none", err) + } + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + }) + } +} + func TestContext_GeneratePath(t *testing.T) { ctx := &Context{ dirPath: "/foo", @@ -680,6 +660,7 @@ func TestContext_Rename(t *testing.T) { }, } { t.Run(tc.scenario, func(t *testing.T) { + tc.ctx.logger = zap.NewNop() err := tc.ctx.Rename("", "") if tc.expectError && err == nil { @@ -788,7 +769,7 @@ func TestContext_BuildOutputFile(t *testing.T) { }, } { t.Run(tc.scenario, func(t *testing.T) { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) dirPath, err := fs.MkdirAll() if err != nil { t.Fatalf("expected no erro but got: %v", err) diff --git a/pkg/modules/api/middlewares.go b/pkg/modules/api/middlewares.go index c3939b5ac..78bcb8542 100644 --- a/pkg/modules/api/middlewares.go +++ b/pkg/modules/api/middlewares.go @@ -48,6 +48,10 @@ func ParseError(err error) (int, string) { return http.StatusTooManyRequests, http.StatusText(http.StatusTooManyRequests) } + if errors.Is(err, gotenberg.ErrPdfSplitModeNotSupported) { + return http.StatusBadRequest, "At least one PDF engine cannot process the requested PDF split mode, while others may have failed to split due to different issues" + } + if errors.Is(err, gotenberg.ErrPdfFormatNotSupported) { return http.StatusBadRequest, "At least one PDF engine cannot process the requested PDF format, while others may have failed to convert due to different issues" } diff --git a/pkg/modules/api/middlewares_test.go b/pkg/modules/api/middlewares_test.go index 6edd1e2e4..8bef12682 100644 --- a/pkg/modules/api/middlewares_test.go +++ b/pkg/modules/api/middlewares_test.go @@ -38,6 +38,11 @@ func TestParseError(t *testing.T) { expectStatus: http.StatusTooManyRequests, expectMessage: http.StatusText(http.StatusTooManyRequests), }, + { + err: gotenberg.ErrPdfSplitModeNotSupported, + expectStatus: http.StatusBadRequest, + expectMessage: "At least one PDF engine cannot process the requested PDF split mode, while others may have failed to split due to different issues", + }, { err: gotenberg.ErrPdfFormatNotSupported, expectStatus: http.StatusBadRequest, @@ -462,7 +467,7 @@ func TestContextMiddleware(t *testing.T) { c.Set("trace", "foo") c.Set("startTime", time.Now()) - err := contextMiddleware(gotenberg.NewFileSystem(), time.Duration(10)*time.Second, 0, downloadFromConfig{})(tc.next)(c) + err := contextMiddleware(gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), time.Duration(10)*time.Second, 0, downloadFromConfig{})(tc.next)(c) if tc.expectErr && err == nil { t.Errorf("test %d: expected error but got: %v", i, err) diff --git a/pkg/modules/api/mocks.go b/pkg/modules/api/mocks.go index 667cd14e9..6d6c2a5f7 100644 --- a/pkg/modules/api/mocks.go +++ b/pkg/modules/api/mocks.go @@ -86,6 +86,14 @@ func (ctx *ContextMock) SetEchoContext(c echo.Context) { ctx.Context.echoCtx = c } +// SetMkdirAll sets the [gotenberg.MkdirAll]. +// +// ctx := &api.ContextMock{Context: &api.Context{}} +// ctx.SetMkdirAll(mkdirAll) +func (ctx *ContextMock) SetMkdirAll(mkdirAll gotenberg.MkdirAll) { + ctx.Context.mkdirAll = mkdirAll +} + // SetPathRename sets the [gotenberg.PathRename]. // // ctx := &api.ContextMock{Context: &api.Context{}} diff --git a/pkg/modules/api/mocks_test.go b/pkg/modules/api/mocks_test.go index 5910726c2..ecc2a19cf 100644 --- a/pkg/modules/api/mocks_test.go +++ b/pkg/modules/api/mocks_test.go @@ -7,6 +7,8 @@ import ( "github.com/alexliesenfeld/health" "github.com/labstack/echo/v4" "go.uber.org/zap" + + "github.com/gotenberg/gotenberg/v8/pkg/gotenberg" ) func TestContextMock_SetDirPath(t *testing.T) { @@ -117,10 +119,23 @@ func TestContextMock_SetEchoContext(t *testing.T) { } } +func TestContextMock_SetMkdirAll(t *testing.T) { + mock := ContextMock{&Context{}} + + expect := new(gotenberg.OsMkdirAll) + mock.SetMkdirAll(expect) + + actual := mock.mkdirAll + + if actual != expect { + t.Errorf("expected %v but got %v", expect, actual) + } +} + func TestContextMock_SetPathRename(t *testing.T) { mock := ContextMock{&Context{}} - expect := new(osPathRename) + expect := new(gotenberg.OsPathRename) mock.SetPathRename(expect) actual := mock.pathRename diff --git a/pkg/modules/chromium/browser.go b/pkg/modules/chromium/browser.go index 380dcccfb..1da24d877 100644 --- a/pkg/modules/chromium/browser.go +++ b/pkg/modules/chromium/browser.go @@ -62,7 +62,7 @@ func newChromiumBrowser(arguments browserArguments) browser { b := &chromiumBrowser{ initialCtx: context.Background(), arguments: arguments, - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), } b.isStarted.Store(false) diff --git a/pkg/modules/chromium/browser_test.go b/pkg/modules/chromium/browser_test.go index 3ba608b4b..a5698eaf7 100644 --- a/pkg/modules/chromium/browser_test.go +++ b/pkg/modules/chromium/browser_test.go @@ -263,7 +263,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { b.isStarted.Store(false) return b }(), - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), noDeadline: false, start: false, expectError: true, @@ -275,7 +275,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { b.isStarted.Store(true) return b }(), - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), noDeadline: true, start: false, expectError: true, @@ -291,7 +291,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { b.isStarted.Store(true) return b }(), - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), noDeadline: false, start: false, expectError: true, @@ -308,7 +308,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { b.isStarted.Store(true) return b }(), - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), noDeadline: false, start: false, expectError: true, @@ -325,7 +325,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -357,7 +357,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -389,7 +389,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -424,7 +424,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -457,7 +457,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -495,7 +495,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -528,7 +528,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -554,7 +554,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -588,7 +588,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -621,7 +621,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -654,7 +654,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -688,7 +688,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -723,7 +723,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -758,7 +758,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -812,7 +812,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -845,7 +845,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -881,7 +881,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -914,7 +914,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -949,7 +949,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -984,7 +984,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1019,7 +1019,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1065,7 +1065,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1100,7 +1100,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1146,7 +1146,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1181,7 +1181,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1217,7 +1217,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1255,7 +1255,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1288,7 +1288,7 @@ func TestChromiumBrowser_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1421,7 +1421,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { b.isStarted.Store(false) return b }(), - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), noDeadline: false, start: false, expectError: true, @@ -1437,7 +1437,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { b.isStarted.Store(true) return b }(), - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), noDeadline: true, start: false, expectError: true, @@ -1453,7 +1453,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { b.isStarted.Store(true) return b }(), - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), noDeadline: false, start: false, expectError: true, @@ -1470,7 +1470,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { b.isStarted.Store(true) return b }(), - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), noDeadline: false, start: false, expectError: true, @@ -1487,7 +1487,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1519,7 +1519,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1551,7 +1551,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1586,7 +1586,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1619,7 +1619,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1657,7 +1657,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1690,7 +1690,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1716,7 +1716,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1750,7 +1750,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1783,7 +1783,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1816,7 +1816,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1850,7 +1850,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1885,7 +1885,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1920,7 +1920,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -1974,7 +1974,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -2009,7 +2009,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -2042,7 +2042,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -2077,7 +2077,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -2112,7 +2112,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -2147,7 +2147,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -2193,7 +2193,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -2228,7 +2228,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -2274,7 +2274,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -2317,7 +2317,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -2365,7 +2365,7 @@ func TestChromiumBrowser_screenshot(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { diff --git a/pkg/modules/chromium/routes.go b/pkg/modules/chromium/routes.go index 2ae33b250..ee330a26a 100644 --- a/pkg/modules/chromium/routes.go +++ b/pkg/modules/chromium/routes.go @@ -326,8 +326,9 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { Handler: func(c echo.Context) error { ctx := c.Get("context").(*api.Context) form, options := FormDataChromiumPdfOptions(ctx) + mode := pdfengines.FormDataPdfSplitMode(form, false) pdfFormats := pdfengines.FormDataPdfFormats(form) - metadata := pdfengines.FormDataPdfMetadata(form) + metadata := pdfengines.FormDataPdfMetadata(form, false) var url string err := form. @@ -337,7 +338,7 @@ func convertUrlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { return fmt.Errorf("validate form data: %w", err) } - err = convertUrl(ctx, chromium, engine, url, options, pdfFormats, metadata) + err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata) if err != nil { return fmt.Errorf("convert URL to PDF: %w", err) } @@ -386,8 +387,9 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { Handler: func(c echo.Context) error { ctx := c.Get("context").(*api.Context) form, options := FormDataChromiumPdfOptions(ctx) + mode := pdfengines.FormDataPdfSplitMode(form, false) pdfFormats := pdfengines.FormDataPdfFormats(form) - metadata := pdfengines.FormDataPdfMetadata(form) + metadata := pdfengines.FormDataPdfMetadata(form, false) var inputPath string err := form. @@ -398,7 +400,7 @@ func convertHtmlRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { } url := fmt.Sprintf("file://%s", inputPath) - err = convertUrl(ctx, chromium, engine, url, options, pdfFormats, metadata) + err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata) if err != nil { return fmt.Errorf("convert HTML to PDF: %w", err) } @@ -448,8 +450,9 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { Handler: func(c echo.Context) error { ctx := c.Get("context").(*api.Context) form, options := FormDataChromiumPdfOptions(ctx) + mode := pdfengines.FormDataPdfSplitMode(form, false) pdfFormats := pdfengines.FormDataPdfFormats(form) - metadata := pdfengines.FormDataPdfMetadata(form) + metadata := pdfengines.FormDataPdfMetadata(form, false) var ( inputPath string @@ -469,7 +472,7 @@ func convertMarkdownRoute(chromium Api, engine gotenberg.PdfEngine) api.Route { return fmt.Errorf("transform markdown file(s) to HTML: %w", err) } - err = convertUrl(ctx, chromium, engine, url, options, pdfFormats, metadata) + err = convertUrl(ctx, chromium, engine, url, options, mode, pdfFormats, metadata) if err != nil { return fmt.Errorf("convert markdown to PDF: %w", err) } @@ -593,7 +596,7 @@ func markdownToHtml(ctx *api.Context, inputPath string, markdownPaths []string) return fmt.Sprintf("file://%s", inputPath), nil } -func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, pdfFormats gotenberg.PdfFormats, metadata map[string]interface{}) error { +func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url string, options PdfOptions, mode gotenberg.SplitMode, pdfFormats gotenberg.PdfFormats, metadata map[string]interface{}) error { outputPath := ctx.GeneratePath(".pdf") err := chromium.Pdf(ctx, ctx.Log(), url, outputPath, options) @@ -632,16 +635,34 @@ func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url return fmt.Errorf("convert to PDF: %w", err) } - outputPaths, err := pdfengines.ConvertStub(ctx, engine, pdfFormats, []string{outputPath}) + outputPaths, err := pdfengines.SplitPdfStub(ctx, engine, mode, []string{outputPath}) if err != nil { - return fmt.Errorf("convert PDF: %w", err) + return fmt.Errorf("split PDF: %w", err) } - err = pdfengines.WriteMetadataStub(ctx, engine, metadata, outputPaths) + convertOutputPaths, err := pdfengines.ConvertStub(ctx, engine, pdfFormats, outputPaths) + if err != nil { + return fmt.Errorf("convert PDF(s): %w", err) + } + + err = pdfengines.WriteMetadataStub(ctx, engine, metadata, convertOutputPaths) if err != nil { return fmt.Errorf("write metadata: %w", err) } + zeroValuedSplitMode := gotenberg.SplitMode{} + zeroValuedPdfFormats := gotenberg.PdfFormats{} + if mode != zeroValuedSplitMode && pdfFormats != zeroValuedPdfFormats { + // The PDF has been split and split parts have been converted to a + // specific format. We want to keep the split naming. + for i, convertOutputPath := range convertOutputPaths { + err = ctx.Rename(convertOutputPath, outputPaths[i]) + if err != nil { + return fmt.Errorf("rename output path: %w", err) + } + } + } + err = ctx.AddOutputPaths(outputPaths...) if err != nil { return fmt.Errorf("add output paths: %w", err) diff --git a/pkg/modules/chromium/routes_test.go b/pkg/modules/chromium/routes_test.go index 1e7363ba6..933b833ed 100644 --- a/pkg/modules/chromium/routes_test.go +++ b/pkg/modules/chromium/routes_test.go @@ -1428,6 +1428,7 @@ func TestConvertUrl(t *testing.T) { api Api engine gotenberg.PdfEngine options PdfOptions + splitMode gotenberg.SplitMode pdfFormats gotenberg.PdfFormats metadata map[string]interface{} expectError bool @@ -1570,6 +1571,36 @@ func TestConvertUrl(t *testing.T) { expectHttpError: false, expectOutputPathsCount: 0, }, + { + scenario: "PDF engine split error", + ctx: &api.ContextMock{Context: new(api.Context)}, + api: &ApiMock{PdfMock: func(ctx context.Context, logger *zap.Logger, url, outputPath string, options PdfOptions) error { + return nil + }}, + engine: &gotenberg.PdfEngineMock{SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, errors.New("foo") + }}, + options: DefaultPdfOptions(), + splitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals, Span: "1"}, + expectError: true, + expectHttpError: false, + expectOutputPathsCount: 0, + }, + { + scenario: "success with split mode", + ctx: &api.ContextMock{Context: new(api.Context)}, + api: &ApiMock{PdfMock: func(ctx context.Context, logger *zap.Logger, url, outputPath string, options PdfOptions) error { + return nil + }}, + engine: &gotenberg.PdfEngineMock{SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return []string{inputPath}, nil + }}, + options: DefaultPdfOptions(), + splitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1"}, + expectError: false, + expectHttpError: false, + expectOutputPathsCount: 1, + }, { scenario: "PDF engine convert error", ctx: &api.ContextMock{Context: new(api.Context)}, @@ -1600,6 +1631,27 @@ func TestConvertUrl(t *testing.T) { expectHttpError: false, expectOutputPathsCount: 1, }, + { + scenario: "success with split mode and PDF formats", + ctx: &api.ContextMock{Context: new(api.Context)}, + api: &ApiMock{PdfMock: func(ctx context.Context, logger *zap.Logger, url, outputPath string, options PdfOptions) error { + return nil + }}, + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return []string{inputPath}, nil + }, + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return nil + }, + }, + options: DefaultPdfOptions(), + splitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1"}, + pdfFormats: gotenberg.PdfFormats{PdfA: gotenberg.PdfA1b}, + expectError: false, + expectHttpError: false, + expectOutputPathsCount: 1, + }, { scenario: "PDF engine write metadata error", ctx: &api.ContextMock{Context: new(api.Context)}, @@ -1659,7 +1711,13 @@ func TestConvertUrl(t *testing.T) { } { t.Run(tc.scenario, func(t *testing.T) { tc.ctx.SetLogger(zap.NewNop()) - err := convertUrl(tc.ctx.Context, tc.api, tc.engine, "", tc.options, tc.pdfFormats, tc.metadata) + tc.ctx.SetMkdirAll(&gotenberg.MkdirAllMock{MkdirAllMock: func(path string, perm os.FileMode) error { + return nil + }}) + tc.ctx.SetPathRename(&gotenberg.PathRenameMock{RenameMock: func(oldpath, newpath string) error { + return nil + }}) + err := convertUrl(tc.ctx.Context, tc.api, tc.engine, "", tc.options, tc.splitMode, tc.pdfFormats, tc.metadata) if tc.expectError && err == nil { t.Fatal("expected error but got none", err) diff --git a/pkg/modules/exiftool/exiftool.go b/pkg/modules/exiftool/exiftool.go index 7d2cb8d97..aeffc6a99 100644 --- a/pkg/modules/exiftool/exiftool.go +++ b/pkg/modules/exiftool/exiftool.go @@ -58,6 +58,11 @@ func (engine *ExifTool) Merge(ctx context.Context, logger *zap.Logger, inputPath return fmt.Errorf("merge PDFs with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported) } +// Split is not available in this implementation. +func (engine *ExifTool) Split(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, fmt.Errorf("split PDF with ExifTool: %w", gotenberg.ErrPdfEngineMethodNotSupported) +} + // Convert is not available in this implementation. func (engine *ExifTool) Convert(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { return fmt.Errorf("convert PDF to '%+v' with ExifTool: %w", formats, gotenberg.ErrPdfEngineMethodNotSupported) diff --git a/pkg/modules/exiftool/exiftool_test.go b/pkg/modules/exiftool/exiftool_test.go index 949087ec8..52c8f31d7 100644 --- a/pkg/modules/exiftool/exiftool_test.go +++ b/pkg/modules/exiftool/exiftool_test.go @@ -82,6 +82,15 @@ func TestExiftool_Merge(t *testing.T) { } } +func TestExiftool_Split(t *testing.T) { + engine := new(ExifTool) + _, err := engine.Split(context.Background(), zap.NewNop(), gotenberg.SplitMode{}, "", "") + + if !errors.Is(err, gotenberg.ErrPdfEngineMethodNotSupported) { + t.Errorf("expected error %v, but got: %v", gotenberg.ErrPdfEngineMethodNotSupported, err) + } +} + func TestExiftool_Convert(t *testing.T) { engine := new(ExifTool) err := engine.Convert(context.Background(), zap.NewNop(), gotenberg.PdfFormats{}, "", "") @@ -257,7 +266,7 @@ func TestExiftool_WriteMetadata(t *testing.T) { var destinationPath string if tc.createCopy { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) outputDir, err := fs.MkdirAll() if err != nil { t.Fatalf("expected error no but got: %v", err) diff --git a/pkg/modules/libreoffice/api/libreoffice.go b/pkg/modules/libreoffice/api/libreoffice.go index f8c4415b2..ee332ad48 100644 --- a/pkg/modules/libreoffice/api/libreoffice.go +++ b/pkg/modules/libreoffice/api/libreoffice.go @@ -44,7 +44,7 @@ type libreOfficeProcess struct { func newLibreOfficeProcess(arguments libreOfficeArguments) libreOffice { p := &libreOfficeProcess{ arguments: arguments, - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), } p.isStarted.Store(false) diff --git a/pkg/modules/libreoffice/api/libreoffice_test.go b/pkg/modules/libreoffice/api/libreoffice_test.go index 953cb908b..fce85515c 100644 --- a/pkg/modules/libreoffice/api/libreoffice_test.go +++ b/pkg/modules/libreoffice/api/libreoffice_test.go @@ -230,7 +230,7 @@ func TestLibreOfficeProcess_pdf(t *testing.T) { p.isStarted.Store(false) return p }(), - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), cancelledCtx: false, start: false, expectError: true, @@ -243,7 +243,7 @@ func TestLibreOfficeProcess_pdf(t *testing.T) { p.isStarted.Store(true) return p }(), - fs: gotenberg.NewFileSystem(), + fs: gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)), options: Options{PdfFormats: gotenberg.PdfFormats{PdfA: "foo"}}, cancelledCtx: false, start: false, @@ -261,7 +261,7 @@ func TestLibreOfficeProcess_pdf(t *testing.T) { ), options: Options{PageRanges: "foo"}, fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -291,7 +291,7 @@ func TestLibreOfficeProcess_pdf(t *testing.T) { ), options: Options{Password: "foo"}, fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -344,7 +344,7 @@ func TestLibreOfficeProcess_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -372,7 +372,7 @@ func TestLibreOfficeProcess_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -400,7 +400,7 @@ func TestLibreOfficeProcess_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -452,7 +452,7 @@ func TestLibreOfficeProcess_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -481,7 +481,7 @@ func TestLibreOfficeProcess_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -510,7 +510,7 @@ func TestLibreOfficeProcess_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -539,7 +539,7 @@ func TestLibreOfficeProcess_pdf(t *testing.T) { }, ), fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -625,7 +625,7 @@ func TestNonBasicLatinCharactersGuard(t *testing.T) { { scenario: "basic latin characters", fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { @@ -646,7 +646,7 @@ func TestNonBasicLatinCharactersGuard(t *testing.T) { { scenario: "non-basic latin characters", fs: func() *gotenberg.FileSystem { - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) err := os.MkdirAll(fs.WorkingDirPath(), 0o755) if err != nil { diff --git a/pkg/modules/libreoffice/pdfengine/pdfengine.go b/pkg/modules/libreoffice/pdfengine/pdfengine.go index b478c21be..5416ab357 100644 --- a/pkg/modules/libreoffice/pdfengine/pdfengine.go +++ b/pkg/modules/libreoffice/pdfengine/pdfengine.go @@ -51,6 +51,11 @@ func (engine *LibreOfficePdfEngine) Merge(ctx context.Context, logger *zap.Logge return fmt.Errorf("merge PDFs with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported) } +// Split is not available in this implementation. +func (engine *LibreOfficePdfEngine) Split(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, fmt.Errorf("split PDF with LibreOffice: %w", gotenberg.ErrPdfEngineMethodNotSupported) +} + // Convert converts the given PDF to a specific PDF format. Currently, only the // PDF/A-1b, PDF/A-2b, PDF/A-3b and PDF/UA formats are available. If another // PDF format is requested, it returns a [gotenberg.ErrPdfFormatNotSupported] diff --git a/pkg/modules/libreoffice/pdfengine/pdfengine_test.go b/pkg/modules/libreoffice/pdfengine/pdfengine_test.go index 8353954d6..1dc4ed737 100644 --- a/pkg/modules/libreoffice/pdfengine/pdfengine_test.go +++ b/pkg/modules/libreoffice/pdfengine/pdfengine_test.go @@ -118,11 +118,21 @@ func TestLibreOfficePdfEngine_Merge(t *testing.T) { } } +func TestLibreOfficePdfEngine_Split(t *testing.T) { + engine := new(LibreOfficePdfEngine) + _, err := engine.Split(context.Background(), zap.NewNop(), gotenberg.SplitMode{}, "", "") + + if !errors.Is(err, gotenberg.ErrPdfEngineMethodNotSupported) { + t.Errorf("expected error %v, but got: %v", gotenberg.ErrPdfEngineMethodNotSupported, err) + } +} + func TestLibreOfficePdfEngine_Convert(t *testing.T) { for _, tc := range []struct { - scenario string - api api.Uno - expectError bool + scenario string + api api.Uno + expectError bool + expectedError error }{ { scenario: "convert success", @@ -134,13 +144,14 @@ func TestLibreOfficePdfEngine_Convert(t *testing.T) { expectError: false, }, { - scenario: "invalid PDF format", + scenario: "ErrInvalidPdfFormats", api: &api.ApiMock{ PdfMock: func(ctx context.Context, logger *zap.Logger, inputPath, outputPath string, options api.Options) error { return api.ErrInvalidPdfFormats }, }, - expectError: true, + expectError: true, + expectedError: gotenberg.ErrPdfFormatNotSupported, }, { scenario: "convert fail", @@ -163,6 +174,10 @@ func TestLibreOfficePdfEngine_Convert(t *testing.T) { if tc.expectError && err == nil { t.Fatal("expected error but got none") } + + if tc.expectedError != nil && !errors.Is(err, tc.expectedError) { + t.Fatalf("expected error %v but got: %v", tc.expectedError, err) + } }) } } diff --git a/pkg/modules/libreoffice/routes.go b/pkg/modules/libreoffice/routes.go index b49677d64..86165833b 100644 --- a/pkg/modules/libreoffice/routes.go +++ b/pkg/modules/libreoffice/routes.go @@ -28,8 +28,11 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap defaultOptions := libreofficeapi.DefaultOptions() form := ctx.FormData() + splitMode := pdfengines.FormDataPdfSplitMode(form, false) pdfFormats := pdfengines.FormDataPdfFormats(form) - metadata := pdfengines.FormDataPdfMetadata(form) + metadata := pdfengines.FormDataPdfMetadata(form, false) + + zeroValuedSplitMode := gotenberg.SplitMode{} var ( inputPaths []string @@ -165,7 +168,9 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap MaxImageResolution: maxImageResolution, } - if nativePdfFormats { + if nativePdfFormats && splitMode == zeroValuedSplitMode { + // Only apply natively given PDF formats if we're not + // splitting the PDF later. options.PdfFormats = pdfFormats } @@ -209,11 +214,44 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap outputPaths = []string{outputPath} } - if !nativePdfFormats { - outputPaths, err = pdfengines.ConvertStub(ctx, engine, pdfFormats, outputPaths) + if splitMode != zeroValuedSplitMode { + if !merge { + // document.docx -> document.docx.pdf, so that split naming + // document.docx_0.pdf, etc. + for i, inputPath := range inputPaths { + outputPath := fmt.Sprintf("%s.pdf", inputPath) + + err = ctx.Rename(outputPaths[i], outputPath) + if err != nil { + return fmt.Errorf("rename output path: %w", err) + } + + outputPaths[i] = outputPath + } + } + + outputPaths, err = pdfengines.SplitPdfStub(ctx, engine, splitMode, outputPaths) + if err != nil { + return fmt.Errorf("split PDFs: %w", err) + } + } + + if !nativePdfFormats || (nativePdfFormats && splitMode != zeroValuedSplitMode) { + convertOutputPaths, err := pdfengines.ConvertStub(ctx, engine, pdfFormats, outputPaths) if err != nil { return fmt.Errorf("convert PDFs: %w", err) } + + if splitMode != zeroValuedSplitMode { + // The PDF has been split and split parts have been converted to + // specific formats. We want to keep the split naming. + for i, convertOutputPath := range convertOutputPaths { + err = ctx.Rename(convertOutputPath, outputPaths[i]) + if err != nil { + return fmt.Errorf("rename output path: %w", err) + } + } + } } err = pdfengines.WriteMetadataStub(ctx, engine, metadata, outputPaths) @@ -221,7 +259,7 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap return fmt.Errorf("write metadata: %w", err) } - if len(outputPaths) > 1 { + if len(outputPaths) > 1 && splitMode == zeroValuedSplitMode { // If .zip archive, document.docx -> document.docx.pdf. for i, inputPath := range inputPaths { outputPath := fmt.Sprintf("%s.pdf", inputPath) diff --git a/pkg/modules/libreoffice/routes_test.go b/pkg/modules/libreoffice/routes_test.go index 041e41655..139f57489 100644 --- a/pkg/modules/libreoffice/routes_test.go +++ b/pkg/modules/libreoffice/routes_test.go @@ -3,7 +3,10 @@ package libreoffice import ( "context" "errors" + "fmt" "net/http" + "os" + "path/filepath" "slices" "testing" @@ -301,18 +304,18 @@ func TestConvertRoute(t *testing.T) { expectOutputPathsCount: 0, }, { - scenario: "PDF engine convert error", + scenario: "PDF engine split error", ctx: func() *api.ContextMock { ctx := &api.ContextMock{Context: new(api.Context)} ctx.SetFiles(map[string]string{ "document.docx": "/document.docx", }) ctx.SetValues(map[string][]string{ - "pdfa": { - gotenberg.PdfA1b, + "splitMode": { + gotenberg.SplitModeIntervals, }, - "nativePdfFormats": { - "false", + "splitSpan": { + "1", }, }) return ctx @@ -326,8 +329,8 @@ func TestConvertRoute(t *testing.T) { }, }, engine: &gotenberg.PdfEngineMock{ - ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { - return errors.New("foo") + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, errors.New("foo") }, }, expectError: true, @@ -335,15 +338,18 @@ func TestConvertRoute(t *testing.T) { expectOutputPathsCount: 0, }, { - scenario: "PDF engine write metadata error", + scenario: "PDF engine convert error", ctx: func() *api.ContextMock { ctx := &api.ContextMock{Context: new(api.Context)} ctx.SetFiles(map[string]string{ "document.docx": "/document.docx", }) ctx.SetValues(map[string][]string{ - "metadata": { - "{\"Creator\": \"foo\", \"Producer\": \"bar\" }", + "pdfa": { + gotenberg.PdfA1b, + }, + "nativePdfFormats": { + "false", }, }) return ctx @@ -357,7 +363,7 @@ func TestConvertRoute(t *testing.T) { }, }, engine: &gotenberg.PdfEngineMock{ - WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { return errors.New("foo") }, }, @@ -366,17 +372,17 @@ func TestConvertRoute(t *testing.T) { expectOutputPathsCount: 0, }, { - scenario: "cannot rename many files", + scenario: "PDF engine write metadata error", ctx: func() *api.ContextMock { ctx := &api.ContextMock{Context: new(api.Context)} ctx.SetFiles(map[string]string{ - "document.docx": "/document.docx", - "document2.docx": "/document2.docx", - "document2.doc": "/document2.doc", + "document.docx": "/document.docx", + }) + ctx.SetValues(map[string][]string{ + "metadata": { + "{\"Creator\": \"foo\", \"Producer\": \"bar\" }", + }, }) - ctx.SetPathRename(&gotenberg.PathRenameMock{RenameMock: func(oldpath, newpath string) error { - return errors.New("cannot rename") - }}) return ctx }(), libreOffice: &libreofficeapi.ApiMock{ @@ -384,7 +390,12 @@ func TestConvertRoute(t *testing.T) { return nil }, ExtensionsMock: func() []string { - return []string{".docx", ".doc"} + return []string{".docx"} + }, + }, + engine: &gotenberg.PdfEngineMock{ + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return errors.New("foo") }, }, expectError: true, @@ -550,9 +561,173 @@ func TestConvertRoute(t *testing.T) { expectHttpError: false, expectOutputPathsCount: 1, }, + { + scenario: "success with split (many files)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetFiles(map[string]string{ + "document.docx": "/document.docx", + "document2.docx": "/document2.docx", + }) + ctx.SetValues(map[string][]string{ + "splitMode": { + gotenberg.SplitModeIntervals, + }, + "splitSpan": { + "1", + }, + }) + return ctx + }(), + libreOffice: &libreofficeapi.ApiMock{ + PdfMock: func(ctx context.Context, logger *zap.Logger, inputPath, outputPath string, options libreofficeapi.Options) error { + return nil + }, + ExtensionsMock: func() []string { + return []string{".docx"} + }, + }, + engine: &gotenberg.PdfEngineMock{ + MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { + return nil + }, + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + inputPathNoExt := inputPath[:len(inputPath)-len(filepath.Ext(inputPath))] + filenameNoExt := filepath.Base(inputPathNoExt) + return []string{ + fmt.Sprintf( + "%s/%s_%d.pdf", + outputDirPath, filenameNoExt, 0, + ), + fmt.Sprintf( + "%s/%s_%d.pdf", + outputDirPath, filenameNoExt, 1, + ), + }, nil + }, + }, + expectError: false, + expectHttpError: false, + expectOutputPathsCount: 4, + expectOutputPaths: []string{"/document_docx/document.docx_0.pdf", "/document_docx/document.docx_1.pdf", "/document2_docx/document2.docx_0.pdf", "/document2_docx/document2.docx_1.pdf"}, + }, + { + scenario: "success with merge and split", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetFiles(map[string]string{ + "document.docx": "/document.docx", + "document2.docx": "/document2.docx", + }) + ctx.SetValues(map[string][]string{ + "merge": { + "true", + }, + "splitMode": { + gotenberg.SplitModeIntervals, + }, + "splitSpan": { + "1", + }, + }) + return ctx + }(), + libreOffice: &libreofficeapi.ApiMock{ + PdfMock: func(ctx context.Context, logger *zap.Logger, inputPath, outputPath string, options libreofficeapi.Options) error { + return nil + }, + ExtensionsMock: func() []string { + return []string{".docx"} + }, + }, + engine: &gotenberg.PdfEngineMock{ + MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { + return nil + }, + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + inputPathNoExt := inputPath[:len(inputPath)-len(filepath.Ext(inputPath))] + filenameNoExt := filepath.Base(inputPathNoExt) + return []string{ + fmt.Sprintf( + "%s/%s_%d.pdf", + outputDirPath, filenameNoExt, 0, + ), + fmt.Sprintf( + "%s/%s_%d.pdf", + outputDirPath, filenameNoExt, 1, + ), + }, nil + }, + }, + expectError: false, + expectHttpError: false, + expectOutputPathsCount: 2, + }, + { + scenario: "success with split and native PDF/A & PDF/UA (many files)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetFiles(map[string]string{ + "document.docx": "/document.docx", + "document2.docx": "/document2.docx", + }) + ctx.SetValues(map[string][]string{ + "splitMode": { + gotenberg.SplitModeIntervals, + }, + "splitSpan": { + "1", + }, + "pdfa": { + gotenberg.PdfA1b, + }, + "pdfua": { + "true", + }, + }) + return ctx + }(), + libreOffice: &libreofficeapi.ApiMock{ + PdfMock: func(ctx context.Context, logger *zap.Logger, inputPath, outputPath string, options libreofficeapi.Options) error { + return nil + }, + ExtensionsMock: func() []string { + return []string{".docx"} + }, + }, + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + inputPathNoExt := inputPath[:len(inputPath)-len(filepath.Ext(inputPath))] + filenameNoExt := filepath.Base(inputPathNoExt) + return []string{ + fmt.Sprintf( + "%s/%s_%d.pdf", + outputDirPath, filenameNoExt, 0, + ), + fmt.Sprintf( + "%s/%s_%d.pdf", + outputDirPath, filenameNoExt, 1, + ), + }, nil + }, + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return nil + }, + }, + expectError: false, + expectHttpError: false, + expectOutputPathsCount: 4, + expectOutputPaths: []string{"/document_docx/document.docx_0.pdf", "/document_docx/document.docx_1.pdf", "/document2_docx/document2.docx_0.pdf", "/document2_docx/document2.docx_1.pdf"}, + }, } { t.Run(tc.scenario, func(t *testing.T) { tc.ctx.SetLogger(zap.NewNop()) + tc.ctx.SetMkdirAll(&gotenberg.MkdirAllMock{MkdirAllMock: func(path string, perm os.FileMode) error { + return nil + }}) + tc.ctx.SetPathRename(&gotenberg.PathRenameMock{RenameMock: func(oldpath, newpath string) error { + return nil + }}) c := echo.New().NewContext(nil, nil) c.Set("context", tc.ctx.Context) diff --git a/pkg/modules/pdfcpu/doc.go b/pkg/modules/pdfcpu/doc.go index e68e2a61f..6856a27ac 100644 --- a/pkg/modules/pdfcpu/doc.go +++ b/pkg/modules/pdfcpu/doc.go @@ -2,6 +2,7 @@ // interface using the pdfcpu command-line tool. This package allows for: // // 1. The merging of PDF files. +// 2. The splitting of PDF files. // // See: https://github.com/pdfcpu/pdfcpu. package pdfcpu diff --git a/pkg/modules/pdfcpu/pdfcpu.go b/pkg/modules/pdfcpu/pdfcpu.go index ac2d53589..b59573c1e 100644 --- a/pkg/modules/pdfcpu/pdfcpu.go +++ b/pkg/modules/pdfcpu/pdfcpu.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "path/filepath" "go.uber.org/zap" @@ -70,6 +71,38 @@ func (engine *PdfCpu) Merge(ctx context.Context, logger *zap.Logger, inputPaths return fmt.Errorf("merge PDFs with pdfcpu: %w", err) } +// Split splits a given PDF file. +func (engine *PdfCpu) Split(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + var args []string + + switch mode.Mode { + case gotenberg.SplitModeIntervals: + args = append(args, "split", "-mode", "span", inputPath, outputDirPath, mode.Span) + case gotenberg.SplitModePages: + outputPath := fmt.Sprintf("%s/%s", outputDirPath, filepath.Base(inputPath)) + args = append(args, "trim", "-pages", mode.Span, inputPath, outputPath) + default: + return nil, fmt.Errorf("split PDFs using mode '%s' with pdfcpu: %w", mode.Mode, gotenberg.ErrPdfSplitModeNotSupported) + } + + cmd, err := gotenberg.CommandContext(ctx, logger, engine.binPath, args...) + if err != nil { + return nil, fmt.Errorf("create command: %w", err) + } + + _, err = cmd.Exec() + if err != nil { + return nil, fmt.Errorf("split PDFs with pdfcpu: %w", err) + } + + outputPaths, err := gotenberg.WalkDir(outputDirPath, ".pdf") + if err != nil { + return nil, fmt.Errorf("walk directory to find resulting PDFs from split with pdfcpu: %w", err) + } + + return outputPaths, nil +} + // Convert is not available in this implementation. func (engine *PdfCpu) Convert(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { return fmt.Errorf("convert PDF to '%+v' with pdfcpu: %w", formats, gotenberg.ErrPdfEngineMethodNotSupported) diff --git a/pkg/modules/pdfcpu/pdfcpu_test.go b/pkg/modules/pdfcpu/pdfcpu_test.go index f009218a2..e962fc698 100644 --- a/pkg/modules/pdfcpu/pdfcpu_test.go +++ b/pkg/modules/pdfcpu/pdfcpu_test.go @@ -116,7 +116,7 @@ func TestPdfCpu_Merge(t *testing.T) { t.Fatalf("expected error but got: %v", err) } - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) outputDir, err := fs.MkdirAll() if err != nil { t.Fatalf("expected error but got: %v", err) @@ -142,6 +142,95 @@ func TestPdfCpu_Merge(t *testing.T) { } } +func TestPdfCpu_Split(t *testing.T) { + for _, tc := range []struct { + scenario string + ctx context.Context + mode gotenberg.SplitMode + inputPath string + expectError bool + expectedError error + expectOutputPathsCount int + }{ + { + scenario: "ErrPdfSplitModeNotSupported", + expectError: true, + expectedError: gotenberg.ErrPdfSplitModeNotSupported, + expectOutputPathsCount: 0, + }, + { + scenario: "invalid context", + ctx: nil, + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals, Span: "1"}, + expectError: true, + expectOutputPathsCount: 0, + }, + { + scenario: "invalid input path", + ctx: context.TODO(), + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals, Span: "1"}, + inputPath: "", + expectError: true, + expectOutputPathsCount: 0, + }, + { + scenario: "success (intervals)", + ctx: context.TODO(), + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals, Span: "1"}, + inputPath: "/tests/test/testdata/pdfengines/sample1.pdf", + expectError: false, + expectOutputPathsCount: 3, + }, + { + scenario: "success (pages)", + ctx: context.TODO(), + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1"}, + inputPath: "/tests/test/testdata/pdfengines/sample1.pdf", + expectError: false, + expectOutputPathsCount: 1, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + engine := new(PdfCpu) + err := engine.Provision(nil) + if err != nil { + t.Fatalf("expected error but got: %v", err) + } + + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) + outputDir, err := fs.MkdirAll() + if err != nil { + t.Fatalf("expected error but got: %v", err) + } + + defer func() { + err = os.RemoveAll(fs.WorkingDirPath()) + if err != nil { + t.Fatalf("expected no error while cleaning up but got: %v", err) + } + }() + + outputPaths, err := engine.Split(tc.ctx, zap.NewNop(), tc.mode, tc.inputPath, outputDir) + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + if tc.expectError && err == nil { + t.Fatal("expected error but got none") + } + + if tc.expectedError != nil && !errors.Is(err, tc.expectedError) { + t.Fatalf("expected error %v but got: %v", tc.expectedError, err) + } + + if tc.expectOutputPathsCount != len(outputPaths) { + t.Errorf("expected %d output paths but got %d", tc.expectOutputPathsCount, len(outputPaths)) + } + }) + } +} + func TestPdfCpu_Convert(t *testing.T) { mod := new(PdfCpu) err := mod.Convert(context.TODO(), zap.NewNop(), gotenberg.PdfFormats{}, "", "") diff --git a/pkg/modules/pdfengines/multi.go b/pkg/modules/pdfengines/multi.go index 4cbbc3eac..c6c9514d6 100644 --- a/pkg/modules/pdfengines/multi.go +++ b/pkg/modules/pdfengines/multi.go @@ -13,6 +13,7 @@ import ( type multiPdfEngines struct { mergeEngines []gotenberg.PdfEngine + splitEngines []gotenberg.PdfEngine convertEngines []gotenberg.PdfEngine readMedataEngines []gotenberg.PdfEngine writeMedataEngines []gotenberg.PdfEngine @@ -20,12 +21,14 @@ type multiPdfEngines struct { func newMultiPdfEngines( mergeEngines, + splitEngines, convertEngines, readMetadataEngines, writeMedataEngines []gotenberg.PdfEngine, ) *multiPdfEngines { return &multiPdfEngines{ mergeEngines: mergeEngines, + splitEngines: splitEngines, convertEngines: convertEngines, readMedataEngines: readMetadataEngines, writeMedataEngines: writeMedataEngines, @@ -57,6 +60,44 @@ func (multi *multiPdfEngines) Merge(ctx context.Context, logger *zap.Logger, inp return fmt.Errorf("merge PDFs with multi PDF engines: %w", err) } +type splitResult struct { + outputPaths []string + err error +} + +// Split tries to split at intervals a given PDF thanks to its children. If the +// context is done, it stops and returns an error. +func (multi *multiPdfEngines) Split(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + var err error + var mu sync.Mutex // to safely append errors. + + resultChan := make(chan splitResult, len(multi.splitEngines)) + + for _, engine := range multi.splitEngines { + go func(engine gotenberg.PdfEngine) { + outputPaths, err := engine.Split(ctx, logger, mode, inputPath, outputDirPath) + resultChan <- splitResult{outputPaths: outputPaths, err: err} + }(engine) + } + + for range multi.splitEngines { + select { + case result := <-resultChan: + if result.err != nil { + mu.Lock() + err = multierr.Append(err, result.err) + mu.Unlock() + } else { + return result.outputPaths, nil + } + case <-ctx.Done(): + return nil, ctx.Err() + } + } + + return nil, fmt.Errorf("split PDF with multi PDF engines: %w", err) +} + // Convert converts the given PDF to a specific PDF format. thanks to its // children. If the context is done, it stops and returns an error. func (multi *multiPdfEngines) Convert(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { diff --git a/pkg/modules/pdfengines/multi_test.go b/pkg/modules/pdfengines/multi_test.go index 00e706d78..6e5686c0c 100644 --- a/pkg/modules/pdfengines/multi_test.go +++ b/pkg/modules/pdfengines/multi_test.go @@ -19,25 +19,22 @@ func TestMultiPdfEngines_Merge(t *testing.T) { }{ { scenario: "nominal behavior", - engine: newMultiPdfEngines( - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + mergeEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { return nil }, }, }, - nil, - nil, - nil, - ), + }, ctx: context.Background(), expectError: false, }, { scenario: "at least one engine does not return an error", - engine: newMultiPdfEngines( - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + mergeEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { return errors.New("foo") @@ -49,17 +46,14 @@ func TestMultiPdfEngines_Merge(t *testing.T) { }, }, }, - nil, - nil, - nil, - ), + }, ctx: context.Background(), expectError: false, }, { scenario: "all engines return an error", - engine: newMultiPdfEngines( - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + mergeEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { return errors.New("foo") @@ -71,27 +65,21 @@ func TestMultiPdfEngines_Merge(t *testing.T) { }, }, }, - nil, - nil, - nil, - ), + }, ctx: context.Background(), expectError: true, }, { scenario: "context expired", - engine: newMultiPdfEngines( - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + mergeEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ MergeMock: func(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { return nil }, }, }, - nil, - nil, - nil, - ), + }, ctx: func() context.Context { ctx, cancel := context.WithCancel(context.Background()) cancel() @@ -115,6 +103,97 @@ func TestMultiPdfEngines_Merge(t *testing.T) { } } +func TestMultiPdfEngines_Split(t *testing.T) { + for _, tc := range []struct { + scenario string + engine *multiPdfEngines + ctx context.Context + expectError bool + }{ + { + scenario: "nominal behavior", + engine: &multiPdfEngines{ + splitEngines: []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, nil + }, + }, + }, + }, + ctx: context.Background(), + }, + { + scenario: "at least one engine does not return an error", + engine: &multiPdfEngines{ + splitEngines: []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, errors.New("foo") + }, + }, + &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, nil + }, + }, + }, + }, + ctx: context.Background(), + }, + { + scenario: "all engines return an error", + engine: &multiPdfEngines{ + splitEngines: []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, errors.New("foo") + }, + }, + &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, errors.New("foo") + }, + }, + }, + }, + ctx: context.Background(), + expectError: true, + }, + { + scenario: "context expired", + engine: &multiPdfEngines{ + splitEngines: []gotenberg.PdfEngine{ + &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, nil + }, + }, + }, + }, + ctx: func() context.Context { + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + return ctx + }(), + expectError: true, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + _, err := tc.engine.Split(tc.ctx, zap.NewNop(), gotenberg.SplitMode{}, "", "") + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + if tc.expectError && err == nil { + t.Fatal("expected error but got none") + } + }) + } +} + func TestMultiPdfEngines_Convert(t *testing.T) { for _, tc := range []struct { scenario string @@ -124,25 +203,21 @@ func TestMultiPdfEngines_Convert(t *testing.T) { }{ { scenario: "nominal behavior", - engine: newMultiPdfEngines( - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + convertEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { return nil }, }, }, - nil, - nil, - ), + }, ctx: context.Background(), }, { scenario: "at least one engine does not return an error", - engine: newMultiPdfEngines( - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + convertEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { return errors.New("foo") @@ -154,16 +229,13 @@ func TestMultiPdfEngines_Convert(t *testing.T) { }, }, }, - nil, - nil, - ), + }, ctx: context.Background(), }, { scenario: "all engines return an error", - engine: newMultiPdfEngines( - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + convertEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { return errors.New("foo") @@ -175,26 +247,21 @@ func TestMultiPdfEngines_Convert(t *testing.T) { }, }, }, - nil, - nil, - ), + }, ctx: context.Background(), expectError: true, }, { scenario: "context expired", - engine: newMultiPdfEngines( - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + convertEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { return nil }, }, }, - nil, - nil, - ), + }, ctx: func() context.Context { ctx, cancel := context.WithCancel(context.Background()) cancel() @@ -227,26 +294,21 @@ func TestMultiPdfEngines_ReadMetadata(t *testing.T) { }{ { scenario: "nominal behavior", - engine: newMultiPdfEngines( - nil, - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + readMedataEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { return make(map[string]interface{}), nil }, }, }, - nil, - ), + }, ctx: context.Background(), }, { scenario: "at least one engine does not return an error", - engine: newMultiPdfEngines( - nil, - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + readMedataEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { return nil, errors.New("foo") @@ -258,16 +320,13 @@ func TestMultiPdfEngines_ReadMetadata(t *testing.T) { }, }, }, - nil, - ), + }, ctx: context.Background(), }, { scenario: "all engines return an error", - engine: newMultiPdfEngines( - nil, - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + readMedataEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { return nil, errors.New("foo") @@ -279,25 +338,21 @@ func TestMultiPdfEngines_ReadMetadata(t *testing.T) { }, }, }, - nil, - ), + }, ctx: context.Background(), expectError: true, }, { scenario: "context expired", - engine: newMultiPdfEngines( - nil, - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + readMedataEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ ReadMetadataMock: func(ctx context.Context, logger *zap.Logger, inputPath string) (map[string]interface{}, error) { return make(map[string]interface{}), nil }, }, }, - nil, - ), + }, ctx: func() context.Context { ctx, cancel := context.WithCancel(context.Background()) cancel() @@ -330,27 +385,21 @@ func TestMultiPdfEngines_WriteMetadata(t *testing.T) { }{ { scenario: "nominal behavior", - engine: newMultiPdfEngines( - nil, - nil, - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + writeMedataEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { return nil }, }, }, - ), + }, ctx: context.Background(), }, { scenario: "at least one engine does not return an error", - engine: newMultiPdfEngines( - nil, - nil, - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + writeMedataEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { return errors.New("foo") @@ -362,16 +411,13 @@ func TestMultiPdfEngines_WriteMetadata(t *testing.T) { }, }, }, - ), + }, ctx: context.Background(), }, { scenario: "all engines return an error", - engine: newMultiPdfEngines( - nil, - nil, - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + writeMedataEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { return errors.New("foo") @@ -383,24 +429,21 @@ func TestMultiPdfEngines_WriteMetadata(t *testing.T) { }, }, }, - ), + }, ctx: context.Background(), expectError: true, }, { scenario: "context expired", - engine: newMultiPdfEngines( - nil, - nil, - nil, - []gotenberg.PdfEngine{ + engine: &multiPdfEngines{ + writeMedataEngines: []gotenberg.PdfEngine{ &gotenberg.PdfEngineMock{ WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { return nil }, }, }, - ), + }, ctx: func() context.Context { ctx, cancel := context.WithCancel(context.Background()) cancel() diff --git a/pkg/modules/pdfengines/pdfengines.go b/pkg/modules/pdfengines/pdfengines.go index 7bd000187..4f07f83ea 100644 --- a/pkg/modules/pdfengines/pdfengines.go +++ b/pkg/modules/pdfengines/pdfengines.go @@ -28,6 +28,7 @@ func init() { // enabled. type PdfEngines struct { mergeNames []string + splitNames []string convertNames []string readMetadataNames []string writeMedataNames []string @@ -42,6 +43,7 @@ func (mod *PdfEngines) Descriptor() gotenberg.ModuleDescriptor { FlagSet: func() *flag.FlagSet { fs := flag.NewFlagSet("pdfengines", flag.ExitOnError) fs.StringSlice("pdfengines-merge-engines", []string{"qpdf", "pdfcpu", "pdftk"}, "Set the PDF engines and their order for the merge feature - empty means all") + fs.StringSlice("pdfengines-split-engines", []string{"pdfcpu", "qpdf", "pdftk"}, "Set the PDF engines and their order for the split feature - empty means all") fs.StringSlice("pdfengines-convert-engines", []string{"libreoffice-pdfengine"}, "Set the PDF engines and their order for the convert feature - empty means all") fs.StringSlice("pdfengines-read-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the read metadata feature - empty means all") fs.StringSlice("pdfengines-write-metadata-engines", []string{"exiftool"}, "Set the PDF engines and their order for the write metadata feature - empty means all") @@ -64,6 +66,7 @@ func (mod *PdfEngines) Descriptor() gotenberg.ModuleDescriptor { func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error { flags := ctx.ParsedFlags() mergeNames := flags.MustStringSlice("pdfengines-merge-engines") + splitNames := flags.MustStringSlice("pdfengines-split-engines") convertNames := flags.MustStringSlice("pdfengines-convert-engines") readMetadataNames := flags.MustStringSlice("pdfengines-read-metadata-engines") writeMetadataNames := flags.MustStringSlice("pdfengines-write-metadata-engines") @@ -98,6 +101,11 @@ func (mod *PdfEngines) Provision(ctx *gotenberg.Context) error { mod.mergeNames = mergeNames } + mod.splitNames = defaultNames + if len(splitNames) > 0 { + mod.splitNames = splitNames + } + mod.convertNames = defaultNames if len(convertNames) > 0 { mod.convertNames = convertNames @@ -161,6 +169,7 @@ func (mod *PdfEngines) Validate() error { } findNonExistingEngines(mod.mergeNames) + findNonExistingEngines(mod.splitNames) findNonExistingEngines(mod.convertNames) findNonExistingEngines(mod.readMetadataNames) findNonExistingEngines(mod.writeMedataNames) @@ -177,6 +186,7 @@ func (mod *PdfEngines) Validate() error { func (mod *PdfEngines) SystemMessages() []string { return []string{ fmt.Sprintf("merge engines - %s", strings.Join(mod.mergeNames[:], " ")), + fmt.Sprintf("split engines - %s", strings.Join(mod.splitNames[:], " ")), fmt.Sprintf("convert engines - %s", strings.Join(mod.convertNames[:], " ")), fmt.Sprintf("read metadata engines - %s", strings.Join(mod.readMetadataNames[:], " ")), fmt.Sprintf("write medata engines - %s", strings.Join(mod.writeMedataNames[:], " ")), @@ -201,6 +211,7 @@ func (mod *PdfEngines) PdfEngine() (gotenberg.PdfEngine, error) { return newMultiPdfEngines( engines(mod.mergeNames), + engines(mod.splitNames), engines(mod.convertNames), engines(mod.readMetadataNames), engines(mod.writeMedataNames), @@ -222,6 +233,7 @@ func (mod *PdfEngines) Routes() ([]api.Route, error) { return []api.Route{ mergeRoute(engine), + splitRoute(engine), convertRoute(engine), readMetadataRoute(engine), writeMetadataRoute(engine), diff --git a/pkg/modules/pdfengines/pdfengines_test.go b/pkg/modules/pdfengines/pdfengines_test.go index fe999432d..505a229f2 100644 --- a/pkg/modules/pdfengines/pdfengines_test.go +++ b/pkg/modules/pdfengines/pdfengines_test.go @@ -26,6 +26,7 @@ func TestPdfEngines_Provision(t *testing.T) { scenario string ctx *gotenberg.Context expectedMergePdfEngines []string + expectedSplitPdfEngines []string expectedConvertPdfEngines []string expectedReadMetadataPdfEngines []string expectedWriteMetadataPdfEngines []string @@ -66,6 +67,7 @@ func TestPdfEngines_Provision(t *testing.T) { ) }(), expectedMergePdfEngines: []string{"qpdf", "pdfcpu", "pdftk"}, + expectedSplitPdfEngines: []string{"pdfcpu", "qpdf", "pdftk"}, expectedConvertPdfEngines: []string{"libreoffice-pdfengine"}, expectedReadMetadataPdfEngines: []string{"exiftool"}, expectedWriteMetadataPdfEngines: []string{"exiftool"}, @@ -107,7 +109,7 @@ func TestPdfEngines_Provision(t *testing.T) { } fs := new(PdfEngines).Descriptor().FlagSet - err := fs.Parse([]string{"--pdfengines-merge-engines=b", "--pdfengines-convert-engines=b", "--pdfengines-read-metadata-engines=a", "--pdfengines-write-metadata-engines=a"}) + err := fs.Parse([]string{"--pdfengines-merge-engines=b", "--pdfengines-split-engines=a", "--pdfengines-convert-engines=b", "--pdfengines-read-metadata-engines=a", "--pdfengines-write-metadata-engines=a"}) if err != nil { t.Fatalf("expected no error but got: %v", err) } @@ -125,6 +127,7 @@ func TestPdfEngines_Provision(t *testing.T) { }(), expectedMergePdfEngines: []string{"b"}, + expectedSplitPdfEngines: []string{"a"}, expectedConvertPdfEngines: []string{"b"}, expectedReadMetadataPdfEngines: []string{"a"}, expectedWriteMetadataPdfEngines: []string{"a"}, @@ -200,6 +203,12 @@ func TestPdfEngines_Provision(t *testing.T) { } } + for index, name := range mod.splitNames { + if name != tc.expectedSplitPdfEngines[index] { + t.Fatalf("expected split name at index %d to be %s, but got: %s", index, name, tc.expectedSplitPdfEngines[index]) + } + } + for index, name := range mod.convertNames { if name != tc.expectedConvertPdfEngines[index] { t.Fatalf("expected convert name at index %d to be %s, but got: %s", index, name, tc.expectedConvertPdfEngines[index]) @@ -303,17 +312,19 @@ func TestPdfEngines_Validate(t *testing.T) { func TestPdfEngines_SystemMessages(t *testing.T) { mod := new(PdfEngines) mod.mergeNames = []string{"foo", "bar"} + mod.splitNames = []string{"foo", "bar"} mod.convertNames = []string{"foo", "bar"} mod.readMetadataNames = []string{"foo", "bar"} mod.writeMedataNames = []string{"foo", "bar"} messages := mod.SystemMessages() - if len(messages) != 4 { + if len(messages) != 5 { t.Errorf("expected one and only one message, but got %d", len(messages)) } expect := []string{ fmt.Sprintf("merge engines - %s", strings.Join(mod.mergeNames[:], " ")), + fmt.Sprintf("split engines - %s", strings.Join(mod.splitNames[:], " ")), fmt.Sprintf("convert engines - %s", strings.Join(mod.convertNames[:], " ")), fmt.Sprintf("read metadata engines - %s", strings.Join(mod.readMetadataNames[:], " ")), fmt.Sprintf("write medata engines - %s", strings.Join(mod.writeMedataNames[:], " ")), @@ -329,6 +340,7 @@ func TestPdfEngines_SystemMessages(t *testing.T) { func TestPdfEngines_PdfEngine(t *testing.T) { mod := PdfEngines{ mergeNames: []string{"foo", "bar"}, + splitNames: []string{"foo", "bar"}, convertNames: []string{"foo", "bar"}, readMetadataNames: []string{"foo", "bar"}, writeMedataNames: []string{"foo", "bar"}, @@ -370,7 +382,7 @@ func TestPdfEngines_Routes(t *testing.T) { }{ { scenario: "routes not disabled", - expectRoutes: 4, + expectRoutes: 5, disableRoutes: false, }, { diff --git a/pkg/modules/pdfengines/routes.go b/pkg/modules/pdfengines/routes.go index a0ddb756e..2a76274b7 100644 --- a/pkg/modules/pdfengines/routes.go +++ b/pkg/modules/pdfengines/routes.go @@ -6,6 +6,8 @@ import ( "fmt" "net/http" "path/filepath" + "strconv" + "strings" "github.com/labstack/echo/v4" @@ -13,6 +15,63 @@ import ( "github.com/gotenberg/gotenberg/v8/pkg/modules/api" ) +// FormDataPdfSplitMode creates a [gotenberg.SplitMode] from the form data. +func FormDataPdfSplitMode(form *api.FormData, mandatory bool) gotenberg.SplitMode { + var ( + mode string + span string + ) + + splitModeFunc := func(value string) error { + if value != "" && value != gotenberg.SplitModeIntervals && value != gotenberg.SplitModePages { + return fmt.Errorf("wrong value, expected either '%s' or '%s'", gotenberg.SplitModeIntervals, gotenberg.SplitModePages) + } + mode = value + return nil + } + + splitSpanFunc := func(value string) error { + value = strings.Join(strings.Fields(value), "") + + if mode == gotenberg.SplitModeIntervals { + intValue, err := strconv.Atoi(value) + if err != nil { + return err + } + if intValue < 1 { + return errors.New("value is inferior to 1") + } + } + + span = value + + return nil + } + + if mandatory { + form. + MandatoryCustom("splitMode", func(value string) error { + return splitModeFunc(value) + }). + MandatoryCustom("splitSpan", func(value string) error { + return splitSpanFunc(value) + }) + } else { + form. + Custom("splitMode", func(value string) error { + return splitModeFunc(value) + }). + Custom("splitSpan", func(value string) error { + return splitSpanFunc(value) + }) + } + + return gotenberg.SplitMode{ + Mode: mode, + Span: span, + } +} + // FormDataPdfFormats creates [gotenberg.PdfFormats] from the form data. // Fallback to default value if the considered key is not present. func FormDataPdfFormats(form *api.FormData) gotenberg.PdfFormats { @@ -32,9 +91,10 @@ func FormDataPdfFormats(form *api.FormData) gotenberg.PdfFormats { } // FormDataPdfMetadata creates metadata object from the form data. -func FormDataPdfMetadata(form *api.FormData) map[string]interface{} { +func FormDataPdfMetadata(form *api.FormData, mandatory bool) map[string]interface{} { var metadata map[string]interface{} - form.Custom("metadata", func(value string) error { + + metadataFunc := func(value string) error { if len(value) > 0 { err := json.Unmarshal([]byte(value), &metadata) if err != nil { @@ -42,7 +102,18 @@ func FormDataPdfMetadata(form *api.FormData) map[string]interface{} { } } return nil - }) + } + + if mandatory { + form.MandatoryCustom("metadata", func(value string) error { + return metadataFunc(value) + }) + } else { + form.Custom("metadata", func(value string) error { + return metadataFunc(value) + }) + } + return metadata } @@ -66,6 +137,52 @@ func MergeStub(ctx *api.Context, engine gotenberg.PdfEngine, inputPaths []string return outputPath, nil } +// SplitPdfStub splits a list of PDF files based on [gotenberg.SplitMode]. +// It returns a list of output paths or the list of provided input paths if no +// split requested. +func SplitPdfStub(ctx *api.Context, engine gotenberg.PdfEngine, mode gotenberg.SplitMode, inputPaths []string) ([]string, error) { + zeroValued := gotenberg.SplitMode{} + if mode == zeroValued { + return inputPaths, nil + } + + var outputPaths []string + for _, inputPath := range inputPaths { + inputPathNoExt := inputPath[:len(inputPath)-len(filepath.Ext(inputPath))] + filenameNoExt := filepath.Base(inputPathNoExt) + outputDirPath, err := ctx.CreateSubDirectory(strings.ReplaceAll(filepath.Base(filenameNoExt), ".", "_")) + if err != nil { + return nil, fmt.Errorf("create subdirectory from input path: %w", err) + } + + paths, err := engine.Split(ctx, ctx.Log(), mode, inputPath, outputDirPath) + if err != nil { + return nil, fmt.Errorf("split PDF '%s': %w", inputPath, err) + } + + if mode.Mode == gotenberg.SplitModePages { + return paths, nil + } + + // Keep the original filename. + for i, path := range paths { + newPath := fmt.Sprintf( + "%s/%s_%d.pdf", + outputDirPath, filenameNoExt, i, + ) + + err = ctx.Rename(path, newPath) + if err != nil { + return nil, fmt.Errorf("rename path: %w", err) + } + + outputPaths = append(outputPaths, newPath) + } + } + + return outputPaths, nil +} + // ConvertStub transforms a given PDF to the specified formats defined in // [gotenberg.PdfFormats]. If no format, it does nothing and returns the input // paths. @@ -116,7 +233,7 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route { form := ctx.FormData() pdfFormats := FormDataPdfFormats(form) - metadata := FormDataPdfMetadata(form) + metadata := FormDataPdfMetadata(form, false) var inputPaths []string err := form. @@ -152,6 +269,65 @@ func mergeRoute(engine gotenberg.PdfEngine) api.Route { } } +// splitRoute returns an [api.Route] which can extract pages from a PDF. +func splitRoute(engine gotenberg.PdfEngine) api.Route { + return api.Route{ + Method: http.MethodPost, + Path: "/forms/pdfengines/split", + IsMultipart: true, + Handler: func(c echo.Context) error { + ctx := c.Get("context").(*api.Context) + + form := ctx.FormData() + mode := FormDataPdfSplitMode(form, true) + pdfFormats := FormDataPdfFormats(form) + metadata := FormDataPdfMetadata(form, false) + + var inputPaths []string + err := form. + MandatoryPaths([]string{".pdf"}, &inputPaths). + Validate() + if err != nil { + return fmt.Errorf("validate form data: %w", err) + } + + outputPaths, err := SplitPdfStub(ctx, engine, mode, inputPaths) + if err != nil { + return fmt.Errorf("split PDFs: %w", err) + } + + convertOutputPaths, err := ConvertStub(ctx, engine, pdfFormats, outputPaths) + if err != nil { + return fmt.Errorf("convert PDFs: %w", err) + } + + err = WriteMetadataStub(ctx, engine, metadata, convertOutputPaths) + if err != nil { + return fmt.Errorf("write metadata: %w", err) + } + + zeroValuedSplitMode := gotenberg.SplitMode{} + zeroValuedPdfFormats := gotenberg.PdfFormats{} + if mode != zeroValuedSplitMode && pdfFormats != zeroValuedPdfFormats { + // Rename the files to keep the split naming. + for i, convertOutputPath := range convertOutputPaths { + err = ctx.Rename(convertOutputPath, outputPaths[i]) + if err != nil { + return fmt.Errorf("rename output path: %w", err) + } + } + } + + err = ctx.AddOutputPaths(outputPaths...) + if err != nil { + return fmt.Errorf("add output paths: %w", err) + } + + return nil + }, + } +} + // convertRoute returns an [api.Route] which can convert PDFs to a specific ODF // format. func convertRoute(engine gotenberg.PdfEngine) api.Route { @@ -258,25 +434,12 @@ func writeMetadataRoute(engine gotenberg.PdfEngine) api.Route { Handler: func(c echo.Context) error { ctx := c.Get("context").(*api.Context) - var ( - inputPaths []string - metadata map[string]interface{} - ) + form := ctx.FormData() + metadata := FormDataPdfMetadata(form, true) - err := ctx.FormData(). + var inputPaths []string + err := form. MandatoryPaths([]string{".pdf"}, &inputPaths). - MandatoryCustom("metadata", func(value string) error { - if len(value) > 0 { - err := json.Unmarshal([]byte(value), &metadata) - if err != nil { - return fmt.Errorf("unmarshal metadata: %w", err) - } - } - if len(metadata) == 0 { - return errors.New("no metadata") - } - return nil - }). Validate() if err != nil { return fmt.Errorf("validate form data: %w", err) diff --git a/pkg/modules/pdfengines/routes_test.go b/pkg/modules/pdfengines/routes_test.go index 94df1688d..6e0e5e3e6 100644 --- a/pkg/modules/pdfengines/routes_test.go +++ b/pkg/modules/pdfengines/routes_test.go @@ -3,13 +3,16 @@ package pdfengines import ( "context" "errors" + "fmt" "net/http" "net/http/httptest" + "os" "reflect" "slices" "strings" "testing" + "github.com/google/uuid" "github.com/labstack/echo/v4" "go.uber.org/zap" @@ -17,6 +20,156 @@ import ( "github.com/gotenberg/gotenberg/v8/pkg/modules/api" ) +func TestFormDataPdfSplitMode(t *testing.T) { + for _, tc := range []struct { + scenario string + ctx *api.ContextMock + mandatory bool + expectedSplitMode gotenberg.SplitMode + expectValidationError bool + }{ + { + scenario: "no custom form fields", + ctx: &api.ContextMock{Context: new(api.Context)}, + mandatory: false, + expectedSplitMode: gotenberg.SplitMode{}, + expectValidationError: false, + }, + { + scenario: "no custom form fields (mandatory)", + ctx: &api.ContextMock{Context: new(api.Context)}, + mandatory: true, + expectedSplitMode: gotenberg.SplitMode{}, + expectValidationError: true, + }, + { + scenario: "invalid splitMode", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "splitMode": { + "foo", + }, + }) + return ctx + }(), + mandatory: false, + expectedSplitMode: gotenberg.SplitMode{}, + expectValidationError: true, + }, + { + scenario: "invalid splitSpan (intervals)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "splitMode": { + "intervals", + }, + "splitSpan": { + "1-2", + }, + }) + return ctx + }(), + mandatory: false, + expectedSplitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals}, + expectValidationError: true, + }, + { + scenario: "splitSpan inferior to 1 (intervals)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "splitMode": { + "intervals", + }, + "splitSpan": { + "-1", + }, + }) + return ctx + }(), + mandatory: false, + expectedSplitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals}, + expectValidationError: true, + }, + { + scenario: "valid form fields (intervals)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "splitMode": { + "intervals", + }, + "splitSpan": { + "1", + }, + }) + return ctx + }(), + mandatory: false, + expectedSplitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals, Span: "1"}, + expectValidationError: false, + }, + { + scenario: "valid form fields (pages)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "splitMode": { + "pages", + }, + "splitSpan": { + "1-2", + }, + }) + return ctx + }(), + mandatory: false, + expectedSplitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + expectValidationError: false, + }, + { + scenario: "valid form fields (mandatory)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "splitMode": { + "intervals", + }, + "splitSpan": { + "1", + }, + }) + return ctx + }(), + mandatory: true, + expectedSplitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals, Span: "1"}, + expectValidationError: false, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + tc.ctx.SetLogger(zap.NewNop()) + form := tc.ctx.Context.FormData() + actual := FormDataPdfSplitMode(form, tc.mandatory) + + if !reflect.DeepEqual(actual, tc.expectedSplitMode) { + t.Fatalf("expected %+v but got: %+v", tc.expectedSplitMode, actual) + } + + err := form.Validate() + + if tc.expectValidationError && err == nil { + t.Fatal("expected validation error but got none", err) + } + + if !tc.expectValidationError && err != nil { + t.Fatalf("expected no validation error but got: %v", err) + } + }) + } +} + func TestFormDataPdfFormats(t *testing.T) { for _, tc := range []struct { scenario string @@ -74,15 +227,24 @@ func TestFormDataPdfMetadata(t *testing.T) { for _, tc := range []struct { scenario string ctx *api.ContextMock + mandatory bool expectedMetadata map[string]interface{} expectValidationError bool }{ { scenario: "no metadata form field", ctx: &api.ContextMock{Context: new(api.Context)}, + mandatory: false, expectedMetadata: nil, expectValidationError: false, }, + { + scenario: "no metadata form field (mandatory)", + ctx: &api.ContextMock{Context: new(api.Context)}, + mandatory: true, + expectedMetadata: nil, + expectValidationError: true, + }, { scenario: "invalid metadata form field", ctx: func() *api.ContextMock { @@ -94,6 +256,7 @@ func TestFormDataPdfMetadata(t *testing.T) { }) return ctx }(), + mandatory: false, expectedMetadata: nil, expectValidationError: true, }, @@ -108,6 +271,7 @@ func TestFormDataPdfMetadata(t *testing.T) { }) return ctx }(), + mandatory: false, expectedMetadata: map[string]interface{}{ "foo": "bar", }, @@ -117,7 +281,7 @@ func TestFormDataPdfMetadata(t *testing.T) { t.Run(tc.scenario, func(t *testing.T) { tc.ctx.SetLogger(zap.NewNop()) form := tc.ctx.Context.FormData() - actual := FormDataPdfMetadata(form) + actual := FormDataPdfMetadata(form, tc.mandatory) if !reflect.DeepEqual(actual, tc.expectedMetadata) { t.Fatalf("expected %+v but got: %+v", tc.expectedMetadata, actual) @@ -193,6 +357,128 @@ func TestMergeStub(t *testing.T) { } } +func TestSplitPdfStub(t *testing.T) { + for _, tc := range []struct { + scenario string + ctx *api.ContextMock + engine gotenberg.PdfEngine + mode gotenberg.SplitMode + expectError bool + }{ + { + scenario: "no split mode", + mode: gotenberg.SplitMode{}, + ctx: &api.ContextMock{Context: new(api.Context)}, + expectError: false, + }, + { + scenario: "cannot create subdirectory", + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals, Span: "1"}, + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetMkdirAll(&gotenberg.MkdirAllMock{MkdirAllMock: func(path string, perm os.FileMode) error { + return errors.New("cannot create subdirectory") + }}) + return ctx + }(), + expectError: true, + }, + { + scenario: "split error", + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals, Span: "1"}, + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetMkdirAll(&gotenberg.MkdirAllMock{MkdirAllMock: func(path string, perm os.FileMode) error { + return nil + }}) + return ctx + }(), + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, errors.New("foo") + }, + }, + expectError: true, + }, + { + scenario: "rename error", + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals, Span: "1"}, + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetMkdirAll(&gotenberg.MkdirAllMock{MkdirAllMock: func(path string, perm os.FileMode) error { + return nil + }}) + ctx.SetPathRename(&gotenberg.PathRenameMock{RenameMock: func(oldpath, newpath string) error { + return errors.New("cannot rename") + }}) + return ctx + }(), + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return []string{inputPath}, nil + }, + }, + expectError: true, + }, + { + scenario: "success (intervals)", + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals, Span: "1"}, + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetMkdirAll(&gotenberg.MkdirAllMock{MkdirAllMock: func(path string, perm os.FileMode) error { + return nil + }}) + ctx.SetPathRename(&gotenberg.PathRenameMock{RenameMock: func(oldpath, newpath string) error { + return nil + }}) + return ctx + }(), + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return []string{inputPath}, nil + }, + }, + expectError: false, + }, + { + scenario: "success (pages)", + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetMkdirAll(&gotenberg.MkdirAllMock{MkdirAllMock: func(path string, perm os.FileMode) error { + return nil + }}) + ctx.SetPathRename(&gotenberg.PathRenameMock{RenameMock: func(oldpath, newpath string) error { + return nil + }}) + return ctx + }(), + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return []string{inputPath}, nil + }, + }, + expectError: false, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + dirPath := fmt.Sprintf("%s/%s", os.TempDir(), uuid.NewString()) + tc.ctx.SetDirPath(dirPath) + tc.ctx.SetLogger(zap.NewNop()) + + _, err := SplitPdfStub(tc.ctx.Context, tc.engine, tc.mode, []string{"my.pdf", "my2.pdf"}) + + if tc.expectError && err == nil { + t.Fatal("expected error but got none", err) + } + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + }) + } +} + func TestConvertStub(t *testing.T) { for _, tc := range []struct { scenario string @@ -503,6 +789,287 @@ func TestMergeHandler(t *testing.T) { } } +func TestSplitHandler(t *testing.T) { + for _, tc := range []struct { + scenario string + ctx *api.ContextMock + engine gotenberg.PdfEngine + expectError bool + expectHttpError bool + expectHttpStatus int + expectOutputPathsCount int + expectOutputPaths []string + }{ + { + scenario: "missing at least one mandatory file", + ctx: &api.ContextMock{Context: new(api.Context)}, + expectError: true, + expectHttpError: true, + expectHttpStatus: http.StatusBadRequest, + expectOutputPathsCount: 0, + }, + { + scenario: "no split mode", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetFiles(map[string]string{ + "file.pdf": "/file.pdf", + }) + return ctx + }(), + expectError: true, + expectHttpError: true, + expectHttpStatus: http.StatusBadRequest, + expectOutputPathsCount: 0, + }, + { + scenario: "error from PDF engine (split)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetFiles(map[string]string{ + "file.pdf": "/file.pdf", + }) + ctx.SetValues(map[string][]string{ + "splitMode": { + gotenberg.SplitModeIntervals, + }, + "splitSpan": { + "1", + }, + }) + return ctx + }(), + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return nil, errors.New("foo") + }, + }, + expectError: true, + expectHttpError: false, + expectOutputPathsCount: 0, + }, + { + scenario: "error from PDF engine (convert)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetFiles(map[string]string{ + "file.pdf": "/file.pdf", + }) + ctx.SetValues(map[string][]string{ + "splitMode": { + gotenberg.SplitModeIntervals, + }, + "splitSpan": { + "1", + }, + "pdfua": { + "true", + }, + }) + return ctx + }(), + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return []string{inputPath}, nil + }, + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return errors.New("foo") + }, + }, + expectError: true, + expectHttpError: false, + expectOutputPathsCount: 0, + }, + { + scenario: "error from PDF engine (write metadata)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetFiles(map[string]string{ + "file.pdf": "/file.pdf", + }) + ctx.SetValues(map[string][]string{ + "splitMode": { + gotenberg.SplitModeIntervals, + }, + "splitSpan": { + "1", + }, + "metadata": { + "{\"Creator\": \"foo\", \"Producer\": \"bar\" }", + }, + }) + return ctx + }(), + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return []string{inputPath}, nil + }, + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return errors.New("foo") + }, + }, + expectError: true, + expectHttpError: false, + expectOutputPathsCount: 0, + }, + { + scenario: "cannot add output paths", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetFiles(map[string]string{ + "file.pdf": "/file.pdf", + }) + ctx.SetValues(map[string][]string{ + "splitMode": { + gotenberg.SplitModeIntervals, + }, + "splitSpan": { + "1", + }, + }) + ctx.SetCancelled(true) + return ctx + }(), + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return []string{inputPath}, nil + }, + }, + expectError: true, + expectHttpError: false, + expectOutputPathsCount: 0, + }, + { + scenario: "success (intervals)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetFiles(map[string]string{ + "file.pdf": "/file.pdf", + }) + ctx.SetValues(map[string][]string{ + "splitMode": { + gotenberg.SplitModeIntervals, + }, + "splitSpan": { + "1", + }, + "pdfua": { + "true", + }, + "metadata": { + "{\"Creator\": \"foo\", \"Producer\": \"bar\" }", + }, + }) + return ctx + }(), + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return []string{"file_split_1.pdf", "file_split_2.pdf"}, nil + }, + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return nil + }, + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return nil + }, + }, + expectError: false, + expectHttpError: false, + expectOutputPathsCount: 2, + expectOutputPaths: []string{"/file/file_0.pdf", "/file/file_1.pdf"}, + }, + { + scenario: "success (pages)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetFiles(map[string]string{ + "file.pdf": "/file.pdf", + }) + ctx.SetValues(map[string][]string{ + "splitMode": { + gotenberg.SplitModePages, + }, + "splitSpan": { + "1-2", + }, + "pdfua": { + "true", + }, + "metadata": { + "{\"Creator\": \"foo\", \"Producer\": \"bar\" }", + }, + }) + return ctx + }(), + engine: &gotenberg.PdfEngineMock{ + SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + return []string{"/file/file.pdf"}, nil + }, + ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { + return nil + }, + WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { + return nil + }, + }, + expectError: false, + expectHttpError: false, + expectOutputPathsCount: 1, + expectOutputPaths: []string{"/file/file.pdf"}, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + tc.ctx.SetLogger(zap.NewNop()) + tc.ctx.SetMkdirAll(&gotenberg.MkdirAllMock{MkdirAllMock: func(path string, perm os.FileMode) error { + return nil + }}) + tc.ctx.SetPathRename(&gotenberg.PathRenameMock{RenameMock: func(oldpath, newpath string) error { + return nil + }}) + c := echo.New().NewContext(nil, nil) + c.Set("context", tc.ctx.Context) + + err := splitRoute(tc.engine).Handler(c) + + if tc.expectError && err == nil { + t.Fatal("expected error but got none", err) + } + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + var httpErr api.HttpError + isHttpError := errors.As(err, &httpErr) + + if tc.expectHttpError && !isHttpError { + t.Errorf("expected an HTTP error but got: %v", err) + } + + if !tc.expectHttpError && isHttpError { + t.Errorf("expected no HTTP error but got one: %v", httpErr) + } + + if err != nil && tc.expectHttpError && isHttpError { + status, _ := httpErr.HttpError() + if status != tc.expectHttpStatus { + t.Errorf("expected %d as HTTP status code but got %d", tc.expectHttpStatus, status) + } + } + + if tc.expectOutputPathsCount != len(tc.ctx.OutputPaths()) { + t.Errorf("expected %d output paths but got %d", tc.expectOutputPathsCount, len(tc.ctx.OutputPaths())) + } + + for _, path := range tc.expectOutputPaths { + if !slices.Contains(tc.ctx.OutputPaths(), path) { + t.Errorf("expected '%s' in output paths %v", path, tc.ctx.OutputPaths()) + } + } + }) + } +} + func TestConvertHandler(t *testing.T) { for _, tc := range []struct { scenario string diff --git a/pkg/modules/pdftk/doc.go b/pkg/modules/pdftk/doc.go index 3a01ae417..c65403f72 100644 --- a/pkg/modules/pdftk/doc.go +++ b/pkg/modules/pdftk/doc.go @@ -2,6 +2,7 @@ // interface using the PDFtk command-line tool. This package allows for: // // 1. The merging of PDF files. +// 2. The splitting of PDF files. // // The path to the PDFtk binary must be specified using the PDFTK_BIN_PATH // environment variable. diff --git a/pkg/modules/pdftk/pdftk.go b/pkg/modules/pdftk/pdftk.go index 9846ee9df..d8870a26d 100644 --- a/pkg/modules/pdftk/pdftk.go +++ b/pkg/modules/pdftk/pdftk.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "path/filepath" "go.uber.org/zap" @@ -51,6 +52,31 @@ func (engine *PdfTk) Validate() error { return nil } +// Split splits a given PDF file. +func (engine *PdfTk) Split(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + var args []string + outputPath := fmt.Sprintf("%s/%s", outputDirPath, filepath.Base(inputPath)) + + switch mode.Mode { + case gotenberg.SplitModePages: + args = append(args, inputPath, "cat", mode.Span, "output", outputPath) + default: + return nil, fmt.Errorf("split PDFs using mode '%s' with PDFtk: %w", mode.Mode, gotenberg.ErrPdfSplitModeNotSupported) + } + + cmd, err := gotenberg.CommandContext(ctx, logger, engine.binPath, args...) + if err != nil { + return nil, fmt.Errorf("create command: %w", err) + } + + _, err = cmd.Exec() + if err != nil { + return nil, fmt.Errorf("split PDFs with PDFtk: %w", err) + } + + return []string{outputPath}, nil +} + // Merge combines multiple PDFs into a single PDF. func (engine *PdfTk) Merge(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { var args []string diff --git a/pkg/modules/pdftk/pdftk_test.go b/pkg/modules/pdftk/pdftk_test.go index c7b864eca..d5dcd573e 100644 --- a/pkg/modules/pdftk/pdftk_test.go +++ b/pkg/modules/pdftk/pdftk_test.go @@ -116,7 +116,7 @@ func TestPdfTk_Merge(t *testing.T) { t.Fatalf("expected error but got: %v", err) } - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) outputDir, err := fs.MkdirAll() if err != nil { t.Fatalf("expected error but got: %v", err) @@ -142,6 +142,88 @@ func TestPdfTk_Merge(t *testing.T) { } } +func TestPdfCpu_Split(t *testing.T) { + for _, tc := range []struct { + scenario string + ctx context.Context + mode gotenberg.SplitMode + inputPath string + expectError bool + expectedError error + expectOutputPathsCount int + expectOutputPaths []string + }{ + { + scenario: "ErrPdfSplitModeNotSupported", + expectError: true, + expectedError: gotenberg.ErrPdfSplitModeNotSupported, + expectOutputPathsCount: 0, + }, + { + scenario: "invalid context", + ctx: nil, + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + expectError: true, + expectOutputPathsCount: 0, + }, + { + scenario: "invalid input path", + ctx: context.TODO(), + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + inputPath: "", + expectError: true, + expectOutputPathsCount: 0, + }, + { + scenario: "success (pages)", + ctx: context.TODO(), + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + inputPath: "/tests/test/testdata/pdfengines/sample1.pdf", + expectError: false, + expectOutputPathsCount: 1, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + engine := new(PdfTk) + err := engine.Provision(nil) + if err != nil { + t.Fatalf("expected error but got: %v", err) + } + + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) + outputDir, err := fs.MkdirAll() + if err != nil { + t.Fatalf("expected error but got: %v", err) + } + + defer func() { + err = os.RemoveAll(fs.WorkingDirPath()) + if err != nil { + t.Fatalf("expected no error while cleaning up but got: %v", err) + } + }() + + outputPaths, err := engine.Split(tc.ctx, zap.NewNop(), tc.mode, tc.inputPath, outputDir) + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + if tc.expectError && err == nil { + t.Fatal("expected error but got none") + } + + if tc.expectedError != nil && !errors.Is(err, tc.expectedError) { + t.Fatalf("expected error %v but got: %v", tc.expectedError, err) + } + + if tc.expectOutputPathsCount != len(outputPaths) { + t.Errorf("expected %d output paths but got %d", tc.expectOutputPathsCount, len(outputPaths)) + } + }) + } +} + func TestPdfTk_Convert(t *testing.T) { engine := new(PdfTk) err := engine.Convert(context.TODO(), zap.NewNop(), gotenberg.PdfFormats{}, "", "") diff --git a/pkg/modules/qpdf/doc.go b/pkg/modules/qpdf/doc.go index 31f61b361..f0d54a548 100644 --- a/pkg/modules/qpdf/doc.go +++ b/pkg/modules/qpdf/doc.go @@ -2,6 +2,7 @@ // interface using the QPDF command-line tool. This package allows for: // // 1. The merging of PDF files. +// 2. The splitting of PDF files. // // The path to the QPDF binary must be specified using the QPDK_BIN_PATH // environment variable. diff --git a/pkg/modules/qpdf/qpdf.go b/pkg/modules/qpdf/qpdf.go index 57698281d..2c010d2ec 100644 --- a/pkg/modules/qpdf/qpdf.go +++ b/pkg/modules/qpdf/qpdf.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "path/filepath" "go.uber.org/zap" @@ -45,12 +46,37 @@ func (engine *QPdf) Provision(ctx *gotenberg.Context) error { func (engine *QPdf) Validate() error { _, err := os.Stat(engine.binPath) if os.IsNotExist(err) { - return fmt.Errorf("QPdf binary path does not exist: %w", err) + return fmt.Errorf("QPDF binary path does not exist: %w", err) } return nil } +// Split splits a given PDF file. +func (engine *QPdf) Split(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { + var args []string + outputPath := fmt.Sprintf("%s/%s", outputDirPath, filepath.Base(inputPath)) + + switch mode.Mode { + case gotenberg.SplitModePages: + args = append(args, inputPath, "--pages", ".", mode.Span, "--", outputPath) + default: + return nil, fmt.Errorf("split PDFs using mode '%s' with QPDF: %w", mode.Mode, gotenberg.ErrPdfSplitModeNotSupported) + } + + cmd, err := gotenberg.CommandContext(ctx, logger, engine.binPath, args...) + if err != nil { + return nil, fmt.Errorf("create command: %w", err) + } + + _, err = cmd.Exec() + if err != nil { + return nil, fmt.Errorf("split PDFs with QPDF: %w", err) + } + + return []string{outputPath}, nil +} + // Merge combines multiple PDFs into a single PDF. func (engine *QPdf) Merge(ctx context.Context, logger *zap.Logger, inputPaths []string, outputPath string) error { var args []string diff --git a/pkg/modules/qpdf/qpdf_test.go b/pkg/modules/qpdf/qpdf_test.go index a966928d0..b32976cbf 100644 --- a/pkg/modules/qpdf/qpdf_test.go +++ b/pkg/modules/qpdf/qpdf_test.go @@ -116,7 +116,7 @@ func TestQPdf_Merge(t *testing.T) { t.Fatalf("expected error but got: %v", err) } - fs := gotenberg.NewFileSystem() + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) outputDir, err := fs.MkdirAll() if err != nil { t.Fatalf("expected error but got: %v", err) @@ -142,6 +142,88 @@ func TestQPdf_Merge(t *testing.T) { } } +func TestQPdf_Split(t *testing.T) { + for _, tc := range []struct { + scenario string + ctx context.Context + mode gotenberg.SplitMode + inputPath string + expectError bool + expectedError error + expectOutputPathsCount int + expectOutputPaths []string + }{ + { + scenario: "ErrPdfSplitModeNotSupported", + expectError: true, + expectedError: gotenberg.ErrPdfSplitModeNotSupported, + expectOutputPathsCount: 0, + }, + { + scenario: "invalid context", + ctx: nil, + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + expectError: true, + expectOutputPathsCount: 0, + }, + { + scenario: "invalid input path", + ctx: context.TODO(), + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + inputPath: "", + expectError: true, + expectOutputPathsCount: 0, + }, + { + scenario: "success (pages)", + ctx: context.TODO(), + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + inputPath: "/tests/test/testdata/pdfengines/sample1.pdf", + expectError: false, + expectOutputPathsCount: 1, + }, + } { + t.Run(tc.scenario, func(t *testing.T) { + engine := new(QPdf) + err := engine.Provision(nil) + if err != nil { + t.Fatalf("expected error but got: %v", err) + } + + fs := gotenberg.NewFileSystem(new(gotenberg.OsMkdirAll)) + outputDir, err := fs.MkdirAll() + if err != nil { + t.Fatalf("expected error but got: %v", err) + } + + defer func() { + err = os.RemoveAll(fs.WorkingDirPath()) + if err != nil { + t.Fatalf("expected no error while cleaning up but got: %v", err) + } + }() + + outputPaths, err := engine.Split(tc.ctx, zap.NewNop(), tc.mode, tc.inputPath, outputDir) + + if !tc.expectError && err != nil { + t.Fatalf("expected no error but got: %v", err) + } + + if tc.expectError && err == nil { + t.Fatal("expected error but got none") + } + + if tc.expectedError != nil && !errors.Is(err, tc.expectedError) { + t.Fatalf("expected error %v but got: %v", tc.expectedError, err) + } + + if tc.expectOutputPathsCount != len(outputPaths) { + t.Errorf("expected %d output paths but got %d", tc.expectOutputPathsCount, len(outputPaths)) + } + }) + } +} + func TestQPdf_Convert(t *testing.T) { engine := new(QPdf) err := engine.Convert(context.TODO(), zap.NewNop(), gotenberg.PdfFormats{}, "", "") From 51a913a5e4032d7ed89e62fc64352a38cbbc49f0 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Fri, 20 Dec 2024 15:59:51 +0100 Subject: [PATCH 21/25] chore(deps): update Go depencies --- go.mod | 25 ++++++++++++------------- go.sum | 54 ++++++++++++++++++++++++++---------------------------- 2 files changed, 38 insertions(+), 41 deletions(-) diff --git a/go.mod b/go.mod index 02b01da49..4edbc38b4 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/alexliesenfeld/health v0.8.0 github.com/andybalholm/brotli v1.1.1 // indirect github.com/barasher/go-exiftool v1.10.0 - github.com/chromedp/cdproto v0.0.0-20241110205750-a72e6703cd9b + github.com/chromedp/cdproto v0.0.0-20241208230723-d1c7de7e5dd2 github.com/chromedp/chromedp v0.11.2 github.com/golang/snappy v0.0.4 // indirect github.com/google/uuid v1.6.0 @@ -14,25 +14,25 @@ require ( github.com/hashicorp/go-retryablehttp v0.7.7 github.com/klauspost/compress v1.17.11 // indirect github.com/klauspost/pgzip v1.2.6 // indirect - github.com/labstack/echo/v4 v4.12.0 + github.com/labstack/echo/v4 v4.13.3 github.com/labstack/gommon v0.4.2 github.com/mattn/go-isatty v0.0.20 // indirect github.com/mholt/archiver/v3 v3.5.1 github.com/microcosm-cc/bluemonday v1.0.27 github.com/nwaples/rardecode v1.1.3 // indirect - github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/pierrec/lz4/v4 v4.1.22 // indirect github.com/prometheus/client_golang v1.20.5 github.com/russross/blackfriday/v2 v2.1.0 github.com/spf13/pflag v1.0.5 github.com/ulikunitz/xz v0.5.12 // indirect go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 - golang.org/x/crypto v0.29.0 // indirect - golang.org/x/net v0.31.0 - golang.org/x/sync v0.9.0 - golang.org/x/sys v0.27.0 // indirect - golang.org/x/term v0.26.0 - golang.org/x/text v0.20.0 + golang.org/x/crypto v0.31.0 // indirect + golang.org/x/net v0.33.0 + golang.org/x/sync v0.10.0 + golang.org/x/sys v0.28.0 // indirect + golang.org/x/term v0.27.0 + golang.org/x/text v0.21.0 ) require github.com/dlclark/regexp2 v1.11.4 @@ -46,18 +46,17 @@ require ( github.com/gobwas/httphead v0.1.0 // indirect github.com/gobwas/pool v0.2.1 // indirect github.com/gobwas/ws v1.4.0 // indirect - github.com/golang-jwt/jwt v3.2.2+incompatible // indirect github.com/gorilla/css v1.0.1 // indirect github.com/josharian/intern v1.0.0 // indirect - github.com/mailru/easyjson v0.7.7 // indirect + github.com/mailru/easyjson v0.9.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.60.1 // indirect + github.com/prometheus/common v0.61.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect golang.org/x/time v0.8.0 // indirect - google.golang.org/protobuf v1.35.2 // indirect + google.golang.org/protobuf v1.36.0 // indirect ) diff --git a/go.sum b/go.sum index 6f328e5f4..cc6a4a12e 100644 --- a/go.sum +++ b/go.sum @@ -11,8 +11,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chromedp/cdproto v0.0.0-20241110205750-a72e6703cd9b h1:md1Gk5jkNE91SZxFDCMHmKqX0/GsEr1/VTejht0sCbY= -github.com/chromedp/cdproto v0.0.0-20241110205750-a72e6703cd9b/go.mod h1:4XqMl3iIW08jtieURWL6Tt5924w21pxirC6th662XUM= +github.com/chromedp/cdproto v0.0.0-20241208230723-d1c7de7e5dd2 h1:fJob5N/Eprtd427U84kFpQhAHIEqJYuDzveaL6T4Xsk= +github.com/chromedp/cdproto v0.0.0-20241208230723-d1c7de7e5dd2/go.mod h1:4XqMl3iIW08jtieURWL6Tt5924w21pxirC6th662XUM= github.com/chromedp/chromedp v0.11.2 h1:ZRHTh7DjbNTlfIv3NFTbB7eVeu5XCNkgrpcGSpn2oX0= github.com/chromedp/chromedp v0.11.2/go.mod h1:lr8dFRLKsdTTWb75C/Ttol2vnBKOSnt0BW8R9Xaupi8= github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM= @@ -33,8 +33,6 @@ github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs= github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc= -github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY= -github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= @@ -63,14 +61,14 @@ github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/labstack/echo/v4 v4.12.0 h1:IKpw49IMryVB2p1a4dzwlhP1O2Tf2E0Ir/450lH+kI0= -github.com/labstack/echo/v4 v4.12.0/go.mod h1:UP9Cr2DJXbOK3Kr9ONYzNowSh7HP0aG0ShAyycHSJvM= +github.com/labstack/echo/v4 v4.13.3 h1:pwhpCPrTl5qry5HRdM5FwdXnhXSLSY+WE+YQSeCaafY= +github.com/labstack/echo/v4 v4.13.3/go.mod h1:o90YNEeQWjDozo584l7AwhJMHN0bOC4tAfg+Xox9q5g= github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0= github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU= github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= -github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= -github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= +github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= @@ -88,16 +86,16 @@ github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWk github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw= github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0= github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= -github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= +github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= -github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= +github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFSRQQ= +github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= @@ -108,8 +106,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= @@ -128,24 +126,24 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= -golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= -golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= -golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= -golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= -golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= -golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= -golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= -golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= -golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= -google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.0 h1:mjIs9gYtt56AzC4ZaffQuh88TZurBGhIJMBZGSxNerQ= +google.golang.org/protobuf v1.36.0/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 8bc29ad92deef5207bf3c78e7c5d2455f21e58fe Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Sat, 21 Dec 2024 12:14:23 +0100 Subject: [PATCH 22/25] feat(split): add splitUnify form field --- pkg/gotenberg/pdfengine.go | 4 ++ pkg/modules/pdfcpu/pdfcpu.go | 8 +++- pkg/modules/pdfcpu/pdfcpu_test.go | 8 ++++ pkg/modules/pdfengines/routes.go | 43 ++++++++++++----- pkg/modules/pdfengines/routes_test.go | 69 +++++++++++---------------- pkg/modules/pdftk/pdftk.go | 3 ++ pkg/modules/pdftk/pdftk_test.go | 16 +++++-- pkg/modules/qpdf/qpdf.go | 3 ++ pkg/modules/qpdf/qpdf_test.go | 16 +++++-- 9 files changed, 106 insertions(+), 64 deletions(-) diff --git a/pkg/gotenberg/pdfengine.go b/pkg/gotenberg/pdfengine.go index bc74f09f2..788c07c7a 100644 --- a/pkg/gotenberg/pdfengine.go +++ b/pkg/gotenberg/pdfengine.go @@ -43,6 +43,10 @@ type SplitMode struct { // Span is either the intervals or the page ranges to extract, depending on // the selected mode. Span string + + // Unify specifies whether to put extracted pages into a single file or as + // many files as there are page ranges. Only works with "pages" mode. + Unify bool } const ( diff --git a/pkg/modules/pdfcpu/pdfcpu.go b/pkg/modules/pdfcpu/pdfcpu.go index b59573c1e..29803cb38 100644 --- a/pkg/modules/pdfcpu/pdfcpu.go +++ b/pkg/modules/pdfcpu/pdfcpu.go @@ -79,8 +79,12 @@ func (engine *PdfCpu) Split(ctx context.Context, logger *zap.Logger, mode gotenb case gotenberg.SplitModeIntervals: args = append(args, "split", "-mode", "span", inputPath, outputDirPath, mode.Span) case gotenberg.SplitModePages: - outputPath := fmt.Sprintf("%s/%s", outputDirPath, filepath.Base(inputPath)) - args = append(args, "trim", "-pages", mode.Span, inputPath, outputPath) + if mode.Unify { + outputPath := fmt.Sprintf("%s/%s", outputDirPath, filepath.Base(inputPath)) + args = append(args, "trim", "-pages", mode.Span, inputPath, outputPath) + break + } + args = append(args, "extract", "-mode", "page", "-pages", mode.Span, inputPath, outputDirPath) default: return nil, fmt.Errorf("split PDFs using mode '%s' with pdfcpu: %w", mode.Mode, gotenberg.ErrPdfSplitModeNotSupported) } diff --git a/pkg/modules/pdfcpu/pdfcpu_test.go b/pkg/modules/pdfcpu/pdfcpu_test.go index e962fc698..e996aed8b 100644 --- a/pkg/modules/pdfcpu/pdfcpu_test.go +++ b/pkg/modules/pdfcpu/pdfcpu_test.go @@ -189,6 +189,14 @@ func TestPdfCpu_Split(t *testing.T) { expectError: false, expectOutputPathsCount: 1, }, + { + scenario: "success (pages & unify)", + ctx: context.TODO(), + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2", Unify: true}, + inputPath: "/tests/test/testdata/pdfengines/sample1.pdf", + expectError: false, + expectOutputPathsCount: 1, + }, } { t.Run(tc.scenario, func(t *testing.T) { engine := new(PdfCpu) diff --git a/pkg/modules/pdfengines/routes.go b/pkg/modules/pdfengines/routes.go index 2a76274b7..d164bc77f 100644 --- a/pkg/modules/pdfengines/routes.go +++ b/pkg/modules/pdfengines/routes.go @@ -18,8 +18,9 @@ import ( // FormDataPdfSplitMode creates a [gotenberg.SplitMode] from the form data. func FormDataPdfSplitMode(form *api.FormData, mandatory bool) gotenberg.SplitMode { var ( - mode string - span string + mode string + span string + unify bool ) splitModeFunc := func(value string) error { @@ -66,9 +67,19 @@ func FormDataPdfSplitMode(form *api.FormData, mandatory bool) gotenberg.SplitMod }) } + form. + Bool("splitUnify", &unify, false). + Custom("splitUnify", func(value string) error { + if value != "" && unify && mode != gotenberg.SplitModePages { + return fmt.Errorf("unify is not available for split mode '%s'", mode) + } + return nil + }) + return gotenberg.SplitMode{ - Mode: mode, - Span: span, + Mode: mode, + Span: span, + Unify: unify, } } @@ -160,16 +171,20 @@ func SplitPdfStub(ctx *api.Context, engine gotenberg.PdfEngine, mode gotenberg.S return nil, fmt.Errorf("split PDF '%s': %w", inputPath, err) } - if mode.Mode == gotenberg.SplitModePages { - return paths, nil - } - // Keep the original filename. for i, path := range paths { - newPath := fmt.Sprintf( - "%s/%s_%d.pdf", - outputDirPath, filenameNoExt, i, - ) + var newPath string + if mode.Unify && mode.Mode == gotenberg.SplitModePages { + newPath = fmt.Sprintf( + "%s/%s.pdf", + outputDirPath, filenameNoExt, + ) + } else { + newPath = fmt.Sprintf( + "%s/%s_%d.pdf", + outputDirPath, filenameNoExt, i, + ) + } err = ctx.Rename(path, newPath) if err != nil { @@ -177,6 +192,10 @@ func SplitPdfStub(ctx *api.Context, engine gotenberg.PdfEngine, mode gotenberg.S } outputPaths = append(outputPaths, newPath) + + if mode.Unify && mode.Mode == gotenberg.SplitModePages { + break + } } } diff --git a/pkg/modules/pdfengines/routes_test.go b/pkg/modules/pdfengines/routes_test.go index 6e0e5e3e6..a0b004fb4 100644 --- a/pkg/modules/pdfengines/routes_test.go +++ b/pkg/modules/pdfengines/routes_test.go @@ -93,6 +93,27 @@ func TestFormDataPdfSplitMode(t *testing.T) { expectedSplitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals}, expectValidationError: true, }, + { + scenario: "invalid splitUnify (intervals)", + ctx: func() *api.ContextMock { + ctx := &api.ContextMock{Context: new(api.Context)} + ctx.SetValues(map[string][]string{ + "splitMode": { + "intervals", + }, + "splitSpan": { + "1", + }, + "splitUnify": { + "true", + }, + }) + return ctx + }(), + mandatory: false, + expectedSplitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModeIntervals, Span: "1", Unify: true}, + expectValidationError: true, + }, { scenario: "valid form fields (intervals)", ctx: func() *api.ContextMock { @@ -122,11 +143,14 @@ func TestFormDataPdfSplitMode(t *testing.T) { "splitSpan": { "1-2", }, + "splitUnify": { + "true", + }, }) return ctx }(), mandatory: false, - expectedSplitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + expectedSplitMode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2", Unify: true}, expectValidationError: false, }, { @@ -442,7 +466,7 @@ func TestSplitPdfStub(t *testing.T) { }, { scenario: "success (pages)", - mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2", Unify: true}, ctx: func() *api.ContextMock { ctx := &api.ContextMock{Context: new(api.Context)} ctx.SetMkdirAll(&gotenberg.MkdirAllMock{MkdirAllMock: func(path string, perm os.FileMode) error { @@ -940,7 +964,7 @@ func TestSplitHandler(t *testing.T) { expectOutputPathsCount: 0, }, { - scenario: "success (intervals)", + scenario: "success", ctx: func() *api.ContextMock { ctx := &api.ContextMock{Context: new(api.Context)} ctx.SetFiles(map[string]string{ @@ -978,45 +1002,6 @@ func TestSplitHandler(t *testing.T) { expectOutputPathsCount: 2, expectOutputPaths: []string{"/file/file_0.pdf", "/file/file_1.pdf"}, }, - { - scenario: "success (pages)", - ctx: func() *api.ContextMock { - ctx := &api.ContextMock{Context: new(api.Context)} - ctx.SetFiles(map[string]string{ - "file.pdf": "/file.pdf", - }) - ctx.SetValues(map[string][]string{ - "splitMode": { - gotenberg.SplitModePages, - }, - "splitSpan": { - "1-2", - }, - "pdfua": { - "true", - }, - "metadata": { - "{\"Creator\": \"foo\", \"Producer\": \"bar\" }", - }, - }) - return ctx - }(), - engine: &gotenberg.PdfEngineMock{ - SplitMock: func(ctx context.Context, logger *zap.Logger, mode gotenberg.SplitMode, inputPath, outputDirPath string) ([]string, error) { - return []string{"/file/file.pdf"}, nil - }, - ConvertMock: func(ctx context.Context, logger *zap.Logger, formats gotenberg.PdfFormats, inputPath, outputPath string) error { - return nil - }, - WriteMetadataMock: func(ctx context.Context, logger *zap.Logger, metadata map[string]interface{}, inputPath string) error { - return nil - }, - }, - expectError: false, - expectHttpError: false, - expectOutputPathsCount: 1, - expectOutputPaths: []string{"/file/file.pdf"}, - }, } { t.Run(tc.scenario, func(t *testing.T) { tc.ctx.SetLogger(zap.NewNop()) diff --git a/pkg/modules/pdftk/pdftk.go b/pkg/modules/pdftk/pdftk.go index d8870a26d..c3f63d173 100644 --- a/pkg/modules/pdftk/pdftk.go +++ b/pkg/modules/pdftk/pdftk.go @@ -59,6 +59,9 @@ func (engine *PdfTk) Split(ctx context.Context, logger *zap.Logger, mode gotenbe switch mode.Mode { case gotenberg.SplitModePages: + if !mode.Unify { + return nil, fmt.Errorf("split PDFs using mode '%s' without unify with PDFtk: %w", mode.Mode, gotenberg.ErrPdfSplitModeNotSupported) + } args = append(args, inputPath, "cat", mode.Span, "output", outputPath) default: return nil, fmt.Errorf("split PDFs using mode '%s' with PDFtk: %w", mode.Mode, gotenberg.ErrPdfSplitModeNotSupported) diff --git a/pkg/modules/pdftk/pdftk_test.go b/pkg/modules/pdftk/pdftk_test.go index d5dcd573e..73311f725 100644 --- a/pkg/modules/pdftk/pdftk_test.go +++ b/pkg/modules/pdftk/pdftk_test.go @@ -159,25 +159,33 @@ func TestPdfCpu_Split(t *testing.T) { expectedError: gotenberg.ErrPdfSplitModeNotSupported, expectOutputPathsCount: 0, }, + { + scenario: "ErrPdfSplitModeNotSupported (no unify with pages)", + ctx: context.TODO(), + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1", Unify: false}, + expectError: true, + expectedError: gotenberg.ErrPdfSplitModeNotSupported, + expectOutputPathsCount: 0, + }, { scenario: "invalid context", ctx: nil, - mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2", Unify: true}, expectError: true, expectOutputPathsCount: 0, }, { scenario: "invalid input path", ctx: context.TODO(), - mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2", Unify: true}, inputPath: "", expectError: true, expectOutputPathsCount: 0, }, { - scenario: "success (pages)", + scenario: "success (pages & unify)", ctx: context.TODO(), - mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2", Unify: true}, inputPath: "/tests/test/testdata/pdfengines/sample1.pdf", expectError: false, expectOutputPathsCount: 1, diff --git a/pkg/modules/qpdf/qpdf.go b/pkg/modules/qpdf/qpdf.go index 2c010d2ec..34785adef 100644 --- a/pkg/modules/qpdf/qpdf.go +++ b/pkg/modules/qpdf/qpdf.go @@ -59,6 +59,9 @@ func (engine *QPdf) Split(ctx context.Context, logger *zap.Logger, mode gotenber switch mode.Mode { case gotenberg.SplitModePages: + if !mode.Unify { + return nil, fmt.Errorf("split PDFs using mode '%s' without unify with QPDF: %w", mode.Mode, gotenberg.ErrPdfSplitModeNotSupported) + } args = append(args, inputPath, "--pages", ".", mode.Span, "--", outputPath) default: return nil, fmt.Errorf("split PDFs using mode '%s' with QPDF: %w", mode.Mode, gotenberg.ErrPdfSplitModeNotSupported) diff --git a/pkg/modules/qpdf/qpdf_test.go b/pkg/modules/qpdf/qpdf_test.go index b32976cbf..9c79721b1 100644 --- a/pkg/modules/qpdf/qpdf_test.go +++ b/pkg/modules/qpdf/qpdf_test.go @@ -159,25 +159,33 @@ func TestQPdf_Split(t *testing.T) { expectedError: gotenberg.ErrPdfSplitModeNotSupported, expectOutputPathsCount: 0, }, + { + scenario: "ErrPdfSplitModeNotSupported (no unify with pages)", + ctx: context.TODO(), + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1", Unify: false}, + expectError: true, + expectedError: gotenberg.ErrPdfSplitModeNotSupported, + expectOutputPathsCount: 0, + }, { scenario: "invalid context", ctx: nil, - mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2", Unify: true}, expectError: true, expectOutputPathsCount: 0, }, { scenario: "invalid input path", ctx: context.TODO(), - mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2", Unify: true}, inputPath: "", expectError: true, expectOutputPathsCount: 0, }, { - scenario: "success (pages)", + scenario: "success (pages & unify)", ctx: context.TODO(), - mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2"}, + mode: gotenberg.SplitMode{Mode: gotenberg.SplitModePages, Span: "1-2", Unify: true}, inputPath: "/tests/test/testdata/pdfengines/sample1.pdf", expectError: false, expectOutputPathsCount: 1, From 16807bd57fcc7235c0b7e65896de17a2b1d90df3 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Mon, 23 Dec 2024 10:04:24 +0100 Subject: [PATCH 23/25] fix(split): wrong output paths when converting to PDF/A & PDF/UA --- pkg/modules/chromium/routes.go | 2 ++ pkg/modules/libreoffice/routes.go | 2 ++ 2 files changed, 4 insertions(+) diff --git a/pkg/modules/chromium/routes.go b/pkg/modules/chromium/routes.go index ee330a26a..2c5791719 100644 --- a/pkg/modules/chromium/routes.go +++ b/pkg/modules/chromium/routes.go @@ -661,6 +661,8 @@ func convertUrl(ctx *api.Context, chromium Api, engine gotenberg.PdfEngine, url return fmt.Errorf("rename output path: %w", err) } } + } else { + outputPaths = convertOutputPaths } err = ctx.AddOutputPaths(outputPaths...) diff --git a/pkg/modules/libreoffice/routes.go b/pkg/modules/libreoffice/routes.go index 86165833b..b6786189b 100644 --- a/pkg/modules/libreoffice/routes.go +++ b/pkg/modules/libreoffice/routes.go @@ -251,6 +251,8 @@ func convertRoute(libreOffice libreofficeapi.Uno, engine gotenberg.PdfEngine) ap return fmt.Errorf("rename output path: %w", err) } } + } else { + outputPaths = convertOutputPaths } } From 910eb9b770fad7a7aac74add8b2573b880a10961 Mon Sep 17 00:00:00 2001 From: Julien Neuhart Date: Wed, 25 Dec 2024 17:04:25 +0100 Subject: [PATCH 24/25] feat(state): improve clean up when LibreOffice and Chromium are restarted --- go.mod | 16 ++++++-- go.sum | 28 +++++++++++-- pkg/gotenberg/gc.go | 7 ++-- pkg/gotenberg/gc_test.go | 3 +- pkg/modules/chromium/browser.go | 48 ++++++++++++++++++++-- pkg/modules/libreoffice/api/libreoffice.go | 11 ++--- 6 files changed, 93 insertions(+), 20 deletions(-) diff --git a/go.mod b/go.mod index 4edbc38b4..582f6d7fb 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/alexliesenfeld/health v0.8.0 github.com/andybalholm/brotli v1.1.1 // indirect github.com/barasher/go-exiftool v1.10.0 - github.com/chromedp/cdproto v0.0.0-20241208230723-d1c7de7e5dd2 + github.com/chromedp/cdproto v0.0.0-20241222144035-c16d098c0fb6 github.com/chromedp/chromedp v0.11.2 github.com/golang/snappy v0.0.4 // indirect github.com/google/uuid v1.6.0 @@ -35,7 +35,10 @@ require ( golang.org/x/text v0.21.0 ) -require github.com/dlclark/regexp2 v1.11.4 +require ( + github.com/dlclark/regexp2 v1.11.4 + github.com/shirou/gopsutil/v4 v4.24.11 +) require ( github.com/aymerick/douceur v0.2.0 // indirect @@ -43,20 +46,27 @@ require ( github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chromedp/sysutil v1.1.0 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect + github.com/ebitengine/purego v0.8.1 // indirect + github.com/go-ole/go-ole v1.3.0 // indirect github.com/gobwas/httphead v0.1.0 // indirect github.com/gobwas/pool v0.2.1 // indirect github.com/gobwas/ws v1.4.0 // indirect github.com/gorilla/css v1.0.1 // indirect github.com/josharian/intern v1.0.0 // indirect + github.com/lufia/plan9stats v0.0.0-20240909124753-873cd0166683 // indirect github.com/mailru/easyjson v0.9.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.61.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect + github.com/tklauser/go-sysconf v0.3.14 // indirect + github.com/tklauser/numcpus v0.9.0 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect + github.com/yusufpapurcu/wmi v1.2.4 // indirect golang.org/x/time v0.8.0 // indirect - google.golang.org/protobuf v1.36.0 // indirect + google.golang.org/protobuf v1.36.1 // indirect ) diff --git a/go.sum b/go.sum index cc6a4a12e..209cf1a7a 100644 --- a/go.sum +++ b/go.sum @@ -11,8 +11,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chromedp/cdproto v0.0.0-20241208230723-d1c7de7e5dd2 h1:fJob5N/Eprtd427U84kFpQhAHIEqJYuDzveaL6T4Xsk= -github.com/chromedp/cdproto v0.0.0-20241208230723-d1c7de7e5dd2/go.mod h1:4XqMl3iIW08jtieURWL6Tt5924w21pxirC6th662XUM= +github.com/chromedp/cdproto v0.0.0-20241222144035-c16d098c0fb6 h1:dAUcp/W5RpJSZW/HksEHfAAoMBIvSFFIwslAFEte+6g= +github.com/chromedp/cdproto v0.0.0-20241222144035-c16d098c0fb6/go.mod h1:4XqMl3iIW08jtieURWL6Tt5924w21pxirC6th662XUM= github.com/chromedp/chromedp v0.11.2 h1:ZRHTh7DjbNTlfIv3NFTbB7eVeu5XCNkgrpcGSpn2oX0= github.com/chromedp/chromedp v0.11.2/go.mod h1:lr8dFRLKsdTTWb75C/Ttol2vnBKOSnt0BW8R9Xaupi8= github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM= @@ -25,8 +25,13 @@ github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cn github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY= github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= +github.com/ebitengine/purego v0.8.1 h1:sdRKd6plj7KYW33EH5As6YKfe8m9zbN9JMrOjNVF/BE= +github.com/ebitengine/purego v0.8.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= +github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= @@ -67,6 +72,8 @@ github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0 github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU= github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= +github.com/lufia/plan9stats v0.0.0-20240909124753-873cd0166683 h1:7UMa6KCCMjZEMDtTVdcGu0B1GmmC7QJKiCCjyTAWQy0= +github.com/lufia/plan9stats v0.0.0-20240909124753-873cd0166683/go.mod h1:ilwx/Dta8jXAgpFYFvSWEMwxmbWXyiUHkd5FwyKhb5k= github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= @@ -90,6 +97,8 @@ github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU= +github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= @@ -100,6 +109,8 @@ github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0leargg github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/shirou/gopsutil/v4 v4.24.11 h1:WaU9xqGFKvFfsUv94SXcUPD7rCkU0vr/asVdQOBZNj8= +github.com/shirou/gopsutil/v4 v4.24.11/go.mod h1:s4D/wg+ag4rG0WO7AiTj2BeYCRhym0vM7DHbZRxnIT8= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -108,6 +119,10 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU= +github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY= +github.com/tklauser/numcpus v0.9.0 h1:lmyCHtANi8aRUgkckBgoDk1nHCux3n2cgkJLXdQGPDo= +github.com/tklauser/numcpus v0.9.0/go.mod h1:SN6Nq1O3VychhC1npsWostA+oW+VOQTxZrS604NSRyI= github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= @@ -120,6 +135,8 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofm github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= +github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= +github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= @@ -132,7 +149,10 @@ golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= @@ -143,7 +163,7 @@ golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.36.0 h1:mjIs9gYtt56AzC4ZaffQuh88TZurBGhIJMBZGSxNerQ= -google.golang.org/protobuf v1.36.0/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk= +google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/gotenberg/gc.go b/pkg/gotenberg/gc.go index 3de80a4cc..2f53cc385 100644 --- a/pkg/gotenberg/gc.go +++ b/pkg/gotenberg/gc.go @@ -5,13 +5,14 @@ import ( "os" "path/filepath" "strings" + "time" "go.uber.org/zap" ) // GarbageCollect scans the root path and deletes files or directories with -// names containing specific substrings. -func GarbageCollect(logger *zap.Logger, rootPath string, includeSubstr []string) error { +// names containing specific substrings and before a given experiation time. +func GarbageCollect(logger *zap.Logger, rootPath string, includeSubstr []string, expirationTime time.Time) error { logger = logger.Named("gc") // To make sure that the next Walk method stays on @@ -36,7 +37,7 @@ func GarbageCollect(logger *zap.Logger, rootPath string, includeSubstr []string) } for _, substr := range includeSubstr { - if strings.Contains(info.Name(), substr) || path == substr { + if (strings.Contains(info.Name(), substr) || path == substr) && info.ModTime().Before(expirationTime) { err := os.RemoveAll(path) if err != nil { return fmt.Errorf("garbage collect '%s': %w", path, err) diff --git a/pkg/gotenberg/gc_test.go b/pkg/gotenberg/gc_test.go index 4dd58e4fb..0ed089a55 100644 --- a/pkg/gotenberg/gc_test.go +++ b/pkg/gotenberg/gc_test.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "testing" + "time" "github.com/google/uuid" "go.uber.org/zap" @@ -64,7 +65,7 @@ func TestGarbageCollect(t *testing.T) { } }() - err := GarbageCollect(zap.NewNop(), tc.rootPath, tc.includeSubstr) + err := GarbageCollect(zap.NewNop(), tc.rootPath, tc.includeSubstr, time.Now()) if !tc.expectError && err != nil { t.Fatalf("expected no error but got: %v", err) diff --git a/pkg/modules/chromium/browser.go b/pkg/modules/chromium/browser.go index 1da24d877..b1fd38c75 100644 --- a/pkg/modules/chromium/browser.go +++ b/pkg/modules/chromium/browser.go @@ -15,6 +15,7 @@ import ( "github.com/chromedp/cdproto/runtime" "github.com/chromedp/chromedp" "github.com/dlclark/regexp2" + "github.com/shirou/gopsutil/v4/process" "go.uber.org/zap" "github.com/gotenberg/gotenberg/v8/pkg/gotenberg" @@ -162,21 +163,60 @@ func (b *chromiumBrowser) Stop(logger *zap.Logger) error { // Always remove the user profile directory created by Chromium. copyUserProfileDirPath := b.userProfileDirPath - defer func(userProfileDirPath string) { + expirationTime := time.Now() + defer func(userProfileDirPath string, expirationTime time.Time) { + // See: + // https://github.com/SeleniumHQ/docker-selenium/blob/7216d060d86872afe853ccda62db0dfab5118dc7/NodeChrome/chrome-cleanup.sh + // https://github.com/SeleniumHQ/docker-selenium/blob/7216d060d86872afe853ccda62db0dfab5118dc7/NodeChromium/chrome-cleanup.sh go func() { + // Clean up stuck processes. + ps, err := process.Processes() + if err != nil { + logger.Error(fmt.Sprintf("list processes: %v", err)) + } else { + for _, p := range ps { + func() { + cmdline, err := p.Cmdline() + if err != nil { + return + } + + if !strings.Contains(cmdline, "chromium/chromium") && !strings.Contains(cmdline, "chrome/chrome") { + return + } + + killCtx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + + err = p.KillWithContext(killCtx) + if err != nil { + logger.Error(fmt.Sprintf("kill process: %v", err)) + } else { + logger.Info(fmt.Sprintf("Chromium process %d killed", p.Pid)) + } + }() + } + } + // FIXME: Chromium seems to recreate the user profile directory // right after its deletion if we do not wait a certain amount // of time before deleting it. <-time.After(10 * time.Second) - err := os.RemoveAll(userProfileDirPath) + err = os.RemoveAll(userProfileDirPath) if err != nil { logger.Error(fmt.Sprintf("remove Chromium's user profile directory: %s", err)) + } else { + logger.Debug(fmt.Sprintf("'%s' Chromium's user profile directory removed", userProfileDirPath)) } - logger.Debug(fmt.Sprintf("'%s' Chromium's user profile directory removed", userProfileDirPath)) + // Also remove Chromium specific files in the temporary directory. + err = gotenberg.GarbageCollect(logger, os.TempDir(), []string{".org.chromium.Chromium", ".com.google.Chrome"}, expirationTime) + if err != nil { + logger.Error(err.Error()) + } }() - }(copyUserProfileDirPath) + }(copyUserProfileDirPath, expirationTime) b.ctxMu.Lock() defer b.ctxMu.Unlock() diff --git a/pkg/modules/libreoffice/api/libreoffice.go b/pkg/modules/libreoffice/api/libreoffice.go index ee332ad48..652d0e05b 100644 --- a/pkg/modules/libreoffice/api/libreoffice.go +++ b/pkg/modules/libreoffice/api/libreoffice.go @@ -190,22 +190,23 @@ func (p *libreOfficeProcess) Stop(logger *zap.Logger) error { // Always remove the user profile directory created by LibreOffice. copyUserProfileDirPath := p.userProfileDirPath - defer func(userProfileDirPath string) { + expirationTime := time.Now() + defer func(userProfileDirPath string, expirationTime time.Time) { go func() { err := os.RemoveAll(userProfileDirPath) if err != nil { logger.Error(fmt.Sprintf("remove LibreOffice's user profile directory: %v", err)) + } else { + logger.Debug(fmt.Sprintf("'%s' LibreOffice's user profile directory removed", userProfileDirPath)) } - logger.Debug(fmt.Sprintf("'%s' LibreOffice's user profile directory removed", userProfileDirPath)) - // Also remove LibreOffice specific files in the temporary directory. - err = gotenberg.GarbageCollect(logger, os.TempDir(), []string{"OSL_PIPE", ".tmp"}) + err = gotenberg.GarbageCollect(logger, os.TempDir(), []string{"OSL_PIPE", ".tmp"}, expirationTime) if err != nil { logger.Error(err.Error()) } }() - }(copyUserProfileDirPath) + }(copyUserProfileDirPath, expirationTime) p.cfgMu.Lock() defer p.cfgMu.Unlock() From 1f99a01f356342a17f863e8aaae820bcc2b0b1c7 Mon Sep 17 00:00:00 2001 From: Ben Fichter Date: Sun, 29 Dec 2024 11:44:10 -0500 Subject: [PATCH 25/25] Update to support new pdfcpu stuff from upstream --- build/Dockerfile.distroless | 65 +++++++++++++++++++++++++++---------- scripts/release.sh | 2 ++ 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/build/Dockerfile.distroless b/build/Dockerfile.distroless index 6591e7a47..ea6ea7049 100644 --- a/build/Dockerfile.distroless +++ b/build/Dockerfile.distroless @@ -11,10 +11,35 @@ ARG GOOGLE_DISTROLESS_BASE_IMAGE=gcr.io/distroless/cc-debian12:latest # stage that uses them. ARG GOLANG_VERSION +# ---------------------------------------------- +# pdfcpu binary build stage +# ---------------------------------------------- +# Note: this stage is required as pdfcpu does not release an armhf variant by +# default. + +FROM golang:$GOLANG_VERSION AS pdfcpu-binary-stage + +ARG PDFCPU_VERSION +ENV CGO_ENABLED=0 + +# Define the working directory outside of $GOPATH (we're using go modules). +WORKDIR /home + +RUN curl -Ls "https://github.com/pdfcpu/pdfcpu/archive/refs/tags/$PDFCPU_VERSION.tar.gz" -o pdfcpu.tar.gz &&\ + tar --strip-components=1 -xvzf pdfcpu.tar.gz + +# Install module dependencies. +RUN go mod download &&\ + go mod verify + +RUN go build -o pdfcpu -ldflags "-s -w -X 'main.version=$PDFCPU_VERSION' -X 'github.com/pdfcpu/pdfcpu/pkg/pdfcpu.VersionStr=$PDFCPU_VERSION' -X main.builtBy=gotenberg" ./cmd/pdfcpu &&\ + # Verify installation. + ./pdfcpu version + # ---------------------------------------------- # Gotenberg binary build stage # ---------------------------------------------- -FROM golang:$GOLANG_VERSION AS binary-stage +FROM golang:$GOLANG_VERSION AS gotenberg-binary-stage ARG GOTENBERG_VERSION ENV CGO_ENABLED=0 @@ -174,38 +199,41 @@ ARG TARGETARCH FROM base-${TARGETARCH} # Required for unoconverter -COPY --from=binary-stage /usr/bin/python /usr/bin/python -COPY --from=binary-stage /usr/lib/python3 /usr/lib/python3 -COPY --from=binary-stage /usr/lib/python3.11 /usr/lib/python3.11 +COPY --from=gotenberg-binary-stage /usr/bin/python /usr/bin/python +COPY --from=gotenberg-binary-stage /usr/lib/python3 /usr/lib/python3 +COPY --from=gotenberg-binary-stage /usr/lib/python3.11 /usr/lib/python3.11 ENV PYTHONPATH="/usr/local/lib/python3.11/dist-packages:" # LibreOffice dependencies # Just copied the entire directory as there were too many dependencies to justify finding/listing them individually -COPY --from=binary-stage /lib/${CHIPSET_ARCH} /lib/${CHIPSET_ARCH} +COPY --from=gotenberg-binary-stage /lib/${CHIPSET_ARCH} /lib/${CHIPSET_ARCH} + +# Copy the pdfcpu binary from the pdfcpu-binary-stage. +COPY --from=pdfcpu-binary-stage /home/pdfcpu /usr/bin/ # Copy the Gotenberg binary from the binary stage -COPY --from=binary-stage /home/gotenberg /usr/bin/gotenberg +COPY --from=gotenberg-binary-stage /home/gotenberg /usr/bin/gotenberg # Copy other neccessary binaries/libraries -COPY --from=binary-stage /usr/lib/libreoffice /usr/lib/libreoffice -COPY --from=binary-stage /etc/libreoffice /etc/libreoffice -COPY --from=binary-stage /etc/apparmor.d /etc/apparmor.d -COPY --from=binary-stage /usr/bin/libreoffice /usr/bin/unoconverter /usr/bin/pdftk /usr/bin/qpdf /usr/bin/exiftool /usr/bin/tini /usr/bin/ +COPY --from=gotenberg-binary-stage /usr/lib/libreoffice /usr/lib/libreoffice +COPY --from=gotenberg-binary-stage /etc/libreoffice /etc/libreoffice +COPY --from=gotenberg-binary-stage /etc/apparmor.d /etc/apparmor.d +COPY --from=gotenberg-binary-stage /usr/bin/libreoffice /usr/bin/unoconverter /usr/bin/pdftk /usr/bin/qpdf /usr/bin/exiftool /usr/bin/tini /usr/bin/ # Fonts -COPY --from=binary-stage /usr/share/doc /usr/share/doc -COPY --from=binary-stage /usr/share/bug /usr/share/bug -COPY --from=binary-stage /usr/share/fonts /usr/share/fonts -COPY --from=binary-stage /usr/share/fontconfig /usr/share/fontconfig -COPY --from=binary-stage /usr/local/share/fonts /usr/local/share/fonts -COPY --from=binary-stage /etc/fonts /etc/fonts +COPY --from=gotenberg-binary-stage /usr/share/doc /usr/share/doc +COPY --from=gotenberg-binary-stage /usr/share/bug /usr/share/bug +COPY --from=gotenberg-binary-stage /usr/share/fonts /usr/share/fonts +COPY --from=gotenberg-binary-stage /usr/share/fontconfig /usr/share/fontconfig +COPY --from=gotenberg-binary-stage /usr/local/share/fonts /usr/local/share/fonts +COPY --from=gotenberg-binary-stage /etc/fonts /etc/fonts # Other binaries (used in /usr/bin/libreoffice script) -COPY --from=binary-stage /usr/bin/dirname /usr/bin/uname /usr/bin/basename /usr/bin/grep /usr/bin/sed /usr/bin/ +COPY --from=gotenberg-binary-stage /usr/bin/dirname /usr/bin/uname /usr/bin/basename /usr/bin/grep /usr/bin/sed /usr/bin/ # Required to allow unoconverter to be called without 'python' prefix -COPY --from=binary-stage /usr/bin/env /usr/bin/ +COPY --from=gotenberg-binary-stage /usr/bin/env /usr/bin/ # Environment variables required by modules or else. ENV LIBREOFFICE_BIN_PATH=/usr/lib/libreoffice/program/soffice.bin @@ -213,6 +241,7 @@ ENV UNOCONVERTER_BIN_PATH=/usr/bin/unoconverter ENV PDFTK_BIN_PATH=/usr/bin/pdftk ENV QPDF_BIN_PATH=/usr/bin/qpdf ENV EXIFTOOL_BIN_PATH=/usr/bin/exiftool +ENV PDFCPU_BIN_PATH=/usr/bin/pdfcpu ARG GOTENBERG_USER_UID diff --git a/scripts/release.sh b/scripts/release.sh index be18cc2b4..b49df9b58 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -8,6 +8,7 @@ GOTENBERG_USER_GID="1001" GOTENBERG_USER_UID="1001" NOTO_COLOR_EMOJI_VERSION="15.1" PDFTK_VERSION="v3.3.3" +PDFCPU_VERSION=v0.8.1 DOCKER_REPOSITORY="$1" TAG="$2" @@ -18,6 +19,7 @@ docker buildx build \ --build-arg GOTENBERG_USER_UID="$GOTENBERG_USER_UID" \ --build-arg NOTO_COLOR_EMOJI_VERSION="$NOTO_COLOR_EMOJI_VERSION" \ --build-arg PDFTK_VERSION="$PDFTK_VERSION" \ + --build-arg PDFCPU_VERSION="$PDFCPU_VERSION" \ --platform linux/arm64 \ --platform linux/amd64 \ -t "$DOCKER_REPOSITORY:$TAG" \