From 9e667bd47666e9dc012186019d245bf340d4888c Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Tue, 2 Dec 2025 03:06:44 +0100 Subject: [PATCH] serve rustdoc-archive-download ourselves, remove public/private feature from storage --- ...20251202020754_remove-file-public.down.sql | 1 + .../20251202020754_remove-file-public.up.sql | 1 + src/config.rs | 9 - src/db/file.rs | 4 - src/docbuilder/rustwide_builder.rs | 4 - src/storage/database.rs | 34 ---- src/storage/mod.rs | 66 ------- src/storage/s3.rs | 48 +---- src/test/fakes.rs | 20 +-- src/web/extractors/rustdoc.rs | 16 ++ src/web/rustdoc.rs | 165 +++++++++--------- 11 files changed, 107 insertions(+), 261 deletions(-) create mode 100644 migrations/20251202020754_remove-file-public.down.sql create mode 100644 migrations/20251202020754_remove-file-public.up.sql diff --git a/migrations/20251202020754_remove-file-public.down.sql b/migrations/20251202020754_remove-file-public.down.sql new file mode 100644 index 000000000..7b6bfb049 --- /dev/null +++ b/migrations/20251202020754_remove-file-public.down.sql @@ -0,0 +1 @@ +ALTER TABLE files ADD COLUMN public BOOL NOT NULL DEFAULT FALSE; diff --git a/migrations/20251202020754_remove-file-public.up.sql b/migrations/20251202020754_remove-file-public.up.sql new file mode 100644 index 000000000..fbc221caa --- /dev/null +++ b/migrations/20251202020754_remove-file-public.up.sql @@ -0,0 +1 @@ +ALTER TABLE files DROP COLUMN public; diff --git a/src/config.rs b/src/config.rs index 160534a52..7781305e9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -45,11 +45,6 @@ pub struct Config { #[builder(default)] pub(crate) s3_bucket_is_temporary: bool, - // CloudFront domain which we can access - // public S3 files through - #[cfg_attr(test, builder(setter(into)))] - pub(crate) s3_static_root_path: String, - // Github authentication pub(crate) github_accesstoken: Option, pub(crate) github_updater_min_rate_limit: u32, @@ -209,10 +204,6 @@ impl Config { .s3_bucket(env("DOCSRS_S3_BUCKET", "rust-docs-rs".to_string())?) .s3_region(env("S3_REGION", "us-west-1".to_string())?) .s3_endpoint(maybe_env("S3_ENDPOINT")?) - .s3_static_root_path(env( - "DOCSRS_S3_STATIC_ROOT_PATH", - "https://static.docs.rs".to_string(), - )?) .github_accesstoken(maybe_env("DOCSRS_GITHUB_ACCESSTOKEN")?) .github_updater_min_rate_limit(env("DOCSRS_GITHUB_UPDATER_MIN_RATE_LIMIT", 2500u32)?) .gitlab_accesstoken(maybe_env("DOCSRS_GITLAB_ACCESSTOKEN")?) diff --git a/src/db/file.rs b/src/db/file.rs index 7ad101cf3..1648c01a9 100644 --- a/src/db/file.rs +++ b/src/db/file.rs @@ -80,14 +80,10 @@ pub async fn add_path_into_remote_archive + std::fmt::Debug>( storage: &AsyncStorage, archive_path: &str, path: P, - public_access: bool, ) -> Result<(Vec, CompressionAlgorithm)> { let (file_list, algorithm) = storage .store_all_in_archive(archive_path, path.as_ref()) .await?; - if public_access { - storage.set_public_access(archive_path, true).await?; - } Ok((file_list, algorithm)) } diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 2980638e5..1a4b65097 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -680,7 +680,6 @@ impl RustwideBuilder { &self.async_storage, &source_archive_path(name, version), build.host_source_dir(), - false, ))?; algs.insert(new_alg); files_list @@ -776,7 +775,6 @@ impl RustwideBuilder { &self.async_storage, &rustdoc_archive_path(name, version), local_storage.path(), - true, ))?; let documentation_size = file_list.iter().map(|info| info.size).sum::(); self.metrics @@ -1077,7 +1075,6 @@ impl RustwideBuilder { self.storage .store_one_uncompressed(&path, compressed_json.clone())?; - self.storage.set_public_access(&path, true)?; } } @@ -1641,7 +1638,6 @@ mod tests { Some(*alg), ); assert!(storage.exists(&path)?); - assert!(storage.get_public_access(&path)?); let ext = compression::file_extension_for(*alg); diff --git a/src/storage/database.rs b/src/storage/database.rs index c7d8a7b49..8eeb9a777 100644 --- a/src/storage/database.rs +++ b/src/storage/database.rs @@ -33,40 +33,6 @@ impl DatabaseBackend { .await?) } - pub(super) async fn get_public_access(&self, path: &str) -> Result { - match sqlx::query_scalar!( - "SELECT public - FROM files - WHERE path = $1", - path - ) - .fetch_optional(&self.pool) - .await? - { - Some(public) => Ok(public), - None => Err(super::PathNotFoundError.into()), - } - } - - pub(super) async fn set_public_access(&self, path: &str, public: bool) -> Result<()> { - if sqlx::query!( - "UPDATE files - SET public = $2 - WHERE path = $1", - path, - public, - ) - .execute(&self.pool) - .await? - .rows_affected() - == 1 - { - Ok(()) - } else { - Err(super::PathNotFoundError.into()) - } - } - pub(super) async fn get_stream( &self, path: &str, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 8f5daea1c..f011d8f30 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -293,22 +293,6 @@ impl AsyncStorage { } } - #[instrument] - pub(crate) async fn get_public_access(&self, path: &str) -> Result { - match &self.backend { - StorageBackend::Database(db) => db.get_public_access(path).await, - StorageBackend::S3(s3) => s3.get_public_access(path).await, - } - } - - #[instrument] - pub(crate) async fn set_public_access(&self, path: &str, public: bool) -> Result<()> { - match &self.backend { - StorageBackend::Database(db) => db.set_public_access(path, public).await, - StorageBackend::S3(s3) => s3.set_public_access(path, public).await, - } - } - /// Fetch a rustdoc file from our blob storage. /// * `name` - the crate name /// * `version` - the crate version @@ -935,15 +919,6 @@ impl Storage { self.runtime.block_on(self.inner.exists(path)) } - pub(crate) fn get_public_access(&self, path: &str) -> Result { - self.runtime.block_on(self.inner.get_public_access(path)) - } - - pub(crate) fn set_public_access(&self, path: &str, public: bool) -> Result<()> { - self.runtime - .block_on(self.inner.set_public_access(path, public)) - } - pub(crate) fn fetch_source_file( &self, name: &str, @@ -1546,35 +1521,6 @@ mod backend_tests { Ok(()) } - fn test_set_public(storage: &Storage) -> Result<()> { - let path: &str = "foo/bar.txt"; - - storage.store_blobs(vec![BlobUpload { - path: path.into(), - mime: mime::TEXT_PLAIN, - compression: None, - content: b"test content\n".to_vec(), - }])?; - - assert!(!storage.get_public_access(path)?); - storage.set_public_access(path, true)?; - assert!(storage.get_public_access(path)?); - storage.set_public_access(path, false)?; - assert!(!storage.get_public_access(path)?); - - for path in &["bar.txt", "baz.txt", "foo/baz.txt"] { - assert!( - storage - .set_public_access(path, true) - .unwrap_err() - .downcast_ref::() - .is_some() - ); - } - - Ok(()) - } - fn test_get_object(storage: &Storage) -> Result<()> { let path: &str = "foo/bar.txt"; let blob = BlobUpload { @@ -1593,9 +1539,6 @@ mod backend_tests { // it seems like minio does it too :) assert_eq!(found.etag, Some(compute_etag(&blob.content))); - // default visibility is private - assert!(!storage.get_public_access(path)?); - for path in &["bar.txt", "baz.txt", "foo/baz.txt"] { assert!( storage @@ -1604,14 +1547,6 @@ mod backend_tests { .downcast_ref::() .is_some() ); - - assert!( - storage - .get_public_access(path) - .unwrap_err() - .downcast_ref::() - .is_some() - ); } Ok(()) @@ -2065,7 +2000,6 @@ mod backend_tests { test_delete_prefix_without_matches, test_delete_percent, test_exists_without_remote_archive, - test_set_public, } tests_with_metrics { diff --git a/src/storage/s3.rs b/src/storage/s3.rs index 3e417ba38..f6e0bd061 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -7,7 +7,7 @@ use aws_sdk_s3::{ Client, config::{Region, retry::RetryConfig}, error::{ProvideErrorMetadata, SdkError}, - types::{Delete, ObjectIdentifier, Tag, Tagging}, + types::{Delete, ObjectIdentifier}, }; use aws_smithy_types_convert::date_time::DateTimeExt; use axum_extra::headers; @@ -20,9 +20,6 @@ use futures_util::{ use std::sync::Arc; use tracing::{error, instrument, warn}; -const PUBLIC_ACCESS_TAG: &str = "static-cloudfront-access"; -const PUBLIC_ACCESS_VALUE: &str = "allow"; - // error codes to check for when trying to determine if an error is // a "NOT FOUND" error. // Definition taken from the S3 rust SDK, @@ -138,49 +135,6 @@ impl S3Backend { } } - pub(super) async fn get_public_access(&self, path: &str) -> Result { - Ok(self - .client - .get_object_tagging() - .bucket(&self.bucket) - .key(path) - .send() - .await - .convert_errors()? - .tag_set() - .iter() - .filter(|tag| tag.key() == PUBLIC_ACCESS_TAG) - .any(|tag| tag.value() == PUBLIC_ACCESS_VALUE)) - } - - pub(super) async fn set_public_access(&self, path: &str, public: bool) -> Result<(), Error> { - self.client - .put_object_tagging() - .bucket(&self.bucket) - .key(path) - .tagging(if public { - Tagging::builder() - .tag_set( - Tag::builder() - .key(PUBLIC_ACCESS_TAG) - .value(PUBLIC_ACCESS_VALUE) - .build() - .context("could not build tag")?, - ) - .build() - .context("could not build tags")? - } else { - Tagging::builder() - .set_tag_set(Some(vec![])) - .build() - .context("could not build tags")? - }) - .send() - .await - .convert_errors() - .map(|_| ()) - } - #[instrument(skip(self))] pub(super) async fn get_stream( &self, diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 4c4660089..12c6b2d50 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -411,22 +411,14 @@ impl<'a> FakeRelease<'a> { source_directory.display() ); if archive_storage { - let (archive, public) = match kind { - FileKind::Rustdoc => { - (rustdoc_archive_path(&package.name, &package.version), true) - } - FileKind::Sources => { - (source_archive_path(&package.name, &package.version), false) - } + let archive = match kind { + FileKind::Rustdoc => rustdoc_archive_path(&package.name, &package.version), + FileKind::Sources => source_archive_path(&package.name, &package.version), }; debug!("store in archive: {:?}", archive); - let (files_list, new_alg) = crate::db::add_path_into_remote_archive( - storage, - &archive, - source_directory, - public, - ) - .await?; + let (files_list, new_alg) = + crate::db::add_path_into_remote_archive(storage, &archive, source_directory) + .await?; Ok((files_list, new_alg)) } else { let prefix = match kind { diff --git a/src/web/extractors/rustdoc.rs b/src/web/extractors/rustdoc.rs index 4d7d29b99..4d8395ad0 100644 --- a/src/web/extractors/rustdoc.rs +++ b/src/web/extractors/rustdoc.rs @@ -574,6 +574,13 @@ impl RustdocParams { EscapedURI::from_path(path) } + pub(crate) fn zip_download_url(&self) -> EscapedURI { + EscapedURI::from_path(format!( + "/crate/{}/{}/download", + self.name, self.req_version + )) + } + pub(crate) fn json_download_url( &self, wanted_compression: Option, @@ -1806,4 +1813,13 @@ mod tests { ) ); } + + #[test] + fn test_zip_download_url() { + let params = RustdocParams::new(KRATE).with_req_version(ReqVersion::Exact(V1)); + assert_eq!( + params.zip_download_url(), + format!("/crate/{KRATE}/{V1}/download") + ); + } } diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index aeb7048d2..845b76f1a 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -41,7 +41,7 @@ use axum_extra::{ headers::{ContentType, ETag, Header as _, HeaderMapExt as _}, typed_header::TypedHeader, }; -use http::{HeaderMap, Uri, uri::Authority}; +use http::{HeaderMap, HeaderValue, Uri, header::CONTENT_DISPOSITION, uri::Authority}; use serde::Deserialize; use std::{ collections::HashMap, @@ -49,6 +49,21 @@ use std::{ }; use tracing::{Instrument, error, info_span, instrument, trace}; +/// generate a "attachment" content disposition header for downloads. +/// +/// Used in archive-download & json-download endpoints. +/// +/// Typically I like typed-headers more, but the `headers::ContentDisposition` impl is lacking, +/// and I don't want to rebuild it now. +fn generate_content_disposition_header(storage_path: &str) -> anyhow::Result { + format!( + "attachment; filename=\"{}\"", + storage_path.replace("/", "-") + ) + .parse() + .map_err(Into::into) +} + #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct OfficialCrateDescription { pub(crate) name: &'static str, @@ -996,43 +1011,40 @@ pub(crate) async fn json_download_handler( #[instrument(skip_all)] pub(crate) async fn download_handler( - Path((name, req_version)): Path<(String, ReqVersion)>, + params: RustdocParams, mut conn: DbConnection, Extension(storage): Extension>, - Extension(config): Extension>, + if_none_match: Option>, ) -> AxumResult { - let version = match_version(&mut conn, &name, &req_version) + let version = match_version(&mut conn, params.name(), params.req_version()) .await? .assume_exact_name()? + .into_canonical_req_version_or_else(|version| { + AxumNope::Redirect( + params.clone().with_req_version(version).zip_download_url(), + CachePolicy::ForeverInCdn, + ) + })? .into_version(); - let archive_path = rustdoc_archive_path(&name, &version); + let archive_path = rustdoc_archive_path(params.name(), &version); - // not all archives are set for public access yet, so we check if - // the access is set and fix it if needed. - let archive_is_public = match storage - .get_public_access(&archive_path) - .await - .context("reading public access for archive") - { - Ok(is_public) => is_public, - Err(err) => { - if matches!(err.downcast_ref(), Some(crate::storage::PathNotFoundError)) { - return Err(AxumNope::ResourceNotFound); - } else { - return Err(AxumNope::InternalError(err)); - } - } - }; + let mut response = StreamingFile(storage.get_raw_stream(&archive_path).await?) + .into_response(if_none_match.as_deref()); - if !archive_is_public { - storage.set_public_access(&archive_path, true).await?; - } + // StreamingFile::into_response automatically set the default cache-policy for + // static assets (ForeverInCdnAndBrowser). + // Here we override it with the standard policy for build output. + response.extensions_mut().insert(CachePolicy::ForeverInCdn); - Ok(super::axum_cached_redirect( - format!("{}/{}", config.s3_static_root_path, archive_path), - CachePolicy::ForeverInCdn, - )?) + // set content-disposition to attachment to trigger download in browsers + response.headers_mut().insert( + CONTENT_DISPOSITION, + generate_content_disposition_header(&archive_path) + .context("could not generate content-disposition header")?, + ); + + Ok(response) } /// Serves shared resources used by rustdoc-generated documentation. @@ -1069,10 +1081,23 @@ mod test { use kuchikiki::traits::TendrilSink; use pretty_assertions::assert_eq; use reqwest::StatusCode; - use std::collections::BTreeMap; + use std::{collections::BTreeMap, io}; use test_case::test_case; use tracing::info; + /// try decompressing the zip & read the content + fn check_archive_consistency(compressed_body: &[u8]) -> anyhow::Result<()> { + let mut zip = zip::ZipArchive::new(io::Cursor::new(compressed_body))?; + for i in 0..zip.len() { + let mut file = zip.by_index(i)?; + + let mut buf = Vec::new(); + io::copy(&mut file, &mut buf)?; + } + + Ok(()) + } + async fn try_latest_version_redirect( path: &str, web: &axum::Router, @@ -2977,10 +3002,8 @@ mod test { fn download_unknown_version_404() { async_wrapper(|env| async move { let web = env.web_app().await; + web.assert_not_found("/crate/dummy/0.1.0/download").await?; - let response = web.get("/crate/dummy/0.1.0/download").await?; - response.assert_cache_control(CachePolicy::NoCaching, env.config()); - assert_eq!(response.status(), StatusCode::NOT_FOUND); Ok(()) }); } @@ -2997,22 +3020,15 @@ mod test { .await?; let web = env.web_app().await; + web.assert_not_found("/crate/dummy/0.1.0/download").await?; - let response = web.get("/crate/dummy/0.1.0/download").await?; - response.assert_cache_control(CachePolicy::NoCaching, env.config()); - assert_eq!(response.status(), StatusCode::NOT_FOUND); Ok(()) }); } #[tokio::test(flavor = "multi_thread")] async fn download_semver() -> Result<()> { - let env = TestEnvironment::with_config( - TestEnvironment::base_config() - .s3_static_root_path("https://static.docs.rs") - .build()?, - ) - .await?; + let env = TestEnvironment::with_config(TestEnvironment::base_config().build()?).await?; env.fake_release() .await @@ -3024,29 +3040,19 @@ mod test { let web = env.web_app().await; - web.assert_redirect_cached_unchecked( + web.assert_redirect_cached( "/crate/dummy/0.1/download", - "https://static.docs.rs/rustdoc/dummy/0.1.0.zip", + "/crate/dummy/0.1.0/download", CachePolicy::ForeverInCdn, env.config(), ) .await?; - assert!( - env.async_storage() - .get_public_access("rustdoc/dummy/0.1.0.zip") - .await? - ); Ok(()) } #[tokio::test(flavor = "multi_thread")] async fn download_specfic_version() -> Result<()> { - let env = TestEnvironment::with_config( - TestEnvironment::base_config() - .s3_static_root_path("https://static.docs.rs") - .build()?, - ) - .await?; + let env = TestEnvironment::new().await?; env.fake_release() .await @@ -3057,32 +3063,25 @@ mod test { .await?; let web = env.web_app().await; - let storage = env.async_storage(); + let path = "/crate/dummy/0.1.0/download"; - // disable public access to be sure that the handler will enable it - storage - .set_public_access("rustdoc/dummy/0.1.0.zip", false) + let resp = web + .assert_success_cached(path, CachePolicy::ForeverInCdn, env.config()) .await?; + assert_eq!( + resp.headers().get(CONTENT_DISPOSITION).unwrap(), + "attachment; filename=\"rustdoc-dummy-0.1.0.zip\"" + ); + web.assert_conditional_get(path, &resp).await?; + + check_archive_consistency(&web.assert_success(path).await?.bytes().await?)?; - web.assert_redirect_cached_unchecked( - "/crate/dummy/0.1.0/download", - "https://static.docs.rs/rustdoc/dummy/0.1.0.zip", - CachePolicy::ForeverInCdn, - env.config(), - ) - .await?; - assert!(storage.get_public_access("rustdoc/dummy/0.1.0.zip").await?); Ok(()) } #[tokio::test(flavor = "multi_thread")] async fn download_latest_version() -> Result<()> { - let env = TestEnvironment::with_config( - TestEnvironment::base_config() - .s3_static_root_path("https://static.docs.rs") - .build()?, - ) - .await?; + let env = TestEnvironment::new().await?; env.fake_release() .await @@ -3101,19 +3100,19 @@ mod test { .await?; let web = env.web_app().await; + let path = "/crate/dummy/latest/download"; - web.assert_redirect_cached_unchecked( - "/crate/dummy/latest/download", - "https://static.docs.rs/rustdoc/dummy/0.2.0.zip", - CachePolicy::ForeverInCdn, - env.config(), - ) - .await?; - assert!( - env.async_storage() - .get_public_access("rustdoc/dummy/0.2.0.zip") - .await? + let resp = web + .assert_success_cached(path, CachePolicy::ForeverInCdn, env.config()) + .await?; + assert_eq!( + resp.headers().get(CONTENT_DISPOSITION).unwrap(), + "attachment; filename=\"rustdoc-dummy-0.2.0.zip\"" ); + web.assert_conditional_get(path, &resp).await?; + + check_archive_consistency(&web.assert_success(path).await?.bytes().await?)?; + Ok(()) }