Skip to content

Commit 6d67225

Browse files
committed
add ETag & conditional get for rustdoc HTML pages
1 parent 6037fe0 commit 6d67225

File tree

14 files changed

+452
-96
lines changed

14 files changed

+452
-96
lines changed

Cargo.lock

Lines changed: 34 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ uuid = { version = "1.1.2", features = ["v4"]}
8686
serde = { version = "1.0", features = ["derive"] }
8787
serde_json = "1.0"
8888
serde_with = "3.4.0"
89+
bincode = "2.0.1"
8990

9091
# axum dependencies
9192
async-trait = "0.1.83"

src/db/types/version.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,20 @@ mod version_impl {
4040
}
4141
}
4242

43+
impl bincode::Encode for Version {
44+
fn encode<E: bincode::enc::Encoder>(
45+
&self,
46+
encoder: &mut E,
47+
) -> Result<(), bincode::error::EncodeError> {
48+
self.0.major.encode(encoder)?;
49+
self.0.minor.encode(encoder)?;
50+
self.0.patch.encode(encoder)?;
51+
bincode::Encode::encode(self.0.pre.as_str(), encoder)?;
52+
bincode::Encode::encode(self.0.build.as_str(), encoder)?;
53+
Ok(())
54+
}
55+
}
56+
4357
impl Type<Postgres> for Version {
4458
fn type_info() -> PgTypeInfo {
4559
<String as Type<Postgres>>::type_info()

src/registry_api.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,17 @@ pub struct CrateOwner {
4444
}
4545

4646
#[derive(
47-
Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, sqlx::Type,
47+
Debug,
48+
Clone,
49+
Copy,
50+
PartialEq,
51+
Eq,
52+
PartialOrd,
53+
Ord,
54+
Serialize,
55+
Deserialize,
56+
sqlx::Type,
57+
bincode::Encode,
4858
)]
4959
#[sqlx(type_name = "owner_kind", rename_all = "lowercase")]
5060
#[serde(rename_all = "lowercase")]

src/test/mod.rs

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,21 @@ use crate::{
2222
page::TemplateData,
2323
},
2424
};
25-
use anyhow::Context as _;
25+
use anyhow::{Context as _, anyhow};
2626
use axum::body::Bytes;
2727
use axum::{Router, body::Body, http::Request, response::Response as AxumResponse};
2828
use axum_extra::headers::{ETag, HeaderMapExt as _};
2929
use fn_error_context::context;
3030
use futures_util::stream::TryStreamExt;
31-
use http::{HeaderMap, StatusCode, header::CACHE_CONTROL};
31+
use http::{
32+
HeaderMap, HeaderName, HeaderValue, StatusCode,
33+
header::{CACHE_CONTROL, CONTENT_TYPE},
34+
};
3235
use http_body_util::BodyExt;
3336
use opentelemetry_sdk::metrics::InMemoryMetricExporter;
3437
use serde::de::DeserializeOwned;
3538
use sqlx::Connection as _;
36-
use std::{fs, future::Future, panic, rc::Rc, str::FromStr, sync::Arc};
39+
use std::{collections::HashMap, fs, future::Future, panic, rc::Rc, str::FromStr, sync::Arc};
3740
use tokio::{runtime, task::block_in_place};
3841
use tower::ServiceExt;
3942
use tracing::error;
@@ -137,11 +140,13 @@ pub(crate) trait AxumRouterTestExt {
137140
config: &Config,
138141
) -> Result<AxumResponse>;
139142
async fn assert_not_found(&self, path: &str) -> Result<()>;
140-
async fn assert_success_and_conditional_get(
143+
async fn assert_conditional_get(
141144
&self,
142-
path: &str,
143-
expected_body: &str,
145+
initial_path: &str,
146+
uncached_response: &AxumResponse,
144147
) -> Result<()>;
148+
async fn assert_success_and_conditional_get(&self, path: &str) -> Result<()>;
149+
145150
async fn assert_success_cached(
146151
&self,
147152
path: &str,
@@ -187,37 +192,66 @@ impl AxumRouterTestExt for axum::Router {
187192
Ok(response)
188193
}
189194

190-
async fn assert_success_and_conditional_get(
195+
async fn assert_conditional_get(
191196
&self,
192-
path: &str,
193-
expected_body: &str,
197+
initial_path: &str,
198+
uncached_response: &AxumResponse,
194199
) -> Result<()> {
195-
let etag: ETag = {
196-
// uncached response
197-
let response = self.assert_success(path).await?;
198-
let etag: ETag = response.headers().typed_get().unwrap();
200+
let etag: ETag = uncached_response
201+
.headers()
202+
.typed_get()
203+
.ok_or_else(|| anyhow!("missing ETag header"))?;
199204

200-
assert_eq!(response.text().await?, expected_body);
205+
let if_none_match = IfNoneMatch::from(etag.clone());
201206

202-
etag
203-
};
207+
// general rule:
208+
//
209+
// if a header influences how any client or intermediate proxy should treat the response,
210+
// it should be repeated on the 304 response.
211+
//
212+
// This logic assumes _all_ headers have to be repeated, except for a few known ones.
213+
const NON_CACHE_HEADERS: &[&HeaderName] = &[&CONTENT_TYPE];
204214

205-
let if_none_match = IfNoneMatch::from(etag.clone());
215+
// store original headers, to assert that they are repeated on the 304 response.
216+
let original_headers: HashMap<HeaderName, HeaderValue> = uncached_response
217+
.headers()
218+
.iter()
219+
.filter(|(k, _)| !NON_CACHE_HEADERS.contains(k))
220+
.map(|(k, v)| (k.clone(), v.clone()))
221+
.collect();
206222

207223
{
208-
// cached response
209-
let response = self
210-
.get_with_headers(path, |headers| {
224+
let cached_response = self
225+
.get_with_headers(initial_path, |headers| {
226+
headers.insert(X_RLNG_SOURCE_CDN, HeaderValue::from_static("fastly"));
211227
headers.typed_insert(if_none_match);
212228
})
213229
.await?;
214-
assert_eq!(response.status(), StatusCode::NOT_MODIFIED);
215-
// etag is repeated
216-
assert_eq!(response.headers().typed_get::<ETag>().unwrap(), etag);
230+
assert_eq!(cached_response.status(), StatusCode::NOT_MODIFIED);
231+
232+
// most headers are repeated on the 304 response.
233+
let cached_response_headers: HashMap<HeaderName, HeaderValue> = cached_response
234+
.headers()
235+
.iter()
236+
.filter_map(|(k, v)| {
237+
if original_headers.contains_key(k) {
238+
Some((k.clone(), v.clone()))
239+
} else {
240+
None
241+
}
242+
})
243+
.collect();
244+
245+
assert_eq!(original_headers, cached_response_headers);
217246
}
218247
Ok(())
219248
}
220249

250+
async fn assert_success_and_conditional_get(&self, path: &str) -> Result<()> {
251+
self.assert_conditional_get(path, &self.assert_success(path).await?)
252+
.await
253+
}
254+
221255
async fn assert_not_found(&self, path: &str) -> Result<()> {
222256
let response = self.get(path).await?;
223257

src/utils/cargo_metadata.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,22 @@ pub(crate) struct Dependency {
135135
pub(crate) optional: bool,
136136
}
137137

138+
impl bincode::Encode for Dependency {
139+
fn encode<E: bincode::enc::Encoder>(
140+
&self,
141+
encoder: &mut E,
142+
) -> Result<(), bincode::error::EncodeError> {
143+
self.name.encode(encoder)?;
144+
// FIXME: VersionReq does not implement Encode, so we serialize it to string
145+
// Could be fixable by wrapping VersionReq in a newtype
146+
self.req.to_string().encode(encoder)?;
147+
self.kind.encode(encoder)?;
148+
self.rename.encode(encoder)?;
149+
self.optional.encode(encoder)?;
150+
Ok(())
151+
}
152+
}
153+
138154
impl Dependency {
139155
#[cfg(test)]
140156
pub fn new(name: String, req: VersionReq) -> Dependency {

src/web/cache.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@ use axum::{
1313
response::Response as AxumResponse,
1414
};
1515
use axum_extra::headers::HeaderMapExt as _;
16-
use http::{HeaderMap, HeaderName, HeaderValue, header::CACHE_CONTROL, request::Parts};
16+
use http::{
17+
HeaderMap, HeaderName, HeaderValue, StatusCode,
18+
header::{CACHE_CONTROL, ETAG},
19+
request::Parts,
20+
};
1721
use serde::Deserialize;
1822
use std::{convert::Infallible, sync::Arc};
1923
use tracing::error;
@@ -235,6 +239,15 @@ pub(crate) async fn cache_middleware(
235239
response.headers(),
236240
);
237241

242+
debug_assert!(
243+
response.status() == StatusCode::NOT_MODIFIED
244+
|| response.status().is_success()
245+
|| !response.headers().contains_key(ETAG),
246+
"only successful or not-modified responses should have etags. \n{:?}\n{:?}",
247+
response.status(),
248+
response.headers(),
249+
);
250+
238251
// extract cache policy, default to "forbid caching everywhere".
239252
// We only use cache policies in our successful responses (with content, or redirect),
240253
// so any errors (4xx, 5xx) should always get "NoCaching".

src/web/escaped_uri.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,23 @@ pub struct EscapedURI {
1919
fragment: Option<String>,
2020
}
2121

22+
impl bincode::Encode for EscapedURI {
23+
fn encode<E: bincode::enc::Encoder>(
24+
&self,
25+
encoder: &mut E,
26+
) -> Result<(), bincode::error::EncodeError> {
27+
// encode as separate parts so we don't have to clone
28+
self.uri.scheme_str().encode(encoder)?;
29+
self.uri.authority().map(|a| a.as_str()).encode(encoder)?;
30+
self.uri
31+
.path_and_query()
32+
.map(|pq| pq.as_str())
33+
.encode(encoder)?;
34+
self.fragment.encode(encoder)?;
35+
Ok(())
36+
}
37+
}
38+
2239
impl EscapedURI {
2340
pub fn from_uri(uri: Uri) -> Self {
2441
if uri.path_and_query().is_some() {

src/web/extractors/rustdoc.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ pub(crate) const ROOT_RUSTDOC_HTML_FILES: &[&str] = &[
2626
"scrape-examples-help.html",
2727
];
2828

29-
#[derive(Clone, Debug, PartialEq)]
29+
#[derive(Clone, Debug, PartialEq, bincode::Encode)]
3030
pub(crate) enum PageKind {
3131
Rustdoc,
3232
Source,
@@ -42,7 +42,7 @@ pub(crate) enum PageKind {
4242
/// All of these have more or less detail depending on how much metadata we have here.
4343
/// Maintains some additional fields containing "fixed" things, whos quality
4444
/// gets better the more metadata we provide.
45-
#[derive(Clone, PartialEq)]
45+
#[derive(Clone, PartialEq, bincode::Encode)]
4646
pub(crate) struct RustdocParams {
4747
// optional behaviour marker
4848
page_kind: Option<PageKind>,

src/web/headers/mod.rs

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,56 @@ mod if_none_match;
33
mod surrogate_key;
44

55
use axum_extra::headers::ETag;
6-
pub use canonical_url::CanonicalUrl;
76
use http::HeaderName;
7+
use std::io::{self, Write};
8+
9+
pub use canonical_url::CanonicalUrl;
810
pub(crate) use if_none_match::IfNoneMatch;
911
pub use surrogate_key::{SURROGATE_KEY, SurrogateKey, SurrogateKeys};
1012

1113
/// Fastly's Surrogate-Control header
1214
/// https://www.fastly.com/documentation/reference/http/http-headers/Surrogate-Control/
1315
pub static SURROGATE_CONTROL: HeaderName = HeaderName::from_static("surrogate-control");
1416

17+
/// X-Robots-Tag header for search engines.
18+
pub static X_ROBOTS_TAG: HeaderName = HeaderName::from_static("x-robots-tag");
19+
1520
/// compute our etag header value from some content
1621
///
1722
/// Has to match the implementation in our build-script.
1823
pub fn compute_etag<T: AsRef<[u8]>>(content: T) -> ETag {
19-
let digest = md5::compute(&content);
20-
format!("\"{:x}\"", digest).parse().unwrap()
24+
let mut computer = ETagComputer::new();
25+
computer.write_all(content.as_ref()).unwrap();
26+
computer.finalize()
27+
}
28+
29+
/// Helper type to compute ETag values.
30+
///
31+
/// Works the same way as the inner `md5::Context`,
32+
/// but produces an `ETag` when finalized.
33+
pub(crate) struct ETagComputer(md5::Context);
34+
35+
impl ETagComputer {
36+
pub fn new() -> Self {
37+
Self(md5::Context::new())
38+
}
39+
40+
pub fn consume<T: AsRef<[u8]>>(&mut self, data: T) {
41+
self.0.consume(data.as_ref());
42+
}
43+
44+
pub fn finalize(self) -> ETag {
45+
let digest = self.0.finalize();
46+
format!("\"{:x}\"", digest).parse().unwrap()
47+
}
48+
}
49+
50+
impl io::Write for ETagComputer {
51+
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
52+
self.0.write(buf)
53+
}
54+
55+
fn flush(&mut self) -> io::Result<()> {
56+
self.0.flush()
57+
}
2158
}

0 commit comments

Comments
 (0)