diff --git a/Cargo.lock b/Cargo.lock index 0969ad8b3..cc4b87327 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2069,7 +2069,6 @@ dependencies = [ "phf 0.13.1", "phf_codegen 0.13.1", "pretty_assertions", - "prometheus", "rand 0.9.2", "rayon", "regex", @@ -6057,20 +6056,6 @@ dependencies = [ "parking_lot", ] -[[package]] -name = "prometheus" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a" -dependencies = [ - "cfg-if", - "fnv", - "lazy_static", - "memchr", - "parking_lot", - "thiserror 2.0.17", -] - [[package]] name = "prost" version = "0.14.1" diff --git a/Cargo.toml b/Cargo.toml index 11ee59246..8be28cb18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,6 @@ thiserror = "2.0.3" comrak = { version = "0.48.0", default-features = false } syntect = { version = "5.0.0", default-features = false, features = ["parsing", "html", "dump-load", "regex-onig"] } toml = "0.9.2" -prometheus = { version = "0.14.0", default-features = false } opentelemetry = "0.31.0" opentelemetry-otlp = { version = "0.31.0", features = ["grpc-tonic", "metrics"] } opentelemetry-resource-detectors = "0.10.0" diff --git a/docker-compose.yml b/docker-compose.yml index eea43d88f..e99746227 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,7 +21,6 @@ # - release-rebuild-enqueuer # # optional profile: `metrics`: -# * `prometheus` -> configured prometheus instance # * `opentelemetry` -> a debug opentelemetry receiver # # optional profile: `full`: all of the above. @@ -44,12 +43,6 @@ # * docker-native for DB, S3, rustwide workspace, crates.io index # * mounts for prefix # -# * prometheus scrape config is set to collect from the web server, the -# registry watcher, and the build servers. Scraping is not dynamic, so -# the local prometheus server will try to fetch from all service -# instances (web, watcher, builder), and just error in case the specific -# server isn't accessible. - x-docker-cache: &docker-cache # shared configuration to cache docker layers across CI runs. # can just always be in in the `docker-compose.yml`, and will @@ -119,9 +112,6 @@ x-builder: &builder <<: *builder-environment env_file: - .docker.env - healthcheck: - <<: *healthcheck-interval - test: curl --silent --fail localhost:3000/about/metrics x-registry-watcher: ®istry-watcher build: @@ -144,9 +134,6 @@ x-registry-watcher: ®istry-watcher DOCSRS_MAX_QUEUED_REBUILDS: ${DOCSRS_MAX_QUEUED_REBUILDS:-10} env_file: - .docker.env - healthcheck: - <<: *healthcheck-interval - test: curl --silent --fail localhost:3000/about/metrics services: web: @@ -166,7 +153,7 @@ services: - .docker.env healthcheck: <<: *healthcheck-interval - test: curl --silent --fail localhost:3000/about/metrics + test: curl --silent --fail localhost:3000/ profiles: - web - full @@ -275,28 +262,6 @@ services: <<: *healthcheck-interval test: mc ready local - prometheus: - build: - context: ./dockerfiles - dockerfile: ./Dockerfile-prometheus - <<: *docker-cache - ports: - - "127.0.0.1:9090:9090" - # we intentionally don't define depends_on here. - # While the scrapers are configured to fetch from eventually running - # web or build-servers, adding these as dependency would mean we can't - # test metrics just with a webserver. - # Prometheus will just scrape from the working endpoints, and skip/error - # on the broken ones. - healthcheck: - <<: *healthcheck-interval - test: promtool check healthy - profiles: - # we rarely need to test with actual prometheus, so always running - # it is a waste. - - metrics - - full - opentelemetry: build: context: ./dockerfiles @@ -309,8 +274,8 @@ services: test: curl --silent --fail http://localhost:13133/health profiles: - # we rarely need to test with actual prometheus, so always running - # it is a waste. + # we rarely need to test with actual opentelemetry, so always + # running it is a waste. - metrics - full diff --git a/dockerfiles/Dockerfile-prometheus b/dockerfiles/Dockerfile-prometheus deleted file mode 100644 index 01357d9c9..000000000 --- a/dockerfiles/Dockerfile-prometheus +++ /dev/null @@ -1,2 +0,0 @@ -FROM prom/prometheus -COPY prometheus.yml /etc/prometheus/ diff --git a/justfiles/services.just b/justfiles/services.just index 4bbbe29a8..a94f2212b 100644 --- a/justfiles/services.just +++ b/justfiles/services.just @@ -20,7 +20,7 @@ compose-up-builder: compose-up-watcher: just compose-up watcher -# Launch prometheus server in the background +# Launch metrics collector in the background [group('compose')] compose-up-metrics: just compose-up metrics diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index b8be90127..b08c9975d 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -4,7 +4,7 @@ use clap::{Parser, Subcommand, ValueEnum}; use docs_rs::{ Config, Context, Index, PackageKind, RustwideBuilder, db::{self, CrateId, Overrides, add_path_into_database, types::version::Version}, - queue_rebuilds_faulty_rustdoc, start_background_metrics_webserver, start_web_server, + queue_rebuilds_faulty_rustdoc, start_web_server, utils::{ ConfigName, daemon::start_background_service_metric_collector, get_config, get_crate_pattern_and_priority, list_crate_priorities, queue_builder, @@ -137,8 +137,6 @@ enum CommandLine { }, StartRegistryWatcher { - #[arg(name = "SOCKET_ADDR", default_value = "0.0.0.0:3000")] - metric_server_socket_addr: SocketAddr, /// Enable or disable the repository stats updater #[arg( long = "repository-stats-updater", @@ -152,10 +150,7 @@ enum CommandLine { queue_rebuilds: Toggle, }, - StartBuildServer { - #[arg(name = "SOCKET_ADDR", default_value = "0.0.0.0:3000")] - metric_server_socket_addr: SocketAddr, - }, + StartBuildServer, /// Starts the daemon Daemon { @@ -186,7 +181,6 @@ impl CommandLine { match self { Self::Build { subcommand } => subcommand.handle_args(ctx)?, Self::StartRegistryWatcher { - metric_server_socket_addr, repository_stats_updater, cdn_invalidator, queue_rebuilds, @@ -205,18 +199,12 @@ impl CommandLine { // metrics from the registry watcher, which should only run once, and all the time. start_background_service_metric_collector(&ctx)?; - start_background_metrics_webserver(Some(metric_server_socket_addr), &ctx)?; - ctx.runtime.block_on(docs_rs::utils::watch_registry( &ctx.async_build_queue, &ctx.config, ))?; } - Self::StartBuildServer { - metric_server_socket_addr, - } => { - start_background_metrics_webserver(Some(metric_server_socket_addr), &ctx)?; - + Self::StartBuildServer => { queue_builder(&ctx, RustwideBuilder::init(&ctx)?)?; } Self::StartWebServer { socket_addr } => { diff --git a/src/build_queue.rs b/src/build_queue.rs index 990fcd206..112d6d438 100644 --- a/src/build_queue.rs +++ b/src/build_queue.rs @@ -1,5 +1,5 @@ use crate::{ - BuildPackageSummary, Config, Context, Index, InstanceMetrics, RustwideBuilder, + BuildPackageSummary, Config, Context, Index, RustwideBuilder, cdn::{self, CdnMetrics}, db::{ CrateId, Pool, delete_crate, delete_version, @@ -70,7 +70,6 @@ pub struct AsyncBuildQueue { config: Arc, storage: Arc, pub(crate) db: Pool, - metrics: Arc, queue_metrics: BuildQueueMetrics, builder_metrics: Arc, cdn_metrics: Arc, @@ -80,7 +79,6 @@ pub struct AsyncBuildQueue { impl AsyncBuildQueue { pub fn new( db: Pool, - metrics: Arc, config: Arc, storage: Arc, cdn_metrics: Arc, @@ -90,7 +88,6 @@ impl AsyncBuildQueue { max_attempts: config.build_attempts.into(), config, db, - metrics, storage, queue_metrics: BuildQueueMetrics::new(otel_meter_provider), builder_metrics: Arc::new(BuilderMetrics::new(otel_meter_provider)), @@ -427,7 +424,6 @@ impl AsyncBuildQueue { "{}-{} added into build queue", release.name, release.version ); - self.metrics.queued_builds.inc(); self.queue_metrics.queued_builds.add(1, &[]); crates_added += 1; } @@ -637,12 +633,10 @@ impl BuildQueue { let instant = Instant::now(); let res = f(&to_process); let elapsed = instant.elapsed().as_secs_f64(); - self.inner.metrics.build_time.observe(elapsed); self.inner.builder_metrics.build_time.record(elapsed, &[]); res }; - self.inner.metrics.total_builds.inc(); self.inner.builder_metrics.total_builds.add(1, &[]); self.runtime.block_on( @@ -665,7 +659,6 @@ impl BuildQueue { )?; if attempt >= self.inner.max_attempts { - self.inner.metrics.failed_builds.inc(); self.inner.builder_metrics.failed_builds.add(1, &[]); } Ok(()) @@ -1384,12 +1377,6 @@ mod tests { })?; assert!(!called, "there were still items in the queue"); - // Ensure metrics were recorded correctly - let metrics = env.instance_metrics(); - assert_eq!(metrics.total_builds.get(), 9); - assert_eq!(metrics.failed_builds.get(), 1); - assert_eq!(metrics.build_time.get_sample_count(), 9); - let collected_metrics = env.collected_metrics(); assert_eq!( diff --git a/src/cdn/cloudfront.rs b/src/cdn/cloudfront.rs index 8ec806407..4ee4c6f36 100644 --- a/src/cdn/cloudfront.rs +++ b/src/cdn/cloudfront.rs @@ -1,5 +1,5 @@ use super::CdnMetrics; -use crate::{Config, InstanceMetrics, utils::report_error}; +use crate::{Config, utils::report_error}; use anyhow::{Context, Error, Result, anyhow, bail}; use aws_config::BehaviorVersion; use aws_sdk_cloudfront::{ @@ -303,7 +303,6 @@ impl CdnBackend { #[instrument(skip_all, fields(distribution_id))] pub(crate) async fn full_invalidation( cdn: &CdnBackend, - metrics: &InstanceMetrics, otel_metrics: &CdnMetrics, conn: &mut sqlx::PgConnection, distribution_id: &str, @@ -323,10 +322,6 @@ pub(crate) async fn full_invalidation( { if let Ok(duration) = (now - row.queued).to_std() { let duration = duration.as_secs_f64(); - metrics - .cdn_queue_time - .with_label_values(&[distribution_id]) - .observe(duration); otel_metrics.queue_time.record( duration, &[KeyValue::new("distribution", distribution_id.to_string())], @@ -365,7 +360,6 @@ pub(crate) async fn full_invalidation( pub(crate) async fn handle_queued_invalidation_requests( config: &Config, cdn: &CdnBackend, - metrics: &InstanceMetrics, otel_metrics: &CdnMetrics, conn: &mut sqlx::PgConnection, distribution_id: &str, @@ -431,10 +425,6 @@ pub(crate) async fn handle_queued_invalidation_requests( { if let Ok(duration) = (now - row.created_in_cdn.expect("this is always Some")).to_std() { let duration = duration.as_secs_f64(); - metrics - .cdn_invalidation_time - .with_label_values(&[distribution_id]) - .observe(duration); otel_metrics.invalidation_time.record( duration, &[KeyValue::new("distribution", distribution_id.to_string())], @@ -466,7 +456,7 @@ pub(crate) async fn handle_queued_invalidation_requests( .await? && (now - min_queued).to_std().unwrap_or_default() >= config.cdn_max_queued_age { - full_invalidation(cdn, metrics, otel_metrics, conn, distribution_id).await?; + full_invalidation(cdn, otel_metrics, conn, distribution_id).await?; return Ok(()); } @@ -493,10 +483,6 @@ pub(crate) async fn handle_queued_invalidation_requests( if let Ok(duration) = (now - row.queued).to_std() { let duration = duration.as_secs_f64(); - metrics - .cdn_queue_time - .with_label_values(&[distribution_id]) - .observe(duration); otel_metrics.queue_time.record( duration, &[KeyValue::new("distribution", distribution_id.to_string())], @@ -789,19 +775,11 @@ mod tests { let metrics = otel_metrics(&env); // now handle the queued invalidations + handle_queued_invalidation_requests(config, cdn, &metrics, &mut conn, DISTRIBUTION_ID_WEB) + .await?; handle_queued_invalidation_requests( config, cdn, - env.instance_metrics(), - &metrics, - &mut conn, - DISTRIBUTION_ID_WEB, - ) - .await?; - handle_queued_invalidation_requests( - config, - cdn, - env.instance_metrics(), &metrics, &mut conn, DISTRIBUTION_ID_STATIC, @@ -896,19 +874,11 @@ mod tests { let metrics = otel_metrics(&env); // now handle the queued invalidations + handle_queued_invalidation_requests(config, cdn, &metrics, &mut conn, DISTRIBUTION_ID_WEB) + .await?; handle_queued_invalidation_requests( config, cdn, - env.instance_metrics(), - &metrics, - &mut conn, - DISTRIBUTION_ID_WEB, - ) - .await?; - handle_queued_invalidation_requests( - config, - cdn, - env.instance_metrics(), &metrics, &mut conn, DISTRIBUTION_ID_STATIC, @@ -939,19 +909,11 @@ mod tests { cdn.clear_active_invalidations(); // now handle again + handle_queued_invalidation_requests(config, cdn, &metrics, &mut conn, DISTRIBUTION_ID_WEB) + .await?; handle_queued_invalidation_requests( config, cdn, - env.instance_metrics(), - &metrics, - &mut conn, - DISTRIBUTION_ID_WEB, - ) - .await?; - handle_queued_invalidation_requests( - config, - cdn, - env.instance_metrics(), &metrics, &mut conn, DISTRIBUTION_ID_STATIC, @@ -1021,7 +983,6 @@ mod tests { handle_queued_invalidation_requests( env.config(), env.cdn(), - env.instance_metrics(), &metrics, &mut conn, DISTRIBUTION_ID_WEB, @@ -1085,7 +1046,6 @@ mod tests { handle_queued_invalidation_requests( env.config(), env.cdn(), - env.instance_metrics(), &metrics, &mut conn, DISTRIBUTION_ID_WEB, @@ -1118,7 +1078,6 @@ mod tests { handle_queued_invalidation_requests( env.config(), env.cdn(), - env.instance_metrics(), &metrics, &mut conn, DISTRIBUTION_ID_WEB, @@ -1161,7 +1120,6 @@ mod tests { handle_queued_invalidation_requests( env.config(), env.cdn(), - env.instance_metrics(), &metrics, &mut conn, DISTRIBUTION_ID_WEB, diff --git a/src/context.rs b/src/context.rs index aefe457e1..b968ac7b6 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1,6 +1,5 @@ use crate::{ - AsyncBuildQueue, AsyncStorage, BuildQueue, Config, InstanceMetrics, RegistryApi, - ServiceMetrics, Storage, + AsyncBuildQueue, AsyncStorage, BuildQueue, Config, RegistryApi, Storage, cdn::{CdnMetrics, cloudfront::CdnBackend}, db::Pool, metrics::otel::{AnyMeterProvider, get_meter_provider}, @@ -19,8 +18,6 @@ pub struct Context { pub cdn: Arc, pub cdn_metrics: Arc, pub pool: Pool, - pub service_metrics: Arc, - pub instance_metrics: Arc, pub registry_api: Arc, pub repository_stats_updater: Arc, pub runtime: runtime::Handle, @@ -30,23 +27,19 @@ pub struct Context { impl Context { /// Create a new context environment from the given configuration. pub async fn from_config(config: Config) -> Result { - let instance_metrics = Arc::new(InstanceMetrics::new()?); let meter_provider = get_meter_provider(&config)?; - let pool = Pool::new(&config, instance_metrics.clone(), &meter_provider).await?; - Self::from_config_with_metrics_and_pool(config, instance_metrics, meter_provider, pool) - .await + let pool = Pool::new(&config, &meter_provider).await?; + Self::from_config_with_metrics_and_pool(config, meter_provider, pool).await } /// Create a new context environment from the given configuration, for running tests. #[cfg(test)] pub async fn from_test_config( config: Config, - instance_metrics: Arc, meter_provider: AnyMeterProvider, pool: Pool, ) -> Result { - Self::from_config_with_metrics_and_pool(config, instance_metrics, meter_provider, pool) - .await + Self::from_config_with_metrics_and_pool(config, meter_provider, pool).await } /// private function for context environment generation, allows passing in a @@ -54,27 +47,18 @@ impl Context { /// Mostly so we can support test environments with their db async fn from_config_with_metrics_and_pool( config: Config, - instance_metrics: Arc, meter_provider: AnyMeterProvider, pool: Pool, ) -> Result { let config = Arc::new(config); - let async_storage = Arc::new( - AsyncStorage::new( - pool.clone(), - instance_metrics.clone(), - config.clone(), - &meter_provider, - ) - .await?, - ); + let async_storage = + Arc::new(AsyncStorage::new(pool.clone(), config.clone(), &meter_provider).await?); let cdn_metrics = Arc::new(CdnMetrics::new(&meter_provider)); let cdn = Arc::new(CdnBackend::new(&config).await); let async_build_queue = Arc::new(AsyncBuildQueue::new( pool.clone(), - instance_metrics.clone(), config.clone(), async_storage.clone(), cdn_metrics.clone(), @@ -95,8 +79,6 @@ impl Context { cdn, cdn_metrics, pool: pool.clone(), - service_metrics: Arc::new(ServiceMetrics::new()?), - instance_metrics, registry_api: Arc::new(RegistryApi::new( config.registry_api_host.clone(), config.crates_io_api_call_retries, diff --git a/src/db/pool.rs b/src/db/pool.rs index 5e607a6b8..dd32f7150 100644 --- a/src/db/pool.rs +++ b/src/db/pool.rs @@ -1,4 +1,4 @@ -use crate::{Config, metrics::InstanceMetrics, metrics::otel::AnyMeterProvider}; +use crate::{Config, metrics::otel::AnyMeterProvider}; use futures_util::{future::BoxFuture, stream::BoxStream}; use opentelemetry::metrics::{Counter, ObservableGauge}; use sqlx::{Executor, postgres::PgPoolOptions}; @@ -68,36 +68,31 @@ impl PoolMetrics { pub struct Pool { async_pool: sqlx::PgPool, runtime: runtime::Handle, - metrics: Arc, - max_size: u32, otel_metrics: Arc, } impl Pool { pub async fn new( config: &Config, - metrics: Arc, otel_meter_provider: &AnyMeterProvider, ) -> Result { debug!( "creating database pool (if this hangs, consider running `docker-compose up -d db s3`)" ); - Self::new_inner(config, metrics, DEFAULT_SCHEMA, otel_meter_provider).await + Self::new_inner(config, DEFAULT_SCHEMA, otel_meter_provider).await } #[cfg(test)] pub(crate) async fn new_with_schema( config: &Config, - metrics: Arc, schema: &str, otel_meter_provider: &AnyMeterProvider, ) -> Result { - Self::new_inner(config, metrics, schema, otel_meter_provider).await + Self::new_inner(config, schema, otel_meter_provider).await } async fn new_inner( config: &Config, - metrics: Arc, schema: &str, otel_meter_provider: &AnyMeterProvider, ) -> Result { @@ -136,9 +131,7 @@ impl Pool { Ok(Pool { async_pool: async_pool.clone(), - metrics, runtime: runtime::Handle::current(), - max_size: config.max_pool_size, otel_metrics: Arc::new(PoolMetrics::new(async_pool, otel_meter_provider)), }) } @@ -150,24 +143,11 @@ impl Pool { runtime: self.runtime.clone(), }), Err(err) => { - self.metrics.failed_db_connections.inc(); self.otel_metrics.failed_connections.add(1, &[]); Err(PoolError::AsyncClientError(err)) } } } - - pub(crate) fn used_connections(&self) -> u32 { - self.async_pool.size() - self.async_pool.num_idle() as u32 - } - - pub(crate) fn idle_connections(&self) -> u32 { - self.async_pool.num_idle() as u32 - } - - pub(crate) fn max_size(&self) -> u32 { - self.max_size - } } /// This impl allows us to use our own pool as an executor for SQLx queries. diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 2980638e5..04097db38 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -1,6 +1,5 @@ use crate::{ - AsyncStorage, Config, Context, InstanceMetrics, RUSTDOC_STATIC_STORAGE_PREFIX, RegistryApi, - Storage, + AsyncStorage, Config, Context, RUSTDOC_STATIC_STORAGE_PREFIX, RegistryApi, Storage, db::{ BuildId, CrateId, Pool, ReleaseId, add_doc_coverage, add_path_into_remote_archive, blacklist::is_blacklisted, @@ -169,7 +168,6 @@ pub struct RustwideBuilder { db: Pool, storage: Arc, async_storage: Arc, - metrics: Arc, registry_api: Arc, repository_stats_updater: Arc, workspace_initialize_time: Instant, @@ -191,7 +189,6 @@ impl RustwideBuilder { runtime: context.runtime.clone(), storage: context.storage.clone(), async_storage: context.async_storage.clone(), - metrics: context.instance_metrics.clone(), registry_api: context.registry_api.clone(), repository_stats_updater: context.repository_stats_updater.clone(), workspace_initialize_time: Instant::now(), @@ -779,9 +776,6 @@ impl RustwideBuilder { true, ))?; let documentation_size = file_list.iter().map(|info| info.size).sum::(); - self.metrics - .documentation_size - .observe(documentation_size as f64 / 1024.0 / 1024.0); self.builder_metrics.documentation_size.record(documentation_size, &[]); algs.insert(new_alg); Some(documentation_size) @@ -816,13 +810,10 @@ impl RustwideBuilder { } if res.result.successful { - self.metrics.successful_builds.inc(); self.builder_metrics.successful_builds.add(1, &[]); } else if res.cargo_metadata.root().is_library() { - self.metrics.failed_builds.inc(); self.builder_metrics.failed_builds.add(1, &[]); } else { - self.metrics.non_library_builds.inc(); self.builder_metrics.non_library_builds.add(1, &[]); } diff --git a/src/lib.rs b/src/lib.rs index e45e42b16..2de789a13 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,10 +10,9 @@ pub use self::context::Context; pub use self::docbuilder::PackageKind; pub use self::docbuilder::{BuildPackageSummary, RustwideBuilder}; pub use self::index::Index; -pub use self::metrics::{InstanceMetrics, ServiceMetrics}; pub use self::registry_api::RegistryApi; pub use self::storage::{AsyncStorage, Storage}; -pub use self::web::{start_background_metrics_webserver, start_web_server}; +pub use self::web::start_web_server; pub use font_awesome_as_a_crate::icons; diff --git a/src/metrics/macros.rs b/src/metrics/macros.rs deleted file mode 100644 index 8e5515633..000000000 --- a/src/metrics/macros.rs +++ /dev/null @@ -1,146 +0,0 @@ -pub(super) trait MetricFromOpts: Sized { - fn from_opts(opts: prometheus::Opts) -> Result; -} - -macro_rules! metrics { - ( - $vis:vis struct $name:ident { - $( - #[doc = $help:expr] - $(#[$meta:meta])* - $metric_vis:vis $metric:ident: $ty:ty $([$($label:expr),* $(,)?])? - ),* $(,)? - } - namespace: $namespace:expr, - ) => { - $vis struct $name { - registry: prometheus::Registry, - $( - $(#[$meta])* - $metric_vis $metric: $ty, - )* - pub(crate) recently_accessed_releases: RecentlyAccessedReleases, - pub(crate) cdn_invalidation_time: prometheus::HistogramVec, - pub(crate) cdn_queue_time: prometheus::HistogramVec, - pub(crate) build_time: prometheus::Histogram, - pub(crate) documentation_size: prometheus::Histogram, - pub(crate) response_time: prometheus::HistogramVec, - } - impl $name { - $vis fn new() -> Result { - let registry = prometheus::Registry::new(); - $( - $(#[$meta])* - let $metric = <$ty>::from_opts( - prometheus::Opts::new(stringify!($metric), $help) - .namespace($namespace) - $(.variable_labels(vec![$($label.into()),*]))? - )?; - $(#[$meta])* - registry.register(Box::new($metric.clone()))?; - )* - - let cdn_invalidation_time = prometheus::HistogramVec::new( - prometheus::HistogramOpts::new( - "cdn_invalidation_time", - "duration of CDN invalidations after having been sent to the CDN.", - ) - .namespace($namespace) - .buckets($crate::metrics::CDN_INVALIDATION_HISTOGRAM_BUCKETS.to_vec()) - .variable_label("distribution"), - &["distribution"], - )?; - registry.register(Box::new(cdn_invalidation_time.clone()))?; - - let cdn_queue_time = prometheus::HistogramVec::new( - prometheus::HistogramOpts::new( - "cdn_queue_time", - "queue time of CDN invalidations before they are sent to the CDN. ", - ) - .namespace($namespace) - .buckets($crate::metrics::CDN_INVALIDATION_HISTOGRAM_BUCKETS.to_vec()) - .variable_label("distribution"), - &["distribution"], - )?; - registry.register(Box::new(cdn_queue_time.clone()))?; - - let build_time = prometheus::Histogram::with_opts( - prometheus::HistogramOpts::new( - "build_time", - "time spent building crates", - ) - .namespace($namespace) - .buckets($crate::metrics::BUILD_TIME_HISTOGRAM_BUCKETS.to_vec()), - )?; - registry.register(Box::new(build_time.clone()))?; - - let documentation_size= prometheus::Histogram::with_opts( - prometheus::HistogramOpts::new( - "documentation_size", - "size of the documentation in MB" - ) - .namespace($namespace) - .buckets($crate::metrics::DOCUMENTATION_SIZE_BUCKETS.to_vec()) - )?; - registry.register(Box::new(documentation_size.clone()))?; - - let response_time = prometheus::HistogramVec::new( - prometheus::HistogramOpts::new( - "response_time", - "The response times of various docs.rs routes", - ) - .namespace($namespace) - .buckets($crate::metrics::RESPONSE_TIME_HISTOGRAM_BUCKETS.to_vec()) - .variable_label("route"), - &["route"], - )?; - registry.register(Box::new(response_time.clone()))?; - - Ok(Self { - registry, - recently_accessed_releases: RecentlyAccessedReleases::new(), - cdn_invalidation_time, - cdn_queue_time, - build_time, - documentation_size, - response_time, - $( - $(#[$meta])* - $metric, - )* - }) - } - } - impl std::fmt::Debug for $name { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", stringify!($name)) - } - } - }; -} - -macro_rules! load_metric_type { - ($name:ident as single) => { - use prometheus::$name; - impl MetricFromOpts for $name { - fn from_opts(opts: prometheus::Opts) -> Result { - $name::with_opts(opts) - } - } - }; - ($name:ident as vec) => { - use prometheus::$name; - impl MetricFromOpts for $name { - fn from_opts(opts: prometheus::Opts) -> Result { - $name::new( - opts.clone().into(), - opts.variable_labels - .iter() - .map(|s| s.as_str()) - .collect::>() - .as_slice(), - ) - } - } - }; -} diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 898283b36..49ec2857e 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -1,29 +1,6 @@ -#[macro_use] -mod macros; pub(crate) mod otel; pub(crate) mod service; -use self::macros::MetricFromOpts; -use crate::{ - AsyncBuildQueue, Config, cdn, - db::{CrateId, Pool, ReleaseId}, - target::TargetAtom, -}; -use anyhow::{Error, Result}; -use dashmap::DashMap; -use prometheus::proto::MetricFamily; -use std::{ - collections::HashSet, - time::{Duration, Instant}, -}; -use tracing::error; - -load_metric_type!(IntGauge as single); -load_metric_type!(IntCounter as single); -load_metric_type!(IntCounterVec as vec); -load_metric_type!(IntGaugeVec as vec); -load_metric_type!(HistogramVec as vec); - /// the measured times from cdn invalidations, meaning: /// * how long an invalidation took, or /// * how long the invalidation was queued @@ -94,290 +71,3 @@ pub const BUILD_TIME_HISTOGRAM_BUCKETS: &[f64] = &[ pub const RESPONSE_TIME_HISTOGRAM_BUCKETS: &[f64] = &[ 0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, ]; - -metrics! { - pub struct InstanceMetrics { - /// The number of idle database connections - idle_db_connections: IntGauge, - /// The number of used database connections - used_db_connections: IntGauge, - /// The maximum number of database connections - max_db_connections: IntGauge, - /// Number of attempted and failed connections to the database - pub(crate) failed_db_connections: IntCounter, - - /// The number of currently opened file descriptors - open_file_descriptors: IntGauge, - /// The number of threads being used by docs.rs - running_threads: IntGauge, - - /// The traffic of various docs.rs routes - pub(crate) routes_visited: IntCounterVec["route"], - - /// Count of recently accessed crates - pub(crate) recent_crates: IntGaugeVec["duration"], - /// Count of recently accessed versions of crates - pub(crate) recent_versions: IntGaugeVec["duration"], - /// Count of recently accessed platforms of versions of crates - pub(crate) recent_platforms: IntGaugeVec["duration"], - - /// number of queued builds - pub(crate) queued_builds: IntCounter, - /// Number of crates built - pub(crate) total_builds: IntCounter, - /// Number of builds that successfully generated docs - pub(crate) successful_builds: IntCounter, - /// Number of builds that generated a compiler error - pub(crate) failed_builds: IntCounter, - /// Number of builds that did not complete due to not being a library - pub(crate) non_library_builds: IntCounter, - - /// Number of files uploaded to the storage backend - pub(crate) uploaded_files_total: IntCounter, - - /// The number of attempted files that failed due to a memory limit - pub(crate) html_rewrite_ooms: IntCounter, - - /// the number of "I'm feeling lucky" searches for crates - pub(crate) im_feeling_lucky_searches: IntCounter, - } - - // The Rust prometheus library treats the namespace as the "prefix" of the metric name: a - // metric named `foo` with a prefix of `docsrs` will expose a metric called `docsrs_foo`. - // - // https://docs.rs/prometheus/0.9.0/prometheus/struct.Opts.html#structfield.namespace - namespace: "docsrs", -} - -#[derive(Debug, Default)] -pub(crate) struct RecentlyAccessedReleases { - crates: DashMap, - versions: DashMap, - platforms: DashMap<(ReleaseId, TargetAtom), Instant>, -} - -impl RecentlyAccessedReleases { - pub(crate) fn new() -> Self { - Self::default() - } - - pub(crate) fn record(&self, krate: CrateId, version: ReleaseId, target: &str) { - if self.platforms.len() > 100_000 { - // Avoid filling the maps _too_ much, we should never get anywhere near this limit - return; - } - - let now = Instant::now(); - self.crates.insert(krate, now); - self.versions.insert(version, now); - self.platforms - .insert((version, TargetAtom::from(target)), now); - } - - pub(crate) fn gather(&self, metrics: &InstanceMetrics) { - fn inner(map: &DashMap, metric: &IntGaugeVec) { - let mut hour_count = 0; - let mut half_hour_count = 0; - let mut five_minute_count = 0; - map.retain(|_, instant| { - let elapsed = instant.elapsed(); - - if elapsed < Duration::from_secs(60 * 60) { - hour_count += 1; - } - if elapsed < Duration::from_secs(30 * 60) { - half_hour_count += 1; - } - if elapsed < Duration::from_secs(5 * 60) { - five_minute_count += 1; - } - - // Only retain items accessed within the last hour - elapsed < Duration::from_secs(60 * 60) - }); - - metric.with_label_values(&["one hour"]).set(hour_count); - - metric - .with_label_values(&["half hour"]) - .set(half_hour_count); - - metric - .with_label_values(&["five minutes"]) - .set(five_minute_count); - } - - inner(&self.crates, &metrics.recent_crates); - inner(&self.versions, &metrics.recent_versions); - inner(&self.platforms, &metrics.recent_platforms); - } -} - -impl InstanceMetrics { - pub(crate) fn gather(&self, pool: &Pool) -> Result, Error> { - self.idle_db_connections.set(pool.idle_connections() as i64); - self.used_db_connections.set(pool.used_connections() as i64); - self.max_db_connections.set(pool.max_size() as i64); - - self.recently_accessed_releases.gather(self); - self.gather_system_performance(); - Ok(self.registry.gather()) - } - - fn gather_system_performance(&self) { - use sysinfo::{ProcessesToUpdate, System, get_current_pid}; - - let current_pid = match get_current_pid() { - Ok(pid) => pid, - Err(err) => { - error!(err, "couldn't get our own process ID"); - return; - } - }; - let mut s = System::new(); - s.refresh_processes(ProcessesToUpdate::Some(&[current_pid]), true); - - let Some(proc) = s.process(current_pid) else { - error!("couldn't find our own process in sysinfo"); - return; - }; - - if let Some(fd_count) = proc.open_files() { - self.open_file_descriptors.set(fd_count as i64); - } - - if let Some(tasks) = proc.tasks() { - self.running_threads.set(tasks.len() as i64); - } - } -} - -fn metric_from_opts( - registry: &prometheus::Registry, - metric: &str, - help: &str, - variable_label: Option<&str>, -) -> Result { - let mut opts = prometheus::Opts::new(metric, help).namespace("docsrs"); - - if let Some(label) = variable_label { - opts = opts.variable_label(label); - } - - let metric = T::from_opts(opts)?; - registry.register(Box::new(metric.clone()))?; - Ok(metric) -} - -#[derive(Debug)] -pub struct ServiceMetrics { - pub queued_crates_count: IntGauge, - pub prioritized_crates_count: IntGauge, - pub failed_crates_count: IntGauge, - pub queue_is_locked: IntGauge, - pub queued_crates_count_by_priority: IntGaugeVec, - pub queued_cdn_invalidations_by_distribution: IntGaugeVec, - - registry: prometheus::Registry, -} - -impl ServiceMetrics { - pub fn new() -> Result { - let registry = prometheus::Registry::new(); - Ok(Self { - registry: registry.clone(), - queued_crates_count: metric_from_opts( - ®istry, - "queued_crates_count", - "Number of crates in the build queue", - None, - )?, - prioritized_crates_count: metric_from_opts( - ®istry, - "prioritized_crates_count", - "Number of crates in the build queue that have a positive priority", - None, - )?, - failed_crates_count: metric_from_opts( - ®istry, - "failed_crates_count", - "Number of crates that failed to build", - None, - )?, - queue_is_locked: metric_from_opts( - ®istry, - "queue_is_locked", - "Whether the build queue is locked", - None, - )?, - queued_crates_count_by_priority: metric_from_opts( - ®istry, - "queued_crates_count_by_priority", - "queued crates by priority", - Some("priority"), - )?, - queued_cdn_invalidations_by_distribution: metric_from_opts( - ®istry, - "queued_cdn_invalidations_by_distribution", - "queued CDN invalidations", - Some("distribution"), - )?, - }) - } - - pub(crate) async fn gather( - &self, - pool: &Pool, - queue: &AsyncBuildQueue, - config: &Config, - ) -> Result, Error> { - self.queue_is_locked.set(queue.is_locked().await? as i64); - self.queued_crates_count - .set(queue.pending_count().await? as i64); - self.prioritized_crates_count - .set(queue.prioritized_count().await? as i64); - - let queue_pending_count = queue.pending_count_by_priority().await?; - - // gauges keep their old value per label when it's not removed, reset to zero or updated. - // When a priority is used at least once, it would be kept in the metric and the last - // value would be remembered. `pending_count_by_priority` returns only the priorities - // that are currently in the queue, which means when the tasks for a priority are - // finished, we wouldn't update the metric anymore, which means a wrong value is - // in the metric. - // - // The solution is to reset the metric, and then set all priorities again. - self.queued_crates_count_by_priority.reset(); - - // for commonly used priorities we want the value to be zero, and not missing, - // when there are no items in the queue with that priority. - // So we create a set of all priorities we want to be explicitly zeroed, combined - // with the actual priorities in the queue. - let all_priorities: HashSet = - queue_pending_count.keys().copied().chain(0..=20).collect(); - - for priority in all_priorities { - let count = queue_pending_count.get(&priority).unwrap_or(&0); - - self.queued_crates_count_by_priority - .with_label_values(&[&priority.to_string()]) - .set(*count as i64); - } - - let mut conn = pool.get_async().await?; - for (distribution_id, count) in - cdn::cloudfront::queued_or_active_crate_invalidation_count_by_distribution( - &mut conn, config, - ) - .await? - { - self.queued_cdn_invalidations_by_distribution - .with_label_values(&[&distribution_id]) - .set(count); - } - - self.failed_crates_count - .set(queue.failed_count().await? as i64); - Ok(self.registry.gather()) - } -} diff --git a/src/storage/database.rs b/src/storage/database.rs index c7d8a7b49..9e4897fe1 100644 --- a/src/storage/database.rs +++ b/src/storage/database.rs @@ -1,27 +1,18 @@ use super::{BlobUpload, FileRange, StorageMetrics, StreamingBlob}; -use crate::{InstanceMetrics, db::Pool, error::Result, web::headers::compute_etag}; +use crate::{db::Pool, error::Result, web::headers::compute_etag}; use chrono::{DateTime, Utc}; use futures_util::stream::{Stream, TryStreamExt}; use sqlx::Acquire; -use std::{io, sync::Arc}; +use std::io; pub(crate) struct DatabaseBackend { pool: Pool, - metrics: Arc, otel_metrics: StorageMetrics, } impl DatabaseBackend { - pub(crate) fn new( - pool: Pool, - metrics: Arc, - otel_metrics: StorageMetrics, - ) -> Self { - Self { - pool, - metrics, - otel_metrics, - } + pub(crate) fn new(pool: Pool, otel_metrics: StorageMetrics) -> Self { + Self { pool, otel_metrics } } pub(super) async fn exists(&self, path: &str) -> Result { @@ -155,7 +146,6 @@ impl DatabaseBackend { compression, ) .execute(&mut *trans).await?; - self.metrics.uploaded_files_total.inc(); self.otel_metrics.uploaded_files.add(1, &[]); } trans.commit().await?; diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 8f5daea1c..c035bb150 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -10,7 +10,7 @@ use self::{ s3::S3Backend, }; use crate::{ - Config, InstanceMetrics, + Config, db::{ BuildId, Pool, file::{FileEntry, detect_mime}, @@ -265,7 +265,6 @@ pub struct AsyncStorage { impl AsyncStorage { pub async fn new( pool: Pool, - metrics: Arc, config: Arc, otel_meter_provider: &AnyMeterProvider, ) -> Result { @@ -274,11 +273,11 @@ impl AsyncStorage { Ok(Self { backend: match config.storage_backend { StorageKind::Database => { - StorageBackend::Database(DatabaseBackend::new(pool, metrics, otel_metrics)) + StorageBackend::Database(DatabaseBackend::new(pool, otel_metrics)) + } + StorageKind::S3 => { + StorageBackend::S3(Box::new(S3Backend::new(&config, otel_metrics).await?)) } - StorageKind::S3 => StorageBackend::S3(Box::new( - S3Backend::new(metrics, &config, otel_metrics).await?, - )), }, config, locks: DashMap::new(), @@ -1738,11 +1737,7 @@ mod backend_tests { Ok(()) } - fn test_store_blobs( - env: &TestEnvironment, - storage: &Storage, - metrics: &InstanceMetrics, - ) -> Result<()> { + fn test_store_blobs(env: &TestEnvironment, storage: &Storage) -> Result<()> { const NAMES: &[&str] = &[ "a", "b", @@ -1769,8 +1764,6 @@ mod backend_tests { assert_eq!(blob.mime, actual.mime); } - assert_eq!(NAMES.len(), metrics.uploaded_files_total.get() as usize); - let collected_metrics = env.collected_metrics(); assert_eq!( @@ -1791,11 +1784,7 @@ mod backend_tests { Ok(()) } - fn test_store_all_in_archive( - _env: &TestEnvironment, - storage: &Storage, - metrics: &InstanceMetrics, - ) -> Result<()> { + fn test_store_all_in_archive(env: &TestEnvironment, storage: &Storage) -> Result<()> { let dir = tempfile::Builder::new() .prefix("docs.rs-upload-archive-test") .tempdir()?; @@ -1856,16 +1845,20 @@ mod backend_tests { assert_eq!(file.mime, "text/rust"); assert_eq!(file.path, "folder/test.zip/src/main.rs"); - assert_eq!(2, metrics.uploaded_files_total.get()); + let collected_metrics = env.collected_metrics(); + + assert_eq!( + collected_metrics + .get_metric("storage", "docsrs.storage.uploaded_files")? + .get_u64_counter() + .value(), + 2, + ); Ok(()) } - fn test_store_all( - _env: &TestEnvironment, - storage: &Storage, - metrics: &InstanceMetrics, - ) -> Result<()> { + fn test_store_all(env: &TestEnvironment, storage: &Storage) -> Result<()> { let dir = tempfile::Builder::new() .prefix("docs.rs-upload-test") .tempdir()?; @@ -1904,7 +1897,14 @@ mod backend_tests { assert_eq!(algs, CompressionAlgorithm::default()); - assert_eq!(2, metrics.uploaded_files_total.get()); + let collected_metrics = env.collected_metrics(); + assert_eq!( + collected_metrics + .get_metric("storage", "docsrs.storage.uploaded_files")? + .get_u64_counter() + .value(), + 2, + ); Ok(()) } @@ -2012,7 +2012,7 @@ mod backend_tests { $( mod $backend { use crate::test::TestEnvironment; - use crate::storage::{ StorageKind}; + use crate::storage::{StorageKind}; fn get_env() -> anyhow::Result { crate::test::TestEnvironment::with_config_and_runtime( @@ -2041,7 +2041,7 @@ mod backend_tests { #[test] fn $test() -> anyhow::Result<()> { let env = get_env()?; - super::$test(&env, &*env.storage(), &*env.instance_metrics()) + super::$test(&env, &*env.storage()) } )* }; diff --git a/src/storage/s3.rs b/src/storage/s3.rs index 3e417ba38..970c785b1 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -1,5 +1,5 @@ use super::{BlobUpload, FileRange, StorageMetrics, StreamingBlob}; -use crate::{Config, InstanceMetrics, web::headers::compute_etag}; +use crate::{Config, web::headers::compute_etag}; use anyhow::{Context as _, Error}; use async_stream::try_stream; use aws_config::BehaviorVersion; @@ -17,7 +17,6 @@ use futures_util::{ pin_mut, stream::{FuturesUnordered, Stream, StreamExt}, }; -use std::sync::Arc; use tracing::{error, instrument, warn}; const PUBLIC_ACCESS_TAG: &str = "static-cloudfront-access"; @@ -73,18 +72,13 @@ where pub(super) struct S3Backend { client: Client, bucket: String, - metrics: Arc, otel_metrics: StorageMetrics, #[cfg(test)] temporary: bool, } impl S3Backend { - pub(super) async fn new( - metrics: Arc, - config: &Config, - otel_metrics: StorageMetrics, - ) -> Result { + pub(super) async fn new(config: &Config, otel_metrics: StorageMetrics) -> Result { let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await; let mut config_builder = aws_sdk_s3::config::Builder::from(&shared_config) .retry_config(RetryConfig::standard().with_max_attempts(config.aws_sdk_max_retries)) @@ -114,7 +108,6 @@ impl S3Backend { Ok(Self { client, - metrics, otel_metrics, bucket: config.s3_bucket.clone(), #[cfg(test)] @@ -279,7 +272,6 @@ impl S3Backend { .set_content_encoding(blob.compression.map(|alg| alg.to_string())) .send() .map_ok(|_| { - self.metrics.uploaded_files_total.inc(); self.otel_metrics.uploaded_files.add(1, &[]); }) .map_err(|err| { diff --git a/src/test/mod.rs b/src/test/mod.rs index f86cbaaed..257c3de0b 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -7,7 +7,7 @@ pub(crate) use self::{ test_metrics::setup_test_meter_provider, }; use crate::{ - AsyncBuildQueue, BuildQueue, Config, Context, InstanceMetrics, + AsyncBuildQueue, BuildQueue, Config, Context, cdn::{CdnMetrics, cloudfront::CdnBackend}, config::ConfigBuilder, db::{self, AsyncPoolClient, Pool, types::version::Version}, @@ -477,19 +477,13 @@ impl TestEnvironment { let (metric_exporter, meter_provider) = setup_test_meter_provider(); - let instance_metrics = Arc::new(InstanceMetrics::new()?); - let test_db = TestDatabase::new(&config, instance_metrics.clone(), &meter_provider) + let test_db = TestDatabase::new(&config, &meter_provider) .await .context("can't initialize test database")?; Ok(Self { - context: Context::from_test_config( - config, - instance_metrics, - meter_provider, - test_db.pool().clone(), - ) - .await?, + context: Context::from_test_config(config, meter_provider, test_db.pool().clone()) + .await?, db: test_db, owned_runtime: None, collected_metrics: metric_exporter, @@ -544,10 +538,6 @@ impl TestEnvironment { &self.context.storage } - pub(crate) fn instance_metrics(&self) -> &InstanceMetrics { - &self.context.instance_metrics - } - pub(crate) fn meter_provider(&self) -> &AnyMeterProvider { &self.context.meter_provider } @@ -605,16 +595,12 @@ pub(crate) struct TestDatabase { } impl TestDatabase { - async fn new( - config: &Config, - metrics: Arc, - otel_meter_provider: &AnyMeterProvider, - ) -> Result { + async fn new(config: &Config, otel_meter_provider: &AnyMeterProvider) -> Result { // A random schema name is generated and used for the current connection. This allows each // test to create a fresh instance of the database to run within. let schema = format!("docs_rs_test_schema_{}", rand::random::()); - let pool = Pool::new_with_schema(config, metrics, &schema, otel_meter_provider).await?; + let pool = Pool::new_with_schema(config, &schema, otel_meter_provider).await?; let mut conn = sqlx::PgConnection::connect(&config.database_url).await?; sqlx::query(&format!("CREATE SCHEMA {schema}")) diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index 29973b06b..05b61fcd0 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -143,7 +143,6 @@ pub fn start_background_service_metric_collector(context: &Context) -> Result<() } pub fn start_background_cdn_invalidator(context: &Context) -> Result<(), Error> { - let metrics = context.instance_metrics.clone(); let config = context.config.clone(); let pool = context.pool.clone(); let runtime = context.runtime.clone(); @@ -171,7 +170,6 @@ pub fn start_background_cdn_invalidator(context: &Context) -> Result<(), Error> let pool = pool.clone(); let config = config.clone(); let cdn = cdn.clone(); - let metrics = metrics.clone(); let otel_metrics = otel_metrics.clone(); async move { let mut conn = pool.get_async().await?; @@ -179,7 +177,6 @@ pub fn start_background_cdn_invalidator(context: &Context) -> Result<(), Error> cdn::cloudfront::handle_queued_invalidation_requests( &config, &cdn, - &metrics, &otel_metrics, &mut conn, distribution_id, @@ -191,7 +188,6 @@ pub fn start_background_cdn_invalidator(context: &Context) -> Result<(), Error> cdn::cloudfront::handle_queued_invalidation_requests( &config, &cdn, - &metrics, &otel_metrics, &mut conn, distribution_id, diff --git a/src/utils/html.rs b/src/utils/html.rs index c327db30a..891655270 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -1,5 +1,4 @@ use crate::{ - InstanceMetrics, utils::report_error, web::{ metrics::WebMetrics, @@ -44,7 +43,6 @@ pub(crate) fn rewrite_rustdoc_html_stream( mut reader: R, max_allowed_memory_usage: usize, data: Arc, - metrics: Arc, otel_metrics: Arc, ) -> impl Stream> + Send + 'static where @@ -226,7 +224,6 @@ where match e.downcast::() { Ok(e) => { if matches!(e, RewritingError::MemoryLimitExceeded(_)) { - metrics.html_rewrite_ooms.inc(); otel_metrics.html_rewrite_ooms.add(1, &[]); } RustdocRewritingError::RewritingError(e) diff --git a/src/web/metrics.rs b/src/web/metrics.rs index 68cb059e3..d42603af9 100644 --- a/src/web/metrics.rs +++ b/src/web/metrics.rs @@ -1,13 +1,7 @@ -use crate::{ - AsyncBuildQueue, Config, InstanceMetrics, ServiceMetrics, - db::Pool, - metrics::{RESPONSE_TIME_HISTOGRAM_BUCKETS, otel::AnyMeterProvider}, - web::error::AxumResult, -}; -use anyhow::{Context as _, Result}; +use crate::metrics::{RESPONSE_TIME_HISTOGRAM_BUCKETS, otel::AnyMeterProvider}; use axum::{ - extract::{Extension, MatchedPath, Request as AxumRequest}, - http::{StatusCode, header::CONTENT_TYPE}, + extract::{MatchedPath, Request as AxumRequest}, + http::StatusCode, middleware::Next, response::IntoResponse, }; @@ -15,8 +9,7 @@ use opentelemetry::{ KeyValue, metrics::{Counter, Histogram}, }; -use prometheus::{Encoder, TextEncoder, proto::MetricFamily}; -use std::{borrow::Cow, future::Future, sync::Arc, time::Instant}; +use std::{borrow::Cow, sync::Arc, time::Instant}; #[derive(Debug)] pub(crate) struct WebMetrics { @@ -53,56 +46,6 @@ impl WebMetrics { } } -async fn fetch_and_render_metrics(fetch_metrics: Fut) -> AxumResult -where - Fut: Future>> + Send + 'static, -{ - let metrics_families = fetch_metrics.await?; - - let mut buffer = Vec::new(); - TextEncoder::new() - .encode(&metrics_families, &mut buffer) - .context("error encoding metrics")?; - - Ok(( - StatusCode::OK, - [(CONTENT_TYPE, mime::TEXT_PLAIN.as_ref())], - buffer, - )) -} - -pub(super) async fn metrics_handler( - Extension(pool): Extension, - Extension(config): Extension>, - Extension(instance_metrics): Extension>, - Extension(service_metrics): Extension>, - Extension(queue): Extension>, -) -> AxumResult { - fetch_and_render_metrics(async move { - let mut families = Vec::new(); - families.extend_from_slice(&instance_metrics.gather(&pool)?); - families.extend_from_slice(&service_metrics.gather(&pool, &queue, &config).await?); - Ok(families) - }) - .await -} - -pub(super) async fn service_metrics_handler( - Extension(pool): Extension, - Extension(config): Extension>, - Extension(metrics): Extension>, - Extension(queue): Extension>, -) -> AxumResult { - fetch_and_render_metrics(async move { metrics.gather(&pool, &queue, &config).await }).await -} - -pub(super) async fn instance_metrics_handler( - Extension(pool): Extension, - Extension(metrics): Extension>, -) -> AxumResult { - fetch_and_render_metrics(async move { metrics.gather(&pool) }).await -} - /// Request recorder middleware /// /// Looks similar, but *is not* a usable middleware / layer @@ -127,12 +70,6 @@ pub(crate) async fn request_recorder( Cow::Owned(request.uri().path().to_string()) }; - let metrics = request - .extensions() - .get::>() - .expect("metrics missing in request extensions") - .clone(); - let otel_metrics = request .extensions() .get::>() @@ -161,18 +98,7 @@ pub(crate) async fn request_recorder( KeyValue::new("status_kind", status_kind), ]; - metrics - .routes_visited - .with_label_values(&[&route_name]) - .inc(); - otel_metrics.routes_visited.add(1, &attrs); - - metrics - .response_time - .with_label_values(&[&route_name]) - .observe(resp_time); - otel_metrics.response_time.record(resp_time, &attrs); result @@ -180,7 +106,7 @@ pub(crate) async fn request_recorder( #[cfg(test)] mod tests { - use crate::test::{AxumResponseTestExt, AxumRouterTestExt, async_wrapper}; + use crate::test::{AxumRouterTestExt, async_wrapper}; use opentelemetry_sdk::metrics::data::{AggregatedMetrics, MetricData}; use pretty_assertions::assert_eq; use std::collections::HashMap; @@ -244,7 +170,6 @@ mod tests { .await?; let frontend = env.web_app().await; - let metrics = env.instance_metrics(); for (route, _) in ROUTES.iter() { frontend.get(route).await?; @@ -350,80 +275,6 @@ mod tests { ) ); - // this shows what the routes were *actually* recorded as, making it easier to update ROUTES if the name changes. - let metrics_serialized = metrics.gather(&env.context.pool)?; - let all_routes_visited = metrics_serialized - .iter() - .find(|x| x.name() == "docsrs_routes_visited") - .unwrap(); - let routes_visited_pretty: Vec<_> = all_routes_visited - .get_metric() - .iter() - .map(|metric| { - let labels = metric.get_label(); - assert_eq!(labels.len(), 1); // not sure when this would be false - let route = labels[0].value(); - let count = metric.get_counter().get_value(); - format!("{route}: {count}") - }) - .collect(); - println!("routes: {routes_visited_pretty:?}"); - - for (label, count) in expected.iter() { - assert_eq!( - metrics.routes_visited.with_label_values(&[*label]).get(), - *count, - "routes_visited metrics for {label} are incorrect", - ); - assert_eq!( - metrics - .response_time - .with_label_values(&[*label]) - .get_sample_count(), - *count, - "response_time metrics for {label} are incorrect", - ); - } - - Ok(()) - }) - } - - #[test] - fn test_metrics_page_success() { - async_wrapper(|env| async move { - let response = env.web_app().await.get("/about/metrics").await?; - assert!(response.status().is_success()); - - let body = response.text().await?; - assert!(body.contains("docsrs_failed_builds"), "{}", body); - assert!(body.contains("queued_crates_count"), "{}", body); - Ok(()) - }) - } - - #[test] - fn test_service_metrics_page_success() { - async_wrapper(|env| async move { - let response = env.web_app().await.get("/about/metrics/service").await?; - assert!(response.status().is_success()); - - let body = response.text().await?; - assert!(!body.contains("docsrs_failed_builds"), "{}", body); - assert!(body.contains("queued_crates_count"), "{}", body); - Ok(()) - }) - } - - #[test] - fn test_instance_metrics_page_success() { - async_wrapper(|env| async move { - let response = env.web_app().await.get("/about/metrics/instance").await?; - assert!(response.status().is_success()); - - let body = response.text().await?; - assert!(body.contains("docsrs_failed_builds"), "{}", body); - assert!(!body.contains("queued_crates_count"), "{}", body); Ok(()) }) } diff --git a/src/web/mod.rs b/src/web/mod.rs index 451c40cf9..689aaf7e8 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -7,7 +7,7 @@ use crate::{ CrateId, types::{BuildStatus, version::Version}, }, - utils::{get_correct_docsrs_style_file, report_error}, + utils::get_correct_docsrs_style_file, web::{ metrics::WebMetrics, page::templates::{RenderBrands, RenderSolid, filters}, @@ -496,8 +496,6 @@ async fn apply_middleware( }))) .layer(Extension(context.pool.clone())) .layer(Extension(context.async_build_queue.clone())) - .layer(Extension(context.service_metrics.clone())) - .layer(Extension(context.instance_metrics.clone())) .layer(Extension(web_metrics)) .layer(Extension(context.config.clone())) .layer(Extension(context.registry_api.clone())) @@ -518,49 +516,6 @@ pub(crate) async fn build_axum_app( apply_middleware(routes::build_axum_routes(), context, Some(template_data)).await } -pub(crate) async fn build_metrics_axum_app(context: &Context) -> Result { - apply_middleware(routes::build_metric_routes(), context, None).await -} - -pub fn start_background_metrics_webserver( - addr: Option, - context: &Context, -) -> Result<(), Error> { - let axum_addr: SocketAddr = addr.unwrap_or(DEFAULT_BIND); - - tracing::info!( - "Starting metrics web server on `{}:{}`", - axum_addr.ip(), - axum_addr.port() - ); - - let metrics_axum_app = context - .runtime - .block_on(build_metrics_axum_app(context))? - .into_make_service(); - - context.runtime.spawn(async move { - match tokio::net::TcpListener::bind(axum_addr) - .await - .context("error binding socket for metrics web server") - { - Ok(listener) => { - if let Err(err) = axum::serve(listener, metrics_axum_app) - .await - .context("error running metrics web server") - { - report_error(&err); - } - } - Err(err) => { - report_error(&err); - } - }; - }); - - Ok(()) -} - #[instrument(skip_all)] pub fn start_web_server(addr: Option, context: &Context) -> Result<(), Error> { let template_data = Arc::new(TemplateData::new(context.config.render_threads)?); @@ -579,7 +534,7 @@ pub fn start_web_server(addr: Option, context: &Context) -> Result<( .into_make_service(); let listener = tokio::net::TcpListener::bind(axum_addr) .await - .context("error binding socket for metrics web server")?; + .context("error binding socket for web server")?; axum::serve(listener, app) .with_graceful_shutdown(shutdown_signal()) diff --git a/src/web/releases.rs b/src/web/releases.rs index a1eed2485..109e0d7e1 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -3,7 +3,7 @@ use super::cache::CachePolicy; use crate::build_queue::PRIORITY_CONTINUOUS; use crate::{ - AsyncBuildQueue, Config, InstanceMetrics, RegistryApi, + AsyncBuildQueue, Config, RegistryApi, build_queue::QueuedCrate, cdn, db::types::version::Version, @@ -455,7 +455,6 @@ impl Default for Search { async fn redirect_to_random_crate( config: Arc, - metrics: Arc, otel_metrics: Arc, conn: &mut sqlx::PgConnection, ) -> AxumResult> { @@ -493,7 +492,6 @@ async fn redirect_to_random_crate( .context("error fetching random crate")?; if let Some(row) = row { - metrics.im_feeling_lucky_searches.inc(); otel_metrics.im_feeling_lucky_searches.add(1, &[]); let params = RustdocParams::new(&row.name) @@ -522,7 +520,6 @@ pub(crate) async fn search_handler( mut conn: DbConnection, Extension(config): Extension>, Extension(registry): Extension>, - Extension(metrics): Extension>, Extension(otel_metrics): Extension>, Query(mut query_params): Query>, ) -> AxumResult { @@ -539,11 +536,9 @@ pub(crate) async fn search_handler( if query_params.remove("i-am-feeling-lucky").is_some() || query.contains("::") { // redirect to a random crate if query is empty if query.is_empty() { - return Ok( - redirect_to_random_crate(config, metrics, otel_metrics, &mut conn) - .await? - .into_response(), - ); + return Ok(redirect_to_random_crate(config, otel_metrics, &mut conn) + .await? + .into_response()); } let mut queries = BTreeMap::new(); diff --git a/src/web/routes.rs b/src/web/routes.rs index ba3924f83..0c0e4cc73 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -85,22 +85,6 @@ async fn block_blacklisted_prefixes_middleware( next.run(request).await } -pub(super) fn build_metric_routes() -> AxumRouter { - AxumRouter::new() - .route_with_tsr( - "/about/metrics/instance", - get_internal(super::metrics::instance_metrics_handler), - ) - .route_with_tsr( - "/about/metrics/service", - get_internal(super::metrics::service_metrics_handler), - ) - .route_with_tsr( - "/about/metrics", - get_internal(super::metrics::metrics_handler), - ) -} - fn cached_permanent_redirect(uri: &str) -> impl IntoResponse { ( Extension(CachePolicy::ForeverInCdnAndBrowser), @@ -154,7 +138,6 @@ pub(super) fn build_axum_routes() -> AxumRouter { "/about/builds", get_internal(super::sitemap::about_builds_handler), ) - .merge(build_metric_routes()) .route_with_tsr("/about", get_internal(super::sitemap::about_handler)) .route_with_tsr( "/about/{subpage}", diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index aeb7048d2..0a8c4f9ef 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -1,7 +1,7 @@ //! rustdoc handlerr use crate::{ - AsyncStorage, BUILD_VERSION, Config, InstanceMetrics, RUSTDOC_STATIC_STORAGE_PREFIX, + AsyncStorage, BUILD_VERSION, Config, RUSTDOC_STATIC_STORAGE_PREFIX, registry_api::OwnerKind, storage::{ CompressionAlgorithm, RustdocJsonFormatVersion, StreamingBlob, rustdoc_archive_path, @@ -510,7 +510,6 @@ impl RustdocPage { async fn into_response( self: &Arc, template_data: Arc, - metrics: Arc, otel_metrics: Arc, rustdoc_html: StreamingBlob, max_parse_memory: usize, @@ -551,7 +550,6 @@ impl RustdocPage { rustdoc_html.content, max_parse_memory, self.clone(), - metrics, otel_metrics, )), ) @@ -572,7 +570,6 @@ impl RustdocPage { #[instrument(skip_all)] pub(crate) async fn rustdoc_html_server_handler( params: RustdocParams, - Extension(metrics): Extension>, Extension(otel_metrics): Extension>, Extension(templates): Extension>, Extension(storage): Extension>, @@ -774,10 +771,6 @@ pub(crate) async fn rustdoc_html_server_handler( let current_target = params.doc_target_or_default().unwrap_or_default(); - metrics - .recently_accessed_releases - .record(krate.crate_id, krate.release_id, current_target); - // Build the page of documentation, let page = Arc::new(RustdocPage { latest_path, @@ -793,7 +786,6 @@ pub(crate) async fn rustdoc_html_server_handler( Ok(page .into_response( templates, - metrics, otel_metrics, blob, config.max_parse_memory,