diff --git a/src/output.rs b/src/output.rs index eb82a518c..e72010395 100644 --- a/src/output.rs +++ b/src/output.rs @@ -10,11 +10,11 @@ use color_eyre::eyre::WrapErr as _; use itertools::Itertools as _; use crate::{ - HashMap, config::Config, ipdb, proxy::{Proxy, ProxyType}, utils::is_docker, + HashMap, }; fn compare_timeout(a: &Proxy, b: &Proxy) -> Ordering { @@ -73,6 +73,23 @@ pub async fn save_proxies( proxies.sort_unstable_by(compare_natural); } + // Deduplicate proxies by exit_ip when available. Different proxies can exit via the same IP. + // We do this after sorting so that if sorted by speed, the fastest one is kept. + // Track seen exit_ip per protocol to avoid cross-protocol removal + let mut seen: std::collections::HashSet<(ProxyType, String)> = + std::collections::HashSet::new(); + let mut deduped = Vec::with_capacity(proxies.len()); + for p in proxies { + if let Some(ip) = &p.exit_ip { + let key = (p.protocol, ip.clone()); + if !seen.insert(key) { + continue; + } + } + deduped.push(p); + } + proxies = deduped; + if config.output.json.enabled { let (maybe_asn_db, maybe_geo_db) = tokio::try_join!( async { diff --git a/src/parsers.rs b/src/parsers.rs index 902e76fc8..65a2f7866 100644 --- a/src/parsers.rs +++ b/src/parsers.rs @@ -1,13 +1,12 @@ use std::sync::LazyLock; pub static PROXY_REGEX: LazyLock = LazyLock::new(|| { - let pattern = r"(?:^|[^0-9A-Za-z])(?:(?Phttps?|socks[45]):\/\/)?(?:(?P[0-9A-Za-z]{1,64}):(?P[0-9A-Za-z]{1,64})@)?(?P[A-Za-z][\-\.A-Za-z]{0,251}[A-Za-z]|[A-Za-z]|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(?:\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])){3}):(?P[0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])(?=[^0-9A-Za-z]|$)"; + let pattern = r"(?:^|[^0-9A-Za-z])(?:(?Phttps?|socks[45]):\/\/)?(?:(?P[0-9A-Za-z._~\-]{1,256}):(?P[0-9A-Za-z._~\-]{1,256})@)?(?P[A-Za-z][\-\.A-Za-z]{0,251}[A-Za-z]|[A-Za-z]|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(?:\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])){3}):(?P[0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])(?=[^0-9A-Za-z]|$)"; fancy_regex::RegexBuilder::new(pattern) .backtrack_limit(usize::MAX) .build() .unwrap() }); - static IPV4_REGEX: LazyLock = LazyLock::new(|| { let pattern = r"^\s*(?P(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(?:\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])){3})(?::(?:[0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5]))?\s*$"; fancy_regex::Regex::new(pattern).unwrap()