From e3b460ee74f0e6512f676dfbc814f219ee41c5c9 Mon Sep 17 00:00:00 2001 From: breezystatic77 Date: Sun, 26 Jan 2025 12:42:38 -0500 Subject: stats --- Cargo.lock | 49 +++++++++++++++++++ Cargo.toml | 3 ++ src/main.rs | 155 +++++++++++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 169 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0b516b3..f0da0cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -453,6 +453,26 @@ dependencies = [ "winapi", ] +[[package]] +name = "num-bigfloat" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5ee1e61b978cc2196731ebb9e349d6f27b99750ad674ae7a2f26ae6e93be7e4" +dependencies = [ + "num-traits", + "rand", + "serde", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "object" version = "0.36.7" @@ -517,9 +537,12 @@ dependencies = [ "http-body-util", "hyper", "hyper-util", + "num-bigfloat", + "num-traits", "rand", "rand_pcg", "rand_seeder", + "thousands", "tokio", "tracing", "tracing-subscriber", @@ -665,6 +688,26 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "serde" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -725,6 +768,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thousands" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820" + [[package]] name = "thread_local" version = "1.1.8" diff --git a/Cargo.toml b/Cargo.toml index eac2bad..bb152e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,9 +8,12 @@ clap = { version = "4.5.26", features = ["derive"] } http-body-util = "0.1.2" hyper = { version = "1.5.2", features = ["full"] } hyper-util = { version = "0.1.10", features = ["full"] } +num-bigfloat = "1.7.1" +num-traits = "0.2.19" rand = "0.8.5" rand_pcg = "0.3.1" rand_seeder = "0.3.0" +thousands = "0.2.0" tokio = { version = "1.43.0", features = ["full"] } tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/src/main.rs b/src/main.rs index 9d0ae2f..ff25e91 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ use std::collections::HashSet; +use std::fmt::Display; use std::future::Future; use std::net::SocketAddr; use std::ops::RangeInclusive; @@ -13,10 +14,15 @@ use hyper::server::conn::http1; use hyper::service::Service; use hyper::{Request, Response, StatusCode}; use hyper_util::rt::TokioIo; +use num_bigfloat::BigFloat; +use num_traits::FromPrimitive; +use num_traits::Pow; use rand::prelude::*; use rand_pcg::Pcg64; use rand_seeder::Seeder; use tokio::net::TcpListener; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::EnvFilter; /// Endless honeypot for webcrawlers #[derive(Parser, Debug)] @@ -33,15 +39,28 @@ struct Args { /// minimum delay for responses (in milliseconds) #[arg(long, default_value_t = 0)] delay_max: u64, + /// maximum number of segments in a url path + #[arg(short, long, default_value_t = 6)] + segments: u8, } #[tokio::main] async fn main() -> Result> { let args = Args::parse(); - tracing_subscriber::fmt::fmt().init(); + tracing_subscriber::fmt::fmt() + .with_env_filter( + EnvFilter::builder() + .with_default_directive(LevelFilter::INFO.into()) + .with_env_var("RUST_LOG") + .from_env() + .expect("invalid env"), + ) + .init(); - let svc = RandomPageService::new(&args); + let generator = PageGenerator::new(&args); + + generator.stats(); tracing::info!(port = args.port, "starting pitch lake"); @@ -49,6 +68,10 @@ async fn main() -> Result> { let listener = TcpListener::bind(addr).await?; + let svc = RandomPageService { + ctx: Arc::new(generator), + }; + loop { let (stream, _) = listener.accept().await?; @@ -69,22 +92,6 @@ struct RandomPageService { pub ctx: Arc, } -impl RandomPageService { - pub fn new(args: &Args) -> Self { - let dictionary_data = include_bytes!(env!("DICTIONARY_FILE_PATH")); - let dictionary_string: &'static str = - std::str::from_utf8(dictionary_data).unwrap(); - Self { - ctx: Arc::new(PageGenerator { - seed: args.seed.clone(), - delay: args.delay_min..=(args.delay_max.max(args.delay_min)), - dict: dictionary_string.split_whitespace().collect(), - dict_set: dictionary_string.split_whitespace().collect(), - }), - } - } -} - impl Service> for RandomPageService { type Response = Response>; type Error = hyper::Error; @@ -116,19 +123,65 @@ impl Service> for RandomPageService { } } -const MAX_ROUTE_SEGMENTS: usize = 6; const PARAGRAPH_WORDS: RangeInclusive = 10..=200; const N_LINKS: RangeInclusive = 3..=10; #[derive(Debug)] struct PageGenerator { seed: String, + segments: u8, delay: RangeInclusive, dict: Vec<&'static str>, dict_set: HashSet<&'static str>, } impl PageGenerator { + fn new(args: &Args) -> Self { + let dictionary_data = include_bytes!(env!("DICTIONARY_FILE_PATH")); + let dictionary_string: &'static str = + std::str::from_utf8(dictionary_data).unwrap(); + PageGenerator { + seed: args.seed.clone(), + segments: args.segments, + delay: args.delay_min..=(args.delay_max.max(args.delay_min)), + dict: dictionary_string.split_whitespace().collect(), + dict_set: dictionary_string.split_whitespace().collect(), + } + } + + fn stats(&self) -> () { + let dict_len = BigFloat::from_usize(self.dict.len()).unwrap(); + let all_segment_lengths = 1..=self.segments; + + let n_pages = all_segment_lengths + .map(|n_segments| dict_len.pow(BigFloat::from_u8(n_segments))) + .sum::(); + + let avg_paragraph_words = + BigFloat::from_usize(PARAGRAPH_WORDS.sum::()).unwrap() + / BigFloat::from_usize(PARAGRAPH_WORDS.count()).unwrap(); + + let avg_word_bytes = self + .dict + .iter() + .map(|word| word.as_bytes().len()) + .map(|l| BigFloat::from_usize(l).unwrap()) + .sum::() + / dict_len; + + let avg_page_bytes = avg_paragraph_words * avg_word_bytes; + + let total_size_bytes = n_pages * avg_page_bytes; + let bf1024 = BigFloat::from_u32(1024); + let total_size_petabytes = + total_size_bytes / bf1024 / bf1024 / bf1024 / bf1024 / bf1024; + + tracing::info!( + n_pages = n_pages.to_string(), + size_gb = format_size(total_size_petabytes) + ); + } + async fn build_page( &self, req: Request, @@ -156,12 +209,12 @@ impl PageGenerator { let n_links = rng.gen_range(N_LINKS); let random_paragraph = (0..n_words) - .map(|_| random_word(&mut rng, &self.dict)) + .map(|_| self.random_word(&mut rng)) .collect::>() .join(" "); let random_links = (0..n_links) - .map(|_| random_route_link(&mut rng, &self.dict)) + .map(|_| self.random_route_link(&mut rng)) .map(|link| format!("

{}

", link)) .collect::>() .join("\n"); @@ -182,24 +235,20 @@ impl PageGenerator { random_paragraph, random_links, )) } -} - -fn random_route_link(rng: &mut Pcg64, dictionary: &[&'static str]) -> String { - let n_segments = rng.gen_range(1..=MAX_ROUTE_SEGMENTS); - let random_route = (0..n_segments) - .map(|_| random_word(rng, dictionary)) - .collect::>() - .join("/"); - let label = random_word(rng, dictionary); - format!("{}", random_route, label) -} + fn random_route_link(&self, rng: &mut Pcg64) -> String { + let n_segments = rng.gen_range(1..=self.segments); + let random_route = (0..n_segments) + .map(|_| self.random_word(rng)) + .collect::>() + .join("/"); + let label = self.random_word(rng); + format!("{}", random_route, label) + } -pub fn random_word( - rng: &mut Pcg64, - dictionary: &[&'static str], -) -> &'static str { - let i = rng.gen_range(0..dictionary.len()); - dictionary[i] + pub fn random_word(&self, rng: &mut Pcg64) -> &'static str { + let i = rng.gen_range(0..self.dict.len()); + self.dict[i] + } } pub fn build_dict() -> Vec<&'static str> { @@ -208,3 +257,33 @@ pub fn build_dict() -> Vec<&'static str> { std::str::from_utf8(dictionary_data).unwrap(); dictionary_string.split_whitespace().collect() } + +pub fn format_size(mut bytes: BigFloat) -> String { + let exponents: &[&str] = &[ + "kilobytes", + "megabytes", + "gigabytes", + "terabytes", + "petabytes", + "exabytes", + "zettabytes", + "yottabytes", + "ronnabytes", + "quettabytes", + ]; + + let fv1024 = BigFloat::from_u32(1024); + let mut current_label = "bytes".to_string(); + + for label in exponents { + let exp_val = bytes.log10().to_f32(); + if exp_val < 2.0 { + break; + } + + bytes = bytes / fv1024; + current_label = label.to_string(); + } + + format!("{:.2} {}", bytes, current_label) +} -- cgit v1.2.3