diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/main.rs | 155 |
1 files changed, 117 insertions, 38 deletions
diff --git a/src/main.rs b/src/main.rs index 9d0ae2f..ff25e91 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ use std::collections::HashSet; +use std::fmt::Display; use std::future::Future; use std::net::SocketAddr; use std::ops::RangeInclusive; @@ -13,10 +14,15 @@ use hyper::server::conn::http1; use hyper::service::Service; use hyper::{Request, Response, StatusCode}; use hyper_util::rt::TokioIo; +use num_bigfloat::BigFloat; +use num_traits::FromPrimitive; +use num_traits::Pow; use rand::prelude::*; use rand_pcg::Pcg64; use rand_seeder::Seeder; use tokio::net::TcpListener; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::EnvFilter; /// Endless honeypot for webcrawlers #[derive(Parser, Debug)] @@ -33,15 +39,28 @@ struct Args { /// minimum delay for responses (in milliseconds) #[arg(long, default_value_t = 0)] delay_max: u64, + /// maximum number of segments in a url path + #[arg(short, long, default_value_t = 6)] + segments: u8, } #[tokio::main] async fn main() -> Result<ExitCode, Box<dyn std::error::Error + Send + Sync>> { let args = Args::parse(); - tracing_subscriber::fmt::fmt().init(); + tracing_subscriber::fmt::fmt() + .with_env_filter( + EnvFilter::builder() + .with_default_directive(LevelFilter::INFO.into()) + .with_env_var("RUST_LOG") + .from_env() + .expect("invalid env"), + ) + .init(); - let svc = RandomPageService::new(&args); + let generator = PageGenerator::new(&args); + + generator.stats(); tracing::info!(port = args.port, "starting pitch lake"); @@ -49,6 +68,10 @@ async fn main() -> Result<ExitCode, Box<dyn std::error::Error + Send + Sync>> { let listener = TcpListener::bind(addr).await?; + let svc = RandomPageService { + ctx: Arc::new(generator), + }; + loop { let (stream, _) = listener.accept().await?; @@ -69,22 +92,6 @@ struct RandomPageService { pub ctx: Arc<PageGenerator>, } -impl RandomPageService { - pub fn new(args: &Args) -> Self { - let dictionary_data = include_bytes!(env!("DICTIONARY_FILE_PATH")); - let dictionary_string: &'static str = - std::str::from_utf8(dictionary_data).unwrap(); - Self { - ctx: Arc::new(PageGenerator { - seed: args.seed.clone(), - delay: args.delay_min..=(args.delay_max.max(args.delay_min)), - dict: dictionary_string.split_whitespace().collect(), - dict_set: dictionary_string.split_whitespace().collect(), - }), - } - } -} - impl Service<Request<hyper::body::Incoming>> for RandomPageService { type Response = Response<Full<Bytes>>; type Error = hyper::Error; @@ -116,19 +123,65 @@ impl Service<Request<hyper::body::Incoming>> for RandomPageService { } } -const MAX_ROUTE_SEGMENTS: usize = 6; const PARAGRAPH_WORDS: RangeInclusive<usize> = 10..=200; const N_LINKS: RangeInclusive<usize> = 3..=10; #[derive(Debug)] struct PageGenerator { seed: String, + segments: u8, delay: RangeInclusive<u64>, dict: Vec<&'static str>, dict_set: HashSet<&'static str>, } impl PageGenerator { + fn new(args: &Args) -> Self { + let dictionary_data = include_bytes!(env!("DICTIONARY_FILE_PATH")); + let dictionary_string: &'static str = + std::str::from_utf8(dictionary_data).unwrap(); + PageGenerator { + seed: args.seed.clone(), + segments: args.segments, + delay: args.delay_min..=(args.delay_max.max(args.delay_min)), + dict: dictionary_string.split_whitespace().collect(), + dict_set: dictionary_string.split_whitespace().collect(), + } + } + + fn stats(&self) -> () { + let dict_len = BigFloat::from_usize(self.dict.len()).unwrap(); + let all_segment_lengths = 1..=self.segments; + + let n_pages = all_segment_lengths + .map(|n_segments| dict_len.pow(BigFloat::from_u8(n_segments))) + .sum::<BigFloat>(); + + let avg_paragraph_words = + BigFloat::from_usize(PARAGRAPH_WORDS.sum::<usize>()).unwrap() + / BigFloat::from_usize(PARAGRAPH_WORDS.count()).unwrap(); + + let avg_word_bytes = self + .dict + .iter() + .map(|word| word.as_bytes().len()) + .map(|l| BigFloat::from_usize(l).unwrap()) + .sum::<BigFloat>() + / dict_len; + + let avg_page_bytes = avg_paragraph_words * avg_word_bytes; + + let total_size_bytes = n_pages * avg_page_bytes; + let bf1024 = BigFloat::from_u32(1024); + let total_size_petabytes = + total_size_bytes / bf1024 / bf1024 / bf1024 / bf1024 / bf1024; + + tracing::info!( + n_pages = n_pages.to_string(), + size_gb = format_size(total_size_petabytes) + ); + } + async fn build_page( &self, req: Request<hyper::body::Incoming>, @@ -156,12 +209,12 @@ impl PageGenerator { let n_links = rng.gen_range(N_LINKS); let random_paragraph = (0..n_words) - .map(|_| random_word(&mut rng, &self.dict)) + .map(|_| self.random_word(&mut rng)) .collect::<Vec<&str>>() .join(" "); let random_links = (0..n_links) - .map(|_| random_route_link(&mut rng, &self.dict)) + .map(|_| self.random_route_link(&mut rng)) .map(|link| format!("<p>{}</p>", link)) .collect::<Vec<String>>() .join("\n"); @@ -182,24 +235,20 @@ impl PageGenerator { random_paragraph, random_links, )) } -} - -fn random_route_link(rng: &mut Pcg64, dictionary: &[&'static str]) -> String { - let n_segments = rng.gen_range(1..=MAX_ROUTE_SEGMENTS); - let random_route = (0..n_segments) - .map(|_| random_word(rng, dictionary)) - .collect::<Vec<&str>>() - .join("/"); - let label = random_word(rng, dictionary); - format!("<a href=\"/{}\">{}</a>", random_route, label) -} + fn random_route_link(&self, rng: &mut Pcg64) -> String { + let n_segments = rng.gen_range(1..=self.segments); + let random_route = (0..n_segments) + .map(|_| self.random_word(rng)) + .collect::<Vec<&str>>() + .join("/"); + let label = self.random_word(rng); + format!("<a href=\"/{}\">{}</a>", random_route, label) + } -pub fn random_word( - rng: &mut Pcg64, - dictionary: &[&'static str], -) -> &'static str { - let i = rng.gen_range(0..dictionary.len()); - dictionary[i] + pub fn random_word(&self, rng: &mut Pcg64) -> &'static str { + let i = rng.gen_range(0..self.dict.len()); + self.dict[i] + } } pub fn build_dict() -> Vec<&'static str> { @@ -208,3 +257,33 @@ pub fn build_dict() -> Vec<&'static str> { std::str::from_utf8(dictionary_data).unwrap(); dictionary_string.split_whitespace().collect() } + +pub fn format_size(mut bytes: BigFloat) -> String { + let exponents: &[&str] = &[ + "kilobytes", + "megabytes", + "gigabytes", + "terabytes", + "petabytes", + "exabytes", + "zettabytes", + "yottabytes", + "ronnabytes", + "quettabytes", + ]; + + let fv1024 = BigFloat::from_u32(1024); + let mut current_label = "bytes".to_string(); + + for label in exponents { + let exp_val = bytes.log10().to_f32(); + if exp_val < 2.0 { + break; + } + + bytes = bytes / fv1024; + current_label = label.to_string(); + } + + format!("{:.2} {}", bytes, current_label) +} |