use std::collections::HashSet; use std::future::Future; use std::net::SocketAddr; use std::ops::RangeInclusive; use std::pin::Pin; use std::process::ExitCode; use std::sync::Arc; use clap::Parser; use http_body_util::Full; use hyper::body::Bytes; use hyper::server::conn::http1; use hyper::service::Service; use hyper::{Request, Response, StatusCode}; use hyper_util::rt::TokioIo; use num_bigfloat::BigFloat; use num_traits::FromPrimitive; use num_traits::Pow; use rand::prelude::*; use rand_pcg::Pcg64; use rand_seeder::Seeder; use tokio::net::TcpListener; use tracing::level_filters::LevelFilter; use tracing_subscriber::EnvFilter; /// Endless honeypot for webcrawlers #[derive(Parser, Debug)] #[command(version, about, long_about = None)] struct Args { /// seed for deterministic page generation seed: String, /// port for serving http traffic #[arg(short, long, default_value_t = 3200)] port: u16, /// minimum delay for responses (in milliseconds) #[arg(long, default_value_t = 0)] delay_min: u64, /// minimum delay for responses (in milliseconds) #[arg(long, default_value_t = 0)] delay_max: u64, /// maximum number of segments in a url path #[arg(long, default_value_t = 6)] url_segments: u8, /// minimum number of words in a paragraph #[arg(long, default_value_t = 10)] paragraph_min: u16, /// maximum number of words in a paragraph #[arg(long, default_value_t = 200)] paragraph_max: u16, /// minimum number of links on a page #[arg(long, default_value_t = 3)] links_min: u16, /// maximum number of links on a page #[arg(long, default_value_t = 10)] links_max: u16, #[arg(long)] href_prefix: Option, } #[tokio::main] async fn main() -> Result> { let args = Args::parse(); tracing_subscriber::fmt::fmt() .with_env_filter( EnvFilter::builder() .with_default_directive(LevelFilter::INFO.into()) .with_env_var("RUST_LOG") .from_env() .expect("invalid env"), ) .init(); let generator = PageGenerator::new(&args); generator.stats(); tracing::info!(port = args.port, "starting pitch lake"); let addr = SocketAddr::from(([127, 0, 0, 1], args.port)); let listener = TcpListener::bind(addr).await?; let svc = RandomPageService { ctx: Arc::new(generator), }; loop { let (stream, _) = listener.accept().await?; let io = TokioIo::new(stream); let svc = svc.clone(); tokio::task::spawn(async move { if let Err(err) = http1::Builder::new().serve_connection(io, svc).await { eprintln!("Error serving connection: {:?}", err); } }); } } #[derive(Debug, Clone)] struct RandomPageService { pub ctx: Arc, } impl Service> for RandomPageService { type Response = Response>; type Error = hyper::Error; type Future = Pin> + Send>>; fn call(&self, req: Request) -> Self::Future { fn mk_response( s: String, status_code: StatusCode, ) -> Result>, hyper::Error> { Ok( Response::builder() .status(status_code) .body(Full::new(Bytes::from(s))) .unwrap(), ) } let ctx = self.ctx.clone(); Box::pin(async move { match ctx.build_page(req).await { Ok(body) => mk_response(body, StatusCode::OK), Err((err_message, code)) => mk_response(err_message.to_string(), code), } }) } } #[derive(Debug)] struct PageGenerator { seed: String, segments: u8, href_prefix: Option, paragraph_size: RangeInclusive, n_links: RangeInclusive, delay: RangeInclusive, dict: Vec<&'static str>, dict_set: HashSet<&'static str>, } impl PageGenerator { fn new(args: &Args) -> Self { let dictionary_data = include_bytes!(env!("DICTIONARY_FILE_PATH")); let dictionary_string: &'static str = std::str::from_utf8(dictionary_data).unwrap(); PageGenerator { seed: args.seed.clone(), href_prefix: args.href_prefix.clone(), segments: args.url_segments, paragraph_size: args.paragraph_min ..=(args.paragraph_max.max(args.paragraph_min)), n_links: (args.links_min)..=(args.links_max.max(args.links_min)), delay: args.delay_min..=(args.delay_max.max(args.delay_min)), dict: dictionary_string.split_whitespace().collect(), dict_set: dictionary_string.split_whitespace().collect(), } } fn stats(&self) -> () { let dict_len = BigFloat::from_usize(self.dict.len()).unwrap(); let all_segment_lengths = 1..=self.segments; let n_pages = all_segment_lengths .map(|n_segments| dict_len.pow(BigFloat::from_u8(n_segments))) .sum::(); let avg_paragraph_words = BigFloat::from_u16(self.paragraph_size.clone().sum::()) / BigFloat::from_usize(self.paragraph_size.clone().count()).unwrap(); let avg_word_bytes = self .dict .iter() .map(|word| word.as_bytes().len()) .map(|l| BigFloat::from_usize(l).unwrap()) .sum::() / dict_len; let avg_page_bytes = avg_paragraph_words * avg_word_bytes; let total_size_bytes = n_pages * avg_page_bytes; let bf1024 = BigFloat::from_u32(1024); let total_size_petabytes = total_size_bytes / bf1024 / bf1024 / bf1024 / bf1024 / bf1024; tracing::info!( n_pages = n_pages.to_string(), size_gb = format_size(total_size_petabytes) ); } async fn build_page( &self, req: Request, ) -> Result { let delay = if *self.delay.start() > 0 { let delay_amount = rand::thread_rng().gen_range(self.delay.clone()); tokio::time::sleep(tokio::time::Duration::from_millis(delay_amount)) .await; Some(delay_amount) } else { None }; let route = req.uri().path(); for (i, segment) in route.split('/').filter(|v| v.len() > 0).enumerate() { if !self.dict_set.contains(segment) { return Err(("not found", StatusCode::NOT_FOUND)); } tracing::debug!(i, segment); if i >= self.segments as usize { return Err(("not found", StatusCode::NOT_FOUND)); } } let page_title = route.replace('/', " ").trim().to_string(); let mut rng: Pcg64 = Seeder::from(format!("{}---{}", self.seed, route)).make_rng(); let n_words = rng.gen_range(self.paragraph_size.clone()); let n_links = rng.gen_range(self.n_links.clone()); let random_paragraph = (0..n_words) .map(|_| self.random_word(&mut rng)) .collect::>() .join(" "); let random_links = (0..n_links) .map(|_| self.random_route_link(&mut rng)) .map(|link| format!("

{}

", link)) .collect::>() .join(""); tracing::info!(route, delay); Ok(format!( "\ \ \ {page_title}\ \ \ \ \ \

{random_paragraph}

\ {random_links}\ \ ", )) } fn random_route_link(&self, rng: &mut Pcg64) -> String { let n_segments = rng.gen_range(1..=self.segments); let random_route = (0..n_segments) .map(|_| self.random_word(rng)) .collect::>() .join("/"); let label = self.random_word(rng); format!("{}", random_route, label) } pub fn random_word(&self, rng: &mut Pcg64) -> &'static str { let i = rng.gen_range(0..self.dict.len()); self.dict[i] } } pub fn build_dict() -> Vec<&'static str> { let dictionary_data = include_bytes!(env!("DICTIONARY_FILE_PATH")); let dictionary_string: &'static str = std::str::from_utf8(dictionary_data).unwrap(); dictionary_string.split_whitespace().collect() } pub fn format_size(mut bytes: BigFloat) -> String { let exponents: &[&str] = &[ "kilobytes", "megabytes", "gigabytes", "terabytes", "petabytes", "exabytes", "zettabytes", "yottabytes", "ronnabytes", "quettabytes", ]; let fv1024 = BigFloat::from_u32(1024); let mut current_label = "bytes".to_string(); for label in exponents { let exp_val = bytes.log10().to_f32(); if exp_val < 2.0 { break; } bytes = bytes / fv1024; current_label = label.to_string(); } format!("{:.2} {}", bytes, current_label) }