aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/main.rs155
1 files changed, 117 insertions, 38 deletions
diff --git a/src/main.rs b/src/main.rs
index 9d0ae2f..ff25e91 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,5 @@
use std::collections::HashSet;
+use std::fmt::Display;
use std::future::Future;
use std::net::SocketAddr;
use std::ops::RangeInclusive;
@@ -13,10 +14,15 @@ use hyper::server::conn::http1;
use hyper::service::Service;
use hyper::{Request, Response, StatusCode};
use hyper_util::rt::TokioIo;
+use num_bigfloat::BigFloat;
+use num_traits::FromPrimitive;
+use num_traits::Pow;
use rand::prelude::*;
use rand_pcg::Pcg64;
use rand_seeder::Seeder;
use tokio::net::TcpListener;
+use tracing::level_filters::LevelFilter;
+use tracing_subscriber::EnvFilter;
/// Endless honeypot for webcrawlers
#[derive(Parser, Debug)]
@@ -33,15 +39,28 @@ struct Args {
/// minimum delay for responses (in milliseconds)
#[arg(long, default_value_t = 0)]
delay_max: u64,
+ /// maximum number of segments in a url path
+ #[arg(short, long, default_value_t = 6)]
+ segments: u8,
}
#[tokio::main]
async fn main() -> Result<ExitCode, Box<dyn std::error::Error + Send + Sync>> {
let args = Args::parse();
- tracing_subscriber::fmt::fmt().init();
+ tracing_subscriber::fmt::fmt()
+ .with_env_filter(
+ EnvFilter::builder()
+ .with_default_directive(LevelFilter::INFO.into())
+ .with_env_var("RUST_LOG")
+ .from_env()
+ .expect("invalid env"),
+ )
+ .init();
- let svc = RandomPageService::new(&args);
+ let generator = PageGenerator::new(&args);
+
+ generator.stats();
tracing::info!(port = args.port, "starting pitch lake");
@@ -49,6 +68,10 @@ async fn main() -> Result<ExitCode, Box<dyn std::error::Error + Send + Sync>> {
let listener = TcpListener::bind(addr).await?;
+ let svc = RandomPageService {
+ ctx: Arc::new(generator),
+ };
+
loop {
let (stream, _) = listener.accept().await?;
@@ -69,22 +92,6 @@ struct RandomPageService {
pub ctx: Arc<PageGenerator>,
}
-impl RandomPageService {
- pub fn new(args: &Args) -> Self {
- let dictionary_data = include_bytes!(env!("DICTIONARY_FILE_PATH"));
- let dictionary_string: &'static str =
- std::str::from_utf8(dictionary_data).unwrap();
- Self {
- ctx: Arc::new(PageGenerator {
- seed: args.seed.clone(),
- delay: args.delay_min..=(args.delay_max.max(args.delay_min)),
- dict: dictionary_string.split_whitespace().collect(),
- dict_set: dictionary_string.split_whitespace().collect(),
- }),
- }
- }
-}
-
impl Service<Request<hyper::body::Incoming>> for RandomPageService {
type Response = Response<Full<Bytes>>;
type Error = hyper::Error;
@@ -116,19 +123,65 @@ impl Service<Request<hyper::body::Incoming>> for RandomPageService {
}
}
-const MAX_ROUTE_SEGMENTS: usize = 6;
const PARAGRAPH_WORDS: RangeInclusive<usize> = 10..=200;
const N_LINKS: RangeInclusive<usize> = 3..=10;
#[derive(Debug)]
struct PageGenerator {
seed: String,
+ segments: u8,
delay: RangeInclusive<u64>,
dict: Vec<&'static str>,
dict_set: HashSet<&'static str>,
}
impl PageGenerator {
+ fn new(args: &Args) -> Self {
+ let dictionary_data = include_bytes!(env!("DICTIONARY_FILE_PATH"));
+ let dictionary_string: &'static str =
+ std::str::from_utf8(dictionary_data).unwrap();
+ PageGenerator {
+ seed: args.seed.clone(),
+ segments: args.segments,
+ delay: args.delay_min..=(args.delay_max.max(args.delay_min)),
+ dict: dictionary_string.split_whitespace().collect(),
+ dict_set: dictionary_string.split_whitespace().collect(),
+ }
+ }
+
+ fn stats(&self) -> () {
+ let dict_len = BigFloat::from_usize(self.dict.len()).unwrap();
+ let all_segment_lengths = 1..=self.segments;
+
+ let n_pages = all_segment_lengths
+ .map(|n_segments| dict_len.pow(BigFloat::from_u8(n_segments)))
+ .sum::<BigFloat>();
+
+ let avg_paragraph_words =
+ BigFloat::from_usize(PARAGRAPH_WORDS.sum::<usize>()).unwrap()
+ / BigFloat::from_usize(PARAGRAPH_WORDS.count()).unwrap();
+
+ let avg_word_bytes = self
+ .dict
+ .iter()
+ .map(|word| word.as_bytes().len())
+ .map(|l| BigFloat::from_usize(l).unwrap())
+ .sum::<BigFloat>()
+ / dict_len;
+
+ let avg_page_bytes = avg_paragraph_words * avg_word_bytes;
+
+ let total_size_bytes = n_pages * avg_page_bytes;
+ let bf1024 = BigFloat::from_u32(1024);
+ let total_size_petabytes =
+ total_size_bytes / bf1024 / bf1024 / bf1024 / bf1024 / bf1024;
+
+ tracing::info!(
+ n_pages = n_pages.to_string(),
+ size_gb = format_size(total_size_petabytes)
+ );
+ }
+
async fn build_page(
&self,
req: Request<hyper::body::Incoming>,
@@ -156,12 +209,12 @@ impl PageGenerator {
let n_links = rng.gen_range(N_LINKS);
let random_paragraph = (0..n_words)
- .map(|_| random_word(&mut rng, &self.dict))
+ .map(|_| self.random_word(&mut rng))
.collect::<Vec<&str>>()
.join(" ");
let random_links = (0..n_links)
- .map(|_| random_route_link(&mut rng, &self.dict))
+ .map(|_| self.random_route_link(&mut rng))
.map(|link| format!("<p>{}</p>", link))
.collect::<Vec<String>>()
.join("\n");
@@ -182,24 +235,20 @@ impl PageGenerator {
random_paragraph, random_links,
))
}
-}
-
-fn random_route_link(rng: &mut Pcg64, dictionary: &[&'static str]) -> String {
- let n_segments = rng.gen_range(1..=MAX_ROUTE_SEGMENTS);
- let random_route = (0..n_segments)
- .map(|_| random_word(rng, dictionary))
- .collect::<Vec<&str>>()
- .join("/");
- let label = random_word(rng, dictionary);
- format!("<a href=\"/{}\">{}</a>", random_route, label)
-}
+ fn random_route_link(&self, rng: &mut Pcg64) -> String {
+ let n_segments = rng.gen_range(1..=self.segments);
+ let random_route = (0..n_segments)
+ .map(|_| self.random_word(rng))
+ .collect::<Vec<&str>>()
+ .join("/");
+ let label = self.random_word(rng);
+ format!("<a href=\"/{}\">{}</a>", random_route, label)
+ }
-pub fn random_word(
- rng: &mut Pcg64,
- dictionary: &[&'static str],
-) -> &'static str {
- let i = rng.gen_range(0..dictionary.len());
- dictionary[i]
+ pub fn random_word(&self, rng: &mut Pcg64) -> &'static str {
+ let i = rng.gen_range(0..self.dict.len());
+ self.dict[i]
+ }
}
pub fn build_dict() -> Vec<&'static str> {
@@ -208,3 +257,33 @@ pub fn build_dict() -> Vec<&'static str> {
std::str::from_utf8(dictionary_data).unwrap();
dictionary_string.split_whitespace().collect()
}
+
+pub fn format_size(mut bytes: BigFloat) -> String {
+ let exponents: &[&str] = &[
+ "kilobytes",
+ "megabytes",
+ "gigabytes",
+ "terabytes",
+ "petabytes",
+ "exabytes",
+ "zettabytes",
+ "yottabytes",
+ "ronnabytes",
+ "quettabytes",
+ ];
+
+ let fv1024 = BigFloat::from_u32(1024);
+ let mut current_label = "bytes".to_string();
+
+ for label in exponents {
+ let exp_val = bytes.log10().to_f32();
+ if exp_val < 2.0 {
+ break;
+ }
+
+ bytes = bytes / fv1024;
+ current_label = label.to_string();
+ }
+
+ format!("{:.2} {}", bytes, current_label)
+}