diff --git a/Cargo.toml b/Cargo.toml index 20c1356..68ad7e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] } axum = "0.7.5" serde_json = "1.0.122" serde = { version = "1.0.205", features = ["derive"] } -tokio = { version = "1.39.2", features = ["rt", "rt-multi-thread", "macros"] } +tokio = { version = "1.39.2", features = ["rt", "rt-multi-thread", "macros", "full"] } reqwest = { version = "0.12.5", features = ["json"] } async-trait = "0.1.81" toml = "0.8.19" @@ -24,3 +24,5 @@ mongodb = "3.0.1" bson = "2.11.0" thiserror = "1.0.63" openssl = { version = "0.10.59", features = ["vendored"] } +rand = "0.8.5" +tower-http = { version = "0.6.1", features = ["trace"] } diff --git a/Makefile b/Makefile index 3ff675e..0f13f89 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ compose-down: ## Run docker-compose down local-development: ## Run compose for local development docker-compose -f local.compose.yml up -d --force-recreate ;\ - IS_LOCAL=true systemfd --no-pid -s http::3000 -- cargo watch -w src -x run + CONFIG_FILE_PATH=./local.config.toml systemfd --no-pid -s http::3000 -- cargo watch -w src -x run local-development-down: ## Run compose for local development docker-compose -f local.compose.yml down diff --git a/README.md b/README.md index 55858a3..04b1ff5 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ make compose-down ```shell cargo install cargo-watch systemfd -docker network create iproxy +docker network create iproxy make local-development ``` diff --git a/compose.yml b/compose.yml index 432b363..c3336c1 100644 --- a/compose.yml +++ b/compose.yml @@ -24,7 +24,7 @@ services: volumes: - /mnt/ssd/iproxy:/data/db -# docker network create iproxy +# docker network create iproxy networks: iproxy: driver: bridge diff --git a/config.toml b/config.toml index 635df10..3c7052d 100644 --- a/config.toml +++ b/config.toml @@ -1,6 +1,7 @@ [server] address = "0.0.0.0" port = 8000 +use_proxy = true [logging] log_level = "info" diff --git a/local.config.toml b/local.config.toml index 90a4889..7b7f83b 100644 --- a/local.config.toml +++ b/local.config.toml @@ -1,6 +1,7 @@ [server] address = "0.0.0.0" port = 8000 +use_proxy = false [logging] log_level = "info" diff --git a/scripts/calculate.py b/scripts/calculate.py index f293c6d..642aefa 100644 --- a/scripts/calculate.py +++ b/scripts/calculate.py @@ -5,7 +5,7 @@ import time # Function to generate IP ranges dynamically from start to end -def generate_ip_ranges(start, end): +def generate_ip_ranges(start, end) -> list: ip_ranges = [] for i in range(start, end + 1): ip_ranges.append(f"{i}.0.0.0/8") @@ -14,13 +14,13 @@ def generate_ip_ranges(start, end): # Function to calculate public IPs excluding private ranges def calculate_public_ips(subnets, private_ranges, output_dir): available_ips_info = {} - + # Convert private ranges to network objects excluded_ips = set() for private in private_ranges: excluded_network = ipaddress.ip_network(private) excluded_ips.update(excluded_network.hosts()) - + # Ensure the output directory exists os.makedirs(output_dir, exist_ok=True) @@ -30,14 +30,14 @@ def calculate_public_ips(subnets, private_ranges, output_dir): total_ips = network.num_addresses - 2 # Exclude network and broadcast available_ips = [ip for ip in network.hosts() if ip not in excluded_ips] available_count = len(available_ips) - + # Save the information available_ips_info[subnet] = { "total_ips": total_ips, "available_count": available_count, "available_ips": available_ips } - + # Print and write the available IPs to a file only if the file doesn't exist print(f"\nAvailable IPs in subnet: {subnet}:") net = f"{subnet}" @@ -50,7 +50,7 @@ def calculate_public_ips(subnets, private_ranges, output_dir): file1.write(str(ip) + "\n") else: print(f"File {file_name} already exists. Skipping write.") - + # return available_ips_info # Define private ranges (will be excluded) @@ -63,6 +63,7 @@ def calculate_public_ips(subnets, private_ranges, output_dir): def main() -> None: # Generate public IP ranges from 1.0.0.0/8 to 223.0.0.0/8 public_subnets = generate_ip_ranges(1, 223) + # public_subnets = ["13.0.0.0/8"] # Directory to store the IP list files output_directory = "/mnt/ssd/ip_list" diff --git a/scripts/do_index.sh b/scripts/do_index.sh index 3aa2ea0..e6162ae 100755 --- a/scripts/do_index.sh +++ b/scripts/do_index.sh @@ -18,5 +18,5 @@ while IFS= read -r ip; do response=$(curl -s "${url}/${ip}") # Print the response echo "Response for $ip: $response" - sleep 2 # Sleep 2 seconds, to avoid being blocked with 429 too many requests + #sleep 2 # Sleep 2 seconds, to avoid being blocked with 429 too many requests done < "$1" diff --git a/src/config.rs b/src/config.rs index c16f011..e2ed82f 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,10 +1,12 @@ use serde::Deserialize; use std::fs; +use tracing::trace; #[derive(Deserialize)] pub struct ServerConfig { pub(crate) address: String, pub(crate) port: u16, + pub(crate) use_proxy: bool, } #[derive(Deserialize)] @@ -27,8 +29,23 @@ pub struct Config { } impl Config { - pub(crate) fn from_file(path: &str) -> Self { + // This function loads the configuration from the file + pub(crate) fn load_config() -> Self { + let path = Config::get_config_path(); // Get the config file path let config_content = fs::read_to_string(path).expect("Failed to read configuration file"); - toml::from_str(&config_content).expect("Failed to parse configuration file") + match toml::from_str(&config_content) { + Ok(config) => { + trace!("Configuration loaded successfully"); + config + } + Err(e) => { + panic!("Failed to parse configuration file: {}", e); + } + } + } + + // Helper function to get the configuration file path + fn get_config_path() -> String { + std::env::var("CONFIG_FILE_PATH").unwrap_or_else(|_| "./config.toml".to_string()) } } diff --git a/src/db.rs b/src/db.rs index 816e444..6067f99 100644 --- a/src/db.rs +++ b/src/db.rs @@ -48,8 +48,8 @@ impl Db { // Send a ping to confirm a successful connection match database.run_command(doc! { "ping": 1 }).await { - Ok(_) => info!("Successfully connected to database"), - Err(e) => error!("Error connecting to database: {}", e), + Ok(_) => info!("successfully connected to database"), + Err(e) => error!("error connecting to database: {}", e), } let collection = database.collection(&collection); diff --git a/src/external.rs b/src/external.rs index 8ceb5f8..072b2ec 100644 --- a/src/external.rs +++ b/src/external.rs @@ -1,30 +1,139 @@ use axum::Json; -use reqwest::Error; -use tracing::{trace, warn}; +use rand::seq::SliceRandom; +use reqwest::{Client, Error, Proxy, StatusCode}; +use serde_json::Value; +use std::time::Duration; +use tracing::{error, trace, warn}; const IP_API_ENDPOINT: &str = "http://ip-api.com/json/"; -const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"; +const MAX_RETRIES: usize = 5; // Maximum number of retries if the request fails -pub async fn get_geolocation(info: &String) -> Result, Error> { - let client = reqwest::Client::new() - .get(format!("{}{}", IP_API_ENDPOINT, info)) - .header("User-Agent", USER_AGENT); +const USER_AGENTS: &[&str] = &[ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:81.0) Gecko/20100101 Firefox/81.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36", + "Mozilla/5.0 (iPhone; CPU iPhone OS 13_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1 Mobile/15E148 Safari/604.1", +]; - let response = client.send().await?; +const PROXIES: &[&str] = &[ + "http://34.81.160.132:80", // Taiwan + "http://34.87.84.105:80", // Singapore + "http://117.54.114.102:80", // Indonesia + "http://47.178.24.220:80", // United States + "http://160.86.242.23:8080", // Japan + "http://20.26.249.29:8080", // United Kingdom + "http://198.49.68.80:80", // United States + "http://154.64.226.138:80", // Japan + "http://89.213.0.29:80", // Hong Kong + "http://51.222.161.115:80", // Canada + "http://195.181.172.220:8080", // Netherlands + "http://41.169.69.91:3128", // South Africa + "http://85.215.64.49:80", // Germany + "http://162.223.90.130:80", // United States + "http://23.247.136.245:80", // Singapore + "http://133.18.234.13:80", // Japan + "http://41.204.53.19:80", // Ghana + "http://41.204.53.30:80", // Ghana +]; - if response.status().is_success() { - trace!( - "Request to external db successfully with status code: {}", - response.status() - ); +// Function to get a random User-Agent +fn get_random_user_agent() -> &'static str { + USER_AGENTS.choose(&mut rand::thread_rng()).unwrap() +} + +// Function to configure the reqwest client, optionally using a proxy +fn configure_client(use_proxy: bool) -> Result { + let mut client_builder = reqwest::Client::builder().timeout(Duration::from_secs(10)); // Set timeout of 10 seconds + + if use_proxy { + // Randomly select a proxy from the list + let proxy_url = PROXIES.choose(&mut rand::thread_rng()).unwrap(); + let proxy = Proxy::all(*proxy_url)?; + client_builder = client_builder.proxy(proxy); + + trace!("Proxy enabled: using proxy {}", proxy_url); } else { - warn!( - "Request to external geolocation db failed with status code: {}", - response.status() - ); + trace!("Proxy disabled: connecting directly"); } - let response_json: serde_json::Value = response.json().await?; + client_builder.build() +} + +// Main function to perform the request, retrying if the request fails +pub async fn get_geolocation(info: &String, use_proxy: bool) -> Result, Error> { + // Log info about proxy status + if use_proxy { + trace!("using proxy to get geolocation data"); + } else { + trace!("getting geolocation data without proxy"); + } + + let mut attempts = 0; // Number of attempts + + while attempts < MAX_RETRIES { + attempts += 1; + + // Get random User-Agent + let user_agent = get_random_user_agent(); + + // Configure the client with or without proxy + let client = match configure_client(use_proxy) { + Ok(c) => c, + Err(e) => { + warn!("Error configuring client: {:?}", e); + continue; // If client configuration fails, retry + } + }; + + // Log the User-Agent being used + trace!( + "Attempting request using User-Agent: '{}' (Attempt {}/{})", + user_agent, + attempts, + MAX_RETRIES + ); + + // Make the request + let response = client + .get(format!("{}{}", IP_API_ENDPOINT, info)) + .header("User-Agent", user_agent) + .send() + .await; + + match response { + Ok(resp) => { + // If the request is successful + if resp.status().is_success() { + trace!("Request succeeded with status: {}", resp.status()); + let response_json: Value = resp.json().await?; + return Ok(Json(response_json)); + } else { + warn!( + "Request failed with status: {} (Attempt {}/{})", + resp.status(), + attempts, + MAX_RETRIES + ); + // If status code indicates too many requests, wait before retrying + if resp.status() == StatusCode::TOO_MANY_REQUESTS { + warn!("Too many requests, retrying after a delay..."); + tokio::time::sleep(Duration::from_secs(2)).await; + } + } + } + Err(e) => { + error!( + "Request error: {:?} (Attempt {}/{})", + e, attempts, MAX_RETRIES + ); + // If a connection error occurs, retry + } + } + } - Ok(Json(response_json)) + // Return a simple JSON error message after max retries + Ok(Json(serde_json::json!({ + "error": "Max retries reached" + }))) } diff --git a/src/handlers.rs b/src/handlers.rs index 7fe601f..05c8c17 100644 --- a/src/handlers.rs +++ b/src/handlers.rs @@ -23,14 +23,14 @@ pub async fn get_ip( match app_state.db.get_ip(ip.clone()).await { Ok(Some(ip_geolocation)) => { // If IP data exists, return it as JSON - info!("Ip {} already registered", &ip); + trace!("ip {} already registered in database", &ip); return Ok(( StatusCode::OK, Json(serde_json::to_value(ip_geolocation).unwrap()), )); } Ok(None) => { - info!("Ip {} not found in database", &ip); + trace!("ip {} not found in database", &ip); } Err(e) => { warn!("Error getting ip data: {}", e); @@ -43,14 +43,14 @@ pub async fn get_ip( } // If IP data does not exist in the database, get it from the external service - match get_geolocation(&ip).await { + match get_geolocation(&ip, app_state.use_proxy).await { Ok(ip_geolocation) => { - info!("Retriveing geolocation data for {}", &ip); + trace!("retriveing geolocation data for {}", &ip); // Serialize the geolocation data to validate its structure match serialize_geolocation_data(&ip_geolocation.to_string()) { Ok(data) => { - trace!("Geolocation data serialized successfully"); + trace!("geolocation data serialized successfully"); // Try to insert the geolocation data into the database match app_state.db.insert_ip(&data).await { diff --git a/src/logger.rs b/src/logger.rs index 369ed62..9592ba8 100644 --- a/src/logger.rs +++ b/src/logger.rs @@ -11,10 +11,14 @@ pub fn setup_logger(log_level: String) { }; tracing_subscriber::fmt() - // .json() - .with_thread_names(true) + .json() + .with_thread_names(false) .with_max_level(log_level) .with_span_events(FmtSpan::FULL) - .with_file(true) + .with_file(false) + .with_target(false) + .with_current_span(true) + .flatten_event(true) + // .with_timer(CustomTimeFormatter) .init(); } diff --git a/src/main.rs b/src/main.rs index 41b5ba3..fc73238 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ use crate::db::Db; use crate::handlers::handler_404; use crate::logger::setup_logger; use crate::router::create_router; +use std::net::SocketAddr; use std::sync::Arc; use tracing::info; @@ -19,23 +20,19 @@ mod utils; pub struct AppState { db: Db, + pub use_proxy: bool, } #[tokio::main] async fn main() -> Result<(), Box> { - // Determine which configuration file to load based on the IS_LOCAL environment variable - let config_file = if std::env::var("IS_LOCAL").unwrap_or_else(|_| "false".to_string()) == "true" - { - "local.config.toml" - } else { - "config.toml" - }; - // Load configuration file - let config = Config::from_file(config_file); + // Load configuration file. Set the CONFIG_FILE_PATH env var. Example: CONFIG_FILE_PATH=./config.toml + let config = Config::load_config(); // Enable logging setup_logger(config.logging.log_level); + info!("starting iproxy server"); + // Init database let db = Db::init( config.database.endpoint, @@ -47,18 +44,25 @@ async fn main() -> Result<(), Box> { // TODO: control CORS // Run server - let app = create_router(Arc::new(AppState { db: db.clone() })); //.layer(cors); + let app = create_router(Arc::new(AppState { + db: db.clone(), + use_proxy: config.server.use_proxy, + })); //.layer(cors); let app = app.fallback(handler_404); // Create index db.create_ips_index().await; - info!("Server started successfully!"); - let addr = format!("{}:{}", config.server.address, config.server.port); - info!("Listening on {}", addr); + info!("listening on {}", addr); + let addr = SocketAddr::from(([0, 0, 0, 0], config.server.port)); let listener = tokio::net::TcpListener::bind(&addr).await.unwrap(); - axum::serve(listener, app).await.unwrap(); - + axum::serve( + listener, + // Don't forget to add `ConnectInfo` if you aren't behind a proxy + app.into_make_service_with_connect_info::(), + ) + .await + .unwrap(); Ok(()) } diff --git a/src/models.rs b/src/models.rs index fecbe01..df60f6c 100644 --- a/src/models.rs +++ b/src/models.rs @@ -16,8 +16,8 @@ pub struct GeoLocation { timezone: String, isp: String, org: String, - #[serde(rename(serialize="as", deserialize="as"))] + #[serde(rename(serialize = "as", deserialize = "as"))] as_field: Option, // Adjusted field name for clarity #[serde(rename(serialize = "ip", deserialize = "query"))] ip: Option, -} \ No newline at end of file +} diff --git a/src/router.rs b/src/router.rs index 32aee6d..31d4740 100644 --- a/src/router.rs +++ b/src/router.rs @@ -1,7 +1,12 @@ -use crate::handlers::{get_ip, health_checker_handler}; +use crate::handlers::{get_ip, handler_404, health_checker_handler}; use crate::AppState; +use axum::extract::ConnectInfo; +use axum::http::Request; use axum::{routing::get, Router}; +use std::net::SocketAddr; use std::sync::Arc; +use tower_http::trace::TraceLayer; +use tracing::info; const API_V1_BASE: &str = "/api/v1"; @@ -13,4 +18,34 @@ pub fn create_router(app_state: Arc) -> Router { ) .route(&(API_V1_BASE.to_string() + "/:ip"), get(get_ip)) .with_state(app_state) + .fallback(handler_404) + .layer(TraceLayer::new_for_http().on_request( + |request: &Request<_>, _span: &tracing::Span| { + let user_agent = request + .headers() + .get(axum::http::header::USER_AGENT) + .and_then(|value| value.to_str().ok()) + .unwrap_or("Unknown"); + + // Log the client IP from ConnectInfo + if let Some(ConnectInfo(addr)) = + request.extensions().get::>() + { + info!( + method = %request.method(), + uri = %request.uri(), + user_agent = %user_agent, + client_ip = %addr.ip(), + "incoming request" + ); + } else { + info!( + method = %request.method(), + uri = %request.uri(), + user_agent = %user_agent, + "incoming request (no client IP)" + ); + } + }, + )) }