Log分析是系統維運中不可或缺的一環。
我將實作一個高效的網頁伺服器日誌分析器,能夠解析常見的日誌格式(如 Apache/Nginx 的 Combined Log Format),
並產生有用的統計資訊,如最常訪問的頁面、HTTP 狀態碼分布、流量高峰時段等。
希望我的學習歷程可以幫助我使用 Rust 落地到各種實用的地方執行
cargo new log_analyzer
cd log_analyzer
[package]
name = "log_analyzer"
version = "0.1.0"
edition = "2021"
[dependencies]
regex = "1.10"
chrono = "0.4"
clap = { version = "4.5", features = ["derive"] }
use chrono::NaiveDateTime;
use clap::Parser;
use regex::Regex;
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;
#[derive(Parser, Debug)]
#[command(author, version, about = "網頁伺服器Log分析器", long_about = None)]
struct Args {
/// Log檔案路徑
#[arg(short, long)]
file: PathBuf,
/// 顯示 Top N 個最常訪問的路徑
#[arg(short = 'n', long, default_value = "10")]
top_urls: usize,
/// 是否顯示每小時流量統計
#[arg(short = 'H', long)]
hourly_stats: bool,
/// 是否顯示狀態碼分布
#[arg(short = 's', long)]
status_codes: bool,
}
#[derive(Debug, Clone)]
struct LogEntry {
ip: String,
timestamp: NaiveDateTime,
method: String,
path: String,
status: u16,
size: u64,
user_agent: String,
}
#[derive(Debug, Default)]
struct LogStats {
total_requests: usize,
path_counts: HashMap<String, usize>,
status_counts: HashMap<u16, usize>,
hourly_requests: HashMap<u8, usize>,
total_bytes: u64,
ip_counts: HashMap<String, usize>,
}
struct LogParser {
// Combined Log Format 正則表達式
// 格式: IP - - [timestamp] "METHOD path HTTP/version" status size "referer" "user-agent"
regex: Regex,
}
impl LogParser {
fn new() -> Result<Self, regex::Error> {
let pattern = r#"^(\S+) \S+ \S+ \[([^\]]+)\] "(\S+) (\S+) \S+" (\d{3}) (\d+|-) "([^"]*)" "([^"]*)""#;
let regex = Regex::new(pattern)?;
Ok(Self { regex })
}
fn parse_line(&self, line: &str) -> Option<LogEntry> {
let captures = self.regex.captures(line)?;
let ip = captures.get(1)?.as_str().to_string();
// 解析時間戳: 01/Jan/2024:12:00:00 +0800
let timestamp_str = captures.get(2)?.as_str();
let timestamp = self.parse_timestamp(timestamp_str)?;
let method = captures.get(3)?.as_str().to_string();
let path = captures.get(4)?.as_str().to_string();
let status = captures.get(5)?.as_str().parse().ok()?;
let size_str = captures.get(6)?.as_str();
let size = if size_str == "-" {
0
} else {
size_str.parse().ok()?
};
let user_agent = captures.get(8)?.as_str().to_string();
Some(LogEntry {
ip,
timestamp,
method,
path,
status,
size,
user_agent,
})
}
fn parse_timestamp(&self, s: &str) -> Option<NaiveDateTime> {
// 格式: 01/Jan/2024:12:00:00 +0800
// 我們忽略時區部分,只解析日期時間
let datetime_part = s.split_whitespace().next()?;
NaiveDateTime::parse_from_str(datetime_part, "%d/%b/%Y:%H:%M:%S").ok()
}
}
impl LogStats {
fn new() -> Self {
Self::default()
}
fn add_entry(&mut self, entry: &LogEntry) {
self.total_requests += 1;
// 統計路徑訪問次數
*self.path_counts.entry(entry.path.clone()).or_insert(0) += 1;
// 統計狀態碼分布
*self.status_counts.entry(entry.status).or_insert(0) += 1;
// 統計每小時請求數
let hour = entry.timestamp.hour() as u8;
*self.hourly_requests.entry(hour).or_insert(0) += 1;
// 統計總流量
self.total_bytes += entry.size;
// 統計 IP 訪問次數
*self.ip_counts.entry(entry.ip.clone()).or_insert(0) += 1;
}
fn get_top_paths(&self, n: usize) -> Vec<(&String, &usize)> {
let mut paths: Vec<_> = self.path_counts.iter().collect();
paths.sort_by(|a, b| b.1.cmp(a.1));
paths.into_iter().take(n).collect()
}
fn get_top_ips(&self, n: usize) -> Vec<(&String, &usize)> {
let mut ips: Vec<_> = self.ip_counts.iter().collect();
ips.sort_by(|a, b| b.1.cmp(a.1));
ips.into_iter().take(n).collect()
}
fn format_bytes(bytes: u64) -> String {
const UNITS: [&str; 5] = ["B", "KB", "MB", "GB", "TB"];
let mut size = bytes as f64;
let mut unit_index = 0;
while size >= 1024.0 && unit_index < UNITS.len() - 1 {
size /= 1024.0;
unit_index += 1;
}
format!("{:.2} {}", size, UNITS[unit_index])
}
}
struct ReportGenerator;
impl ReportGenerator {
fn print_summary(stats: &LogStats) {
println!("\n{}", "=".repeat(60));
println!("📊 日誌分析報告");
println!("{}", "=".repeat(60));
println!("總請求數: {}", stats.total_requests);
println!("總流量: {}", LogStats::format_bytes(stats.total_bytes));
println!("不重複 IP 數: {}", stats.ip_counts.len());
if stats.total_requests > 0 {
let avg_size = stats.total_bytes / stats.total_requests as u64;
println!("平均請求大小: {}", LogStats::format_bytes(avg_size));
}
println!();
}
fn print_top_urls(stats: &LogStats, n: usize) {
println!("🔥 Top {} 最常訪問的路徑:", n);
println!("{:-<60}", "");
let top_paths = stats.get_top_paths(n);
for (i, (path, count)) in top_paths.iter().enumerate() {
let percentage = (*count as f64 / stats.total_requests as f64) * 100.0;
println!("{:2}. {:4} 次 ({:5.2}%) - {}",
i + 1, count, percentage, path);
}
println!();
}
fn print_status_codes(stats: &LogStats) {
println!("📈 HTTP 狀態碼分布:");
println!("{:-<60}", "");
let mut status_vec: Vec<_> = stats.status_counts.iter().collect();
status_vec.sort_by_key(|&(status, _)| status);
for (status, count) in status_vec {
let percentage = (*count as f64 / stats.total_requests as f64) * 100.0;
let status_desc = Self::status_description(*status);
println!("{} - {:4} 次 ({:5.2}%) - {}",
status, count, percentage, status_desc);
}
println!();
}
fn print_hourly_stats(stats: &LogStats) {
println!("⏰ 每小時請求分布:");
println!("{:-<60}", "");
for hour in 0..24 {
let count = stats.hourly_requests.get(&hour).unwrap_or(&0);
let percentage = (*count as f64 / stats.total_requests as f64) * 100.0;
let bar = Self::generate_bar(*count, stats.total_requests);
println!("{:02}:00 - {:4} 次 ({:5.2}%) {}",
hour, count, percentage, bar);
}
println!();
}
fn print_top_ips(stats: &LogStats, n: usize) {
println!("🌐 Top {} 活躍 IP 位址:", n);
println!("{:-<60}", "");
let top_ips = stats.get_top_ips(n);
for (i, (ip, count)) in top_ips.iter().enumerate() {
let percentage = (*count as f64 / stats.total_requests as f64) * 100.0;
println!("{:2}. {:4} 次 ({:5.2}%) - {}",
i + 1, count, percentage, ip);
}
println!();
}
fn status_description(status: u16) -> &'static str {
match status {
200 => "OK",
201 => "Created",
204 => "No Content",
301 => "Moved Permanently",
302 => "Found",
304 => "Not Modified",
400 => "Bad Request",
401 => "Unauthorized",
403 => "Forbidden",
404 => "Not Found",
500 => "Internal Server Error",
502 => "Bad Gateway",
503 => "Service Unavailable",
_ => "Unknown",
}
}
fn generate_bar(count: usize, max: usize) -> String {
let bar_width = 40;
let filled = ((count as f64 / max as f64) * bar_width as f64) as usize;
let filled = filled.min(bar_width);
format!("[{}{}]", "█".repeat(filled), "░".repeat(bar_width - filled))
}
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
// 檢查檔案是否存在
if !args.file.exists() {
eprintln!("❌ 錯誤: 檔案不存在: {:?}", args.file);
std::process::exit(1);
}
println!("📂 正在分析日誌檔案: {:?}", args.file);
let parser = LogParser::new()?;
let mut stats = LogStats::new();
// 讀取並解析日誌
let file = File::open(&args.file)?;
let reader = BufReader::new(file);
let mut parse_errors = 0;
let mut line_number = 0;
for line in reader.lines() {
line_number += 1;
let line = line?;
if line.trim().is_empty() {
continue;
}
match parser.parse_line(&line) {
Some(entry) => stats.add_entry(&entry),
None => {
parse_errors += 1;
if parse_errors <= 5 {
eprintln!("⚠️ 第 {} 行解析失敗", line_number);
}
}
}
}
if parse_errors > 0 {
eprintln!("\n⚠️ 共有 {} 行無法解析", parse_errors);
}
// 生成報告
ReportGenerator::print_summary(&stats);
ReportGenerator::print_top_urls(&stats, args.top_urls);
ReportGenerator::print_top_ips(&stats, 5);
if args.status_codes {
ReportGenerator::print_status_codes(&stats);
}
if args.hourly_stats {
ReportGenerator::print_hourly_stats(&stats);
}
Ok(())
}
完成!
src/generate_sample_log.rs
這裡我們製作 fake data 製作一些 log 的產生 讓我們好使用一些 log 測試
use chrono::{Datelike, NaiveDateTime, Timelike};
use rand::seq::SliceRandom;
use rand::Rng;
use std::fs::File;
use std::io::Write;
fn main() -> std::io::Result<()> {
let mut file = File::create("sample.log")?;
let mut rng = rand::thread_rng();
let paths = vec![
"/", "/about", "/contact", "/products", "/api/users",
"/api/orders", "/login", "/signup", "/dashboard",
"/static/css/style.css", "/static/js/app.js",
];
let user_agents = vec![
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"curl/7.68.0",
];
let statuses = vec![200, 200, 200, 200, 304, 404, 500, 302];
// 生成 10000 條日誌
for i in 0..10000 {
let ip = format!(
"{}.{}.{}.{}",
rng.gen_range(1..255),
rng.gen_range(0..255),
rng.gen_range(0..255),
rng.gen_range(1..255)
);
let timestamp = format!(
"{:02}/{}/2024:{:02}:{:02}:{:02} +0800",
rng.gen_range(1..29),
["Jan", "Feb", "Mar"].choose(&mut rng).unwrap(),
rng.gen_range(0..24),
rng.gen_range(0..60),
rng.gen_range(0..60)
);
let method = "GET";
let path = paths.choose(&mut rng).unwrap();
let status = statuses.choose(&mut rng).unwrap();
let size = rng.gen_range(100..50000);
let user_agent = user_agents.choose(&mut rng).unwrap();
writeln!(
file,
r#"{} - - [{}] "{} {} HTTP/1.1" {} {} "-" "{}""#,
ip, timestamp, method, path, status, size, user_agent
)?;
}
println!("✅ 已生成 sample.log");
Ok(())
}
# 編譯並執行
cargo build --release
# 生成測試資料
cargo run --bin generate_sample_log
# 基本分析
cargo run -- -f sample.log
# 顯示 Top 20 路徑
cargo run -- -f sample.log -n 20
# 顯示所有統計資訊
cargo run -- -f sample.log -n 15 --status-codes --hourly-stats
# 分析真實的 Nginx Log
cargo run -- -f /var/log/nginx/access.log -s -H