iT邦幫忙

2025 iThome 鐵人賽

DAY 13
0
Rust

Rust 實戰專案集:30 個漸進式專案從工具到服務系列 第 13

網站健康檢查器 - 監控多個網站的可用性

  • 分享至 

  • xImage
  •  

前言

今天我要做一個網站檢查器,關於 health check ,並且提供健康報告的部分
確保網站高可用,這相關的部分可以用到許多監控和網頁運維相關環節,
我知道現今有 Prometheus,grafana 相關的大殺器可以用,但有時候要學習專案
建議還是自己去嘗試並完成。當然作為學習用途,網頁檢查器也是相當不錯的學習目標

專案目標

  • 同時監控多個網站的 HTTP 狀態
  • 測量回應時間並統計平均延遲
  • 偵測網站的可用性變化
  • 產生詳細的健康檢查報告
  • 支援定期自動檢查
  • 儲存歷史檢查記錄

這次的專案結構

website_health_checker/
├── Cargo.toml
├── src/
│   ├── main.rs
│   ├── checker.rs
│   ├── config.rs
│   ├── reporter.rs
│   └── storage.rs
└── config/
    └── sites.json

開始專案

cargo new website_health_checker
cd website_health_checker

一樣 cargo.toml

[package]
name = "website_health_checker"
version = "0.1.0"
edition = "2021"

[dependencies]
tokio = { version = "1.0", features = ["full"] }
reqwest = { version = "0.11", features = ["json"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
clap = { version = "4.0", features = ["derive"] }
chrono = { version = "0.4", features = ["serde"] }
colored = "2.0"
anyhow = "1.0"
url = "2.4"
futures = "0.3"

這裡我們先做 config.rs -> 配置管理 module

use serde::{Deserialize, Serialize};
use std::time::Duration;
use url::Url;

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SiteConfig {
    pub name: String,
    pub url: String,
    pub expected_status: Option<u16>,
    pub timeout_seconds: Option<u64>,
    pub check_interval_seconds: Option<u64>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Config {
    pub sites: Vec<SiteConfig>,
    pub default_timeout: u64,
    pub default_interval: u64,
    pub max_retries: usize,
}

impl Default for Config {
    fn default() -> Self {
        Self {
            sites: vec![],
            default_timeout: 10,
            default_interval: 300, // 5 minutes
            max_retries: 3,
        }
    }
}

impl Config {
    pub fn from_file(path: &str) -> anyhow::Result<Self> {
        let content = std::fs::read_to_string(path)?;
        let config: Config = serde_json::from_str(&content)?;
        Ok(config)
    }

    pub fn validate(&self) -> anyhow::Result<()> {
        for site in &self.sites {
            // 驗證 URL 格式
            Url::parse(&site.url)
                .map_err(|e| anyhow::anyhow!("Invalid URL '{}': {}", site.url, e))?;
            
            // 驗證站點名稱不為空
            if site.name.trim().is_empty() {
                return Err(anyhow::anyhow!("Site name cannot be empty"));
            }
        }
        Ok(())
    }
}

impl SiteConfig {
    pub fn timeout(&self, default: u64) -> Duration {
        Duration::from_secs(self.timeout_seconds.unwrap_or(default))
    }

    pub fn interval(&self, default: u64) -> Duration {
        Duration::from_secs(self.check_interval_seconds.unwrap_or(default))
    }

    pub fn expected_status(&self) -> u16 {
        self.expected_status.unwrap_or(200)
    }
}

health checker module (健康檢查模組)

src/checker.rs

use crate::config::SiteConfig;
use chrono::{DateTime, Utc};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::time::{Duration, Instant};

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CheckResult {
    pub site_name: String,
    pub url: String,
    pub status: HealthStatus,
    pub response_time_ms: u64,
    pub status_code: Option<u16>,
    pub error_message: Option<String>,
    pub timestamp: DateTime<Utc>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum HealthStatus {
    Healthy,
    Unhealthy,
    Timeout,
    NetworkError,
    UnexpectedStatus,
}

pub struct HealthChecker {
    client: Client,
    max_retries: usize,
}

impl HealthChecker {
    pub fn new(max_retries: usize) -> Self {
        let client = Client::builder()
            .user_agent("Rust-HealthChecker/1.0")
            .build()
            .expect("Failed to create HTTP client");

        Self {
            client,
            max_retries,
        }
    }

    pub async fn check_site(&self, site: &SiteConfig, timeout: Duration) -> CheckResult {
        let start_time = Instant::now();
        let mut last_error = None;

        for attempt in 0..=self.max_retries {
            if attempt > 0 {
                tokio::time::sleep(Duration::from_millis(1000)).await;
            }

            match self.perform_check(site, timeout).await {
                Ok(mut result) => {
                    result.response_time_ms = start_time.elapsed().as_millis() as u64;
                    return result;
                }
                Err(e) => {
                    last_error = Some(e);
                }
            }
        }

        // 所有重試都失敗了
        let error = last_error.unwrap();
        CheckResult {
            site_name: site.name.clone(),
            url: site.url.clone(),
            status: self.categorize_error(&error),
            response_time_ms: start_time.elapsed().as_millis() as u64,
            status_code: None,
            error_message: Some(error.to_string()),
            timestamp: Utc::now(),
        }
    }

    async fn perform_check(&self, site: &SiteConfig, timeout: Duration) -> anyhow::Result<CheckResult> {
        let response = self
            .client
            .get(&site.url)
            .timeout(timeout)
            .send()
            .await?;

        let status_code = response.status().as_u16();
        let expected_status = site.expected_status();

        let health_status = if status_code == expected_status {
            HealthStatus::Healthy
        } else {
            HealthStatus::UnexpectedStatus
        };

        Ok(CheckResult {
            site_name: site.name.clone(),
            url: site.url.clone(),
            status: health_status,
            response_time_ms: 0, // 將在外部設置
            status_code: Some(status_code),
            error_message: None,
            timestamp: Utc::now(),
        })
    }

    fn categorize_error(&self, error: &anyhow::Error) -> HealthStatus {
        let error_str = error.to_string().to_lowercase();

        if error_str.contains("timeout") {
            HealthStatus::Timeout
        } else if error_str.contains("connection") || error_str.contains("dns") {
            HealthStatus::NetworkError
        } else {
            HealthStatus::Unhealthy
        }
    }

    pub async fn check_multiple_sites(
        &self,
        sites: &[SiteConfig],
        default_timeout: Duration,
    ) -> Vec<CheckResult> {
        let tasks: Vec<_> = sites
            .iter()
            .map(|site| {
                let timeout = site.timeout(default_timeout.as_secs());
                async move { self.check_site(site, timeout).await }
            })
            .collect();

        futures::future::join_all(tasks).await
    }
}

impl HealthStatus {
    pub fn is_healthy(&self) -> bool {
        matches!(self, HealthStatus::Healthy)
    }

    pub fn emoji(&self) -> &'static str {
        match self {
            HealthStatus::Healthy => "✅",
            HealthStatus::Unhealthy => "❌",
            HealthStatus::Timeout => "⏰",
            HealthStatus::NetworkError => "🔌",
            HealthStatus::UnexpectedStatus => "⚠️",
        }
    }
}

報告生成

src/reporter.rs

use crate::checker::{CheckResult, HealthStatus};
use chrono::{DateTime, Utc};
use colored::*;
use serde::{Deserialize, Serialize};

#[derive(Debug, Serialize, Deserialize)]
pub struct HealthReport {
    pub timestamp: DateTime<Utc>,
    pub total_sites: usize,
    pub healthy_sites: usize,
    pub unhealthy_sites: usize,
    pub average_response_time: u64,
    pub results: Vec<CheckResult>,
}

pub struct Reporter;

impl Reporter {
    pub fn generate_report(results: Vec<CheckResult>) -> HealthReport {
        let total_sites = results.len();
        let healthy_sites = results.iter().filter(|r| r.status.is_healthy()).count();
        let unhealthy_sites = total_sites - healthy_sites;

        let average_response_time = if total_sites > 0 {
            results.iter().map(|r| r.response_time_ms).sum::<u64>() / total_sites as u64
        } else {
            0
        };

        HealthReport {
            timestamp: Utc::now(),
            total_sites,
            healthy_sites,
            unhealthy_sites,
            average_response_time,
            results,
        }
    }

    pub fn print_console_report(report: &HealthReport) {
        println!();
        println!("{}", "=== Website Health Check Report ===".bold().blue());
        println!("Timestamp: {}", report.timestamp.format("%Y-%m-%d %H:%M:%S UTC"));
        println!();

        // 總覽統計
        Self::print_summary(report);
        println!();

        // 詳細結果
        Self::print_detailed_results(report);
    }

    fn print_summary(report: &HealthReport) {
        println!("{}", "Summary:".bold());
        println!("  Total Sites: {}", report.total_sites);
        println!(
            "  Healthy: {} {}",
            report.healthy_sites.to_string().green(),
            "✅"
        );
        println!(
            "  Unhealthy: {} {}",
            report.unhealthy_sites.to_string().red(),
            if report.unhealthy_sites > 0 { "❌" } else { "" }
        );
        println!("  Average Response Time: {}ms", report.average_response_time);

        let health_percentage =
            (report.healthy_sites as f64 / report.total_sites as f64 * 100.0) as u8;
        println!(
            "  Overall Health: {}%",
            if health_percentage >= 90 {
                health_percentage.to_string().green()
            } else if health_percentage >= 70 {
                health_percentage.to_string().yellow()
            } else {
                health_percentage.to_string().red()
            }
        );
    }

    fn print_detailed_results(report: &HealthReport) {
        println!("{}", "Detailed Results:".bold());

        for result in &report.results {
            let status_color = match result.status {
                HealthStatus::Healthy => result.status_code.unwrap_or(0).to_string().green(),
                HealthStatus::UnexpectedStatus => result.status_code.unwrap_or(0).to_string().yellow(),
                _ => "ERROR".red(),
            };

            let response_time_color = if result.response_time_ms < 500 {
                format!("{}ms", result.response_time_ms).green()
            } else if result.response_time_ms < 2000 {
                format!("{}ms", result.response_time_ms).yellow()
            } else {
                format!("{}ms", result.response_time_ms).red()
            };

            println!(
                "  {} {} [{}] {} - {}",
                result.status.emoji(),
                result.site_name.bold(),
                status_color,
                response_time_color,
                result.url.dimmed()
            );

            if let Some(error) = &result.error_message {
                println!("    Error: {}", error.red());
            }
        }
    }

    pub fn save_json_report(report: &HealthReport, filename: &str) -> anyhow::Result<()> {
        let json = serde_json::to_string_pretty(report)?;
        std::fs::write(filename, json)?;
        println!("Report saved to: {}", filename);
        Ok(())
    }

    pub fn print_trending_analysis(historical_results: &[HealthReport]) {
        if historical_results.len() < 2 {
            return;
        }

        println!();
        println!("{}", "=== Trending Analysis ===".bold().blue());

        let latest = &historical_results[historical_results.len() - 1];
        let previous = &historical_results[historical_results.len() - 2];

        let health_trend = latest.healthy_sites as i32 - previous.healthy_sites as i32;
        let response_trend = latest.average_response_time as i64 - previous.average_response_time as i64;

        if health_trend > 0 {
            println!("Health Trend: {} sites improved ⬆️", health_trend.to_string().green());
        } else if health_trend < 0 {
            println!("Health Trend: {} sites degraded ⬇️", (-health_trend).to_string().red());
        } else {
            println!("Health Trend: No change ➡️");
        }

        if response_trend > 0 {
            println!("Response Time: {}ms slower ⬆️", response_trend.to_string().red());
        } else if response_trend < 0 {
            println!("Response Time: {}ms faster ⬇️", (-response_trend).to_string().green());
        } else {
            println!("Response Time: No significant change ➡️");
        }
    }
}

存儲

src/storage.rs

use crate::reporter::HealthReport;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::fs::OpenOptions;
use std::io::{BufRead, BufReader, Write};
use std::path::Path;

#[derive(Debug, Serialize, Deserialize)]
struct StorageRecord {
    timestamp: DateTime<Utc>,
    report: HealthReport,
}

pub struct Storage {
    file_path: String,
}

impl Storage {
    pub fn new(file_path: String) -> Self {
        Self { file_path }
    }

    pub fn save_report(&self, report: &HealthReport) -> anyhow::Result<()> {
        let record = StorageRecord {
            timestamp: Utc::now(),
            report: report.clone(),
        };

        let json_line = serde_json::to_string(&record)?;

        let mut file = OpenOptions::new()
            .create(true)
            .append(true)
            .open(&self.file_path)?;

        writeln!(file, "{}", json_line)?;
        Ok(())
    }

    pub fn load_recent_reports(&self, limit: usize) -> anyhow::Result<Vec<HealthReport>> {
        if !Path::new(&self.file_path).exists() {
            return Ok(vec![]);
        }

        let file = std::fs::File::open(&self.file_path)?;
        let reader = BufReader::new(file);

        let mut reports = Vec::new();

        for line in reader.lines() {
            if let Ok(line_content) = line {
                if let Ok(record) = serde_json::from_str::<StorageRecord>(&line_content) {
                    reports.push(record.report);
                }
            }
        }

        // 保留最近的記錄
        if reports.len() > limit {
            reports.drain(..reports.len() - limit);
        }

        Ok(reports)
    }

    pub fn cleanup_old_records(&self, days_to_keep: u32) -> anyhow::Result<usize> {
        if !Path::new(&self.file_path).exists() {
            return Ok(0);
        }

        let cutoff_date = Utc::now() - chrono::Duration::days(days_to_keep as i64);
        let file = std::fs::File::open(&self.file_path)?;
        let reader = BufReader::new(file);

        let mut valid_records = Vec::new();
        let mut removed_count = 0;

        for line in reader.lines() {
            if let Ok(line_content) = line {
                if let Ok(record) = serde_json::from_str::<StorageRecord>(&line_content) {
                    if record.timestamp > cutoff_date {
                        valid_records.push(line_content);
                    } else {
                        removed_count += 1;
                    }
                }
            }
        }

        // 重寫文件
        let mut file = std::fs::File::create(&self.file_path)?;
        for record in valid_records {
            writeln!(file, "{}", record)?;
        }

        Ok(removed_count)
    }
}

main.rs

mod checker;
mod config;
mod reporter;
mod storage;

use checker::HealthChecker;
use clap::{Arg, Command};
use config::Config;
use reporter::Reporter;
use storage::Storage;
use std::time::Duration;

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let matches = Command::new("Website Health Checker")
        .version("1.0")
        .author("Your Name")
        .about("Monitor multiple websites for availability and performance")
        .arg(
            Arg::new("config")
                .short('c')
                .long("config")
                .value_name("FILE")
                .help("Configuration file path")
                .default_value("config/sites.json"),
        )
        .arg(
            Arg::new("once")
                .long("once")
                .help("Run check once and exit")
                .action(clap::ArgAction::SetTrue),
        )
        .arg(
            Arg::new("output")
                .short('o')
                .long("output")
                .value_name("FILE")
                .help("Output report to JSON file"),
        )
        .arg(
            Arg::new("storage")
                .short('s')
                .long("storage")
                .value_name("FILE")
                .help("Storage file for historical data")
                .default_value("health_history.jsonl"),
        )
        .arg(
            Arg::new("trending")
                .long("trending")
                .help("Show trending analysis")
                .action(clap::ArgAction::SetTrue),
        )
        .get_matches();

    let config_path = matches.get_one::<String>("config").unwrap();
    let run_once = matches.get_flag("once");
    let trending = matches.get_flag("trending");
    let storage_file = matches.get_one::<String>("storage").unwrap();

    // 載入配置
    let config = Config::from_file(config_path)?;
    config.validate()?;

    if config.sites.is_empty() {
        eprintln!("No sites configured for monitoring");
        return Ok(());
    }

    println!("Loaded {} sites for monitoring", config.sites.len());

    let checker = HealthChecker::new(config.max_retries);
    let storage = Storage::new(storage_file.clone());

    if run_once {
        // 執行一次檢查
        let results = checker
            .check_multiple_sites(&config.sites, Duration::from_secs(config.default_timeout))
            .await;

        let report = Reporter::generate_report(results);
        Reporter::print_console_report(&report);

        // 儲存結果
        storage.save_report(&report)?;

        // 輸出到 JSON 文件(如果指定)
        if let Some(output_file) = matches.get_one::<String>("output") {
            Reporter::save_json_report(&report, output_file)?;
        }

        // 顯示趨勢分析
        if trending {
            let historical = storage.load_recent_reports(10)?;
            Reporter::print_trending_analysis(&historical);
        }
    } else {
        // 持續監控模式
        println!("Starting continuous monitoring (Press Ctrl+C to stop)...");

        loop {
            let results = checker
                .check_multiple_sites(&config.sites, Duration::from_secs(config.default_timeout))
                .await;

            let report = Reporter::generate_report(results);
            Reporter::print_console_report(&report);

            storage.save_report(&report)?;

            println!(
                "Next check in {} seconds...",
                config.default_interval
            );
            tokio::time::sleep(Duration::from_secs(config.default_interval)).await;
        }
    }

    Ok(())
}

配置文件

config/site.json

{
  "default_timeout": 10,
  "default_interval": 300,
  "max_retries": 3,
  "sites": [
    {
      "name": "Google",
      "url": "https://www.google.com",
      "expected_status": 200,
      "timeout_seconds": 5
    },
    {
      "name": "GitHub",
      "url": "https://github.com",
      "expected_status": 200,
      "timeout_seconds": 10
    },
    {
      "name": "Example API",
      "url": "https://httpbin.org/status/200",
      "expected_status": 200,
      "check_interval_seconds": 60
    },
    {
      "name": "Local Service",
      "url": "http://localhost:8080/health",
      "expected_status": 200,
      "timeout_seconds": 2
    }
  ]
}

開始使用

# 執行單次檢查
cargo run -- --once

# 持續監控
cargo run

# 指定配置文件
cargo run -- --config my_sites.json

# 輸出 JSON 報告
cargo run -- --once --output report.json

# 顯示趨勢分析
cargo run -- --once --trending

# 自訂存儲文件
cargo run -- --storage custom_history.jsonl


上一篇
RSS 訂閱閱讀器 - 抓取並解析 RSS feeds
系列文
Rust 實戰專案集:30 個漸進式專案從工具到服務13
圖片
  熱門推薦
圖片
{{ item.channelVendor }} | {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言