iT邦幫忙

2025 iThome 鐵人賽

DAY 25
0
Rust

Rust 實戰專案集:30 個漸進式專案從工具到服務系列 第 25

備份自動化工具 - 定期備份檔案到雲端儲存

  • 分享至 

  • xImage
  •  

前言

資料備份是至關重要的一環。
今天我們將使用 Rust 製作一個自動化備份工具,能夠定期將本地檔案備份到雲端儲存(以 AWS S3 為例),
並提供完整的錯誤處理、日誌記錄和排程功能,會有這樣的發想是基於,自己有很多備份資料的需求,
例如本地的特殊研究性影片可以放到雲端備份,有需要再研究時可以拿出來重溫,今天我打算以 Rust 去實現相關功能

學習目標

  • 實作檔案監控與備份邏輯
  • 整合 AWS S3 API
  • 使用 tokio 實現非同步任務調度
  • 處理檔案壓縮與加密
  • 建立可配置的備份策略

一樣開始把專案搞起來!!

cargo new backup_automation
cd backup_automation

依賴

cargo.toml

[package]
name = "backup_automation"
version = "0.1.0"
edition = "2021"

[dependencies]
tokio = { version = "1.35", features = ["full"] }
aws-config = "1.1"
aws-sdk-s3 = "1.13"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
chrono = "0.4"
anyhow = "1.0"
thiserror = "1.0"
clap = { version = "4.4", features = ["derive"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
walkdir = "2.4"
flate2 = "1.0"
tar = "0.4"
sha2 = "0.10"
hex = "0.4"

開始實作

先定義錯誤處理的部分

use thiserror::Error;

#[derive(Error, Debug)]
pub enum BackupError {
    #[error("IO error: {0}")]
    Io(#[from] std::io::Error),
    
    #[error("S3 error: {0}")]
    S3(String),
    
    #[error("Configuration error: {0}")]
    Config(String),
    
    #[error("Compression error: {0}")]
    Compression(String),
    
    #[error("File not found: {0}")]
    FileNotFound(String),
}

pub type Result<T> = std::result::Result<T, BackupError>;

寫基本的 configuration

use serde::{Deserialize, Serialize};
use std::path::PathBuf;

#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct BackupConfig {
    /// 要備份的目錄列表
    pub source_paths: Vec<PathBuf>,
    
    /// S3 bucket 名稱
    pub s3_bucket: String,
    
    /// S3 路徑前綴
    pub s3_prefix: String,
    
    /// 備份間隔(秒)
    pub interval_seconds: u64,
    
    /// 是否壓縮
    pub compress: bool,
    
    /// 保留的備份數量
    pub retention_count: usize,
    
    /// 排除的檔案模式
    pub exclude_patterns: Vec<String>,
}

impl BackupConfig {
    pub fn from_file(path: &str) -> Result<Self> {
        let content = std::fs::read_to_string(path)
            .map_err(|e| BackupError::Config(format!("Failed to read config: {}", e)))?;
        
        serde_json::from_str(&content)
            .map_err(|e| BackupError::Config(format!("Failed to parse config: {}", e)))
    }
}

備份管理器

use aws_sdk_s3::{Client, primitives::ByteStream};
use chrono::{DateTime, Utc};
use std::path::Path;
use tracing::{info, warn, error};

pub struct BackupManager {
    config: BackupConfig,
    s3_client: Client,
}

impl BackupManager {
    pub async fn new(config: BackupConfig) -> Result<Self> {
        let aws_config = aws_config::load_from_env().await;
        let s3_client = Client::new(&aws_config);
        
        Ok(Self {
            config,
            s3_client,
        })
    }
    
    /// 執行單次備份
    pub async fn backup(&self) -> Result<BackupReport> {
        let timestamp = Utc::now();
        info!("Starting backup at {}", timestamp);
        
        let mut report = BackupReport::new(timestamp);
        
        for source_path in &self.config.source_paths {
            match self.backup_directory(source_path, &timestamp).await {
                Ok(stats) => {
                    report.add_success(source_path.clone(), stats);
                }
                Err(e) => {
                    error!("Failed to backup {:?}: {}", source_path, e);
                    report.add_failure(source_path.clone(), e.to_string());
                }
            }
        }
        
        // 清理舊備份
        if let Err(e) = self.cleanup_old_backups().await {
            warn!("Failed to cleanup old backups: {}", e);
        }
        
        Ok(report)
    }
    
    /// 備份單個目錄
    async fn backup_directory(
        &self,
        path: &Path,
        timestamp: &DateTime<Utc>
    ) -> Result<BackupStats> {
        let mut stats = BackupStats::default();
        
        // 建立臨時壓縮檔
        let temp_archive = if self.config.compress {
            Some(self.create_archive(path, &mut stats).await?)
        } else {
            None
        };
        
        // 上傳到 S3
        if let Some(archive_path) = temp_archive {
            self.upload_to_s3(&archive_path, path, timestamp).await?;
            
            // 清理臨時檔案
            std::fs::remove_file(&archive_path)?;
        } else {
            self.upload_directory_to_s3(path, timestamp, &mut stats).await?;
        }
        
        Ok(stats)
    }
    
    /// 建立壓縮檔案
    async fn create_archive(&self, path: &Path, stats: &mut BackupStats) -> Result<PathBuf> {
        use flate2::Compression;
        use flate2::write::GzEncoder;
        use tar::Builder;
        
        let temp_dir = std::env::temp_dir();
        let archive_name = format!(
            "backup_{}.tar.gz",
            path.file_name()
                .and_then(|n| n.to_str())
                .unwrap_or("unknown")
        );
        let archive_path = temp_dir.join(archive_name);
        
        info!("Creating archive: {:?}", archive_path);
        
        let tar_gz = std::fs::File::create(&archive_path)?;
        let enc = GzEncoder::new(tar_gz, Compression::default());
        let mut tar = Builder::new(enc);
        
        // 遞迴添加檔案
        for entry in walkdir::WalkDir::new(path)
            .follow_links(false)
            .into_iter()
            .filter_entry(|e| !self.should_exclude(e.path()))
        {
            let entry = entry?;
            let entry_path = entry.path();
            
            if entry_path.is_file() {
                let relative_path = entry_path.strip_prefix(path)
                    .unwrap_or(entry_path);
                
                tar.append_path_with_name(entry_path, relative_path)?;
                
                stats.files_count += 1;
                stats.total_size += entry.metadata()?.len();
            }
        }
        
        tar.finish()?;
        
        let archive_size = std::fs::metadata(&archive_path)?.len();
        stats.compressed_size = Some(archive_size);
        
        info!(
            "Archive created: {} files, {} bytes (compressed: {} bytes)",
            stats.files_count, stats.total_size, archive_size
        );
        
        Ok(archive_path)
    }
    
    /// 上傳檔案到 S3
    async fn upload_to_s3(
        &self,
        file_path: &Path,
        original_path: &Path,
        timestamp: &DateTime<Utc>
    ) -> Result<()> {
        let file_name = original_path
            .file_name()
            .and_then(|n| n.to_str())
            .unwrap_or("backup");
        
        let s3_key = format!(
            "{}/{}_{}",
            self.config.s3_prefix,
            timestamp.format("%Y%m%d_%H%M%S"),
            file_name
        );
        
        info!("Uploading to s3://{}/{}", self.config.s3_bucket, s3_key);
        
        let body = ByteStream::from_path(file_path)
            .await
            .map_err(|e| BackupError::S3(e.to_string()))?;
        
        self.s3_client
            .put_object()
            .bucket(&self.config.s3_bucket)
            .key(&s3_key)
            .body(body)
            .send()
            .await
            .map_err(|e| BackupError::S3(e.to_string()))?;
        
        info!("Upload completed: {}", s3_key);
        
        Ok(())
    }
    
    /// 直接上傳目錄到 S3(不壓縮)
    async fn upload_directory_to_s3(
        &self,
        path: &Path,
        timestamp: &DateTime<Utc>,
        stats: &mut BackupStats,
    ) -> Result<()> {
        for entry in walkdir::WalkDir::new(path)
            .follow_links(false)
            .into_iter()
            .filter_entry(|e| !self.should_exclude(e.path()))
        {
            let entry = entry?;
            
            if entry.path().is_file() {
                let relative_path = entry.path().strip_prefix(path)
                    .unwrap_or(entry.path());
                
                let s3_key = format!(
                    "{}/{}/{}",
                    self.config.s3_prefix,
                    timestamp.format("%Y%m%d_%H%M%S"),
                    relative_path.display()
                );
                
                let body = ByteStream::from_path(entry.path())
                    .await
                    .map_err(|e| BackupError::S3(e.to_string()))?;
                
                self.s3_client
                    .put_object()
                    .bucket(&self.config.s3_bucket)
                    .key(&s3_key)
                    .body(body)
                    .send()
                    .await
                    .map_err(|e| BackupError::S3(e.to_string()))?;
                
                stats.files_count += 1;
                stats.total_size += entry.metadata()?.len();
            }
        }
        
        Ok(())
    }
    
    /// 清理舊備份
    async fn cleanup_old_backups(&self) -> Result<()> {
        info!("Cleaning up old backups...");
        
        let list_output = self.s3_client
            .list_objects_v2()
            .bucket(&self.config.s3_bucket)
            .prefix(&self.config.s3_prefix)
            .send()
            .await
            .map_err(|e| BackupError::S3(e.to_string()))?;
        
        let mut objects: Vec<_> = list_output.contents().to_vec();
        objects.sort_by(|a, b| {
            b.last_modified().cmp(&a.last_modified())
        });
        
        // 保留最新的 N 個備份
        for object in objects.iter().skip(self.config.retention_count) {
            if let Some(key) = object.key() {
                info!("Deleting old backup: {}", key);
                
                self.s3_client
                    .delete_object()
                    .bucket(&self.config.s3_bucket)
                    .key(key)
                    .send()
                    .await
                    .map_err(|e| BackupError::S3(e.to_string()))?;
            }
        }
        
        Ok(())
    }
    
    /// 檢查是否應排除該路徑
    fn should_exclude(&self, path: &Path) -> bool {
        let path_str = path.to_string_lossy();
        
        for pattern in &self.config.exclude_patterns {
            if path_str.contains(pattern) {
                return true;
            }
        }
        
        false
    }
}

備份報告

use std::collections::HashMap;

#[derive(Debug, Default)]
pub struct BackupStats {
    pub files_count: usize,
    pub total_size: u64,
    pub compressed_size: Option<u64>,
}

#[derive(Debug)]
pub struct BackupReport {
    pub timestamp: DateTime<Utc>,
    pub successful: HashMap<PathBuf, BackupStats>,
    pub failed: HashMap<PathBuf, String>,
}

impl BackupReport {
    pub fn new(timestamp: DateTime<Utc>) -> Self {
        Self {
            timestamp,
            successful: HashMap::new(),
            failed: HashMap::new(),
        }
    }
    
    pub fn add_success(&mut self, path: PathBuf, stats: BackupStats) {
        self.successful.insert(path, stats);
    }
    
    pub fn add_failure(&mut self, path: PathBuf, error: String) {
        self.failed.insert(path, error);
    }
    
    pub fn print_summary(&self) {
        println!("\n=== Backup Report ===");
        println!("Time: {}", self.timestamp.format("%Y-%m-%d %H:%M:%S"));
        println!("\nSuccessful backups: {}", self.successful.len());
        
        for (path, stats) in &self.successful {
            println!("  {:?}", path);
            println!("    Files: {}", stats.files_count);
            println!("    Size: {} bytes", stats.total_size);
            if let Some(compressed) = stats.compressed_size {
                let ratio = (compressed as f64 / stats.total_size as f64) * 100.0;
                println!("    Compressed: {} bytes ({:.1}%)", compressed, ratio);
            }
        }
        
        if !self.failed.is_empty() {
            println!("\nFailed backups: {}", self.failed.len());
            for (path, error) in &self.failed {
                println!("  {:?}: {}", path, error);
            }
        }
        
        println!("====================\n");
    }
}

排程器

use tokio::time::{interval, Duration};

pub struct BackupScheduler {
    manager: BackupManager,
    interval_seconds: u64,
}

impl BackupScheduler {
    pub fn new(manager: BackupManager, interval_seconds: u64) -> Self {
        Self {
            manager,
            interval_seconds,
        }
    }
    
    /// 啟動排程備份
    pub async fn run(self) -> Result<()> {
        info!("Starting backup scheduler with interval: {}s", self.interval_seconds);
        
        let mut interval = interval(Duration::from_secs(self.interval_seconds));
        
        loop {
            interval.tick().await;
            
            info!("Scheduled backup triggered");
            
            match self.manager.backup().await {
                Ok(report) => {
                    report.print_summary();
                }
                Err(e) => {
                    error!("Backup failed: {}", e);
                }
            }
        }
    }
}

主程式

use clap::{Parser, Subcommand};

#[derive(Parser)]
#[command(name = "backup-automation")]
#[command(about = "Automated backup tool with S3 integration")]
struct Cli {
    #[command(subcommand)]
    command: Commands,
}

#[derive(Subcommand)]
enum Commands {
    /// 執行單次備份
    Run {
        #[arg(short, long, default_value = "config.json")]
        config: String,
    },
    
    /// 啟動排程備份
    Schedule {
        #[arg(short, long, default_value = "config.json")]
        config: String,
    },
    
    /// 產生範例配置檔
    InitConfig {
        #[arg(short, long, default_value = "config.json")]
        output: String,
    },
}

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    // 初始化日誌
    tracing_subscriber::fmt()
        .with_env_filter("backup_automation=info")
        .init();
    
    let cli = Cli::parse();
    
    match cli.command {
        Commands::Run { config } => {
            let config = BackupConfig::from_file(&config)?;
            let manager = BackupManager::new(config).await?;
            
            let report = manager.backup().await?;
            report.print_summary();
        }
        
        Commands::Schedule { config } => {
            let config = BackupConfig::from_file(&config)?;
            let interval = config.interval_seconds;
            let manager = BackupManager::new(config).await?;
            
            let scheduler = BackupScheduler::new(manager, interval);
            scheduler.run().await?;
        }
        
        Commands::InitConfig { output } => {
            let sample_config = BackupConfig {
                source_paths: vec![
                    PathBuf::from("/path/to/backup1"),
                    PathBuf::from("/path/to/backup2"),
                ],
                s3_bucket: "my-backup-bucket".to_string(),
                s3_prefix: "backups".to_string(),
                interval_seconds: 3600,
                compress: true,
                retention_count: 7,
                exclude_patterns: vec![
                    ".git".to_string(),
                    "node_modules".to_string(),
                    "target".to_string(),
                ],
            };
            
            let json = serde_json::to_string_pretty(&sample_config)?;
            std::fs::write(&output, json)?;
            
            println!("Sample configuration written to: {}", output);
        }
    }
    
    Ok(())
}

開始使用

這裏我們建立 config file -> config.json

{
  "source_paths": [
    "/home/user/documents",
    "/home/user/projects"
  ],
  "s3_bucket": "my-backup-bucket",
  "s3_prefix": "daily-backups",
  "interval_seconds": 86400,
  "compress": true,
  "retention_count": 7,
  "exclude_patterns": [
    ".git",
    "node_modules",
    "target",
    ".DS_Store"
  ]
}
cargo run -- init-config

設定 AWS 的 Credentials

export AWS_ACCESS_KEY_ID=<Access key>
export AWS_SECRET_ACCESS_KEY=<Secret Key>
export AWS_REGION=<你的 Region>

單次備份

cargo run -- run --config config.json

排程備份

cargo run -- schedule --config config.json

上一篇
服務健康監控 - 監控系統服務狀態並發送警報
系列文
Rust 實戰專案集:30 個漸進式專案從工具到服務25
圖片
  熱門推薦
圖片
{{ item.channelVendor }} | {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言