資料備份是至關重要的一環。
今天我們將使用 Rust 製作一個自動化備份工具,能夠定期將本地檔案備份到雲端儲存(以 AWS S3 為例),
並提供完整的錯誤處理、日誌記錄和排程功能,會有這樣的發想是基於,自己有很多備份資料的需求,
例如本地的特殊研究性影片可以放到雲端備份,有需要再研究時可以拿出來重溫,今天我打算以 Rust 去實現相關功能
cargo new backup_automation
cd backup_automation
cargo.toml
[package]
name = "backup_automation"
version = "0.1.0"
edition = "2021"
[dependencies]
tokio = { version = "1.35", features = ["full"] }
aws-config = "1.1"
aws-sdk-s3 = "1.13"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
chrono = "0.4"
anyhow = "1.0"
thiserror = "1.0"
clap = { version = "4.4", features = ["derive"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
walkdir = "2.4"
flate2 = "1.0"
tar = "0.4"
sha2 = "0.10"
hex = "0.4"
先定義錯誤處理的部分
use thiserror::Error;
#[derive(Error, Debug)]
pub enum BackupError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("S3 error: {0}")]
S3(String),
#[error("Configuration error: {0}")]
Config(String),
#[error("Compression error: {0}")]
Compression(String),
#[error("File not found: {0}")]
FileNotFound(String),
}
pub type Result<T> = std::result::Result<T, BackupError>;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct BackupConfig {
/// 要備份的目錄列表
pub source_paths: Vec<PathBuf>,
/// S3 bucket 名稱
pub s3_bucket: String,
/// S3 路徑前綴
pub s3_prefix: String,
/// 備份間隔(秒)
pub interval_seconds: u64,
/// 是否壓縮
pub compress: bool,
/// 保留的備份數量
pub retention_count: usize,
/// 排除的檔案模式
pub exclude_patterns: Vec<String>,
}
impl BackupConfig {
pub fn from_file(path: &str) -> Result<Self> {
let content = std::fs::read_to_string(path)
.map_err(|e| BackupError::Config(format!("Failed to read config: {}", e)))?;
serde_json::from_str(&content)
.map_err(|e| BackupError::Config(format!("Failed to parse config: {}", e)))
}
}
use aws_sdk_s3::{Client, primitives::ByteStream};
use chrono::{DateTime, Utc};
use std::path::Path;
use tracing::{info, warn, error};
pub struct BackupManager {
config: BackupConfig,
s3_client: Client,
}
impl BackupManager {
pub async fn new(config: BackupConfig) -> Result<Self> {
let aws_config = aws_config::load_from_env().await;
let s3_client = Client::new(&aws_config);
Ok(Self {
config,
s3_client,
})
}
/// 執行單次備份
pub async fn backup(&self) -> Result<BackupReport> {
let timestamp = Utc::now();
info!("Starting backup at {}", timestamp);
let mut report = BackupReport::new(timestamp);
for source_path in &self.config.source_paths {
match self.backup_directory(source_path, ×tamp).await {
Ok(stats) => {
report.add_success(source_path.clone(), stats);
}
Err(e) => {
error!("Failed to backup {:?}: {}", source_path, e);
report.add_failure(source_path.clone(), e.to_string());
}
}
}
// 清理舊備份
if let Err(e) = self.cleanup_old_backups().await {
warn!("Failed to cleanup old backups: {}", e);
}
Ok(report)
}
/// 備份單個目錄
async fn backup_directory(
&self,
path: &Path,
timestamp: &DateTime<Utc>
) -> Result<BackupStats> {
let mut stats = BackupStats::default();
// 建立臨時壓縮檔
let temp_archive = if self.config.compress {
Some(self.create_archive(path, &mut stats).await?)
} else {
None
};
// 上傳到 S3
if let Some(archive_path) = temp_archive {
self.upload_to_s3(&archive_path, path, timestamp).await?;
// 清理臨時檔案
std::fs::remove_file(&archive_path)?;
} else {
self.upload_directory_to_s3(path, timestamp, &mut stats).await?;
}
Ok(stats)
}
/// 建立壓縮檔案
async fn create_archive(&self, path: &Path, stats: &mut BackupStats) -> Result<PathBuf> {
use flate2::Compression;
use flate2::write::GzEncoder;
use tar::Builder;
let temp_dir = std::env::temp_dir();
let archive_name = format!(
"backup_{}.tar.gz",
path.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
);
let archive_path = temp_dir.join(archive_name);
info!("Creating archive: {:?}", archive_path);
let tar_gz = std::fs::File::create(&archive_path)?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut tar = Builder::new(enc);
// 遞迴添加檔案
for entry in walkdir::WalkDir::new(path)
.follow_links(false)
.into_iter()
.filter_entry(|e| !self.should_exclude(e.path()))
{
let entry = entry?;
let entry_path = entry.path();
if entry_path.is_file() {
let relative_path = entry_path.strip_prefix(path)
.unwrap_or(entry_path);
tar.append_path_with_name(entry_path, relative_path)?;
stats.files_count += 1;
stats.total_size += entry.metadata()?.len();
}
}
tar.finish()?;
let archive_size = std::fs::metadata(&archive_path)?.len();
stats.compressed_size = Some(archive_size);
info!(
"Archive created: {} files, {} bytes (compressed: {} bytes)",
stats.files_count, stats.total_size, archive_size
);
Ok(archive_path)
}
/// 上傳檔案到 S3
async fn upload_to_s3(
&self,
file_path: &Path,
original_path: &Path,
timestamp: &DateTime<Utc>
) -> Result<()> {
let file_name = original_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("backup");
let s3_key = format!(
"{}/{}_{}",
self.config.s3_prefix,
timestamp.format("%Y%m%d_%H%M%S"),
file_name
);
info!("Uploading to s3://{}/{}", self.config.s3_bucket, s3_key);
let body = ByteStream::from_path(file_path)
.await
.map_err(|e| BackupError::S3(e.to_string()))?;
self.s3_client
.put_object()
.bucket(&self.config.s3_bucket)
.key(&s3_key)
.body(body)
.send()
.await
.map_err(|e| BackupError::S3(e.to_string()))?;
info!("Upload completed: {}", s3_key);
Ok(())
}
/// 直接上傳目錄到 S3(不壓縮)
async fn upload_directory_to_s3(
&self,
path: &Path,
timestamp: &DateTime<Utc>,
stats: &mut BackupStats,
) -> Result<()> {
for entry in walkdir::WalkDir::new(path)
.follow_links(false)
.into_iter()
.filter_entry(|e| !self.should_exclude(e.path()))
{
let entry = entry?;
if entry.path().is_file() {
let relative_path = entry.path().strip_prefix(path)
.unwrap_or(entry.path());
let s3_key = format!(
"{}/{}/{}",
self.config.s3_prefix,
timestamp.format("%Y%m%d_%H%M%S"),
relative_path.display()
);
let body = ByteStream::from_path(entry.path())
.await
.map_err(|e| BackupError::S3(e.to_string()))?;
self.s3_client
.put_object()
.bucket(&self.config.s3_bucket)
.key(&s3_key)
.body(body)
.send()
.await
.map_err(|e| BackupError::S3(e.to_string()))?;
stats.files_count += 1;
stats.total_size += entry.metadata()?.len();
}
}
Ok(())
}
/// 清理舊備份
async fn cleanup_old_backups(&self) -> Result<()> {
info!("Cleaning up old backups...");
let list_output = self.s3_client
.list_objects_v2()
.bucket(&self.config.s3_bucket)
.prefix(&self.config.s3_prefix)
.send()
.await
.map_err(|e| BackupError::S3(e.to_string()))?;
let mut objects: Vec<_> = list_output.contents().to_vec();
objects.sort_by(|a, b| {
b.last_modified().cmp(&a.last_modified())
});
// 保留最新的 N 個備份
for object in objects.iter().skip(self.config.retention_count) {
if let Some(key) = object.key() {
info!("Deleting old backup: {}", key);
self.s3_client
.delete_object()
.bucket(&self.config.s3_bucket)
.key(key)
.send()
.await
.map_err(|e| BackupError::S3(e.to_string()))?;
}
}
Ok(())
}
/// 檢查是否應排除該路徑
fn should_exclude(&self, path: &Path) -> bool {
let path_str = path.to_string_lossy();
for pattern in &self.config.exclude_patterns {
if path_str.contains(pattern) {
return true;
}
}
false
}
}
use std::collections::HashMap;
#[derive(Debug, Default)]
pub struct BackupStats {
pub files_count: usize,
pub total_size: u64,
pub compressed_size: Option<u64>,
}
#[derive(Debug)]
pub struct BackupReport {
pub timestamp: DateTime<Utc>,
pub successful: HashMap<PathBuf, BackupStats>,
pub failed: HashMap<PathBuf, String>,
}
impl BackupReport {
pub fn new(timestamp: DateTime<Utc>) -> Self {
Self {
timestamp,
successful: HashMap::new(),
failed: HashMap::new(),
}
}
pub fn add_success(&mut self, path: PathBuf, stats: BackupStats) {
self.successful.insert(path, stats);
}
pub fn add_failure(&mut self, path: PathBuf, error: String) {
self.failed.insert(path, error);
}
pub fn print_summary(&self) {
println!("\n=== Backup Report ===");
println!("Time: {}", self.timestamp.format("%Y-%m-%d %H:%M:%S"));
println!("\nSuccessful backups: {}", self.successful.len());
for (path, stats) in &self.successful {
println!(" {:?}", path);
println!(" Files: {}", stats.files_count);
println!(" Size: {} bytes", stats.total_size);
if let Some(compressed) = stats.compressed_size {
let ratio = (compressed as f64 / stats.total_size as f64) * 100.0;
println!(" Compressed: {} bytes ({:.1}%)", compressed, ratio);
}
}
if !self.failed.is_empty() {
println!("\nFailed backups: {}", self.failed.len());
for (path, error) in &self.failed {
println!(" {:?}: {}", path, error);
}
}
println!("====================\n");
}
}
use tokio::time::{interval, Duration};
pub struct BackupScheduler {
manager: BackupManager,
interval_seconds: u64,
}
impl BackupScheduler {
pub fn new(manager: BackupManager, interval_seconds: u64) -> Self {
Self {
manager,
interval_seconds,
}
}
/// 啟動排程備份
pub async fn run(self) -> Result<()> {
info!("Starting backup scheduler with interval: {}s", self.interval_seconds);
let mut interval = interval(Duration::from_secs(self.interval_seconds));
loop {
interval.tick().await;
info!("Scheduled backup triggered");
match self.manager.backup().await {
Ok(report) => {
report.print_summary();
}
Err(e) => {
error!("Backup failed: {}", e);
}
}
}
}
}
主程式
use clap::{Parser, Subcommand};
#[derive(Parser)]
#[command(name = "backup-automation")]
#[command(about = "Automated backup tool with S3 integration")]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// 執行單次備份
Run {
#[arg(short, long, default_value = "config.json")]
config: String,
},
/// 啟動排程備份
Schedule {
#[arg(short, long, default_value = "config.json")]
config: String,
},
/// 產生範例配置檔
InitConfig {
#[arg(short, long, default_value = "config.json")]
output: String,
},
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
// 初始化日誌
tracing_subscriber::fmt()
.with_env_filter("backup_automation=info")
.init();
let cli = Cli::parse();
match cli.command {
Commands::Run { config } => {
let config = BackupConfig::from_file(&config)?;
let manager = BackupManager::new(config).await?;
let report = manager.backup().await?;
report.print_summary();
}
Commands::Schedule { config } => {
let config = BackupConfig::from_file(&config)?;
let interval = config.interval_seconds;
let manager = BackupManager::new(config).await?;
let scheduler = BackupScheduler::new(manager, interval);
scheduler.run().await?;
}
Commands::InitConfig { output } => {
let sample_config = BackupConfig {
source_paths: vec![
PathBuf::from("/path/to/backup1"),
PathBuf::from("/path/to/backup2"),
],
s3_bucket: "my-backup-bucket".to_string(),
s3_prefix: "backups".to_string(),
interval_seconds: 3600,
compress: true,
retention_count: 7,
exclude_patterns: vec![
".git".to_string(),
"node_modules".to_string(),
"target".to_string(),
],
};
let json = serde_json::to_string_pretty(&sample_config)?;
std::fs::write(&output, json)?;
println!("Sample configuration written to: {}", output);
}
}
Ok(())
}
這裏我們建立 config file -> config.json
{
"source_paths": [
"/home/user/documents",
"/home/user/projects"
],
"s3_bucket": "my-backup-bucket",
"s3_prefix": "daily-backups",
"interval_seconds": 86400,
"compress": true,
"retention_count": 7,
"exclude_patterns": [
".git",
"node_modules",
"target",
".DS_Store"
]
}
cargo run -- init-config
設定 AWS 的 Credentials
export AWS_ACCESS_KEY_ID=<Access key>
export AWS_SECRET_ACCESS_KEY=<Secret Key>
export AWS_REGION=<你的 Region>
cargo run -- run --config config.json
cargo run -- schedule --config config.json