第三部分的一開始我們要學習 Rust 中處理資料集合的核心工具 - 集合(Collections) 和 迭代器(Iterators),學完後可能有再寫 Python 的人會感覺有點像 Rust 自帶的 Pandas XD。
在實際程式開發中,你經常需要:
// 傳統的命令式風格
let numbers = vec![1, 2, 3, 4, 5];
let mut squared_evens = Vec::new();
for num in &numbers {
if num % 2 == 0 {
squared_evens.push(num * num);
}
}
// Rust 迭代器風格(更優雅)
let squared_evens: Vec<i32> = numbers
.iter()
.filter(|&n| n % 2 == 0)
.map(|&n| n * n)
.collect();
fn demo_vec() {
// 建立 Vec
let mut numbers = Vec::new();
let mut fruits = vec!["蘋果", "香蕉", "橘子"];
// 添加元素
numbers.push(1);
numbers.push(2);
numbers.push(3);
// 存取元素
println!("第一個數字: {}", numbers[0]);
println!("第二個水果: {}", fruits.get(1).unwrap_or(&"無"));
// 安全存取
match fruits.get(10) {
Some(fruit) => println!("第11個水果: {}", fruit),
None => println!("沒有第11個水果"),
}
// 修改元素
fruits[0] = "芒果";
// 長度和容量
println!("長度: {}, 容量: {}", numbers.len(), numbers.capacity());
// 移除元素
let last = numbers.pop(); // 移除並回傳最後一個
let second = numbers.remove(1); // 移除指定索引
println!("移除的元素: {:?}, {}", last, second);
println!("剩餘數字: {:?}", numbers);
}
// Vec 的進階操作
fn advanced_vec_operations() {
let mut scores = vec![85, 92, 78, 96, 88];
// 排序
scores.sort();
println!("排序後: {:?}", scores);
// 反轉
scores.reverse();
println!("反轉後: {:?}", scores);
// 去重(需要先排序)
scores.sort();
scores.dedup();
println!("去重後: {:?}", scores);
// 分割
let (left, right) = scores.split_at(2);
println!("左半: {:?}, 右半: {:?}", left, right);
// 搜尋
if let Ok(index) = scores.binary_search(&88) {
println!("找到 88 在索引 {}", index);
}
// 批次操作
scores.extend_from_slice(&[90, 94, 87]);
println!("擴展後: {:?}", scores);
}
use std::collections::HashMap;
fn demo_hashmap() {
// 建立 HashMap
let mut scores = HashMap::new();
// 插入資料
scores.insert("Alice", 95);
scores.insert("Bob", 87);
scores.insert("Charlie", 92);
// 存取資料
match scores.get("Alice") {
Some(score) => println!("Alice 的分數: {}", score),
None => println!("找不到 Alice"),
}
// 修改資料
scores.insert("Alice", 98); // 覆蓋舊值
// 只在鍵不存在時插入
scores.entry("David").or_insert(85);
scores.entry("Alice").or_insert(80); // 不會覆蓋
// 遍歷
for (name, score) in &scores {
println!("{}: {}", name, score);
}
// 檢查存在性
if scores.contains_key("Bob") {
println!("Bob 在名單中");
}
// 移除
if let Some(old_score) = scores.remove("Charlie") {
println!("移除了 Charlie,分數是 {}", old_score);
}
}
// HashMap 的進階用法
fn advanced_hashmap_operations() {
let mut word_count = HashMap::new();
let text = "hello world hello rust world";
// 計算詞頻
for word in text.split_whitespace() {
let count = word_count.entry(word).or_insert(0);
*count += 1;
}
println!("詞頻統計: {:?}", word_count);
// 使用自訂型別作為值
#[derive(Debug)]
struct StudentInfo {
age: u32,
grade: char,
}
let mut students = HashMap::new();
students.insert("Alice", StudentInfo { age: 20, grade: 'A' });
students.insert("Bob", StudentInfo { age: 19, grade: 'B' });
// 更新結構
if let Some(student) = students.get_mut("Alice") {
student.grade = 'A';
}
println!("學生資訊: {:?}", students);
}
// 從集合建立 HashMap
fn create_hashmap_from_data() {
let teams = vec![
("Red", vec!["Alice", "Bob"]),
("Blue", vec!["Charlie", "David"]),
];
// 方式1:使用 collect
let team_map: HashMap<_, _> = teams.into_iter().collect();
// 方式2:使用元組向量
let scores = vec![("Alice", 95), ("Bob", 87), ("Charlie", 92)];
let score_map: HashMap<_, _> = scores.into_iter().collect();
println!("團隊: {:?}", team_map);
println!("分數: {:?}", score_map);
}
use std::collections::BTreeMap;
fn demo_btreemap() {
let mut grades = BTreeMap::new();
grades.insert("Math", 95);
grades.insert("English", 87);
grades.insert("Science", 92);
grades.insert("Art", 88);
// BTreeMap 自動按鍵排序
for (subject, grade) in &grades {
println!("{}: {}", subject, grade);
}
// 範圍查詢
println!("\nA-M 科目:");
for (subject, grade) in grades.range("A".."N") {
println!("{}: {}", subject, grade);
}
// 分割
let math_and_after: BTreeMap<_, _> = grades
.range("Math"..)
.map(|(k, v)| (*k, *v))
.collect();
println!("\nMath 之後的科目: {:?}", math_and_after);
}
use std::collections::{HashSet, BTreeSet};
fn demo_sets() {
// HashSet:無序集合
let mut languages = HashSet::new();
languages.insert("Rust");
languages.insert("Python");
languages.insert("Go");
languages.insert("Rust"); // 重複會被忽略
println!("程式語言數量: {}", languages.len());
// 檢查成員資格
if languages.contains("Rust") {
println!("包含 Rust");
}
// 集合運算
let modern_languages: HashSet<_> =
["Rust", "Go", "Swift", "Kotlin"].iter().cloned().collect();
let system_languages: HashSet<_> =
["Rust", "C", "C++", "Go"].iter().cloned().collect();
// 交集
let intersection: HashSet<_> = modern_languages
.intersection(&system_languages)
.cloned()
.collect();
println!("現代系統語言: {:?}", intersection);
// 聯集
let union: HashSet<_> = modern_languages
.union(&system_languages)
.cloned()
.collect();
println!("所有語言: {:?}", union);
// 差集
let difference: HashSet<_> = modern_languages
.difference(&system_languages)
.cloned()
.collect();
println!("只在現代語言中: {:?}", difference);
// BTreeSet:有序集合
let mut ordered_numbers = BTreeSet::new();
ordered_numbers.insert(3);
ordered_numbers.insert(1);
ordered_numbers.insert(4);
ordered_numbers.insert(2);
println!("排序的數字: {:?}", ordered_numbers);
// 範圍操作
let range_2_to_4: BTreeSet<_> = ordered_numbers
.range(2..=4)
.cloned()
.collect();
println!("2到4的數字: {:?}", range_2_to_4);
}
fn create_iterators() {
let numbers = vec![1, 2, 3, 4, 5];
// 方式1:iter() - 借用元素
let iter1 = numbers.iter(); // 回傳 &T
for num in iter1 {
println!("借用: {}", num); // num 是 &i32
}
// 方式2:into_iter() - 擁有元素
let numbers2 = vec![1, 2, 3, 4, 5];
let iter2 = numbers2.into_iter(); // 回傳 T
for num in iter2 {
println!("擁有: {}", num); // num 是 i32
}
// numbers2 在這裡已經不能使用
// 方式3:iter_mut() - 可變借用
let mut numbers3 = vec![1, 2, 3, 4, 5];
let iter3 = numbers3.iter_mut(); // 回傳 &mut T
for num in iter3 {
*num *= 2; // num 是 &mut i32
}
println!("修改後: {:?}", numbers3);
// 其他建立迭代器的方式
let range_iter = (1..=5); // 範圍迭代器
let repeat_iter = std::iter::repeat("hello").take(3); // 重複迭代器
let once_iter = std::iter::once(42); // 單一元素迭代器
println!("範圍: {:?}", range_iter.collect::<Vec<_>>());
println!("重複: {:?}", repeat_iter.collect::<Vec<_>>());
println!("單一: {:?}", once_iter.collect::<Vec<_>>());
}
fn iterator_adaptors() {
let numbers = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
// map:轉換每個元素
let squared: Vec<i32> = numbers
.iter()
.map(|&x| x * x)
.collect();
println!("平方: {:?}", squared);
// filter:篩選元素
let evens: Vec<&i32> = numbers
.iter()
.filter(|&&x| x % 2 == 0)
.collect();
println!("偶數: {:?}", evens);
// enumerate:加上索引
let indexed: Vec<_> = numbers
.iter()
.enumerate()
.collect();
println!("帶索引: {:?}", indexed.iter().take(3).collect::<Vec<_>>());
// zip:與另一個迭代器結合
let names = vec!["Alice", "Bob", "Charlie"];
let ages = vec![25, 30, 35];
let combined: Vec<_> = names
.iter()
.zip(ages.iter())
.collect();
println!("組合: {:?}", combined);
// take 和 skip
let first_3: Vec<_> = numbers.iter().take(3).collect();
let skip_5: Vec<_> = numbers.iter().skip(5).collect();
println!("前3個: {:?}", first_3);
println!("跳過5個: {:?}", skip_5);
// step_by:每隔 n 個取一個
let every_other: Vec<_> = numbers
.iter()
.step_by(2)
.collect();
println!("每隔一個: {:?}", every_other);
// chain:連接迭代器
let first_part = vec![1, 2, 3];
let second_part = vec![4, 5, 6];
let chained: Vec<_> = first_part
.iter()
.chain(second_part.iter())
.collect();
println!("連接: {:?}", chained);
}
fn complex_iterator_chains() {
let text = "Hello World! This is a Rust example.";
// 複雜的文字處理
let word_lengths: Vec<usize> = text
.split_whitespace() // 分割單詞
.map(|word| word.trim_matches(|c: char| !c.is_alphabetic())) // 移除標點符號
.filter(|word| !word.is_empty()) // 過濾空字串
.map(|word| word.len()) // 計算長度
.collect();
println!("單詞長度: {:?}", word_lengths);
// 數據處理管道
#[derive(Debug)]
struct Person {
name: String,
age: u32,
salary: u32,
}
let people = vec![
Person { name: "Alice".to_string(), age: 25, salary: 50000 },
Person { name: "Bob".to_string(), age: 30, salary: 60000 },
Person { name: "Charlie".to_string(), age: 35, salary: 70000 },
Person { name: "Diana".to_string(), age: 28, salary: 55000 },
];
let high_earners: Vec<String> = people
.iter()
.filter(|person| person.salary > 55000) // 高薪者
.filter(|person| person.age < 35) // 年輕人
.map(|person| person.name.clone()) // 只要名字
.collect();
println!("年輕高薪者: {:?}", high_earners);
// 統計資訊
let total_salary: u32 = people
.iter()
.map(|person| person.salary)
.sum();
let average_age: f64 = people
.iter()
.map(|person| person.age as f64)
.sum::<f64>() / people.len() as f64;
println!("總薪資: {}, 平均年齡: {:.1}", total_salary, average_age);
}
fn consuming_adaptors() {
let numbers = vec![1, 2, 3, 4, 5];
// collect:收集到集合
let doubled: Vec<i32> = numbers
.iter()
.map(|&x| x * 2)
.collect();
// reduce/fold:聚合操作
let sum = numbers.iter().fold(0, |acc, &x| acc + x);
let product = numbers.iter().fold(1, |acc, &x| acc * x);
println!("總和: {}, 乘積: {}", sum, product);
// reduce:更簡潔的 fold
let sum2 = numbers.iter().reduce(|acc, x| acc + x);
println!("總和2: {:?}", sum2);
// find:查找第一個符合條件的元素
let first_even = numbers
.iter()
.find(|&&x| x % 2 == 0);
println!("第一個偶數: {:?}", first_even);
// any 和 all:條件檢查
let has_even = numbers.iter().any(|&x| x % 2 == 0);
let all_positive = numbers.iter().all(|&x| x > 0);
println!("有偶數: {}, 都是正數: {}", has_even, all_positive);
// count:計數
let even_count = numbers
.iter()
.filter(|&&x| x % 2 == 0)
.count();
println!("偶數個數: {}", even_count);
// min 和 max
let min_val = numbers.iter().min();
let max_val = numbers.iter().max();
println!("最小值: {:?}, 最大值: {:?}", min_val, max_val);
// partition:分割
let (evens, odds): (Vec<_>, Vec<_>) = numbers
.iter()
.partition(|&&x| x % 2 == 0);
println!("偶數: {:?}, 奇數: {:?}", evens, odds);
}
讓我們建立一個實用的資料分析工具來展示集合和迭代器的威力:
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct DataRecord {
pub id: u32,
pub category: String,
pub value: f64,
pub timestamp: String,
pub tags: Vec<String>,
}
impl DataRecord {
pub fn new(id: u32, category: String, value: f64, timestamp: String) -> Self {
DataRecord {
id,
category,
value,
timestamp,
tags: Vec::new(),
}
}
pub fn with_tags(mut self, tags: Vec<String>) -> Self {
self.tags = tags;
self
}
}
pub struct DataAnalyzer {
records: Vec<DataRecord>,
}
impl DataAnalyzer {
pub fn new() -> Self {
DataAnalyzer {
records: Vec::new(),
}
}
pub fn add_record(&mut self, record: DataRecord) {
self.records.push(record);
}
pub fn load_from_slice(&mut self, records: Vec<DataRecord>) {
self.records = records;
}
// 基本統計
pub fn basic_stats(&self) -> BasicStats {
if self.records.is_empty() {
return BasicStats::default();
}
let values: Vec<f64> = self.records
.iter()
.map(|r| r.value)
.collect();
let sum: f64 = values.iter().sum();
let count = values.len();
let mean = sum / count as f64;
let min = values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
let max = values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
// 計算標準差
let variance: f64 = values
.iter()
.map(|&x| (x - mean).powi(2))
.sum::<f64>() / count as f64;
let std_dev = variance.sqrt();
BasicStats {
count,
sum,
mean,
min,
max,
std_dev,
}
}
// 按類別分組統計
pub fn group_by_category(&self) -> HashMap<String, BasicStats> {
let mut groups: HashMap<String, Vec<f64>> = HashMap::new();
// 分組
for record in &self.records {
groups.entry(record.category.clone())
.or_insert_with(Vec::new)
.push(record.value);
}
// 計算每組的統計
groups.into_iter()
.map(|(category, values)| {
let stats = self.calculate_stats(&values);
(category, stats)
})
.collect()
}
// 篩選資料
pub fn filter_by_value_range(&self, min: f64, max: f64) -> Vec<&DataRecord> {
self.records
.iter()
.filter(|record| record.value >= min && record.value <= max)
.collect()
}
pub fn filter_by_category(&self, category: &str) -> Vec<&DataRecord> {
self.records
.iter()
.filter(|record| record.category == category)
.collect()
}
pub fn filter_by_tag(&self, tag: &str) -> Vec<&DataRecord> {
self.records
.iter()
.filter(|record| record.tags.contains(&tag.to_string()))
.collect()
}
// 排名分析
pub fn top_records(&self, n: usize) -> Vec<&DataRecord> {
let mut records = self.records.iter().collect::<Vec<_>>();
records.sort_by(|a, b| b.value.partial_cmp(&a.value).unwrap());
records.into_iter().take(n).collect()
}
pub fn bottom_records(&self, n: usize) -> Vec<&DataRecord> {
let mut records = self.records.iter().collect::<Vec<_>>();
records.sort_by(|a, b| a.value.partial_cmp(&b.value).unwrap());
records.into_iter().take(n).collect()
}
// 趨勢分析
pub fn category_trends(&self) -> Vec<(String, Vec<f64>)> {
let mut category_values: HashMap<String, Vec<f64>> = HashMap::new();
for record in &self.records {
category_values.entry(record.category.clone())
.or_insert_with(Vec::new)
.push(record.value);
}
category_values.into_iter().collect()
}
// 異常值檢測(使用 IQR 方法)
pub fn find_outliers(&self) -> Vec<&DataRecord> {
let mut values: Vec<(f64, usize)> = self.records
.iter()
.enumerate()
.map(|(i, record)| (record.value, i))
.collect();
values.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
let len = values.len();
if len < 4 {
return Vec::new();
}
let q1_idx = len / 4;
let q3_idx = 3 * len / 4;
let q1 = values[q1_idx].0;
let q3 = values[q3_idx].0;
let iqr = q3 - q1;
let lower_bound = q1 - 1.5 * iqr;
let upper_bound = q3 + 1.5 * iqr;
values.into_iter()
.filter(|(value, _)| *value < lower_bound || *value > upper_bound)
.map(|(_, idx)| &self.records[idx])
.collect()
}
// 資料轉換
pub fn transform_values<F>(&self, transform: F) -> Vec<DataRecord>
where
F: Fn(f64) -> f64,
{
self.records
.iter()
.map(|record| {
let mut new_record = record.clone();
new_record.value = transform(record.value);
new_record
})
.collect()
}
// 相關性分析(簡化版)
pub fn category_correlation(&self) -> HashMap<String, f64> {
let stats = self.basic_stats();
let global_mean = stats.mean;
self.records
.iter()
.fold(HashMap::new(), |mut acc, record| {
let entry = acc.entry(record.category.clone()).or_insert(Vec::new());
entry.push(record.value - global_mean);
acc
})
.into_iter()
.map(|(category, deviations)| {
let correlation = deviations.iter().sum::<f64>() / deviations.len() as f64;
(category, correlation)
})
.collect()
}
// 輔助函式
fn calculate_stats(&self, values: &[f64]) -> BasicStats {
if values.is_empty() {
return BasicStats::default();
}
let sum: f64 = values.iter().sum();
let count = values.len();
let mean = sum / count as f64;
let min = values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
let max = values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
let variance: f64 = values
.iter()
.map(|&x| (x - mean).powi(2))
.sum::<f64>() / count as f64;
let std_dev = variance.sqrt();
BasicStats {
count,
sum,
mean,
min,
max,
std_dev,
}
}
}
#[derive(Debug, Default)]
pub struct BasicStats {
pub count: usize,
pub sum: f64,
pub mean: f64,
pub min: f64,
pub max: f64,
pub std_dev: f64,
}
impl std::fmt::Display for BasicStats {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
writeln!(f, "統計資訊:")?;
writeln!(f, " 數量: {}", self.count)?;
writeln!(f, " 總和: {:.2}", self.sum)?;
writeln!(f, " 平均: {:.2}", self.mean)?;
writeln!(f, " 最小: {:.2}", self.min)?;
writeln!(f, " 最大: {:.2}", self.max)?;
write!(f, " 標準差: {:.2}", self.std_dev)
}
}
fn main() {
let mut analyzer = DataAnalyzer::new();
// 載入測試資料
let test_data = vec![
DataRecord::new(1, "銷售".to_string(), 1200.0, "2024-01-01".to_string())
.with_tags(vec!["重要".to_string(), "月度".to_string()]),
DataRecord::new(2, "銷售".to_string(), 980.0, "2024-01-02".to_string())
.with_tags(vec!["日常".to_string()]),
DataRecord::new(3, "行銷".to_string(), 450.0, "2024-01-03".to_string())
.with_tags(vec!["廣告".to_string(), "重要".to_string()]),
DataRecord::new(4, "行銷".to_string(), 320.0, "2024-01-04".to_string()),
DataRecord::new(5, "銷售".to_string(), 1500.0, "2024-01-05".to_string())
.with_tags(vec!["重要".to_string(), "異常".to_string()]),
DataRecord::new(6, "研發".to_string(), 800.0, "2024-01-06".to_string()),
DataRecord::new(7, "研發".to_string(), 750.0, "2024-01-07".to_string()),
DataRecord::new(8, "行銷".to_string(), 280.0, "2024-01-08".to_string()),
];
analyzer.load_from_slice(test_data);
// 基本統計
println!("=== 基本統計 ===");
let stats = analyzer.basic_stats();
println!("{}", stats);
// 按類別分組
println!("\n=== 按類別統計 ===");
let category_stats = analyzer.group_by_category();
for (category, stats) in &category_stats {
println!("\n{}:", category);
println!("{}", stats);
}
// 排名分析
println!("\n=== TOP 3 記錄 ===");
let top_records = analyzer.top_records(3);
for record in top_records {
println!("ID: {}, 類別: {}, 值: {:.2}",
record.id, record.category, record.value);
}
// 篩選分析
println!("\n=== 高價值記錄 (>= 800) ===");
let high_value = analyzer.filter_by_value_range(800.0, f64::INFINITY);
for record in high_value {
println!("ID: {}, 類別: {}, 值: {:.2}, 標籤: {:?}",
record.id, record.category, record.value, record.tags);
}
// 標籤篩選
println!("\n=== 重要記錄 ===");
let important = analyzer.filter_by_tag("重要");
for record in important {
println!("ID: {}, 類別: {}, 值: {:.2}",
record.id, record.category, record.value);
}
// 異常值檢測
println!("\n=== 異常值檢測 ===");
let outliers = analyzer.find_outliers();
if outliers.is_empty() {
println!("沒有檢測到異常值");
} else {
for record in outliers {
println!("異常值 - ID: {}, 值: {:.2}", record.id, record.value);
}
}
// 資料轉換
println!("\n=== 資料轉換 (標準化) ===");
let stats = analyzer.basic_stats();
let normalized = analyzer.transform_values(|x| (x - stats.mean) / stats.std_dev);
for record in normalized.iter().take(3) {
println!("ID: {}, 標準化值: {:.3}", record.id, record.value);
}
// 相關性分析
println!("\n=== 類別相關性 ===");
let correlations = analyzer.category_correlation();
for (category, correlation) in correlations {
println!("{}: {:.3}", category, correlation);
}
}
use std::collections::{HashMap, BTreeMap, HashSet, BTreeSet};
fn choose_collection_types() {
// HashMap vs BTreeMap
// HashMap:平均 O(1) 存取,無序,適合需要快速查找的場景
let mut fast_lookup = HashMap::new();
fast_lookup.insert("key1", "value1");
// BTreeMap:O(log n) 存取,有序,適合需要排序的場景
let mut sorted_map = BTreeMap::new();
sorted_map.insert("key1", "value1");
// 對於大量查找操作,使用 HashMap
// 對於需要範圍查詢,使用 BTreeMap
// HashSet vs BTreeSet
let mut unique_items = HashSet::new();
let mut sorted_unique = BTreeSet::new();
// Vec vs VecDeque
// Vec:適合大多數情況,尾部操作效率高
let mut list = Vec::new();
list.push(1); // O(1)
// VecDeque:雙端操作都效率高
use std::collections::VecDeque;
let mut deque = VecDeque::new();
deque.push_front(1); // O(1)
deque.push_back(2); // O(1)
}
fn iterator_performance() {
let data: Vec<i32> = (1..1_000_000).collect();
// 零成本抽象:迭代器編譯後和手寫循環一樣快
let sum1: i32 = data.iter().sum();
// 等價的手寫循環
let mut sum2 = 0;
for &item in &data {
sum2 += item;
}
// 鏈式操作也是零成本的
let result: Vec<i32> = data
.iter()
.filter(|&&x| x % 2 == 0)
.map(|&x| x * x)
.take(100)
.collect();
// 避免不必要的 collect()
// 不好:立即 collect
let _bad: Vec<_> = data.iter().map(|&x| x * 2).collect();
// 好:延遲求值
let good = data.iter().map(|&x| x * 2);
// 只在需要時才消耗迭代器
let sum: i32 = good.sum();
println!("優化結果: {}", sum);
}
fn memory_efficiency() {
// 預先分配容量
let mut vec = Vec::with_capacity(1000);
// 避免重複分配
// 使用 into_iter() 避免不必要的複製
let data = vec![1, 2, 3, 4, 5];
// 好:移動所有權
let doubled: Vec<i32> = data
.into_iter() // 消耗原 Vec
.map(|x| x * 2)
.collect();
// 對於大型結構,考慮使用 Box
let large_data: Box<[u8; 1_000_000]> = Box::new([0; 1_000_000]);
// 使用切片而不是 Vec 當作函式參數
fn process_data(data: &[i32]) -> i32 {
data.iter().sum()
}
let numbers = vec![1, 2, 3, 4, 5];
let sum = process_data(&numbers); // 可以接受 Vec、陣列等
println!("處理結果: {}", sum);
}
// 實作一個文字統計分析器
fn analyze_text(text: &str) -> TextStats {
// 計算:
// - 字元數、單詞數、行數
// - 最常見的單詞
// - 平均單詞長度
// - 單詞長度分佈
}
struct TextStats {
char_count: usize,
word_count: usize,
line_count: usize,
most_common_words: Vec<(String, usize)>,
average_word_length: f64,
word_length_distribution: HashMap<usize, usize>,
}
// 建立一個可配置的資料處理管道
struct DataPipeline<T> {
// 支援多種轉換操作
// map, filter, reduce, group_by 等
}
impl<T> DataPipeline<T> {
fn new() -> Self { /* ... */ }
fn map<U, F>(self, f: F) -> DataPipeline<U> where F: Fn(T) -> U { /* ... */ }
fn filter<F>(self, predicate: F) -> DataPipeline<T> where F: Fn(&T) -> bool { /* ... */ }
fn group_by<K, F>(self, key_fn: F) -> HashMap<K, Vec<T>> where F: Fn(&T) -> K { /* ... */ }
}
// 建立一個效能分析工具,比較不同集合和迭代器操作的效能
use std::time::Instant;
struct PerformanceBenchmark;
impl PerformanceBenchmark {
// 比較 Vec vs VecDeque vs LinkedList
fn benchmark_collections(&self) { /* ... */ }
// 比較 HashMap vs BTreeMap 的查找效能
fn benchmark_maps(&self) { /* ... */ }
// 比較不同迭代器鏈的效能
fn benchmark_iterators(&self) { /* ... */ }
// 測試記憶體使用情況
fn benchmark_memory(&self) { /* ... */ }
}
// 不好:過度複製
fn bad_example(data: &[String]) -> Vec<String> {
data.iter().map(|s| s.clone()).collect() // 不必要的 clone
}
// 好:使用參考
fn good_example(data: &[String]) -> Vec<&str> {
data.iter().map(|s| s.as_str()).collect()
}
// 不好:用 Vec 做頻繁的查找
fn find_in_vec(data: &Vec<i32>, target: i32) -> bool {
data.iter().any(|&x| x == target) // O(n)
}
// 好:用 HashSet 做查找
use std::collections::HashSet;
fn find_in_set(data: &HashSet<i32>, target: i32) -> bool {
data.contains(&target) // O(1)
}
// 錯誤理解:以為這會立即執行
let iter = (1..1_000_000)
.filter(|&x| x % 2 == 0)
.map(|x| x * x); // 什麼都沒發生!
// 必須消耗迭代器才會執行
let result: Vec<_> = iter.collect(); // 現在才真正執行
今天學習的集合和迭代器將為明天的泛型學習提供很好的基礎,因為它們本身就大量使用了泛型和 trait!