unsharp mask 是想像把一張圖「模糊」成低頻版,再用原圖去減掉低頻,得到的就是高頻細節(邊緣、紋理)。最後把高頻乘上一個比例加回原圖,邊緣就會變利。這個方法很適合放進我們的code裡,而且也能沿用 Day 8 的 ping-pong buffer:模糊階段把結果寫到目的緩衝,接著就地把銳化後的結果覆寫目的端,整段下來依舊是零額外配置。
這次的重點除了 amount 之外,還補了兩個安全閥。第一個是 threshold:差異低於門檻的一律忽略,避免暗部雜訊被誤強化。第二個是 limit:限制每像素的最大改變幅度,防止局部對比暴衝造成 halo 與破色。這兩個開關對小字、UI 截圖、噪點多的照片特別有感,讓銳化看起來是「更清楚」而不是「更刺眼」。
整合點維持和 Day 8 一樣:列舉型別 Op
新增一個 unsharp
操作,參數包含半徑 r、強度 amount、雜訊門檻 threshold、限幅 limit。為了讓慢速版(配置回傳 Vec)和快速管線(乒乓零配置)都能用,我們各準備一個版本:unsharp(...) -> Vec<u8>
與 unsharp_into(src, dst, ...)
。核心演算法是 into 版:先把低頻模糊寫進 dst
,再用 src
與目前 dst
的低頻相減,乘上 amount 後覆寫回 dst
,alpha 複製即可。
use wasm_bindgen::prelude::*;
use serde::Deserialize;
use serde_wasm_bindgen as swb;
// 低階工具:索引夾取
fn clamp_i(v: isize, lo: isize, hi: isize) -> isize {
if v < lo { lo } else if v > hi { hi } else { v }
}
// 既有模糊(into 版,兩段方框濾波,alpha 保留)
fn box_blur_rgba_into(src: &[u8], dst: &mut [u8], w: u32, h: u32, r: u32) {
if r == 0 { dst.copy_from_slice(src); return; }
let w = w as usize;
let h = h as usize;
let win = (2 * r + 1) as usize;
let mut tmp = vec![0u8; src.len()];
// 水平:src -> tmp
for y in 0..h {
let mut sr: u32 = 0; let mut sg: u32 = 0; let mut sb: u32 = 0;
for dx in 0..win {
let x = clamp_i(dx as isize - r as isize, 0, (w - 1) as isize) as usize;
let i = (y * w + x) * 4;
sr += src[i] as u32; sg += src[i + 1] as u32; sb += src[i + 2] as u32;
}
let i0 = (y * w) * 4;
tmp[i0] = (sr / win as u32) as u8;
tmp[i0 + 1] = (sg / win as u32) as u8;
tmp[i0 + 2] = (sb / win as u32) as u8;
tmp[i0 + 3] = src[i0 + 3];
for x in 1..w {
let x_add = clamp_i(x as isize + r as isize, 0, (w - 1) as isize) as usize;
let x_sub = clamp_i(x as isize - 1 - r as isize, 0, (w - 1) as isize) as usize;
let i_add = (y * w + x_add) * 4;
let i_sub = (y * w + x_sub) * 4;
sr = sr + src[i_add] as u32 - src[i_sub] as u32;
sg = sg + src[i_add + 1] as u32 - src[i_sub + 1] as u32;
sb = sb + src[i_add + 2] as u32 - src[i_sub + 2] as u32;
let i = (y * w + x) * 4;
tmp[i] = (sr / win as u32) as u8;
tmp[i + 1] = (sg / win as u32) as u8;
tmp[i + 2] = (sb / win as u32) as u8;
tmp[i + 3] = src[i + 3];
}
}
// 垂直:tmp -> dst
for x in 0..w {
let mut sr: u32 = 0; let mut sg: u32 = 0; let mut sb: u32 = 0;
for dy in 0..win {
let y = clamp_i(dy as isize - r as isize, 0, (h - 1) as isize) as usize;
let i = (y * w + x) * 4;
sr += tmp[i] as u32; sg += tmp[i + 1] as u32; sb += tmp[i + 2] as u32;
}
let i0 = x * 4;
dst[i0] = (sr / win as u32) as u8;
dst[i0 + 1] = (sg / win as u32) as u8;
dst[i0 + 2] = (sb / win as u32) as u8;
dst[i0 + 3] = src[i0 + 3];
for y in 1..h {
let y_add = clamp_i(y as isize + r as isize, 0, (h - 1) as isize) as usize;
let y_sub = clamp_i(y as isize - 1 - r as isize, 0, (h - 1) as isize) as usize;
let i_add = (y_add * w + x) * 4;
let i_sub = (y_sub * w + x) * 4;
sr = sr + tmp[i_add] as u32 - tmp[i_sub] as u32;
sg = sg + tmp[i_add + 1] as u32 - tmp[i_sub + 1] as u32;
sb = sb + tmp[i_add + 2] as u32 - tmp[i_sub + 2] as u32;
let i = (y * w + x) * 4;
dst[i] = (sr / win as u32) as u8;
dst[i + 1] = (sg / win as u32) as u8;
dst[i + 2] = (sb / win as u32) as u8;
dst[i + 3] = src[i + 3];
}
}
}
// Unsharp:零配置版(ping-pong 用)
fn unsharp_into(src: &[u8], dst: &mut [u8], w: u32, h: u32,
r: u32, amount: f32, threshold: u8, limit: u8) {
// 先把低頻模糊寫進 dst
box_blur_rgba_into(src, dst, w, h, r);
// 以 dst 的低頻與 src 的原圖求高頻,乘上 amount 後覆寫回 dst
let th = threshold as i16;
let cap = limit as i16;
let amt = amount;
let mut i = 0usize;
while i < src.len() {
let a = src[i + 3]; // alpha 直拷
for c in 0..3 {
let s = src[i + c] as i16;
let b = dst[i + c] as i16; // 低頻
let high = s - b; // 高頻(可正可負)
let adj = if high.abs() < th { 0.0 }
else { (high.clamp(-cap, cap) as f32) * amt };
let y = (s as f32 + adj).round().clamp(0.0, 255.0) as u8;
dst[i + c] = y;
}
dst[i + 3] = a;
i += 4;
}
}
// Unsharp:配置回傳版(慢速管線用)
fn unsharp(input: &[u8], w: u32, h: u32,
r: u32, amount: f32, threshold: u8, limit: u8) -> Vec<u8> {
let mut out = vec![0u8; input.len()];
unsharp_into(input, &mut out, w, h, r, amount, threshold, limit);
out
}
#[derive(Deserialize)]
#[serde(tag = "kind")]
enum Op {
#[serde(rename = "grayscale")]
Grayscale,
#[serde(rename = "bc")]
BrightnessContrast { b: f64, c: f64 },
#[serde(rename = "blur")]
Blur { r: u32 },
#[serde(rename = "conv3x3")]
Conv3x3 { k: [f32; 9] },
#[serde(rename = "unsharp")]
Unsharp { r: u32, amount: f32, threshold: u8, limit: u8 },
}
// 慢速版:保留舊 API,不動前端也能測
#[wasm_bindgen]
pub fn apply_pipeline(input: &[u8], w: u32, h: u32, ops: &JsValue) -> Result<Vec<u8>, JsValue> {
let expected = (w as usize) * (h as usize) * 4;
if input.len() != expected {
return Err(JsValue::from_str("input length mismatch"));
}
let ops: Vec<Op> = swb::from_value(ops.clone())
.map_err(|e| JsValue::from_str(&format!("bad ops: {e}")))?;
let mut buf = input.to_vec();
for op in ops {
buf = match op {
Op::Grayscale => grayscale(&buf, w, h),
Op::BrightnessContrast { b, c } => brightness_contrast(&buf, w, h, b, c),
Op::Blur { r } => box_blur_rgba(&buf, w, h, r),
Op::Conv3x3 { k } => convolve3x3(&buf, w, h, &k),
Op::Unsharp { r, amount, threshold, limit } =>
unsharp(&buf, w, h, r, amount, threshold, limit),
};
}
Ok(buf)
}
// 既有的配置版 API(保留)
#[wasm_bindgen]
pub fn grayscale(input: &[u8], w: u32, h: u32) -> Vec<u8> { /* ...略,沿用你現有的... */ }
#[wasm_bindgen]
pub fn brightness_contrast(input: &[u8], w: u32, h: u32, brightness: f64, contrast: f64) -> Vec<u8> { /* ... */ }
fn box_blur_rgba(input: &[u8], w: u32, h: u32, r: u32) -> Vec<u8> { /* ... */ }
fn convolve3x3(input: &[u8], w: u32, h: u32, k: &[f32; 9]) -> Vec<u8> { /* ... */ }
apply_pipeline_fast
不用改架構,只要加入 Op::Unsharp
時呼叫 unsharp_into
,day 8 版本的乒乓交換就能直接吃這個效果。前後緩衝 A/B 的交換規則維持原樣,不另外建立任何暫存;模糊階段的內部 tmp
仍會配置,但只在該 pass 內部使用。
打包流程與 Day 8 相同,把舊的 pkg
清掉重產一份。這一步能確保 d.ts 與 JS glue 同步更新,讓前端直接補到新的 unsharp
列舉值。
rm -rf pkg
wasm-pack build --target web --out-dir pkg --out-name rustwasm_test
前端不需要改結構。ops 內容多一個 unsharp
物件即可。
const ops = [
{ kind: 'bc', b: 0, c: 18 },
{ kind: 'unsharp', r: 2, amount: 0.6, threshold: 8, limit: 32 },
{ kind: 'bc', b: 0, c: 6 },
];
在 4K 圖片上,unsharp 帶來的時間主要花在兩件事:模糊掃兩遍(水平與垂直)與銳化合成掃一遍。但這本質仍然是 memory-bound,每個 pass 都要讀寫整張圖,所以即使使用 ping-pong,整體時間不會劇烈下降;能感受到的提升依舊偏向「穩定性」與「分配壓力下降」。真正會明顯影響耗時的是 r 與 amount:r 越大,模糊窗口越寬,掃描的加總運算就越多;amount 影響的是視覺,而非速度。
今天一整個來不及,原本想說要把 memory-bound 降低,但試了好久還是沒有所以情急之下只好寫 unsharp 了。