feat: 优化 overlay_pixels

This commit is contained in:
2026-03-07 15:51:13 +08:00
parent 2631fec53c
commit d409c11f50
2 changed files with 50 additions and 15 deletions

View File

@@ -293,24 +293,48 @@ impl MSCanvas {
let width = canvas.width.min(self.width) as usize;
let height = canvas.height.min(self.height) as usize;
// 预计算步长,避免重复计算
let src_stride = self.width as usize * 4;
let dst_stride = width * 4;
let dst_stride = canvas.width as usize * 4; // ⚠️ Bug fix: use canvas.width, not `width`
for y in 0..height {
let src_base = y * src_stride;
let dst_base = y * dst_stride;
let src_row = &self.pixels[y * src_stride..y * src_stride + width * 4];
let dst_row = &mut pixels[y * dst_stride..y * dst_stride + width * 4];
// 使用显式循环,可能被编译器自动向量化
for x in 0..width {
let src_idx = src_base + x * 4;
let dst_idx = dst_base + x * 4;
// Process 4 pixels at a time (16 bytes) for better auto-vectorization
let chunks = width / 4;
let remainder = width % 4;
// 只在非透明时复制
if self.pixels[src_idx + 3] != 0 {
// 一次复制4个字节可能被优化为单个u32操作
pixels[dst_idx..dst_idx + 4]
.copy_from_slice(&self.pixels[src_idx..src_idx + 4]);
for chunk in 0..chunks {
let base = chunk * 16;
// Check alpha bytes for 4 pixels at once
let a0 = src_row[base + 3];
let a1 = src_row[base + 7];
let a2 = src_row[base + 11];
let a3 = src_row[base + 15];
if a0 | a1 | a2 | a3 != 0 {
// At least one pixel is non-transparent
if a0 != 0 && a1 != 0 && a2 != 0 && a3 != 0 {
// All 4 pixels are non-transparent — bulk copy 16 bytes
dst_row[base..base + 16].copy_from_slice(&src_row[base..base + 16]);
} else {
// Mixed — copy individually
for i in 0..4 {
let off = base + i * 4;
if src_row[off + 3] != 0 {
dst_row[off..off + 4].copy_from_slice(&src_row[off..off + 4]);
}
}
}
}
// else: all 4 transparent — skip entirely
}
// Handle remaining pixels
for x in (chunks * 4)..width {
let off = x * 4;
if src_row[off + 3] != 0 {
dst_row[off..off + 4].copy_from_slice(&src_row[off..off + 4]);
}
}
}