feat: 优化 overlay_pixels
This commit is contained in:
@@ -293,24 +293,48 @@ impl MSCanvas {
|
||||
let width = canvas.width.min(self.width) as usize;
|
||||
let height = canvas.height.min(self.height) as usize;
|
||||
|
||||
// 预计算步长,避免重复计算
|
||||
let src_stride = self.width as usize * 4;
|
||||
let dst_stride = width * 4;
|
||||
let dst_stride = canvas.width as usize * 4; // ⚠️ Bug fix: use canvas.width, not `width`
|
||||
|
||||
for y in 0..height {
|
||||
let src_base = y * src_stride;
|
||||
let dst_base = y * dst_stride;
|
||||
let src_row = &self.pixels[y * src_stride..y * src_stride + width * 4];
|
||||
let dst_row = &mut pixels[y * dst_stride..y * dst_stride + width * 4];
|
||||
|
||||
// 使用显式循环,可能被编译器自动向量化
|
||||
for x in 0..width {
|
||||
let src_idx = src_base + x * 4;
|
||||
let dst_idx = dst_base + x * 4;
|
||||
// Process 4 pixels at a time (16 bytes) for better auto-vectorization
|
||||
let chunks = width / 4;
|
||||
let remainder = width % 4;
|
||||
|
||||
// 只在非透明时复制
|
||||
if self.pixels[src_idx + 3] != 0 {
|
||||
// 一次复制4个字节(可能被优化为单个u32操作)
|
||||
pixels[dst_idx..dst_idx + 4]
|
||||
.copy_from_slice(&self.pixels[src_idx..src_idx + 4]);
|
||||
for chunk in 0..chunks {
|
||||
let base = chunk * 16;
|
||||
// Check alpha bytes for 4 pixels at once
|
||||
let a0 = src_row[base + 3];
|
||||
let a1 = src_row[base + 7];
|
||||
let a2 = src_row[base + 11];
|
||||
let a3 = src_row[base + 15];
|
||||
|
||||
if a0 | a1 | a2 | a3 != 0 {
|
||||
// At least one pixel is non-transparent
|
||||
if a0 != 0 && a1 != 0 && a2 != 0 && a3 != 0 {
|
||||
// All 4 pixels are non-transparent — bulk copy 16 bytes
|
||||
dst_row[base..base + 16].copy_from_slice(&src_row[base..base + 16]);
|
||||
} else {
|
||||
// Mixed — copy individually
|
||||
for i in 0..4 {
|
||||
let off = base + i * 4;
|
||||
if src_row[off + 3] != 0 {
|
||||
dst_row[off..off + 4].copy_from_slice(&src_row[off..off + 4]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// else: all 4 transparent — skip entirely
|
||||
}
|
||||
|
||||
// Handle remaining pixels
|
||||
for x in (chunks * 4)..width {
|
||||
let off = x * 4;
|
||||
if src_row[off + 3] != 0 {
|
||||
dst_row[off..off + 4].copy_from_slice(&src_row[off..off + 4]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user