use std::mem; use std::fmt::{Debug, Formatter, Result}; use std::cmp::min; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[derive(Copy, Clone)] pub struct Point { pub x: f32, pub y: f32, } impl Point { pub fn new>(x: T, y: T) -> Self { Self { x: x.into(), y: y.into(), } } pub fn lerp(t: f32, p0: &Self, p1: &Self) -> Self { Self { x: p0.x + t * (p1.x - p0.x), y: p0.y + t * (p1.y - p0.y), } } } impl Debug for Point { fn fmt(&self, f: &mut Formatter) -> Result { write!(f, "({}, {})", self.x, self.y) } } #[derive(Debug)] pub struct Affine { a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, } impl Affine { pub fn new(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32) -> Self { Self { a, b, c, d, e, f } } /// Concatenate two affine transforms. pub fn concat(t1: &Self, t2: &Self) -> Self { Self { a: t1.a * t2.a + t1.c * t2.b, b: t1.b * t2.a + t1.d * t2.b, c: t1.a * t2.c + t1.c * t2.d, d: t1.b * t2.c + t1.d * t2.d, e: t1.a * t2.e + t1.c * t2.f + t1.e, f: t1.b * t2.e + t1.d * t2.f + t1.f, } } pub fn apply(&self, p: &Point) -> Point { Point { x: self.a * p.x + self.c * p.y + self.e, y: self.b * p.x + self.d * p.y + self.f, } } } #[doc(hidden)] macro _mm_shuffle($z:expr, $y:expr, $x:expr, $w:expr) { ($z << 6) | ($y << 4) | ($x << 2) | $w } #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] pub fn accumulate_sse(src: &[f32]) -> Vec { // SIMD instructions force us to align data since we iterate each 4 elements // So: // n (0) => 0 // n (1 or 2 or 3 or 4) => 4, // n (5) => 8 // and so on let len = src.len(); let n = (len + 3) & !3; // align data let mut dst: Vec = vec![0; n]; unsafe { let mut offset = _mm_setzero_ps(); let sign_mask = _mm_set1_ps(-0.); let mask = _mm_set1_epi32(0x0c080400); for i in (0..n).step_by(4) { let mut x = _mm_loadu_ps(&src[i]); x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4))); x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40)); x = _mm_add_ps(x, offset); let mut y = _mm_andnot_ps(sign_mask, x); // fabs(x) y = _mm_min_ps(y, _mm_set1_ps(1.0)); y = _mm_mul_ps(y, _mm_set1_ps(255.0)); let mut z = _mm_cvttps_epi32(y); z = _mm_shuffle_epi8(z, mask); _mm_store_ss(mem::transmute(&dst[i]), _mm_castsi128_ps(z)); offset = _mm_shuffle_ps(x, x, _mm_shuffle!(3, 3, 3, 3)); } dst.set_len(len); // we must return vec of the same length as src.len() } dst } pub fn accumulate_simple(src: &[f32]) -> Vec { let mut acc = 0.0; src.iter() .map(|c| { // This would translate really well to SIMD acc += c; let y = acc.abs(); let y = if y < 1.0 { y } else { 1.0 }; (255.0 * y) as u8 }) .collect() } #[cfg(test)] mod tests { use super::*; fn test_accumulate(src: Vec) { assert_eq!(accumulate_simple(&src), accumulate(&src)); } #[test] fn max_255_from_1_0() { // 1.0 * 255.0 = 255.0 (max value) test_accumulate(vec![1.0]); } #[test] fn max_255_from_0_5() { // 0.5 * 2 = 1.0 // 1.0 * 255.0 = 255.0 (max value) test_accumulate(vec![0.5; 2]); } #[test] fn max_255_from_0_25() { // 0.25 * 4 = 1.0 // 1.0 * 255.0 = 255.0 (max value) test_accumulate(vec![0.25; 4]); } #[test] fn max_255_from_0_125() { // 0.125 * 8 = 1.0 // 1.0 * 255.0 = 255.0 (max value) test_accumulate(vec![0.125; 8]); } #[test] fn max_255_from_0_0625() { // 0.0625 * 16 = 1.0 // 1.0 * 255.0 = 255.0 (max value) test_accumulate(vec![0.0625; 16]); } #[test] fn max_255_from_0_03125() { // 0.03125 * 32 = 1.0 // 1.0 * 255.0 = 255.0 (max value) test_accumulate(vec![0.03125; 32]); } #[test] fn max_255_from_0_015625() { // 0.015625 * 64 = 1.0 // 1.0 * 255.0 = 255.0 (max value) test_accumulate(vec![0.015625; 64]); } #[test] fn max_255_from_0_0078125() { // 0.0078125 * 128 = 1.0 // 1.0 * 255.0 = 255.0 (max value) test_accumulate(vec![0.0078125; 128]); } #[test] fn simple_0() { test_accumulate(vec![]); } #[test] fn simple_1() { test_accumulate(vec![0.1]); } #[test] fn simple_2() { test_accumulate(vec![0.1, 0.2]); } #[test] fn simple_3() { test_accumulate(vec![0.1, 0.2, 0.3]); } #[test] fn simple_4() { test_accumulate(vec![0.1, 0.2, 0.3, 0.4]); } #[test] fn simple_5() { test_accumulate(vec![0.1, 0.2, 0.3, 0.4, 0.5]); } #[test] fn simple_6() { test_accumulate(vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6]); } #[test] fn simple_7() { test_accumulate(vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]); } } pub struct Raster { w: usize, h: usize, a: Vec, } // TODO: is there a faster way? (investigate whether approx recip is good enough) fn recip(x: f32) -> f32 { x.recip() } impl Raster { pub fn new(w: usize, h: usize) -> Self { Self { w, h, a: vec![0.0; w * h + 4], } } pub fn draw_line(&mut self, p0: &Point, p1: &Point) { //println!("draw_line {} {}", p0, p1); if p0.y == p1.y { return; } let (dir, p0, p1) = if p0.y < p1.y { (1.0, p0, p1) } else { (-1.0, p1, p0) }; let dxdy = (p1.x - p0.x) / (p1.y - p0.y); let mut x = p0.x; let y0 = p0.y as usize; // note: implicit max of 0 because usize (TODO: really true?) if p0.y < 0.0 { x -= p0.y * dxdy; } for y in y0..min(self.h, p1.y.ceil() as usize) { let linestart = y * self.w; let dy = ((y + 1) as f32).min(p1.y) - (y as f32).max(p0.y); let xnext = x + dxdy * dy; let d = dy * dir; let (x0, x1) = if x < xnext { (x, xnext) } else { (xnext, x) }; let x0floor = x0.floor(); let x0i = x0floor as i32; let x1ceil = x1.ceil(); let x1i = x1ceil as i32; if x1i <= x0i + 1 { let xmf = 0.5 * (x + xnext) - x0floor; self.a[linestart + x0i as usize] += d - d * xmf; self.a[linestart + (x0i + 1) as usize] += d * xmf; } else { let s = recip(x1 - x0); let x0f = x0 - x0floor; let a0 = 0.5 * s * (1.0 - x0f) * (1.0 - x0f); let x1f = x1 - x1ceil + 1.0; let am = 0.5 * s * x1f * x1f; self.a[linestart + x0i as usize] += d * a0; if x1i == x0i + 2 { self.a[linestart + (x0i + 1) as usize] += d * (1.0 - a0 - am); } else { let a1 = s * (1.5 - x0f); self.a[linestart + (x0i + 1) as usize] += d * (a1 - a0); for xi in x0i + 2..x1i - 1 { self.a[linestart + xi as usize] += d * s; } let a2 = a1 + (x1i - x0i - 3) as f32 * s; self.a[linestart + (x1i - 1) as usize] += d * (1.0 - a2 - am); } self.a[linestart + x1i as usize] += d * am; } x = xnext; } } pub fn draw_quad(&mut self, p0: &Point, p1: &Point, p2: &Point) { //println!("draw_quad {} {} {}", p0, p1, p2); let devx = p0.x - 2.0 * p1.x + p2.x; let devy = p0.y - 2.0 * p1.y + p2.y; let devsq = devx * devx + devy * devy; if devsq < 0.333 { self.draw_line(p0, p2); return; } let tol = 3.0; let n = 1 + (tol * (devx * devx + devy * devy)).sqrt().sqrt().floor() as usize; //println!("n = {}", n); let mut p = *p0; let nrecip = recip(n as f32); let mut t = 0.0; for _i in 0..n - 1 { t += nrecip; let pn = Point::lerp(t, &Point::lerp(t, p0, p1), &Point::lerp(t, p1, p2)); self.draw_line(&p, &pn); p = pn; } self.draw_line(&p, p2); } /* fn get_bitmap_fancy(&self) -> Vec { let mut acc = 0.0; // This would translate really well to SIMD self.a[0..self.w * self.h].iter().map(|&a| { acc += a; (255.0 * acc.abs().min(1.0)) as u8 //(255.5 * (0.5 + 0.4 * acc)) as u8 }).collect() } */ pub fn get_bitmap(&self) -> Vec { accumulate(&self.a[0..self.w * self.h]) } }