// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #ifndef LIB_JXL_LINALG_H_ #define LIB_JXL_LINALG_H_ // Linear algebra. #include #include #include #include #include "lib/jxl/base/compiler_specific.h" #include "lib/jxl/base/status.h" #include "lib/jxl/common.h" #include "lib/jxl/image.h" #include "lib/jxl/image_ops.h" namespace jxl { using ImageD = Plane; template inline T DotProduct(const size_t N, const T* const JXL_RESTRICT a, const T* const JXL_RESTRICT b) { T sum = 0.0; for (size_t k = 0; k < N; ++k) { sum += a[k] * b[k]; } return sum; } template inline T L2NormSquared(const size_t N, const T* const JXL_RESTRICT a) { return DotProduct(N, a, a); } template inline T L1Norm(const size_t N, const T* const JXL_RESTRICT a) { T sum = 0; for (size_t k = 0; k < N; ++k) { sum += a[k] >= 0 ? a[k] : -a[k]; } return sum; } inline double DotProduct(const ImageD& a, const ImageD& b) { JXL_ASSERT(a.ysize() == 1); JXL_ASSERT(b.ysize() == 1); JXL_ASSERT(a.xsize() == b.xsize()); const double* const JXL_RESTRICT row_a = a.Row(0); const double* const JXL_RESTRICT row_b = b.Row(0); return DotProduct(a.xsize(), row_a, row_b); } inline ImageD Transpose(const ImageD& A) { ImageD out(A.ysize(), A.xsize()); for (size_t x = 0; x < A.xsize(); ++x) { double* const JXL_RESTRICT row_out = out.Row(x); for (size_t y = 0; y < A.ysize(); ++y) { row_out[y] = A.Row(y)[x]; } } return out; } template Plane MatMul(const Plane& A, const Plane& B) { JXL_ASSERT(A.ysize() == B.xsize()); Plane out(A.xsize(), B.ysize()); for (size_t y = 0; y < B.ysize(); ++y) { const Tin2* const JXL_RESTRICT row_b = B.Row(y); Tout* const JXL_RESTRICT row_out = out.Row(y); for (size_t x = 0; x < A.xsize(); ++x) { row_out[x] = 0.0; for (size_t k = 0; k < B.xsize(); ++k) { row_out[x] += A.Row(k)[x] * row_b[k]; } } } return out; } template ImageD MatMul(const Plane& A, const Plane& B) { return MatMul(A, B); } template ImageI MatMulI(const Plane& A, const Plane& B) { return MatMul(A, B); } // Computes A = B * C, with sizes rows*cols: A=ha*wa, B=wa*wb, C=ha*wb template void MatMul(const T* a, const T* b, int ha, int wa, int wb, T* c) { std::vector temp(wa); // Make better use of cache lines for (int x = 0; x < wb; x++) { for (int z = 0; z < wa; z++) { temp[z] = b[z * wb + x]; } for (int y = 0; y < ha; y++) { double e = 0; for (int z = 0; z < wa; z++) { e += a[y * wa + z] * temp[z]; } c[y * wb + x] = e; } } } // Computes C = A + factor * B template void MatAdd(const T* a, const T* b, F factor, int h, int w, T* c) { for (int i = 0; i < w * h; i++) { c[i] = a[i] + b[i] * factor; } } template inline Plane Identity(const size_t N) { Plane out(N, N); for (size_t i = 0; i < N; ++i) { T* JXL_RESTRICT row = out.Row(i); std::fill(row, row + N, 0); row[i] = static_cast(1.0); } return out; } inline ImageD Diagonal(const ImageD& d) { JXL_ASSERT(d.ysize() == 1); ImageD out(d.xsize(), d.xsize()); const double* JXL_RESTRICT row_diag = d.Row(0); for (size_t k = 0; k < d.xsize(); ++k) { double* JXL_RESTRICT row_out = out.Row(k); std::fill(row_out, row_out + d.xsize(), 0.0); row_out[k] = row_diag[k]; } return out; } // Computes c, s such that c^2 + s^2 = 1 and // [c -s] [x] = [ * ] // [s c] [y] [ 0 ] void GivensRotation(double x, double y, double* c, double* s); // U = U * Givens(i, j, c, s) void RotateMatrixCols(ImageD* JXL_RESTRICT U, int i, int j, double c, double s); // A is symmetric, U is orthogonal, T is tri-diagonal and // A = U * T * Transpose(U). void ConvertToTridiagonal(const ImageD& A, ImageD* JXL_RESTRICT T, ImageD* JXL_RESTRICT U); // A is symmetric, U is orthogonal, and A = U * Diagonal(diag) * Transpose(U). void ConvertToDiagonal(const ImageD& A, ImageD* JXL_RESTRICT diag, ImageD* JXL_RESTRICT U); // A is square matrix, Q is orthogonal, R is upper triangular and A = Q * R; void ComputeQRFactorization(const ImageD& A, ImageD* JXL_RESTRICT Q, ImageD* JXL_RESTRICT R); // Inverts a 3x3 matrix in place template Status Inv3x3Matrix(T* matrix) { // Intermediate computation is done in double precision. double temp[9]; temp[0] = static_cast(matrix[4]) * matrix[8] - static_cast(matrix[5]) * matrix[7]; temp[1] = static_cast(matrix[2]) * matrix[7] - static_cast(matrix[1]) * matrix[8]; temp[2] = static_cast(matrix[1]) * matrix[5] - static_cast(matrix[2]) * matrix[4]; temp[3] = static_cast(matrix[5]) * matrix[6] - static_cast(matrix[3]) * matrix[8]; temp[4] = static_cast(matrix[0]) * matrix[8] - static_cast(matrix[2]) * matrix[6]; temp[5] = static_cast(matrix[2]) * matrix[3] - static_cast(matrix[0]) * matrix[5]; temp[6] = static_cast(matrix[3]) * matrix[7] - static_cast(matrix[4]) * matrix[6]; temp[7] = static_cast(matrix[1]) * matrix[6] - static_cast(matrix[0]) * matrix[7]; temp[8] = static_cast(matrix[0]) * matrix[4] - static_cast(matrix[1]) * matrix[3]; double det = matrix[0] * temp[0] + matrix[1] * temp[3] + matrix[2] * temp[6]; if (std::abs(det) < 1e-10) { return JXL_FAILURE("Matrix determinant is too close to 0"); } double idet = 1.0 / det; for (int i = 0; i < 9; i++) { matrix[i] = temp[i] * idet; } return true; } // Solves system of linear equations A * X = B using the conjugate gradient // method. Matrix a must be a n*n, symmetric and positive definite. // Vectors b and x must have n elements template void ConjugateGradient(const T* a, int n, const T* b, T* x) { std::vector r(n); MatMul(a, x, n, n, 1, r.data()); MatAdd(b, r.data(), -1, n, 1, r.data()); std::vector p = r; T rr; MatMul(r.data(), r.data(), 1, n, 1, &rr); // inner product if (rr == 0) return; // The initial values were already optimal for (int i = 0; i < n; i++) { std::vector ap(n); MatMul(a, p.data(), n, n, 1, ap.data()); T alpha; MatMul(r.data(), ap.data(), 1, n, 1, &alpha); // Normally alpha couldn't be zero here but if numerical issues caused it, // return assuming the solution is close. if (alpha == 0) return; alpha = rr / alpha; MatAdd(x, p.data(), alpha, n, 1, x); MatAdd(r.data(), ap.data(), -alpha, n, 1, r.data()); T rr2; MatMul(r.data(), r.data(), 1, n, 1, &rr2); // inner product if (rr2 < 1e-20) break; T beta = rr2 / rr; MatAdd(r.data(), p.data(), beta, 1, n, p.data()); rr = rr2; } } // Computes optimal coefficients r to approximate points p with linear // combination of functions f. The matrix f has h rows and w columns, r has h // values, p has w values. h is the amount of functions, w the amount of points. // Uses the finite element method and minimizes mean square error. template void FEM(const T* f, int h, int w, const T* p, T* r) { // Compute "Gramian" matrix G = F * F^T // Speed up multiplication by using non-zero intervals in sparse F. std::vector start(h); std::vector end(h); for (int y = 0; y < h; y++) { start[y] = end[y] = 0; for (int x = 0; x < w; x++) { if (f[y * w + x] != 0) { start[y] = x; break; } } for (int x = w - 1; x >= 0; x--) { if (f[y * w + x] != 0) { end[y] = x + 1; break; } } } std::vector g(h * h); for (int y = 0; y < h; y++) { for (int x = 0; x <= y; x++) { T v = 0; // Intersection of the two sparse intervals. int s = std::max(start[x], start[y]); int e = std::min(end[x], end[y]); for (int z = s; z < e; z++) { v += f[x * w + z] * f[y * w + z]; } // Symmetric, so two values output at once g[y * h + x] = v; g[x * h + y] = v; } } // B vector: sum of each column of F multiplied by corresponding p std::vector b(h, 0); for (int y = 0; y < h; y++) { T v = 0; for (int x = 0; x < w; x++) { v += f[y * w + x] * p[x]; } b[y] = v; } ConjugateGradient(g.data(), h, b.data(), r); } } // namespace jxl #endif // LIB_JXL_LINALG_H_