#include #include #include #define CAT_(A, B) A##B #define CAT(A, B) CAT_(A, B) #define MAKE_LAPACK(prefix, func) \ inline static constexpr auto func = CAT(CAT(prefix, func), _); using namespace diol; template T const *addr(T const &ptr) { return &ptr; } template T *addr(T &&ptr) { return &ptr; } MKL_INT max(MKL_INT a, MKL_INT b) { return a > b ? a : b; } using f32 = float; using f64 = double; using c32 = std::complex; using c64 = std::complex; template using Mat = Eigen::Matrix; #define MAKE_MKL(ty, mkl_ty, prefix, sym) \ template <> struct Mkl { \ using T = mkl_ty; \ MAKE_LAPACK(prefix, potrf); \ MAKE_LAPACK(prefix, geqrf); \ MAKE_LAPACK(prefix, geqp3); \ MAKE_LAPACK(prefix, getrf); \ MAKE_LAPACK(prefix, getc2); \ MAKE_LAPACK(prefix, gesdd); \ MAKE_LAPACK(CAT(prefix, sym), evd); \ MAKE_LAPACK(prefix, geev); \ }; template struct Mkl; MAKE_MKL(f32, f32, s, sy); MAKE_MKL(f64, f64, d, sy); MAKE_MKL(c32, MKL_Complex8, c, he); MAKE_MKL(c64, MKL_Complex16, z, he); template using Data = typename Mkl::T *; template MKL_INT stride(Mat const &mat) { return mat.outerStride(); } template auto data(Mat &mat) { return Data(mat.data()); } template Mat rand(MKL_INT m, MKL_INT n) { Mat A(m, n); A.setRandom(); return A; } template Mat rand_pos_def(MKL_INT n) { Mat A(n, n); A.setRandom(); Mat H = T(0.5) * (A + A.adjoint()); H += T(n) * Mat::Identity(n, n); return H; } template Mat randh(MKL_INT n) { Mat A(n, n); A.setRandom(); Mat H = T(0.5) * (A + A.adjoint()); return H; } template void cholesky(Bencher bencher, PlotArg arg) { MKL_INT n = arg.n; std::srand(0); Mat H_orig = rand_pos_def(n); Mat H = H_orig; std::move(bencher).bench([&] { H = H_orig; Mkl::potrf("L", addr(n), data(H), addr(stride(H)), addr((MKL_INT)0)); }); } template void qr(Bencher bencher, PlotArg arg) { MKL_INT n = arg.n; std::srand(0); Mat H_orig = rand(n, n); Mat H = H_orig; Mat tau(n, 1); T lwork_; Mkl::geqrf(addr(n), addr(n), data(H), addr(stride(H)), data(tau), Data(&lwork_), addr((MKL_INT)-1), addr((MKL_INT)0)); MKL_INT lwork = 2 * (MKL_INT)std::real(lwork_); Mat work(lwork, 1); std::move(bencher).bench([&] { H = H_orig; Mkl::geqrf(addr(n), addr(n), data(H), addr(stride(H)), data(tau), data(work), addr(lwork), addr((MKL_INT)0)); }); } template void piv_qr(Bencher bencher, PlotArg arg) { MKL_INT n = arg.n; std::srand(0); Mat H_orig = rand(n, n); Mat H = H_orig; Mat tau(n, 1); Mat p(n, 1); Mat::RealScalar> rwork(2 * n, 1); T lwork_; if constexpr (std::same_as::T>) { Mkl::geqp3(addr(n), addr(n), data(H), addr(stride(H)), p.data(), data(tau), Data(&lwork_), addr((MKL_INT)-1), addr((MKL_INT)0)); } else { Mkl::geqp3(addr(n), addr(n), data(H), addr(stride(H)), p.data(), data(tau), Data(&lwork_), addr((MKL_INT)-1), data(rwork), addr((MKL_INT)0)); } auto lwork = 2 * (MKL_INT)std::real(lwork_); Mat work(lwork, 1); std::move(bencher).bench([&] { H = H_orig; if constexpr (std::same_as::T>) { Mkl::geqp3(addr(n), addr(n), data(H), addr(stride(H)), p.data(), data(tau), data(work), addr(lwork), addr((MKL_INT)0)); } else { Mkl::geqp3(addr(n), addr(n), data(H), addr(stride(H)), p.data(), data(tau), data(work), addr(lwork), data(rwork), addr((MKL_INT)0)); } }); } template void lu(Bencher bencher, PlotArg arg) { MKL_INT n = arg.n; std::srand(0); Mat H_orig = rand(n, n); Mat H = H_orig; Mat p(n, 1); Mat work(n, max(n, 16)); std::move(bencher).bench([&] { H = H_orig; Mkl::getrf(addr(n), addr(n), data(H), addr(stride(H)), p.data(), addr((MKL_INT)0)); }); } template void piv_lu(Bencher bencher, PlotArg arg) { MKL_INT n = arg.n; std::srand(0); Mat H_orig = rand(n, n); Mat H = H_orig; Mat p(n, 1); Mat q(n, 1); Mat work(n, max(n, 16)); std::move(bencher).bench([&] { H = H_orig; Mkl::getc2(addr(n), data(H), addr(stride(H)), p.data(), q.data(), addr((MKL_INT)0)); }); } template void svd(Bencher bencher, PlotArg arg) { MKL_INT m = arg.n; MKL_INT n = arg.n; MKL_INT mn = m * n; MKL_INT mx = max(m, n); std::srand(0); Mat H_orig = rand(m, n); Mat H = H_orig; Mat U(m, m); Mat V(n, n); Mat::RealScalar> S(m, 1); Mat::RealScalar> rwork( max(5 * mn * mn + 5 * mn, 2 * mx * mn + 2 * mn * mn + mn), 1); Mat iwork(8 * max(m, n), 1); T lwork_; if constexpr (std::same_as::T>) { Mkl::gesdd("A", addr(m), addr(n), data(H), addr(stride(H)), data(S), data(U), addr(stride(U)), data(V), addr(stride(V)), Data(&lwork_), addr((MKL_INT)-1), iwork.data(), addr((MKL_INT)0)); } else { Mkl::gesdd("A", addr(m), addr(n), data(H), addr(stride(H)), data(S), data(U), addr(stride(U)), data(V), addr(stride(V)), Data(&lwork_), addr((MKL_INT)-1), data(rwork), iwork.data(), addr((MKL_INT)0)); } auto lwork = 2 * (MKL_INT)std::real(lwork_); Mat work(lwork, 1); std::move(bencher).bench([&] { H = H_orig; if constexpr (std::same_as::T>) { Mkl::gesdd("A", addr(m), addr(n), data(H), addr(stride(H)), data(S), data(U), addr(stride(U)), data(V), addr(stride(V)), data(work), addr(lwork), iwork.data(), addr((MKL_INT)0)); } else { Mkl::gesdd("A", addr(m), addr(n), data(H), addr(stride(H)), data(S), data(U), addr(stride(U)), data(V), addr(stride(V)), data(work), addr(lwork), data(rwork), iwork.data(), addr((MKL_INT)0)); } }); } template void thin_svd(Bencher bencher, PlotArg arg) { MKL_INT m = 4096; MKL_INT n = arg.n; MKL_INT mn = m * n; MKL_INT mx = max(m, n); std::srand(0); Mat H_orig = rand(m, n); Mat H = H_orig; Mat U(m, m); Mat V(n, n); Mat::RealScalar> S(m, 1); Mat::RealScalar> rwork( max(5 * mn * mn + 5 * mn, 2 * mx * mn + 2 * mn * mn + mn), 1); Mat iwork(8 * max(m, n), 1); T lwork_; if constexpr (std::same_as::T>) { Mkl::gesdd("S", addr(m), addr(n), data(H), addr(stride(H)), data(S), data(U), addr(stride(U)), data(V), addr(stride(V)), Data(&lwork_), addr((MKL_INT)-1), iwork.data(), addr((MKL_INT)0)); } else { Mkl::gesdd("S", addr(m), addr(n), data(H), addr(stride(H)), data(S), data(U), addr(stride(U)), data(V), addr(stride(V)), Data(&lwork_), addr((MKL_INT)-1), data(rwork), iwork.data(), addr((MKL_INT)0)); } auto lwork = 2 * (MKL_INT)std::real(lwork_); Mat work(lwork, 1); std::move(bencher).bench([&] { H = H_orig; if constexpr (std::same_as::T>) { Mkl::gesdd("S", addr(m), addr(n), data(H), addr(stride(H)), data(S), data(U), addr(stride(U)), data(V), addr(stride(V)), data(work), addr(lwork), iwork.data(), addr((MKL_INT)0)); } else { Mkl::gesdd("S", addr(m), addr(n), data(H), addr(stride(H)), data(S), data(U), addr(stride(U)), data(V), addr(stride(V)), data(work), addr(lwork), data(rwork), iwork.data(), addr((MKL_INT)0)); } }); } template void eigh(Bencher bencher, PlotArg arg) { MKL_INT n = arg.n; std::srand(0); Mat H_orig = randh(n); Mat H = H_orig; Mat U(n, n); Mat::RealScalar> W(n, 1); T lwork_ = 0; typename Mat::RealScalar lrwork_ = 0; MKL_INT liwork_ = 0; if constexpr (std::same_as::T>) { Mkl::evd("V", "L", addr(n), data(H), addr(stride(H)), data(W), Data(&lwork_), addr((MKL_INT)-1), &liwork_, addr((MKL_INT)-1), addr((MKL_INT)0)); } else { Mkl::evd("V", "L", addr(n), data(H), addr(stride(H)), data(W), Data(&lwork_), addr((MKL_INT)-1), &lrwork_, addr((MKL_INT)-1), &liwork_, addr((MKL_INT)-1), addr((MKL_INT)0)); } MKL_INT lwork = 2 * (MKL_INT)(std::real(lwork_)); MKL_INT lrwork = 2 * (MKL_INT)(lrwork_); MKL_INT liwork = 2 * (MKL_INT)(liwork_); Mat work(lwork, 1); Mat::RealScalar> rwork(lrwork, 1); Mat iwork(liwork, 1); std::move(bencher).bench([&] { H = H_orig; if constexpr (std::same_as::T>) { Mkl::evd("V", "L", addr(n), data(H), addr(stride(H)), data(W), data(work), addr(lwork), iwork.data(), addr(liwork), addr((MKL_INT)0)); } else { Mkl::evd("V", "L", addr(n), data(H), addr(stride(H)), data(W), data(work), addr(lwork), data(rwork), addr(lrwork), iwork.data(), addr(liwork), addr((MKL_INT)0)); } }); } template void eig(Bencher bencher, PlotArg arg) { MKL_INT n = arg.n; std::srand(0); Mat H_orig = rand(n, n); Mat H = H_orig; Mat U(n, n); Mat V(n, n); Mat W(n, 1); Mat::RealScalar> W_im(n, 1); Mat::RealScalar> rwork(2 * n, 1); T lwork_; if constexpr (std::same_as::T>) { Mkl::geev("V", "N", addr(n), data(H), addr(stride(H)), data(W), data(W_im), data(U), addr(stride(U)), data(V), addr(stride(V)), Data(&lwork_), addr((MKL_INT)-1), addr((MKL_INT)0)); } else { Mkl::geev("V", "N", addr(n), data(H), addr(stride(H)), data(W), data(U), addr(stride(U)), data(V), addr(stride(V)), Data(&lwork_), addr((MKL_INT)-1), data(rwork), addr((MKL_INT)0)); } MKL_INT lwork = 2 * (MKL_INT)std::real(lwork_); Mat work(lwork, 1); std::move(bencher).bench([&] { H = H_orig; if constexpr (std::same_as::T>) { Mkl::geev("V", "N", addr(n), data(H), addr(stride(H)), data(W), data(W_im), data(U), addr(stride(U)), data(V), addr(stride(V)), data(work), addr(lwork), addr((MKL_INT)0)); } else { Mkl::geev("V", "N", addr(n), data(H), addr(stride(H)), data(W), data(U), addr(stride(U)), data(V), addr(stride(V)), data(work), addr(lwork), data(rwork), addr((MKL_INT)0)); } }); } std::string glue_name(std::string prefix, std::string name, std::string type) { return prefix + "_" + name + "<" + type + ">"; } template void register_funcs(std::string prefix, std::string ty, Bench &bench) { PlotArg args[] = { 4, 8, 12, 16, 24, 32, 48, 64, 128, 256, 512, 1024, 1536, 2048, 2560, 3072, 3584, 4096, }; auto do_it = [&](std::string name, FnPtr fn) { bench.register_funcs({{{glue_name(prefix, name, ty), fn}}}, args); }; do_it("cholesky", cholesky); do_it("qr", qr); do_it("piv_qr", piv_qr); do_it("lu", lu); do_it("piv_lu", piv_lu); do_it("svd", svd); do_it("thin_svd", thin_svd); do_it("eigh", eigh); do_it("eig", eig); } f64 flops_per_sec(size_t n, f64 time) { return f64(n) * f64(n) * f64(n) / time; } int main() { auto config = BenchConfig::from_args(); config.set_metric("n³/s", Monotonicity::HigherIsBetter, flops_per_sec); auto bench = Bench::from_config(config); #ifdef BENCH_MKL std::string prefix = "mkl"; #else std::string prefix = "openblas"; #endif register_funcs(prefix, "f32", bench); register_funcs(prefix, "f64", bench); register_funcs(prefix, "c32", bench); register_funcs(prefix, "c64", bench); bench.run(); }