/*
Copyright 2019 Daniel Schultz
This file is part of FLINT.
FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See .
*/
/* usage:
likwid-setFrequencies -g performance
make profile MOD=nmod_mpoly && ./build/nmod_mpoly/profile/p-divides 4 sparse 12 12
p-divides nthreads sparse m n:
run the sparse benchmark on nthreads with powers (m, n)
divides((1+x+y+2*z^2+3*t^3+5*u^5)^m*(1+u+t+2*z^2+3*y^3+5*x^5)^n,
(1+u+t+2*z^2+3*y^3+5*x^5)^n)
*/
#include
#include
#include "profiler.h"
#include "nmod_mpoly.h"
#define CALCULATE_MACHINE_EFFICIENCY 0
int * cpu_affinities;
#if CALCULATE_MACHINE_EFFICIENCY
typedef struct _worker_arg_struct
{
nmod_mpoly_t Q;
const nmod_mpoly_struct * A, * B;
const nmod_mpoly_ctx_struct * ctx;
} worker_arg_struct;
typedef worker_arg_struct worker_arg_t[1];
static void worker_divides(void * varg)
{
worker_arg_struct * W = (worker_arg_struct *) varg;
nmod_mpoly_divides_threaded(W->Q, W->A, W->B, W->ctx, 1);
}
#endif
void profile_divides(
const nmod_mpoly_t realQ,
const nmod_mpoly_t A,
const nmod_mpoly_t B,
const nmod_mpoly_ctx_t ctx,
slong max_threads)
{
nmod_mpoly_t Q;
timeit_t timer;
slong num_threads;
slong serial_time;
flint_set_num_threads(1);
flint_set_thread_affinity(cpu_affinities, 1);
nmod_mpoly_init(Q, ctx);
timeit_start(timer);
nmod_mpoly_divides(Q, A, B, ctx);
timeit_stop(timer);
serial_time = FLINT_MAX(WORD(1), timer->wall);
flint_printf("serial time: %wd\n", serial_time);
if (!nmod_mpoly_equal(Q, realQ, ctx))
{
printf("quotient wrong!!!\n");
flint_abort();
}
for (num_threads = 2; num_threads <= max_threads; num_threads++)
{
slong parallel_time;
double parallel_efficiency;
#if CALCULATE_MACHINE_EFFICIENCY
thread_pool_handle * handles;
worker_arg_struct * worker_args;
slong i;
double machine_efficiency;
slong num_workers;
#endif
flint_set_num_threads(num_threads);
flint_set_thread_affinity(cpu_affinities, num_threads);
#if CALCULATE_MACHINE_EFFICIENCY
handles = (thread_pool_handle *) flint_malloc((num_threads - 1)*sizeof(thread_pool_handle));
num_workers = thread_pool_request(global_thread_pool, handles, num_threads - 1);
worker_args = (worker_arg_struct *) flint_malloc((num_workers + 1)*sizeof(worker_arg_t));
timeit_start(timer);
for (i = 0; i <= num_workers; i++)
{
nmod_mpoly_init((worker_args + i)->Q, ctx);
(worker_args + i)->A = A;
(worker_args + i)->B = B;
(worker_args + i)->ctx = ctx;
if (i < num_workers)
{
thread_pool_wake(global_thread_pool, handles[i], 0, worker_divides, worker_args + i);
}
else
{
worker_divides(worker_args + i);
}
}
for (i = 0; i < num_workers; i++)
{
thread_pool_wait(global_thread_pool, handles[i]);
}
timeit_stop(timer);
parallel_time = FLINT_MAX(WORD(1), timer->wall);
for (i = 0; i <= num_workers; i++)
{
if (!nmod_mpoly_equal((worker_args + i)->Q, realQ, ctx))
{
printf("quotient wrong\n");
flint_abort();
}
nmod_mpoly_clear((worker_args + i)->Q, ctx);
if (i < num_workers)
{
thread_pool_give_back(global_thread_pool, handles[i]);
}
}
flint_free(worker_args);
flint_free(handles);
machine_efficiency = (double)(serial_time)/(double)(parallel_time);
#endif
nmod_mpoly_clear(Q, ctx);
nmod_mpoly_init(Q, ctx);
timeit_start(timer);
nmod_mpoly_divides(Q, A, B, ctx);
timeit_stop(timer);
parallel_time = FLINT_MAX(WORD(1), timer->wall);
if (!nmod_mpoly_equal(Q, realQ, ctx))
{
printf("quotient wrong!!!\n");
flint_abort();
}
parallel_efficiency = (double)(serial_time)/(double)(parallel_time)/(double)(num_threads);
#if CALCULATE_MACHINE_EFFICIENCY
flint_printf("parallel %wd time: %wd, efficiency %f (machine %f)\n", num_threads, parallel_time, parallel_efficiency, machine_efficiency);
#else
flint_printf("parallel %wd time: %wd, efficiency %f\n", num_threads, parallel_time, parallel_efficiency);
#endif
}
nmod_mpoly_clear(Q, ctx);
}
int main(int argc, char *argv[])
{
slong i, m, n, max_threads;
const slong thread_limit = 64;
const char * name;
cpu_affinities = flint_malloc(thread_limit*sizeof(int));
for (i = 0; i < thread_limit; i++)
cpu_affinities[i] = i;
if (argc == 5)
{
max_threads = atoi(argv[1]);
max_threads = FLINT_MIN(max_threads, thread_limit);
max_threads = FLINT_MAX(max_threads, WORD(1));
name = argv[2];
m = atoi(argv[3]);
n = atoi(argv[4]);
}
else
{
printf(" usage: p-divides nthreads {dense|sparse} m n\n");
printf("running: p-divides 4 sparse 12 12\n");
max_threads = 4;
name = "sparse";
m = 12;
n = 12;
}
m = FLINT_MIN(m, WORD(30));
m = FLINT_MAX(m, WORD(5));
n = FLINT_MIN(n, WORD(30));
n = FLINT_MAX(n, WORD(5));
flint_printf("setting up nmod_mpoly %s divides ... ", name, m, n);
if (strcmp(name, "dense") == 0)
{
nmod_mpoly_ctx_t ctx;
nmod_mpoly_t a, b, A, B, Q;
const char * vars[] = {"x", "y", "z", "t"};
nmod_mpoly_ctx_init(ctx, 4, ORD_DEGLEX, 536870909);
nmod_mpoly_init(a, ctx);
nmod_mpoly_init(b, ctx);
nmod_mpoly_init(A, ctx);
nmod_mpoly_init(B, ctx);
nmod_mpoly_init(Q, ctx);
nmod_mpoly_set_str_pretty(a, "1 + x + y + z + t", vars, ctx);
nmod_mpoly_set_str_pretty(b, "1 + x + y + z + t", vars, ctx);
nmod_mpoly_pow_ui(Q, a, m, ctx);
nmod_mpoly_pow_ui(B, b, n, ctx);
nmod_mpoly_mul(A, Q, B, ctx);
flint_printf("starting dense divides (%wu, %wd):\n", m, n);
profile_divides(Q, A, B, ctx, max_threads);
nmod_mpoly_clear(Q, ctx);
nmod_mpoly_clear(B, ctx);
nmod_mpoly_clear(A, ctx);
nmod_mpoly_clear(b, ctx);
nmod_mpoly_clear(a, ctx);
nmod_mpoly_ctx_clear(ctx);
}
else /* sparse */
{
nmod_mpoly_ctx_t ctx;
nmod_mpoly_t a, b, A, B, Q;
const char * vars[] = {"x", "y", "z", "t", "u"};
nmod_mpoly_ctx_init(ctx, 5, ORD_LEX, 536870909);
nmod_mpoly_init(a, ctx);
nmod_mpoly_init(b, ctx);
nmod_mpoly_init(A, ctx);
nmod_mpoly_init(B, ctx);
nmod_mpoly_init(Q, ctx);
nmod_mpoly_set_str_pretty(a, "1 + x + y + 2*z^2 + 3*t^3 + 5*u^5", vars, ctx);
nmod_mpoly_set_str_pretty(b, "1 + u + t + 2*z^2 + 3*y^3 + 5*x^5", vars, ctx);
nmod_mpoly_pow_ui(Q, a, m, ctx);
nmod_mpoly_pow_ui(B, b, n, ctx);
nmod_mpoly_mul(A, Q, B, ctx);
flint_printf("starting sparse divides (%wu, %wd):\n", m, n);
profile_divides(Q, A, B, ctx, max_threads);
nmod_mpoly_clear(Q, ctx);
nmod_mpoly_clear(B, ctx);
nmod_mpoly_clear(A, ctx);
nmod_mpoly_clear(b, ctx);
nmod_mpoly_clear(a, ctx);
nmod_mpoly_ctx_clear(ctx);
}
flint_free(cpu_affinities);
flint_cleanup_master();
return 0;
}