/*
Copyright (C) 2009, 2011, 2020 William Hart
This file is part of FLINT.
FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See .
*/
#include "gmp.h"
#include "flint.h"
#include "fft.h"
#include "thread_support.h"
typedef struct
{
volatile mp_size_t * i;
slong num;
mp_size_t coeff_limbs;
mp_size_t output_limbs;
mp_srcptr limbs;
mp_limb_t ** poly;
#if FLINT_USES_PTHREAD
pthread_mutex_t * mutex;
#endif
}
split_limbs_arg_t;
void
_split_limbs_worker(void * arg_ptr)
{
split_limbs_arg_t arg = *((split_limbs_arg_t *) arg_ptr);
slong num = arg.num;
mp_size_t skip;
mp_size_t coeff_limbs = arg.coeff_limbs;
mp_size_t output_limbs = arg.output_limbs;
mp_srcptr limbs = arg.limbs;
mp_limb_t ** poly = arg.poly;
mp_size_t i, end;
while (1)
{
#if FLINT_USES_PTHREAD
pthread_mutex_lock(arg.mutex);
#endif
i = *arg.i;
end = *arg.i = FLINT_MIN(i + 16, num);
#if FLINT_USES_PTHREAD
pthread_mutex_unlock(arg.mutex);
#endif
if (i >= num)
return;
for ( ; i < end; i++)
{
skip = i*coeff_limbs;
flint_mpn_zero(poly[i], output_limbs + 1);
flint_mpn_copyi(poly[i], limbs + skip, coeff_limbs);
}
}
}
mp_size_t fft_split_limbs(mp_limb_t ** poly, mp_srcptr limbs,
mp_size_t total_limbs, mp_size_t coeff_limbs, mp_size_t output_limbs)
{
mp_size_t i, shared_i = 0, skip, length = (total_limbs - 1)/coeff_limbs + 1;
mp_size_t num = total_limbs/coeff_limbs;
#if FLINT_USES_PTHREAD
pthread_mutex_t mutex;
#endif
slong num_threads;
thread_pool_handle * threads;
split_limbs_arg_t * args;
#if FLINT_USES_PTHREAD
pthread_mutex_init(&mutex, NULL);
#endif
num_threads = flint_request_threads(&threads,
FLINT_MIN(flint_get_num_threads(), (num + 15)/16));
args = (split_limbs_arg_t *)
flint_malloc(sizeof(split_limbs_arg_t)*(num_threads + 1));
for (i = 0; i < num_threads + 1; i++)
{
args[i].i = &shared_i;
args[i].num = num;
args[i].coeff_limbs = coeff_limbs;
args[i].output_limbs = output_limbs;
args[i].limbs = limbs;
args[i].poly = poly;
#if FLINT_USES_PTHREAD
args[i].mutex = &mutex;
#endif
}
for (i = 0; i < num_threads; i++)
thread_pool_wake(global_thread_pool, threads[i], 0,
_split_limbs_worker, &args[i]);
_split_limbs_worker(&args[num_threads]);
for (i = 0; i < num_threads; i++)
thread_pool_wait(global_thread_pool, threads[i]);
flint_give_back_threads(threads, num_threads);
flint_free(args);
#if FLINT_USES_PTHREAD
pthread_mutex_destroy(&mutex);
#endif
i = num;
skip = i*coeff_limbs;
if (i < length)
flint_mpn_zero(poly[i], output_limbs + 1);
if (total_limbs > skip)
flint_mpn_copyi(poly[i], limbs + skip, total_limbs - skip);
return length;
}
typedef struct
{
volatile mp_size_t * i;
slong length;
mp_size_t coeff_limbs;
mp_size_t output_limbs;
mp_srcptr limbs;
flint_bitcnt_t top_bits;
mp_limb_t mask;
mp_limb_t ** poly;
#if FLINT_USES_PTHREAD
pthread_mutex_t * mutex;
#endif
}
split_bits_arg_t;
void
_split_bits_worker(void * arg_ptr)
{
split_bits_arg_t arg = *((split_bits_arg_t *) arg_ptr);
slong length = arg.length;
mp_size_t coeff_limbs = arg.coeff_limbs;
mp_size_t output_limbs = arg.output_limbs;
mp_srcptr limbs = arg.limbs;
flint_bitcnt_t top_bits = arg.top_bits;
mp_limb_t mask = arg.mask;
mp_limb_t ** poly = arg.poly;
flint_bitcnt_t shift_bits;
mp_srcptr limb_ptr;
mp_size_t i, end;
while (1)
{
#if FLINT_USES_PTHREAD
pthread_mutex_lock(arg.mutex);
#endif
i = *arg.i;
end = *arg.i = FLINT_MIN(i + 16, length - 1);
#if FLINT_USES_PTHREAD
pthread_mutex_unlock(arg.mutex);
#endif
if (i >= length - 1)
return;
for ( ; i < end; i++)
{
flint_mpn_zero(poly[i], output_limbs + 1);
limb_ptr = limbs + i*(coeff_limbs - 1) + (i*top_bits)/FLINT_BITS;
shift_bits = (i*top_bits) % FLINT_BITS;
if (!shift_bits)
{
flint_mpn_copyi(poly[i], limb_ptr, coeff_limbs);
poly[i][coeff_limbs - 1] &= mask;
limb_ptr += (coeff_limbs - 1);
shift_bits += top_bits;
} else
{
mpn_rshift(poly[i], limb_ptr, coeff_limbs, shift_bits);
limb_ptr += (coeff_limbs - 1);
shift_bits += top_bits;
if (shift_bits >= FLINT_BITS)
{
limb_ptr++;
poly[i][coeff_limbs - 1] +=
(limb_ptr[0] << (FLINT_BITS - (shift_bits - top_bits)));
shift_bits -= FLINT_BITS;
}
poly[i][coeff_limbs - 1] &= mask;
}
}
}
}
mp_size_t fft_split_bits(mp_limb_t ** poly, mp_srcptr limbs,
mp_size_t total_limbs, flint_bitcnt_t bits, mp_size_t output_limbs)
{
mp_size_t i, shared_i = 0, coeff_limbs, limbs_left;
mp_size_t length = (FLINT_BITS*total_limbs - 1)/bits + 1;
flint_bitcnt_t shift_bits, top_bits = ((FLINT_BITS - 1) & bits);
mp_srcptr limb_ptr;
mp_limb_t mask;
#if FLINT_USES_PTHREAD
pthread_mutex_t mutex;
#endif
slong num_threads;
thread_pool_handle * threads;
split_bits_arg_t * args;
if (top_bits == 0)
return fft_split_limbs(poly, limbs, total_limbs, bits/FLINT_BITS, output_limbs);
coeff_limbs = (bits/FLINT_BITS) + 1;
mask = (WORD(1)<