/*
Copyright (C) 2013-2014 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See .
*/
#include
#include "partitions.h"
/* defined in flint*/
#define NUMBER_OF_SMALL_PARTITIONS 128
FLINT_DLL extern const unsigned int partitions_lookup[NUMBER_OF_SMALL_PARTITIONS];
slong partitions_hrr_needed_terms(double n);
typedef struct
{
arb_ptr x;
fmpz * n;
ulong N0;
ulong N;
int use_doubles;
}
worker_arg_t;
static void *
worker(void * arg_ptr)
{
worker_arg_t arg = *((worker_arg_t *) arg_ptr);
partitions_hrr_sum_arb(arg.x, arg.n, arg.N0, arg.N, arg.use_doubles);
flint_cleanup();
return NULL;
}
/* TODO: set number of threads in child threads, for future
multithreaded evaluation of single terms */
static void
hrr_sum_threaded(arb_t x, const fmpz_t n, slong N, int use_doubles)
{
arb_t y;
pthread_t threads[2];
worker_arg_t args[2];
arb_init(y);
args[0].x = x;
args[0].n = (fmpz *) n;
args[0].N0 = 1;
args[0].N = 16;
args[0].use_doubles = use_doubles;
args[1].x = y;
args[1].n = (fmpz *) n;
args[1].N0 = 17;
args[1].N = N;
args[1].use_doubles = use_doubles;
pthread_create(&threads[0], NULL, worker, &args[0]);
pthread_create(&threads[1], NULL, worker, &args[1]);
pthread_join(threads[0], NULL);
pthread_join(threads[1], NULL);
arb_add(x, x, y, ARF_PREC_EXACT);
arb_clear(y);
}
void
partitions_fmpz_fmpz_hrr(fmpz_t p, const fmpz_t n, int use_doubles)
{
arb_t x;
arf_t bound;
slong N;
arb_init(x);
arf_init(bound);
N = partitions_hrr_needed_terms(fmpz_get_d(n));
if (fmpz_cmp_ui(n, 4e8) >= 0 && flint_get_num_threads() > 1)
{
hrr_sum_threaded(x, n, N, use_doubles);
}
else
{
partitions_hrr_sum_arb(x, n, 1, N, use_doubles);
}
partitions_rademacher_bound(bound, n, N);
arb_add_error_arf(x, bound);
if (!arb_get_unique_fmpz(p, x))
{
flint_printf("not unique!\n");
arb_printd(x, 50);
flint_printf("\n");
flint_abort();
}
arb_clear(x);
arf_clear(bound);
}
/* To compute p(n) mod 2^64. */
static void
partitions_vec(mp_ptr v, slong len)
{
slong i, j, n;
mp_limb_t p;
for (n = 0; n < FLINT_MIN(len, NUMBER_OF_SMALL_PARTITIONS); n++)
v[n] = partitions_lookup[n];
for (n = NUMBER_OF_SMALL_PARTITIONS; n < len; n++)
{
p = 0;
for (i = 1, j = 1; j <= n; j += 3 * i + 1, i++)
p = v[n - j] - p;
if ((i & 1) == 0)
p = -p;
for (i = 1, j = 2; j <= n; j += 3 * i + 2, i++)
p = v[n - j] - p;
if ((i & 1) != 0)
p = -p;
v[n] = p;
}
}
/* The floor+vec method *requires* n <= 1498 for floor(p(n)/2^64)
to be equal to floor(T/2^64). It is faster up to n ~= 1200.
With doubles, it is faster up to n ~= 500. */
void
_partitions_fmpz_ui(fmpz_t res, ulong n, int use_doubles)
{
if (n < NUMBER_OF_SMALL_PARTITIONS)
{
fmpz_set_ui(res, partitions_lookup[n]);
}
else if (FLINT_BITS == 64 && (n < 500 || (!use_doubles && n < 1200)))
{
mp_ptr tmp = flint_malloc((n + 1) * sizeof(mp_limb_t));
if (n < 417) /* p(n) < 2^64 */
{
partitions_vec(tmp, n + 1);
fmpz_set_ui(res, tmp[n]);
}
else
{
arb_t x;
arb_init(x);
fmpz_set_ui(res, n);
partitions_leading_fmpz(x, res, 4 * sqrt(n) - 50);
arb_mul_2exp_si(x, x, -64);
arb_floor(x, x, 4 * sqrt(n) - 50);
if (arb_get_unique_fmpz(res, x))
{
fmpz_mul_2exp(res, res, 64);
partitions_vec(tmp, n + 1);
fmpz_add_ui(res, res, tmp[n]);
}
else
{
flint_printf("warning: failed at %wu\n", n);
fmpz_set_ui(res, n);
partitions_fmpz_fmpz_hrr(res, res, use_doubles);
}
arb_clear(x);
}
flint_free(tmp);
}
else
{
fmpz_set_ui(res, n);
partitions_fmpz_fmpz_hrr(res, res, use_doubles);
}
}
void
partitions_fmpz_fmpz(fmpz_t res, const fmpz_t n, int use_doubles)
{
if (fmpz_cmp_ui(n, 2000) < 0)
{
if (fmpz_sgn(n) < 0)
fmpz_zero(res);
else
_partitions_fmpz_ui(res, *n, use_doubles);
}
else
{
partitions_fmpz_fmpz_hrr(res, n, use_doubles);
}
}
void
partitions_fmpz_ui(fmpz_t res, ulong n)
{
_partitions_fmpz_ui(res, n, 0);
}
void
partitions_fmpz_ui_using_doubles(fmpz_t res, ulong n)
{
_partitions_fmpz_ui(res, n, 1);
}