/* Copyright (C) 2012, 2016 Fredrik Johansson Copyright (C) 2020 William Hart This file is part of FLINT. FLINT is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #include #include #include #include "flint.h" #include "fmpz.h" #include "fmpz_poly.h" #include "nmod_poly.h" typedef struct { fmpz * poly; const fmpz * c; slong len; } worker_t; static void _fmpz_poly_taylor_shift_divconquer_worker(void * arg_ptr) { worker_t * data = (worker_t *) arg_ptr; _fmpz_poly_taylor_shift_divconquer(data->poly, data->c, data->len); } void _fmpz_poly_taylor_shift_divconquer(fmpz * poly, const fmpz_t c, slong len) { fmpz *tmp, *tmp2; slong k, len1, len2; slong bits, cutoff; slong nt, nw; thread_pool_handle * threads; worker_t args[2]; if (len < 50 || fmpz_is_zero(c)) { _fmpz_poly_taylor_shift_horner(poly, c, len); return; } bits = _fmpz_vec_max_bits(poly, len); bits = FLINT_ABS(bits); nt = flint_get_num_threads(); cutoff = 100 + 10 * n_sqrt(FLINT_MAX(bits - FLINT_BITS, 0)); /* Parallel cutoff is set lower since shift_horner is serial. */ if (nt == 1) cutoff = FLINT_MIN(cutoff, 1000); else cutoff = FLINT_MIN(cutoff, 300); if (len < cutoff) { _fmpz_poly_taylor_shift_horner(poly, c, len); return; } len1 = len / 2; len2 = len - len1; nw = flint_request_threads(&threads, 2); /* request one extra worker */ if (len < 200 || len + bits < 2000 || nw == 0) { _fmpz_poly_taylor_shift_divconquer(poly, c, len1); _fmpz_poly_taylor_shift_divconquer(poly + len1, c, len2); } else { slong nw_save; args[0].poly = poly; args[0].c = c; args[0].len = len1; args[1].poly = poly + len1; args[1].c = c; args[1].len = len2; nw_save = flint_set_num_workers(nt - nt/2 - 1); thread_pool_wake(global_thread_pool, threads[0], nt/2 - 1, _fmpz_poly_taylor_shift_divconquer_worker, &args[1]); _fmpz_poly_taylor_shift_divconquer_worker(&args[0]); flint_reset_num_workers(nw_save); thread_pool_wait(global_thread_pool, threads[0]); } flint_give_back_threads(threads, nw); tmp = _fmpz_vec_init(len1 + 1); tmp2 = _fmpz_vec_init(len); /* Now we generate (x+c)^len1 using binomial expansion. It's redundant to do this in all branches of the tree, but since it's just O(d), it's going to be cheap compared to the actual multiplications anyway. */ fmpz_one(tmp); for (k = 1; k <= len1; k++) { if (k > len1 - k) { fmpz_set(tmp + k, tmp + len1 - k); } else { fmpz_mul_ui(tmp + k, tmp + k - 1, len1 + 1 - k); fmpz_divexact_ui(tmp + k, tmp + k, k); } } if (!fmpz_is_one(c)) { if (fmpz_cmp_si(c, -1) == 0) { for (k = len1 - 1; k >= 0; k -= 2) fmpz_neg(tmp + k, tmp + k); } else { fmpz_set(tmp2, c); for (k = len1 - 1; k >= 0; k--) { fmpz_mul(tmp + k, tmp + k, tmp2); fmpz_mul(tmp2, tmp2, c); } } } _fmpz_poly_mul(tmp2, tmp, len1 + 1, poly + len1, len2); _fmpz_vec_add(poly, poly, tmp2, len1); _fmpz_vec_set(poly + len1, tmp2 + len1, len2); _fmpz_vec_clear(tmp, len1 + 1); _fmpz_vec_clear(tmp2, len); } void fmpz_poly_taylor_shift_divconquer(fmpz_poly_t g, const fmpz_poly_t f, const fmpz_t c) { if (f != g) fmpz_poly_set(g, f); _fmpz_poly_taylor_shift_divconquer(g->coeffs, c, g->length); }