/* Copyright (C) 2020 William Hart This file is part of FLINT. FLINT is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #include #include #include "flint.h" #include "fmpz_vec.h" #include "fmpz_mod_poly.h" #include "ulong_extras.h" typedef struct { volatile slong * j; slong k; slong n; slong glen; slong ginvlen; const fmpz * g; const fmpz * ginv; fmpz ** res; const fmpz * p; #if FLINT_USES_PTHREAD pthread_mutex_t * mutex; #endif } fmpz_powers_preinv_arg_t; void _fmpz_mod_poly_powers_mod_preinv_worker(void * arg_ptr) { fmpz_powers_preinv_arg_t arg = *((fmpz_powers_preinv_arg_t *) arg_ptr); slong i, j, k = arg.k, n = arg.n; slong glen = arg.glen, ginvlen = arg.ginvlen; fmpz ** res = arg.res; const fmpz * g = arg.g, * ginv = arg.ginv; const fmpz * p = arg.p; while (1) { #if FLINT_USES_PTHREAD pthread_mutex_lock(arg.mutex); #endif j = *arg.j + k; *arg.j = j; #if FLINT_USES_PTHREAD pthread_mutex_unlock(arg.mutex); #endif if (j >= n) return; if (glen == 2) /* special case, constant polynomials */ { for (i = j + 1; i < j + k && i < n; i++) { fmpz_mul(res[i] + 0, res[j] + 0, res[i - j] + 0); fmpz_mod(res[i] + 0, res[i] + 0, p); } } else { for (i = j + 1; i < j + k && i < n; i++) _fmpz_mod_poly_mulmod_preinv(res[i], res[j], glen - 1, res[i - j], glen - 1, g, glen, ginv, ginvlen, p); } } } /* compute f^0, f^1, ..., f^(n-1) mod g, where g has length glen and f is reduced mod g and has length flen (possibly zero spaced) assumes res is an array of n arrays each with space for at least glen - 1 coefficients and that flen > 0 {ginv, ginvlen} must be set to the power series inverse of the reverse of g */ void _fmpz_mod_poly_powers_mod_preinv_threaded_pool(fmpz ** res, const fmpz * f, slong flen, slong n, const fmpz * g, slong glen, const fmpz * ginv, slong ginvlen, const fmpz_t p, thread_pool_handle * threads, slong num_threads) { slong i, k, shared_j = 0; fmpz_powers_preinv_arg_t * args; #if FLINT_USES_PTHREAD pthread_mutex_t mutex; #endif if (n == 0) return; if (n == 1) { if (glen > 1) fmpz_set_ui(res[0] + 0, 1); if (glen > 2) { for (i = 1; i < glen - 1; i++) fmpz_zero(res[0] + i); } return; } k = n_sqrt(n); /* compute baby steps */ _fmpz_mod_poly_powers_mod_preinv_naive(res, f, flen, k + 1, g, glen, ginv, ginvlen, p); /* compute giant steps */ /* f^(k*i) = f^(k*(i - 1))*f^k */ if (glen == 2) /* special case, constant polys */ { for (i = 2*k; i < n; i += k) { fmpz_mul(res[i] + 0, res[i - k] + 0, res[k] + 0); fmpz_mod(res[i] + 0, res[i] + 0, p); } } else { for (i = 2*k; i < n; i += k) _fmpz_mod_poly_mulmod_preinv(res[i], res[i - k], glen - 1, res[k], glen - 1, g, glen, ginv, ginvlen, p); } args = (fmpz_powers_preinv_arg_t *) flint_malloc(sizeof(fmpz_powers_preinv_arg_t) * (num_threads + 1)); for (i = 0; i < num_threads + 1; i++) { args[i].j = &shared_j; args[i].k = k; args[i].n = n; args[i].glen = glen; args[i].ginvlen = ginvlen; args[i].g = g; args[i].ginv = ginv; args[i].res = res; args[i].p = p; #if FLINT_USES_PTHREAD args[i].mutex = &mutex; #endif } #if FLINT_USES_PTHREAD pthread_mutex_init(&mutex, NULL); #endif for (i = 0; i < num_threads; i++) thread_pool_wake(global_thread_pool, threads[i], 0, _fmpz_mod_poly_powers_mod_preinv_worker, &args[i]); _fmpz_mod_poly_powers_mod_preinv_worker(&args[num_threads]); for (i = 0; i < num_threads; i++) thread_pool_wait(global_thread_pool, threads[i]); #if FLINT_USES_PTHREAD pthread_mutex_destroy(&mutex); #endif flint_free(args); } void fmpz_mod_poly_powers_mod_bsgs(fmpz_mod_poly_struct * res, const fmpz_mod_poly_t f, slong n, const fmpz_mod_poly_t g, const fmpz_mod_ctx_t ctx) { slong i; fmpz_mod_poly_t ginv; fmpz ** res_arr; thread_pool_handle * threads; slong num_threads; if (fmpz_mod_poly_length(g, ctx) == 0) { flint_printf("Exception (fmpz_mod_poly_powers_mod_bsgs). Divide by zero.\n"); flint_abort(); } if (fmpz_mod_poly_length(f, ctx) == 0 || fmpz_mod_poly_length(g, ctx) == 1) { if (n > 0) fmpz_mod_poly_one(res + 0, ctx); for (i = 1; i < n; i++) fmpz_mod_poly_zero(res + i, ctx); return; } if (fmpz_mod_poly_length(f, ctx) >= fmpz_mod_poly_length(g, ctx)) { fmpz_mod_poly_t q, r; fmpz_mod_poly_init(q, ctx); fmpz_mod_poly_init(r, ctx); fmpz_mod_poly_divrem(q, r, f, g, ctx); fmpz_mod_poly_powers_mod_naive(res, r, n, g, ctx); fmpz_mod_poly_clear(q, ctx); fmpz_mod_poly_clear(r, ctx); return; } res_arr = (fmpz **) flint_malloc(n*sizeof(fmpz *)); fmpz_mod_poly_init(ginv, ctx); for (i = 0; i < n; i++) { fmpz_mod_poly_fit_length(res + i, fmpz_mod_poly_length(g, ctx) - 1, ctx); res_arr[i] = res[i].coeffs; _fmpz_mod_poly_set_length(res + i, fmpz_mod_poly_length(g, ctx) - 1); } fmpz_mod_poly_reverse(ginv, g, fmpz_mod_poly_length(g, ctx), ctx); fmpz_mod_poly_inv_series(ginv, ginv, fmpz_mod_poly_length(g, ctx), ctx); num_threads = flint_request_threads(&threads, flint_get_num_threads()); _fmpz_mod_poly_powers_mod_preinv_threaded_pool(res_arr, f->coeffs, f->length, n, g->coeffs, g->length, ginv->coeffs, ginv->length, fmpz_mod_ctx_modulus(ctx), threads, num_threads); flint_give_back_threads(threads, num_threads); for (i = 0; i < n; i++) _fmpz_mod_poly_normalise(res + i); fmpz_mod_poly_clear(ginv, ctx); flint_free(res_arr); }