/* Copyright (C) 2011 Fredrik Johansson Copyright (C) 2012 Lina Kulakova Copyright (C) 2013 Martin Lee Copyright (C) 2020 William Hart This file is part of FLINT. FLINT is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. See . */ #undef ulong #define ulong ulongxx/* interferes with system includes */ #include #include #undef ulong #include #include #define ulong mp_limb_t #include "flint.h" #include "fmpz_mod_poly.h" #include "ulong_extras.h" #include "thread_support.h" int main(void) { #if FLINT_USES_PTHREAD && (FLINT_USES_TLS || FLINT_REENTRANT) int i; fmpz_mod_ctx_t ctx; #endif FLINT_TEST_INIT(state); flint_printf("compose_mod_brent_kung_precomp_preinv_threaded...."); fflush(stdout); #if FLINT_USES_PTHREAD && (FLINT_USES_TLS || FLINT_REENTRANT) fmpz_mod_ctx_init_ui(ctx, 2); /* check precomputation */ for (i = 0; i < 200 * flint_test_multiplier(); i++) { fmpz_mod_poly_t a, b, c, cinv; fmpz_mod_poly_struct * tmp; fmpz_t p; fmpz_mat_t B; fmpz_mat_struct * C; slong j, num_threads; fmpz_mod_poly_matrix_precompute_arg_t * args1; thread_pool_handle * threads; flint_set_num_threads(1 + n_randint(state, 3)); num_threads = flint_request_threads(&threads, FLINT_DEFAULT_THREAD_LIMIT); tmp = (fmpz_mod_poly_struct *) flint_malloc(sizeof(fmpz_mod_poly_struct)*(num_threads + 1)); fmpz_init(p); fmpz_set_ui(p, n_randtest_prime(state, 0)); fmpz_mod_ctx_set_modulus(ctx, p); fmpz_mod_poly_init(a, ctx); fmpz_mod_poly_init(b, ctx); fmpz_mod_poly_init(c, ctx); fmpz_mod_poly_init(cinv, ctx); for (j = 0; j < num_threads + 1; j++) fmpz_mod_poly_init(tmp + j, ctx); fmpz_mod_poly_randtest_not_zero(a, state, n_randint(state, 20) + 1, ctx); fmpz_mod_poly_randtest_not_zero(b, state, n_randint(state, 20) + 1, ctx); do { fmpz_mod_poly_randtest_not_zero(c, state, n_randint(state, 20) + 1, ctx); } while (c->length < 2); fmpz_mod_poly_reverse(cinv, c, c->length, ctx); fmpz_mod_poly_inv_series_newton(cinv, cinv, c->length, ctx); fmpz_mat_init(B, n_sqrt(c->length - 1) + 1, c->length - 1); fmpz_mod_poly_precompute_matrix(B, b, c, cinv, ctx); args1 = (fmpz_mod_poly_matrix_precompute_arg_t *) flint_malloc(sizeof(fmpz_mod_poly_matrix_precompute_arg_t) *(num_threads + 1)); C = (fmpz_mat_struct* ) flint_malloc(sizeof(fmpz_mat_struct)*(num_threads + 1)); for (j = 0; j < num_threads + 1; j++) { fmpz_mat_init(C + j, n_sqrt(c->length - 1) + 1, c->length - 1); fmpz_mod_poly_set(tmp + j, b, ctx); fmpz_mod_poly_rem(tmp + j, tmp + j, c, ctx); if (tmp[j].length < c->length - 1) { fmpz_mod_poly_fit_length(tmp + j, c->length - 1, ctx); _fmpz_vec_zero(tmp[j].coeffs + tmp[j].length, c->length - 1 - b->length); } args1[j].A = C + j; args1[j].poly1 = tmp + j; args1[j].poly2 = c; args1[j].poly2inv = cinv; args1[j].ctx = ctx; } for (j = 1; j < num_threads + 1; j++) thread_pool_wake(global_thread_pool, threads[j - 1], 0, _fmpz_mod_poly_precompute_matrix_worker, &args1[j]); _fmpz_mod_poly_precompute_matrix_worker(&args1[0]); for (j = 0; j < num_threads; j++) { thread_pool_wait(global_thread_pool, threads[j]); } for (j = 0; j < num_threads + 1; j++) { if (!fmpz_mat_equal(B, C + j)) { flint_printf("FAIL (precomputation):\n"); flint_printf("B:\n"); fmpz_mat_print(B); flint_printf("\n"); flint_printf("C[j]:\n"); fmpz_mat_print(C + j); flint_printf("\n"); flint_printf("a:\n"); fmpz_mod_poly_print(a, ctx); flint_printf("\n"); flint_printf("b:\n"); fmpz_mod_poly_print(b, ctx); flint_printf("\n"); flint_printf("c:\n"); fmpz_mod_poly_print(c, ctx); flint_printf("\n"); abort(); } } flint_give_back_threads(threads, num_threads); fmpz_clear(p); fmpz_mod_poly_clear(a, ctx); fmpz_mod_poly_clear(b, ctx); fmpz_mat_clear(B); fmpz_mod_poly_clear(c, ctx); fmpz_mod_poly_clear(cinv, ctx); for (j = 0; j < num_threads + 1; j++) { fmpz_mod_poly_clear(tmp + j, ctx); fmpz_mat_clear(C + j); } flint_free(C); flint_free(tmp); flint_free(args1); } /* check composition */ for (i = 0; i < 200 * flint_test_multiplier(); i++) { fmpz_mod_poly_t a, b, c, cinv, d; fmpz_mod_poly_struct * res; fmpz_t p; fmpz_mat_t B; slong j, num_threads; fmpz_mod_poly_compose_mod_precomp_preinv_arg_t * args1; thread_pool_handle * threads; flint_set_num_threads(1 + n_randint(state, 3)); num_threads = flint_request_threads(&threads, FLINT_DEFAULT_THREAD_LIMIT); res = (fmpz_mod_poly_struct* ) flint_malloc(sizeof(fmpz_mod_poly_struct)*(num_threads + 1)); fmpz_init(p); fmpz_set_ui(p, n_randtest_prime(state, 0)); fmpz_mod_ctx_set_modulus(ctx, p); fmpz_mod_poly_init(a, ctx); fmpz_mod_poly_init(b, ctx); fmpz_mod_poly_init(c, ctx); fmpz_mod_poly_init(cinv, ctx); fmpz_mod_poly_init(d, ctx); for (j = 0; j < num_threads + 1; j++) fmpz_mod_poly_init(res + j, ctx); fmpz_mod_poly_randtest_not_zero(a, state, n_randint(state, 20) + 1, ctx); fmpz_mod_poly_randtest_not_zero(b, state, n_randint(state, 20) + 1, ctx); do { fmpz_mod_poly_randtest_not_zero(c, state, n_randint(state, 20) + 1, ctx); } while (c->length < 2); fmpz_mod_poly_reverse(cinv, c, c->length, ctx); fmpz_mod_poly_inv_series_newton(cinv, cinv, c->length, ctx); fmpz_mat_init(B, n_sqrt(c->length - 1) + 1, c->length - 1); fmpz_mod_poly_precompute_matrix(B, b, c, cinv, ctx); fmpz_mod_poly_rem(a, a, c, ctx); fmpz_mod_poly_compose_mod(d, a, b, c, ctx); args1 = (fmpz_mod_poly_compose_mod_precomp_preinv_arg_t *) flint_malloc((num_threads + 1)* sizeof(fmpz_mod_poly_compose_mod_precomp_preinv_arg_t)); for (j = 0; j < num_threads + 1; j++) { fmpz_mod_poly_fit_length(res + j, c->length - 1, ctx); _fmpz_mod_poly_set_length(res + j, c->length - 1); args1[j].A = B; args1[j].res = res + j; args1[j].poly1 = a; args1[j].poly3 = c; args1[j].poly3inv = cinv; args1[j].ctx = ctx; } for (j = 1; j < num_threads + 1; j++) thread_pool_wake(global_thread_pool, threads[j - 1], 0, _fmpz_mod_poly_compose_mod_brent_kung_precomp_preinv_worker, &args1[j]); _fmpz_mod_poly_compose_mod_brent_kung_precomp_preinv_worker(&args1[0]); _fmpz_mod_poly_normalise(res + 0); for (j = 1; j < num_threads + 1; j++) { thread_pool_wait(global_thread_pool, threads[j - 1]); _fmpz_mod_poly_normalise(res + j); } for (j = 0; j < num_threads + 1; j++) { if (!fmpz_mod_poly_equal(d, res + j, ctx)) { flint_printf("FAIL (composition):\n"); flint_printf("res[j]:\n"); fmpz_mod_poly_print(res + j, ctx); flint_printf("\n"); flint_printf("d:\n"); fmpz_mod_poly_print(d, ctx); flint_printf("\n"); flint_printf("a:\n"); fmpz_mod_poly_print(a, ctx); flint_printf("\n"); flint_printf("b:\n"); fmpz_mod_poly_print(b, ctx); flint_printf("\n"); flint_printf("c:\n"); fmpz_mod_poly_print(c, ctx); flint_printf("\n"); abort(); } } flint_give_back_threads(threads, num_threads); fmpz_clear(p); fmpz_mod_poly_clear(a, ctx); fmpz_mod_poly_clear(b, ctx); fmpz_mat_clear(B); fmpz_mod_poly_clear(c, ctx); fmpz_mod_poly_clear(cinv, ctx); fmpz_mod_poly_clear(d, ctx); for (j = 0; j < num_threads + 1; j++) fmpz_mod_poly_clear(res + j, ctx); flint_free(res); flint_free(args1); } fmpz_mod_ctx_clear(ctx); FLINT_TEST_CLEANUP(state); flint_printf("PASS\n"); return 0; #else FLINT_TEST_CLEANUP(state); flint_printf("SKIPPED\n"); return 0; #endif }