31 #include "../fpt/fpt.h" 37 #define DEFAULT_NFFT_CUTOFF 6 38 #define FPT_THRESHOLD 1000 40 #define NFSOFT_INDEX_TWO(m,n,l,B) ((B+1)*(B+1)+(B+1)*(B+1)*(m+B)-((m-1)*m*(2*m-1)+(B+1)*(B+2)*(2*B+3))/6)+(posN(n,m,B))+(l-MAX(ABS(m),ABS(n))) 42 static fpt_set* SO3_fpt_init(
int l,
unsigned int flags,
int kappa,
int nthreads);
43 static int posN(
int n,
int m,
int B);
47 nfsoft_init_advanced(plan, N, M, NFSOFT_MALLOC_X | NFSOFT_MALLOC_F
48 | NFSOFT_MALLOC_F_HAT);
51 void nfsoft_init_advanced(
nfsoft_plan *plan,
int N,
int M,
52 unsigned int nfsoft_flags)
54 nfsoft_init_guru(plan, N, M, nfsoft_flags, PRE_PHI_HUT | PRE_PSI | MALLOC_X | NFFT_OMP_BLOCKWISE_ADJOINT
55 | MALLOC_F_HAT | MALLOC_F | FFTW_INIT,
56 DEFAULT_NFFT_CUTOFF, FPT_THRESHOLD);
59 void nfsoft_init_guru(
nfsoft_plan *plan,
int B,
int M,
60 unsigned int nfsoft_flags,
unsigned int nfft_flags,
int nfft_cutoff,
63 nfsoft_init_guru_advanced(plan, B, M, nfsoft_flags, nfft_flags, nfft_cutoff, fpt_kappa, 8* B);
66 void nfsoft_init_guru_advanced(
nfsoft_plan *plan,
int B,
int M,
67 unsigned int nfsoft_flags,
unsigned int nfft_flags,
int nfft_cutoff,
68 int fpt_kappa,
int nn_oversampled)
77 n[0] = nn_oversampled ;
78 n[1] = nn_oversampled ;
79 n[2] = nn_oversampled ;
81 nfft_init_guru(&plan->
p_nfft, 3, N, M, n, nfft_cutoff, nfft_flags,
82 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
84 if ((plan->
p_nfft).flags & PRE_LIN_PSI)
86 nfft_precompute_lin_psi(&(plan->
p_nfft));
91 plan->
flags = nfsoft_flags;
93 if (plan->
flags & NFSOFT_MALLOC_F_HAT)
95 plan->
f_hat = (C*) nfft_malloc((B + 1) * (4* (B +1)*(B+1)-1)/3*
sizeof(C));
96 if (plan->
f_hat == NULL ) printf(
"Allocation failed!\n");
99 if (plan->
flags & NFSOFT_MALLOC_X)
101 plan->
x = (R*) nfft_malloc(plan->
M_total*3*
sizeof(R));
102 if (plan->
x == NULL ) printf(
"Allocation failed!\n");
104 if (plan->
flags & NFSOFT_MALLOC_F)
106 plan->
f = (C*) nfft_malloc(plan->
M_total*
sizeof(C));
107 if (plan->
f == NULL ) printf(
"Allocation failed!\n");
114 plan->
mv_trafo = (void (*) (
void* ))nfsoft_trafo;
115 plan->
mv_adjoint = (void (*) (
void* ))nfsoft_adjoint;
117 plan->
nthreads = Y(get_num_threads)();
123 static void c2e(
nfsoft_plan *my_plan,
int even, C* wig_coeffs,
int k,
int m)
131 C cheby[2*my_plan->
N_total + 2];
132 cheby[my_plan->
N_total+1] = wig_coeffs[0];
135 for (j=1;j<my_plan->
N_total+1;j++)
137 cheby[my_plan->
N_total+1+j]=0.5* wig_coeffs[j];
138 cheby[my_plan->
N_total+1-j]=0.5* wig_coeffs[j];
151 cheby[0]=(C) (-1.)/(2.*_Complex_I) * aux[1];
154 cheby[j]=(1./(2.*_Complex_I)*(aux[j+1]-aux[j-1]));
159 for (
int i = 1; i <= 2* my_plan ->
N_total + 2; i++)
168 static fpt_set* SO3_fpt_init(
int l,
unsigned int flags,
int kappa,
int nthreads)
171 int N, t, k_start, k, m;
174 if (flags & NFSOFT_USE_DPT)
181 t = (int) log2(
X(next_power_of_2)(N));
190 N =
X(next_power_of_2)(l);
196 unsigned int fptflags = 0U
197 | IF(flags & NFSOFT_USE_DPT,FPT_NO_FAST_ALGORITHM,IF(t > 1,FPT_NO_DIRECT_ALGORITHM,0U))
198 | IF(flags & NFSOFT_NO_STABILIZATION,FPT_NO_STABILIZATION,0U);
216 set[0] = fpt_init((2* N + 1) * (2* N + 1), t, fptflags);
217 for (
int i=1; i<nthreads; i++)
219 set[i] = fpt_init((2* N + 1) * (2* N + 1), t, fptflags | FPT_NO_INIT_FPT_DATA);
220 set[i]->dpt =
set[0]->dpt;
225 for (k = -N; k <= N; k++)
226 for (m = -N; m <= N; m++)
229 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
231 fpt_precompute_1(
set[0], (k+N)*(2*N+1) + m+N,k_start);
233 #pragma omp parallel for default(shared) private(k,m,k_start) schedule(dynamic) num_threads(nthreads) 235 for (k = -N; k <= N; k++)
236 for (m = -N; m <= N; m++)
239 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
248 fpt_precompute_2(
set[omp_get_thread_num()], (k+N)*(2*N+1) + m+N, alpha, beta, gamma, k_start, kappa);
250 fpt_precompute(
set[0], (k+N)*(2*N+1) + m+N, alpha, beta, gamma, k_start, kappa);
257 static void SO3_fpt(C *coeffs,
fpt_set set,
int l,
int k,
int m,
unsigned int flags)
261 int k_start, k_end, j;
262 int function_values = 0;
265 if (flags & NFSOFT_USE_DPT)
276 N =
X(next_power_of_2)(l);
280 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
282 trafo_nr = (N + k) * (2* N + 1) + (m + N);
287 for (j = 0; j <= k_end; j++)
291 for (j = 0; j <= l - k_start; j++)
293 x[j + k_start] = coeffs[j];
295 for (j = l - k_start + 1; j <= k_end - k_start; j++)
297 x[j + k_start] = K(0.0);
303 if (flags & NFSOFT_USE_DPT)
305 fpt_trafo_direct(
set, trafo_nr, &x[k_start], y, k_end, 0U
306 | (function_values ? FPT_FUNCTION_VALUES : 0U));
310 fpt_trafo(
set, trafo_nr, &x[k_start], y, k_end, 0U
311 | (function_values ? FPT_FUNCTION_VALUES : 0U));
315 for (j = 0; j <= l; j++)
322 static void SO3_fpt_transposed(C *coeffs,
fpt_set set,
int l,
int k,
int m,
325 int N, k_start, k_end, j;
327 int function_values = 0;
331 if (flags & NFSOFT_USE_DPT)
342 N =
X(next_power_of_2)(l);
346 k_start = (ABS(k) >= ABS(m)) ? ABS(k) : ABS(m);
348 trafo_nr = (N + k) * (2* N + 1) + (m + N);
355 for (j = 0; j <= l; j++)
359 for (j = l + 1; j <= k_end; j++)
364 if (flags & NFSOFT_USE_DPT)
366 fpt_transposed_direct(
set, trafo_nr, &x[k_start], y, k_end, 0U
367 | (function_values ? FPT_FUNCTION_VALUES : 0U));
371 fpt_transposed(
set, trafo_nr, &x[k_start], y, k_end, 0U
372 | (function_values ? FPT_FUNCTION_VALUES : 0U));
375 for (j = 0; j <= l; j++)
391 for (j = 0; j < M; j++)
393 plan3D->
p_nfft.
x[3* j ] = plan3D->
x[3* j + 2];
394 plan3D->
p_nfft.
x[3* j + 1] = plan3D->
x[3* j ];
395 plan3D->
p_nfft.
x[3* j + 2] = plan3D->
x[3* j + 1];
405 if ((plan3D->
p_nfft).flags & FG_PSI)
407 nfft_precompute_one_psi(&(plan3D->
p_nfft));
409 if ((plan3D->
p_nfft).flags & PRE_PSI)
411 nfft_precompute_one_psi(&(plan3D->
p_nfft));
426 for (
int j = 0; j < M; j++)
427 plan3D->
f[j] = plan3D->
f_hat[0];
435 #pragma omp parallel
for default(shared) num_threads(plan3D->
nthreads)
437 for (
int k = -N; k <= N; k++)
439 C wig_coeffs[(
X(next_power_of_2)(N)+1)];
441 int threadid = omp_get_thread_num();
446 for (
int m = -N; m <= N; m++)
449 int max = (ABS(m) > ABS(k) ? ABS(m) : ABS(k));
451 int glo0 = NFSOFT_INDEX_TWO(k,m,max,N);
453 for (
int j = 0; j <= N - max; j++)
455 wig_coeffs[j] = plan3D->
f_hat[glo0 + j];
457 if ((plan3D->
flags & NFSOFT_NORMALIZED))
459 wig_coeffs[j] = wig_coeffs[j] * (1. / (2. * KPI))
460 * SQRT(0.5 * (2. * (max + j) + 1.));
463 if ((plan3D->
flags & NFSOFT_REPRESENT))
465 if ((k < 0) && (k % 2))
467 wig_coeffs[j] = wig_coeffs[j] * (-1);
469 if ((m < 0) && (m % 2))
470 wig_coeffs[j] = wig_coeffs[j] * (-1);
473 wig_coeffs[j] = wig_coeffs[j] * (-1);
480 for (
int j = N - max + 1; j <
X(next_power_of_2)(N) + 1; j++)
485 c2e(plan3D, ABS((k + m) % 2), wig_coeffs, k, m);
490 if (plan3D->
flags & NFSOFT_USE_NDFT)
492 nfft_trafo_direct(&(plan3D->
p_nfft));
496 nfft_trafo(&(plan3D->
p_nfft));
500 for (
int j = 0; j < plan3D->
M_total; j++)
505 static void e2c(
nfsoft_plan *my_plan,
int even, C* wig_coeffs, C* cheby)
519 aux[0]= 1/(2*_Complex_I)*cheby[1];
523 aux[j]=1/(2*_Complex_I)*(cheby[j+1]-cheby[j-1]);
525 aux[N-1]=1/(2*_Complex_I)*(-cheby[j-1]);
534 wig_coeffs[0]=cheby[my_plan->
N_total+1];
536 for(j=1;j<=my_plan->
N_total;j++)
538 wig_coeffs[j]=0.5*(cheby[my_plan->
N_total+j+1]+cheby[my_plan->
N_total+1-j]);
556 for (
int j = 0; j < M; j++)
557 plan3D->
f_hat[0] += plan3D->
f[j];
562 for (
int j = 0; j < M; j++)
567 if (plan3D->
flags & NFSOFT_USE_NDFT)
569 nfft_adjoint_direct(&(plan3D->
p_nfft));
573 nfft_adjoint(&(plan3D->
p_nfft));
579 #pragma omp parallel for default(shared) num_threads(plan3D->nthreads) 581 for (
int k = -N; k <= N; k++)
584 int threadid = omp_get_thread_num();
588 for (
int m = -N; m <= N; m++)
590 C wig_coeffs[(
X(next_power_of_2)(N)+1)];
591 C cheby[2*plan3D->
N_total + 2];
593 int max = (ABS(m) > ABS(k) ? ABS(m) : ABS(k));
595 for (
int i = 1; i < 2* plan3D ->
N_total + 3; i++)
597 cheby[i - 1] = plan3D->
p_nfft.
f_hat[NFSOFT_INDEX(k, m, i - N
604 e2c(plan3D, ABS((k + m) % 2), wig_coeffs, cheby);
612 int glo0 = NFSOFT_INDEX_TWO(k,m,0,N);
614 for (
int j = max; j <= N; j++)
616 if ((plan3D->
flags & NFSOFT_REPRESENT))
618 if ((k < 0) && (k % 2))
620 wig_coeffs[j] = -wig_coeffs[j];
622 if ((m < 0) && (m % 2))
623 wig_coeffs[j] = -wig_coeffs[j];
626 wig_coeffs[j] = wig_coeffs[j] * (-1);
630 plan3D->
f_hat[glo0+j] = wig_coeffs[j];
632 if ((plan3D->
flags & NFSOFT_NORMALIZED))
634 plan3D->
f_hat[glo0+j] = plan3D->
f_hat[glo0+j] * (1 / (2. * KPI)) * SQRT(
635 0.5 * (2. * (j) + 1.));
648 nfft_finalize(&plan->
p_nfft);
650 for (
int i=0; i<plan->
nthreads; i++)
655 if (plan->
flags & NFSOFT_MALLOC_F_HAT)
658 nfft_free(plan->
f_hat);
662 if (plan->
flags & NFSOFT_MALLOC_F)
669 if (plan->
flags & NFSOFT_MALLOC_X)
676 static int posN(
int n,
int m,
int B)
681 pos = posN(n - 1, m, B) + B + 1 - MAX(ABS(m), ABS(n - 1));
static void c2e(nfsft_plan *plan)
Converts coefficients with , from a linear combination of Chebyshev polynomials to coefficients m...
double * gamma
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
fftw_complex * f_hat
Fourier coefficients.
fftw_complex * aux
deprecated variable
fftw_complex * f_hat
Fourier coefficients.
void SO3_gamma_row(double *gamma, int N, int m, int n)
Compute three-term-recurrence coefficients of Wigner-d functions for all degrees ...
int nthreads
the number of threads
fftw_complex * wig_coeffs
deprecated variable
void SO3_beta_row(double *beta, int N, int m, int n)
Compute three-term-recurrence coefficients of Wigner-d functions for all degrees ...
NFFT_INT M_total
Total number of samples.
Holds data for a set of cascade summations.
void(* mv_adjoint)(void *)
Adjoint transform.
void SO3_alpha_row(double *alpha, int N, int m, int n)
Compute three-term-recurrence coefficients of Wigner-d functions for all degrees ...
NFFT_INT N_total
Total number of Fourier coefficients.
fftw_complex * cheby
deprecated variable
NFFT_INT N_total
Total number of Fourier coefficients.
NFFT_INT M_total
Total number of samples.
#define X(name)
Include header for C99 complex datatype.
double * alpha
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
double * beta
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
Header file for functions related to Wigner-d/D functions.
void(* mv_trafo)(void *)
Transform.
fpt_set * internal_fpt_set
the internal FPT plan
double * x
Nodes in time/spatial domain, size is doubles.
nfft_plan p_nfft
the internal NFFT plan
unsigned int flags
the planner flags