34 #include "pocketfft/pocketfft.h" 44 typedef complex
double dcmplx;
45 typedef complex
float fcmplx;
47 static const double sqrt_one_half = 0.707106781186547572737310929369;
48 static const double sqrt_two = 1.414213562373095145474621858739;
50 static int chunksize_min=500, nchunks_max=10;
52 static void get_chunk_info (
int ndata,
int nmult,
int *nchunks,
int *chunksize)
54 *chunksize = (ndata+nchunks_max-1)/nchunks_max;
55 if (*chunksize>=chunksize_min)
56 *chunksize = ((*chunksize+nmult-1)/nmult)*nmult;
59 *nchunks = (ndata+chunksize_min-1)/chunksize_min;
60 *chunksize = (ndata+(*nchunks)-1)/(*nchunks);
62 *chunksize = ((*chunksize+nmult-1)/nmult)*nmult;
64 *nchunks = (ndata+(*chunksize)-1)/(*chunksize);
67 NOINLINE
int sharp_get_mlim (
int lmax,
int spin,
double sth,
double cth)
70 if (ofs<100.) ofs=100.;
71 double b = -2*spin*fabs(cth);
72 double t1 = lmax*sth+ofs;
73 double c = (double)spin*spin-t1*t1;
74 double discr = b*b-4*c;
75 if (discr<=0)
return lmax;
76 double res=(-b+sqrt(discr))/2.;
77 if (res>lmax) res=lmax;
78 return (
int)(res+0.5);
91 static void ringhelper_init (ringhelper *
self)
93 static ringhelper rh_null = { 0, NULL, 0, NULL, 0, 0 };
97 static void ringhelper_destroy (ringhelper *
self)
99 if (self->plan) destroy_rfft_plan(self->plan);
101 ringhelper_init(
self);
104 NOINLINE
static void ringhelper_update (ringhelper *
self,
int nph,
int mmax,
double phi0)
106 self->norot = (fabs(phi0)<1e-14);
108 if ((mmax!=self->s_shift-1) || (!FAPPROX(phi0,self->phi0_,1e-12)))
110 RESIZE (self->shiftarr,dcmplx,mmax+1);
111 self->s_shift = mmax+1;
113 for (
int m=0; m<=mmax; ++m)
114 self->shiftarr[m] = cos(m*phi0) + _Complex_I*sin(m*phi0);
118 if (!self->plan)
self->plan=make_rfft_plan(nph);
119 if (nph!=(
int)self->length)
121 destroy_rfft_plan(self->plan);
122 self->plan=make_rfft_plan(nph);
127 static int ringinfo_compare (
const void *xa,
const void *xb)
129 const sharp_ringinfo *a=xa, *b=xb;
130 return (a->sth < b->sth) ? -1 : (a->sth > b->sth) ? 1 : 0;
132 static int ringpair_compare (
const void *xa,
const void *xb)
134 const sharp_ringpair *a=xa, *b=xb;
136 if (a->r1.nph==b->r1.nph)
137 return (a->r1.phi0 < b->r1.phi0) ? -1 :
138 ((a->r1.phi0 > b->r1.phi0) ? 1 :
139 (a->r1.cth>b->r1.cth ? -1 : 1));
140 return (a->r1.nph<b->r1.nph) ? -1 : 1;
144 const ptrdiff_t *mstart,
int flags, sharp_alm_info **alm_info)
146 sharp_alm_info *info =
RALLOC(sharp_alm_info,1);
149 info->mval =
RALLOC(
int,nm);
150 info->mvstart =
RALLOC(ptrdiff_t,nm);
151 info->stride = stride;
153 for (
int mi=0; mi<nm; ++mi)
155 info->mval[mi] = mval[mi];
156 info->mvstart[mi] = mstart[mi];
162 const ptrdiff_t *mstart, sharp_alm_info **alm_info)
164 int *mval=
RALLOC(
int,mmax+1);
165 for (
int i=0; i<=mmax; ++i)
174 "sharp_alm_index not applicable with SHARP_PACKED alms");
175 return self->mvstart[mi]+
self->stride*l;
181 for (
int im=0; im!=
self->nm; ++im)
183 int m=
self->mval[im];
184 ptrdiff_t x=(
self->lmax + 1 - m);
185 if ((m!=0)&&((
self->flags&
SHARP_PACKED)!=0)) result+=2*x;
199 const int *stride,
const double *phi0,
const double *theta,
200 const double *wgt, sharp_geom_info **geom_info)
202 sharp_geom_info *info =
RALLOC(sharp_geom_info,1);
203 sharp_ringinfo *infos =
RALLOC(sharp_ringinfo,nrings);
206 info->pair=
RALLOC(sharp_ringpair,nrings);
211 for (
int m=0; m<nrings; ++m)
213 infos[m].theta = theta[m];
214 infos[m].cth = cos(theta[m]);
215 infos[m].sth = sin(theta[m]);
216 infos[m].weight = (wgt != NULL) ? wgt[m] : 1.;
217 infos[m].phi0 = phi0[m];
218 infos[m].ofs = ofs[m];
219 infos[m].stride = stride[m];
220 infos[m].nph = nph[m];
221 if (info->nphmax<nph[m]) info->nphmax=nph[m];
223 qsort(infos,nrings,
sizeof(sharp_ringinfo),ringinfo_compare);
226 info->pair[info->npairs].r1=infos[pos];
227 if ((pos<nrings-1) && FAPPROX(infos[pos].cth,-infos[pos+1].cth,1e-12))
229 if (infos[pos].cth>0)
230 info->pair[info->npairs].r2=infos[pos+1];
233 info->pair[info->npairs].r1=infos[pos+1];
234 info->pair[info->npairs].r2=infos[pos];
239 info->pair[info->npairs].r2.nph=-1;
245 qsort(info->pair,info->npairs,
sizeof(sharp_ringpair),ringpair_compare);
250 ptrdiff_t result = 0;
251 for (
int m=0; m<info->npairs; ++m)
253 result+=info->pair[m].r1.nph;
254 result+=(info->pair[m].r2.nph>=0) ? (info->pair[m].r2.nph) : 0;
268 static int sharp_get_mmax (
int *mval,
int nm)
271 int *mcheck=
RALLOC(
int,nm);
273 for (
int i=0; i<nm; ++i)
276 UTIL_ASSERT((m_cur>=0) && (m_cur<nm),
"not all m values are present");
277 UTIL_ASSERT(mcheck[m_cur]==0,
"duplicate m value");
284 NOINLINE
static void ringhelper_phase2ring (ringhelper *
self,
285 const sharp_ringinfo *info,
double *data,
int mmax,
const dcmplx *phase,
286 int pstride,
int flags)
290 ringhelper_update (
self, nph, mmax, info->phi0);
292 double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1.;
294 wgt *= sqrt_one_half;
299 for (
int m=0; m<=mmax; ++m)
301 data[2*m]=creal(phase[m*pstride])*wgt;
302 data[2*m+1]=cimag(phase[m*pstride])*wgt;
305 for (
int m=0; m<=mmax; ++m)
307 dcmplx tmp = phase[m*pstride]*
self->shiftarr[m];
308 data[2*m]=creal(tmp)*wgt;
309 data[2*m+1]=cimag(tmp)*wgt;
311 for (
int m=2*(mmax+1); m<nph+2; ++m)
316 data[0]=creal(phase[0])*wgt;
319 int idx1=1, idx2=nph-1;
320 for (
int m=1; m<=mmax; ++m)
322 dcmplx tmp = phase[m*pstride]*wgt;
323 if(!self->norot) tmp*=
self->shiftarr[m];
326 data[2*idx1]+=creal(tmp);
327 data[2*idx1+1]+=cimag(tmp);
331 data[2*idx2]+=creal(tmp);
332 data[2*idx2+1]-=cimag(tmp);
334 if (++idx1>=nph) idx1=0;
335 if (--idx2<0) idx2=nph-1;
339 rfft_backward (self->plan, &(data[1]), 1.);
342 NOINLINE
static void ringhelper_ring2phase (ringhelper *
self,
343 const sharp_ringinfo *info,
double *data,
int mmax, dcmplx *phase,
344 int pstride,
int flags)
350 int maxidx = IMIN(nph-1,mmax);
353 ringhelper_update (
self, nph, mmax, -info->phi0);
354 double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1;
358 rfft_forward (self->plan, &(data[1]), 1.);
360 data[1]=data[nph+1]=0.;
365 for (
int m=0; m<=maxidx; ++m)
366 phase[m*pstride] = (data[2*m] + _Complex_I*data[2*m+1]) * wgt;
368 for (
int m=0; m<=maxidx; ++m)
370 (data[2*m] + _Complex_I*data[2*m+1]) *
self->shiftarr[m] * wgt;
374 for (
int m=0; m<=maxidx; ++m)
379 val = (data[2*idx] + _Complex_I*data[2*idx+1]) * wgt;
381 val = (data[2*(nph-idx)] - _Complex_I*data[2*(nph-idx)+1]) * wgt;
383 val *=
self->shiftarr[m];
384 phase[m*pstride]=val;
388 for (
int m=maxidx+1;m<=mmax; ++m)
392 NOINLINE
static void clear_map (
const sharp_geom_info *ginfo,
void *map,
395 if (flags & SHARP_NO_FFT)
397 for (
int j=0;j<ginfo->npairs;++j)
401 for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
402 ((dcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
403 for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
404 ((dcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
408 for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
409 ((fcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
410 for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
411 ((fcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
419 for (
int j=0;j<ginfo->npairs;++j)
421 double *dmap=(
double *)map;
422 if (ginfo->pair[j].r1.stride==1)
423 memset(&dmap[ginfo->pair[j].r1.ofs],0,
424 ginfo->pair[j].r1.nph*
sizeof(
double));
426 for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
427 dmap[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
428 if ((ginfo->pair[j].r2.nph>0)&&(ginfo->pair[j].r2.stride==1))
429 memset(&dmap[ginfo->pair[j].r2.ofs],0,
430 ginfo->pair[j].r2.nph*
sizeof(
double));
432 for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
433 dmap[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
438 for (
int j=0;j<ginfo->npairs;++j)
440 for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
441 ((
float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
442 for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
443 ((
float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
449 NOINLINE
static void clear_alm (
const sharp_alm_info *ainfo,
void *alm,
452 #define CLEARLOOP(real_t,body) \ 454 real_t *talm = (real_t *)alm; \ 455 for (int l=m;l<=ainfo->lmax;++l) \ 459 for (
int mi=0;mi<ainfo->nm;++mi)
461 int m=ainfo->mval[mi];
462 ptrdiff_t mvstart = ainfo->mvstart[mi];
463 ptrdiff_t stride = ainfo->stride;
469 CLEARLOOP(
double, talm[mvstart+l*stride] = 0.;)
471 CLEARLOOP(
float, talm[mvstart+l*stride] = 0.;)
477 CLEARLOOP(
double,talm[mvstart+l*stride]=talm[mvstart+l*stride+1]=0.;)
479 CLEARLOOP(
float,talm[mvstart+l*stride]=talm[mvstart+l*stride+1]=0.;)
486 NOINLINE
static void init_output (sharp_job *job)
490 for (
int i=0; i<job->ntrans*job->nalm; ++i)
491 clear_alm (job->ainfo,job->alm[i],job->flags);
493 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
494 clear_map (job->ginfo,job->map[i],job->flags);
497 NOINLINE
static void alloc_phase (sharp_job *job,
int nm,
int ntheta)
501 job->s_m=2*job->ntrans*job->nmaps;
502 if (((job->s_m*16*nm)&1023)==0) nm+=3;
503 job->s_th=job->s_m*nm;
507 job->s_th=2*job->ntrans*job->nmaps;
508 if (((job->s_th*16*ntheta)&1023)==0) ntheta+=3;
509 job->s_m=job->s_th*ntheta;
511 job->phase=
RALLOC(dcmplx,2*job->ntrans*job->nmaps*nm*ntheta);
514 static void dealloc_phase (sharp_job *job)
517 static void alloc_almtmp (sharp_job *job,
int lmax)
518 { job->almtmp=
RALLOC(dcmplx,job->ntrans*job->nalm*(lmax+1)); }
520 static void dealloc_almtmp (sharp_job *job)
523 NOINLINE
static void alm2almtmp (sharp_job *job,
int lmax,
int mi)
526 #define COPY_LOOP(real_t, source_t, expr_of_x) \ 528 for (int l=m; l<lmin; ++l) \ 529 for (int i=0; i<job->ntrans*job->nalm; ++i) \ 530 job->almtmp[job->ntrans*job->nalm*l+i] = 0; \ 531 for (int l=lmin; l<=lmax; ++l) \ 532 for (int i=0; i<job->ntrans*job->nalm; ++i) \ 534 source_t x = *(source_t *)(((real_t *)job->alm[i])+ofs+l*stride); \ 535 job->almtmp[job->ntrans*job->nalm*l+i] = expr_of_x; \ 541 ptrdiff_t ofs=job->ainfo->mvstart[mi];
542 int stride=job->ainfo->stride;
543 int m=job->ainfo->mval[mi];
544 int lmin=(m<job->spin) ? job->spin : m;
558 COPY_LOOP(
double,
double, x*norm_m0)
560 COPY_LOOP(
float,
float, x*norm_m0)
565 COPY_LOOP(
double, dcmplx, x)
567 COPY_LOOP(
float, fcmplx, x)
575 COPY_LOOP(
double,
double, x*job->norm_l[l]*norm_m0)
577 COPY_LOOP(
float,
float, x*job->norm_l[l]*norm_m0)
582 COPY_LOOP(
double, dcmplx, x*job->norm_l[l])
584 COPY_LOOP(
float, fcmplx, x*job->norm_l[l])
589 memset (job->almtmp+job->ntrans*job->nalm*job->ainfo->mval[mi], 0,
590 job->ntrans*job->nalm*(lmax+1-job->ainfo->mval[mi])*
sizeof(dcmplx));
595 NOINLINE
static void almtmp2alm (sharp_job *job,
int lmax,
int mi)
598 #define COPY_LOOP(real_t, target_t, expr_of_x) \ 599 for (int l=lmin; l<=lmax; ++l) \ 600 for (int i=0; i<job->ntrans*job->nalm; ++i) \ 602 dcmplx x = job->almtmp[job->ntrans*job->nalm*l+i]; \ 603 *(target_t *)(((real_t *)job->alm[i])+ofs+l*stride) += expr_of_x; \ 607 ptrdiff_t ofs=job->ainfo->mvstart[mi];
608 int stride=job->ainfo->stride;
609 int m=job->ainfo->mval[mi];
610 int lmin=(m<job->spin) ? job->spin : m;
624 COPY_LOOP(
double,
double, creal(x)*norm_m0)
626 COPY_LOOP(
float,
float, crealf(x)*norm_m0)
631 COPY_LOOP(
double, dcmplx, x)
633 COPY_LOOP(
float, fcmplx, (fcmplx)x)
641 COPY_LOOP(
double,
double, creal(x)*job->norm_l[l]*norm_m0)
643 COPY_LOOP(
float, fcmplx, (
float)(creal(x)*job->norm_l[l]*norm_m0))
648 COPY_LOOP(
double, dcmplx, x*job->norm_l[l])
650 COPY_LOOP(
float, fcmplx, (fcmplx)(x*job->norm_l[l]))
657 NOINLINE
static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
658 const double *ringtmp,
int rstride)
662 double **dmap = (
double **)job->map;
663 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
665 double *restrict p1=&dmap[i][ri->ofs];
666 const double *restrict p2=&ringtmp[i*rstride+1];
670 for (
int m=0; m<ri->nph; ++m)
673 memcpy(p1,p2,ri->nph*
sizeof(
double));
676 for (
int m=0; m<ri->nph; ++m)
677 p1[m*ri->stride] += p2[m];
682 float **fmap = (
float **)job->map;
683 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
684 for (
int m=0; m<ri->nph; ++m)
685 fmap[i][ri->ofs+m*ri->stride] += (
float)ringtmp[i*rstride+m+1];
689 NOINLINE
static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri,
690 double *ringtmp,
int rstride)
693 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
695 double *restrict p1=&ringtmp[i*rstride+1],
696 *restrict p2=&(((
double *)(job->map[i]))[ri->ofs]);
698 memcpy(p1,p2,ri->nph*
sizeof(
double));
700 for (
int m=0; m<ri->nph; ++m)
701 p1[m] = p2[m*ri->stride];
704 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
705 for (
int m=0; m<ri->nph; ++m)
706 ringtmp[i*rstride+m+1] = ((
float *)(job->map[i]))[ri->ofs+m*ri->stride];
709 static void ring2phase_direct (sharp_job *job, sharp_ringinfo *ri,
int mmax,
714 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
715 for (
int m=0; m<=mmax; ++m)
716 phase[2*i+job->s_m*m]=0.;
721 double wgt = (job->flags&SHARP_USE_WEIGHTS) ? (ri->nph*ri->weight) : 1.;
724 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
725 for (
int m=0; m<=mmax; ++m)
726 phase[2*i+job->s_m*m]= (job->flags &
SHARP_DP) ?
727 ((dcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt :
728 ((fcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt;
731 static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri,
int mmax,
734 if (ri->nph<0)
return;
736 dcmplx **dmap = (dcmplx **)job->map;
737 fcmplx **fmap = (fcmplx **)job->map;
738 double wgt = (job->flags&SHARP_USE_WEIGHTS) ? (ri->nph*ri->weight) : 1.;
740 wgt *= sqrt_one_half;
741 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
742 for (
int m=0; m<=mmax; ++m)
744 dmap[i][ri->ofs+m*ri->stride] += wgt*phase[2*i+job->s_m*m];
746 fmap[i][ri->ofs+m*ri->stride] += (fcmplx)(wgt*phase[2*i+job->s_m*m]);
750 NOINLINE
static void map2phase (sharp_job *job,
int mmax,
int llim,
int ulim)
753 int pstride = job->s_m;
754 if (job->flags & SHARP_NO_FFT)
756 for (
int ith=llim; ith<ulim; ++ith)
758 int dim2 = job->s_th*(ith-llim);
759 ring2phase_direct(job,&(job->ginfo->pair[ith].r1),mmax,
760 &(job->phase[dim2]));
761 ring2phase_direct(job,&(job->ginfo->pair[ith].r2),mmax,
762 &(job->phase[dim2+1]));
767 #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0) 770 ringhelper_init(&helper);
771 int rstride=job->ginfo->nphmax+2;
772 double *ringtmp=
RALLOC(
double,job->ntrans*job->nmaps*rstride);
773 #pragma omp for schedule(dynamic,1) 774 for (
int ith=llim; ith<ulim; ++ith)
776 int dim2 = job->s_th*(ith-llim);
777 ring2ringtmp(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride);
778 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
779 ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r1),
780 &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags);
781 if (job->ginfo->pair[ith].r2.nph>0)
783 ring2ringtmp(job,&(job->ginfo->pair[ith].r2),ringtmp,rstride);
784 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
785 ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r2),
786 &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags);
790 ringhelper_destroy(&helper);
795 NOINLINE
static void phase2map (sharp_job *job,
int mmax,
int llim,
int ulim)
798 int pstride = job->s_m;
799 if (job->flags & SHARP_NO_FFT)
801 for (
int ith=llim; ith<ulim; ++ith)
803 int dim2 = job->s_th*(ith-llim);
804 phase2ring_direct(job,&(job->ginfo->pair[ith].r1),mmax,
805 &(job->phase[dim2]));
806 phase2ring_direct(job,&(job->ginfo->pair[ith].r2),mmax,
807 &(job->phase[dim2+1]));
812 #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0) 815 ringhelper_init(&helper);
816 int rstride=job->ginfo->nphmax+2;
817 double *ringtmp=
RALLOC(
double,job->ntrans*job->nmaps*rstride);
818 #pragma omp for schedule(dynamic,1) 819 for (
int ith=llim; ith<ulim; ++ith)
821 int dim2 = job->s_th*(ith-llim);
822 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
823 ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r1),
824 &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags);
825 ringtmp2ring(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride);
826 if (job->ginfo->pair[ith].r2.nph>0)
828 for (
int i=0; i<job->ntrans*job->nmaps; ++i)
829 ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r2),
830 &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags);
831 ringtmp2ring(job,&(job->ginfo->pair[ith].r2),ringtmp,rstride);
835 ringhelper_destroy(&helper);
840 NOINLINE
static void sharp_execute_job (sharp_job *job)
844 int lmax = job->ainfo->lmax,
845 mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm);
854 int nchunks, chunksize;
855 get_chunk_info(job->ginfo->npairs,(job->flags&SHARP_NVMAX)*VLEN,&nchunks,
858 alloc_phase (job,mmax+1,chunksize);
861 for (
int chunk=0; chunk<nchunks; ++chunk)
863 int llim=chunk*chunksize, ulim=IMIN(llim+chunksize,job->ginfo->npairs);
864 int *ispair =
RALLOC(
int,ulim-llim);
865 int *mlim =
RALLOC(
int,ulim-llim);
866 double *cth =
RALLOC(
double,ulim-llim), *sth =
RALLOC(
double,ulim-llim);
867 for (
int i=0; i<ulim-llim; ++i)
869 ispair[i] = job->ginfo->pair[i+llim].r2.nph>0;
870 cth[i] = job->ginfo->pair[i+llim].r1.cth;
871 sth[i] = job->ginfo->pair[i+llim].r1.sth;
872 mlim[i] = sharp_get_mlim(lmax, job->spin, sth[i], cth[i]);
876 map2phase (job, mmax, llim, ulim);
878 #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0) 880 sharp_job ljob = *job;
882 sharp_Ylmgen_C generator;
884 alloc_almtmp(&ljob,lmax);
886 #pragma omp for schedule(dynamic,1) 887 for (
int mi=0; mi<job->ainfo->nm; ++mi)
890 alm2almtmp (&ljob, lmax, mi);
892 inner_loop (&ljob, ispair, cth, sth, llim, ulim, &generator, mi, mlim);
895 almtmp2alm (&ljob, lmax, mi);
899 dealloc_almtmp(&ljob);
902 job->opcnt+=ljob.opcnt;
906 phase2map (job, mmax, llim, ulim);
919 static void sharp_build_job_common (sharp_job *job,
sharp_jobtype type,
920 int spin,
void *alm,
void *map,
const sharp_geom_info *geom_info,
921 const sharp_alm_info *alm_info,
int ntrans,
int flags)
924 "bad number of simultaneous transforms");
930 UTIL_ASSERT((spin>=0)&&(spin<=alm_info->lmax),
"bad spin");
936 job->ginfo = geom_info;
937 job->ainfo = alm_info;
939 if ((job->flags&SHARP_NVMAX)==0)
940 job->flags|=sharp_nv_oracle (type, spin, ntrans);
945 job->ntrans = ntrans;
951 const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info,
int ntrans,
952 int flags,
double *time,
unsigned long long *opcnt)
955 sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
958 sharp_execute_job (&job);
959 if (time!=NULL) *time = job.time;
960 if (opcnt!=NULL) *opcnt = job.opcnt;
963 void sharp_set_chunksize_min(
int new_chunksize_min)
964 { chunksize_min=new_chunksize_min; }
965 void sharp_set_nchunks_max(
int new_nchunks_max)
966 { nchunks_max=new_nchunks_max; }
968 int sharp_get_nv_max (
void)
971 static int sharp_oracle (
sharp_jobtype type,
int spin,
int ntrans)
975 int nrings=(lmax+1)/4;
978 spin = (spin!=0) ? 2 : 0;
980 ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
981 sharp_geom_info *tinfo;
984 ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
985 int ncomp = ntrans*((spin==0) ? 1 : 2);
988 ALLOC2D(map,
double,ncomp,npix);
991 sharp_alm_info *alms;
995 ALLOC2D(alm,dcmplx,ncomp,nalms);
1001 for (
int nv=1; nv<=sharp_get_nv_max(); ++nv)
1009 nv|
SHARP_DP|SHARP_NO_OPENMP,&jtime,NULL);
1011 if (jtime<time) { time=jtime; nvbest=nv; }
1015 while ((time_acc<0.02)&&(ntries<2));
1026 int sharp_nv_oracle (
sharp_jobtype type,
int spin,
int ntrans)
1028 static const int maxtr = 6;
1029 static int nv_opt[6][2][5] = {
1030 {{0,0,0,0,0},{0,0,0,0,0}},
1031 {{0,0,0,0,0},{0,0,0,0,0}},
1032 {{0,0,0,0,0},{0,0,0,0,0}},
1033 {{0,0,0,0,0},{0,0,0,0,0}},
1034 {{0,0,0,0,0},{0,0,0,0,0}},
1035 {{0,0,0,0,0},{0,0,0,0,0}} };
1039 UTIL_ASSERT((ntrans>0),
"bad number of simultaneous transforms");
1041 ntrans=IMIN(ntrans,maxtr);
1043 if (nv_opt[ntrans-1][spin!=0][type]==0)
1044 nv_opt[ntrans-1][spin!=0][type]=sharp_oracle(type,spin,ntrans);
1045 return nv_opt[ntrans-1][spin!=0][type];
#define RALLOC(type, num)
void sharp_Ylmgen_destroy(sharp_Ylmgen_C *gen)
double * sharp_Ylmgen_get_norm(int lmax, int spin)
void sharp_execute(sharp_jobtype type, int spin, void *alm, void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans, int flags, double *time, unsigned long long *opcnt)
void sharp_destroy_geom_info(sharp_geom_info *geom_info)
void sharp_make_general_alm_info(int lmax, int nm, int stride, const int *mval, const ptrdiff_t *mstart, int flags, sharp_alm_info **alm_info)
#define SET_ARRAY(ptr, i1, i2, val)
double * sharp_Ylmgen_get_d1norm(int lmax)
void sharp_make_triangular_alm_info(int lmax, int mmax, int stride, sharp_alm_info **alm_info)
void sharp_destroy_alm_info(sharp_alm_info *info)
ptrdiff_t sharp_map_size(const sharp_geom_info *info)
void sharp_make_geom_info(int nrings, const int *nph, const ptrdiff_t *ofs, const int *stride, const double *phi0, const double *theta, const double *wgt, sharp_geom_info **geom_info)
void sharp_make_gauss_geom_info(int nrings, int nphi, double phi0, int stride_lon, int stride_lat, sharp_geom_info **geom_info)
ptrdiff_t sharp_alm_index(const sharp_alm_info *self, int l, int mi)
#define UTIL_ASSERT(cond, msg)
void sharp_make_alm_info(int lmax, int mmax, int stride, const ptrdiff_t *mstart, sharp_alm_info **alm_info)
void sharp_Ylmgen_init(sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
ptrdiff_t sharp_alm_count(const sharp_alm_info *self)