LevelS C support library  3.50
sse_utils.h
Go to the documentation of this file.
1 /*
2  * This file is part of libc_utils.
3  *
4  * libc_utils is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * libc_utils is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with libc_utils; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 /*
20  * libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
21  * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
22  * (DLR).
23  */
24 
25 /*! \file sse_utils.h
26  * SSE/SSE2/SSE3-related functionality
27  *
28  * Copyright (C) 2010,2011 Max-Planck-Society
29  * \author Martin Reinecke
30  */
31 
32 #ifndef PLANCK_SSE_UTILS_H
33 #define PLANCK_SSE_UTILS_H
34 
35 #if (defined(__SSE__))
36 
37 #include <xmmintrin.h>
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 typedef __m128 v4sf; /* vector of 4 floats (SSE1) */
44 
45 typedef union {
46  float f[4];
47  v4sf v;
48 } V4SF;
49 
50 static inline v4sf build_v4sf (float a, float b, float c, float d)
51  { return _mm_set_ps(d,c,b,a); }
52 static inline void read_v4sf (v4sf v, float *a, float *b, float *c, float *d)
53  {
54  V4SF tmp;
55  tmp.v = v;
56  if (a) *a=tmp.f[0];
57  if (b) *b=tmp.f[1];
58  if (c) *c=tmp.f[2];
59  if (d) *d=tmp.f[3];
60  }
61 
62 #ifdef __cplusplus
63 }
64 #endif
65 
66 #endif
67 
68 #if (defined(__SSE2__))
69 
70 #include <emmintrin.h>
71 
72 #ifdef __cplusplus
73 extern "C" {
74 #endif
75 
76 typedef __m128d v2df; /* vector of 2 doubles (SSE2) */
77 
78 typedef union {
79  double d[2];
80  v2df v;
81 } V2DF;
82 
83 typedef struct {
84  v2df a,b;
85 } v2df2;
86 typedef struct {
87  V2DF a,b;
88 } V2DF2;
89 
90 #define V2DF_SIGNMASK _mm_set1_pd(-0.0)
91 
92 static inline v2df build_v2df (double a, double b)
93  { return _mm_set_pd(b,a); }
94 static inline void read_v2df (v2df v, double *a, double *b)
95  { _mm_store_sd(a,v); _mm_storeh_pd(b,v); }
96 
97 static inline int v2df_any_gt (v2df a, v2df b)
98  {
99  return (_mm_movemask_pd(_mm_cmpgt_pd(_mm_andnot_pd(V2DF_SIGNMASK,a),b))!=0);
100  }
101 static inline int v2df_all_ge (v2df a, v2df b)
102  {
103  return (_mm_movemask_pd(_mm_cmplt_pd(_mm_andnot_pd(V2DF_SIGNMASK,a),b))==0);
104  }
105 static inline V2DF to_V2DF (v2df x)
106  { V2DF X; X.v=x; return X; }
107 static inline V2DF2 to_V2DF2 (v2df2 x)
108  { V2DF2 X; X.a.v=x.a; X.b.v=x.b; return X; }
109 static inline v2df2 to_v2df2 (V2DF2 X)
110  { v2df2 x; x.a=X.a.v; x.b=X.b.v; return x; }
111 static inline v2df2 zero_v2df2(void)
112  { v2df2 x; x.a=x.b=_mm_setzero_pd(); return x; }
113 
114 #ifdef __cplusplus
115 }
116 #endif
117 
118 #endif
119 
120 #if (defined(__SSE3__))
121 
122 #include <pmmintrin.h>
123 
124 #endif
125 
126 #endif

Generated on Mon Dec 10 2018 10:24:19 for LevelS C support library