LCOV - coverage.info - Vc/src/const.cpp

LCOV - code coverage report

Current view:	top level - Vc/src - const.cpp (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	2	5	40.0 %
Date:	2016-06-14 17:26:59	Functions:	1	2	50.0 %

          Line data    Source code

       1             : /*  This file is part of the Vc library.
       2             : 
       3             :     Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
       4             : 
       5             :     Vc is free software: you can redistribute it and/or modify
       6             :     it under the terms of the GNU Lesser General Public License as
       7             :     published by the Free Software Foundation, either version 3 of
       8             :     the License, or (at your option) any later version.
       9             : 
      10             :     Vc is distributed in the hope that it will be useful, but
      11             :     WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             :     GNU Lesser General Public License for more details.
      14             : 
      15             :     You should have received a copy of the GNU Lesser General Public
      16             :     License along with Vc.  If not, see <http://www.gnu.org/licenses/>.
      17             : 
      18             : */
      19             : 
      20             : #ifndef V_ALIGN
      21             : # ifdef __GNUC__
      22             : #  define V_ALIGN(n) __attribute__((aligned(n)))
      23             : # else
      24             : #  define V_ALIGN(n) __declspec(align(n))
      25             : # endif
      26             : #endif
      27             : 
      28             : #include "Vc/avx/const_data.h"
      29             : #include "Vc/sse/const_data.h"
      30             : #include <Vc/version.h>
      31             : 
      32             : #include <cstdio>
      33             : #include <cstdlib>
      34             : #include <cstring>
      35             : 
      36             : #include "Vc/common/macros.h"
      37             : 
      38             : namespace AliRoot {
      39             : namespace Vc
      40             : {
      41             : namespace AVX
      42             : {
      43             :     // cacheline 1
      44             :     V_ALIGN(64) extern const unsigned int   _IndexesFromZero32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
      45             :     V_ALIGN(16) extern const unsigned short _IndexesFromZero16[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
      46             :     V_ALIGN(16) extern const unsigned char  _IndexesFromZero8 [16]= { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
      47             : 
      48             :     template<> const double c_trig<double>::data[] = {
      49             :     // cacheline 4
      50             :         Vc_buildDouble(1, 0x921fb54442d18ull, -1), // π/4
      51             :         Vc_buildDouble(1, 0x921fb40000000ull, -1), // π/4 - 30bits precision
      52             :         Vc_buildDouble(1, 0x4442d00000000ull, -25), // π/4 remainder1 - 32bits precision
      53             :         Vc_buildDouble(1, 0x8469898cc5170ull, -49), // π/4 remainder2
      54             :         0.0625,
      55             :         16.,
      56             :         0., // padding
      57             :         0., // padding
      58             :     // cacheline 5
      59             :         Vc_buildDouble( 1, 0x555555555554bull,  -5), // ~ 1/4!
      60             :         Vc_buildDouble(-1, 0x6c16c16c14f91ull, -10), // ~-1/6!
      61             :         Vc_buildDouble( 1, 0xa01a019c844f5ull, -16), // ~ 1/8!
      62             :         Vc_buildDouble(-1, 0x27e4f7eac4bc6ull, -22), // ~-1/10!
      63             :         Vc_buildDouble( 1, 0x1ee9d7b4e3f05ull, -29), // ~ 1/12!
      64             :         Vc_buildDouble(-1, 0x8fa49a0861a9bull, -37), // ~-1/14!
      65             :         Vc_buildDouble(-1, 0x5555555555548ull,  -3), // ~-1/3!
      66             :         Vc_buildDouble( 1, 0x111111110f7d0ull,  -7), // ~ 1/5!
      67             :     // cacheline 8
      68             :         Vc_buildDouble(-1, 0xa01a019bfdf03ull, -13), // ~-1/7!
      69             :         Vc_buildDouble( 1, 0x71de3567d48a1ull, -19), // ~ 1/9!
      70             :         Vc_buildDouble(-1, 0xae5e5a9291f5dull, -26), // ~-1/11!
      71             :         Vc_buildDouble( 1, 0x5d8fd1fd19ccdull, -33), // ~ 1/13!
      72             :         0., // padding (for alignment with float)
      73             :         Vc_buildDouble(1, 0x8BE60DB939105ull,  0), // 4/π
      74             :         Vc_buildDouble(1, 0x921fb54442d18ull,  0), // π/2
      75             :         Vc_buildDouble(1, 0x921fb54442d18ull,  1), // π
      76             :     // cacheline 10
      77             :         Vc_buildDouble(-1, 0xc007fa1f72594ull, -1), // atan P coefficients
      78             :         Vc_buildDouble(-1, 0x028545b6b807aull,  4), // atan P coefficients
      79             :         Vc_buildDouble(-1, 0x2c08c36880273ull,  6), // atan P coefficients
      80             :         Vc_buildDouble(-1, 0xeb8bf2d05ba25ull,  6), // atan P coefficients
      81             :         Vc_buildDouble(-1, 0x03669fd28ec8eull,  6), // atan P coefficients
      82             :         Vc_buildDouble( 1, 0x8dbc45b14603cull,  4), // atan Q coefficients
      83             :         Vc_buildDouble( 1, 0x4a0dd43b8fa25ull,  7), // atan Q coefficients
      84             :         Vc_buildDouble( 1, 0xb0e18d2e2be3bull,  8), // atan Q coefficients
      85             :     // cacheline 12
      86             :         Vc_buildDouble( 1, 0xe563f13b049eaull,  8), // atan Q coefficients
      87             :         Vc_buildDouble( 1, 0x8519efbbd62ecull,  7), // atan Q coefficients
      88             :         Vc_buildDouble( 1, 0x3504f333f9de6ull,  1), // tan( 3/8 π )
      89             :         0.66,                                    // lower threshold for special casing in atan
      90             :         Vc_buildDouble(1, 0x1A62633145C07ull, -54), // remainder of pi/2
      91             :         1.e-8, // small asin input threshold
      92             :         0.625, // large asin input threshold
      93             :         0., // padding
      94             :     // cacheline 14
      95             :         Vc_buildDouble( 1, 0x84fc3988e9f08ull, -9), // asinCoeff0
      96             :         Vc_buildDouble(-1, 0x2079259f9290full, -1), // asinCoeff0
      97             :         Vc_buildDouble( 1, 0xbdff5baf33e6aull,  2), // asinCoeff0
      98             :         Vc_buildDouble(-1, 0x991aaac01ab68ull,  4), // asinCoeff0
      99             :         Vc_buildDouble( 1, 0xc896240f3081dull,  4), // asinCoeff0
     100             :         Vc_buildDouble(-1, 0x5f2a2b6bf5d8cull,  4), // asinCoeff1
     101             :         Vc_buildDouble( 1, 0x26219af6a7f42ull,  7), // asinCoeff1
     102             :         Vc_buildDouble(-1, 0x7fe08959063eeull,  8), // asinCoeff1
     103             :     // cacheline 16
     104             :         Vc_buildDouble( 1, 0x56709b0b644beull,  8), // asinCoeff1
     105             :         Vc_buildDouble( 1, 0x16b9b0bd48ad3ull, -8), // asinCoeff2
     106             :         Vc_buildDouble(-1, 0x34341333e5c16ull, -1), // asinCoeff2
     107             :         Vc_buildDouble( 1, 0x5c74b178a2dd9ull,  2), // asinCoeff2
     108             :         Vc_buildDouble(-1, 0x04331de27907bull,  4), // asinCoeff2
     109             :         Vc_buildDouble( 1, 0x39007da779259ull,  4), // asinCoeff2
     110             :         Vc_buildDouble(-1, 0x0656c06ceafd5ull,  3), // asinCoeff2
     111             :         Vc_buildDouble(-1, 0xd7b590b5e0eabull,  3), // asinCoeff3
     112             :     // cacheline 18
     113             :         Vc_buildDouble( 1, 0x19fc025fe9054ull,  6), // asinCoeff3
     114             :         Vc_buildDouble(-1, 0x265bb6d3576d7ull,  7), // asinCoeff3
     115             :         Vc_buildDouble( 1, 0x1705684ffbf9dull,  7), // asinCoeff3
     116             :         Vc_buildDouble(-1, 0x898220a3607acull,  5), // asinCoeff3
     117             :     };
     118             : #define _4(x) x
     119             :     template<> const float c_trig<float>::data[] = {
     120             :     // cacheline
     121             :         _4(Vc_buildFloat( 1, 0x490FDB,  -1)), // π/4
     122             :         _4(Vc_buildFloat( 1, 0x491000,  -1)), // π/4 - 12 bits precision
     123             :         _4(Vc_buildFloat(-1, 0x157000, -19)), // π/4 remainder1 - 12 bits precision
     124             :         _4(Vc_buildFloat(-1, 0x6F4B9F, -32)), // π/4 remainder2
     125             :         _4(0.0625f),
     126             :         _4(16.f),
     127             :         _4(0.f), // padding
     128             :         _4(0.f), // padding
     129             :         _4(4.166664568298827e-2f),  // ~ 1/4!
     130             :         _4(-1.388731625493765e-3f), // ~-1/6!
     131             :         _4(2.443315711809948e-5f),  // ~ 1/8!
     132             :         _4(0.f), // padding (for alignment with double)
     133             :         _4(0.f), // padding (for alignment with double)
     134             :         _4(0.f), // padding (for alignment with double)
     135             :         _4(-1.6666654611e-1f), // ~-1/3!
     136             :         _4(8.3321608736e-3f),  // ~ 1/5!
     137             :     // cacheline
     138             :         _4(-1.9515295891e-4f), // ~-1/7!
     139             :         _4(0.f), // padding (for alignment with double)
     140             :         _4(0.f), // padding (for alignment with double)
     141             :         _4(0.f), // padding (for alignment with double)
     142             :         _4(8192.f), // loss threshold
     143             :         _4(Vc_buildFloat(1, 0x22F983, 0)), // 1.27323949337005615234375 = 4/π
     144             :         _4(Vc_buildFloat(1, 0x490FDB, 0)), // π/2
     145             :         _4(Vc_buildFloat(1, 0x490FDB, 1)), // π
     146             :         _4(8.05374449538e-2f), // atan P coefficients
     147             :         _4(1.38776856032e-1f), // atan P coefficients
     148             :         _4(1.99777106478e-1f), // atan P coefficients
     149             :         _4(3.33329491539e-1f), // atan P coefficients
     150             :         _4(0.f), // padding (for alignment with double)
     151             :         _4(0.f), // padding (for alignment with double)
     152             :         _4(0.f), // padding (for alignment with double)
     153             :         _4(0.f), // padding (for alignment with double)
     154             :     // cacheline
     155             :         _4(0.f), // padding (for alignment with double)
     156             :         _4(0.f), // padding (for alignment with double)
     157             :         _4(2.414213562373095f), // tan( 3/8 π )
     158             :         _4(0.414213562373095f), // tan( 1/8 π ) lower threshold for special casing in atan
     159             :         _4(Vc_buildFloat(-1, 0x3BBD2E, -25)), // remainder of pi/2
     160             :         _4(1.e-4f), // small asin input threshold
     161             :         _4(0.f), // padding (for alignment with double)
     162             :         _4(0.f), // padding (for alignment with double)
     163             :         _4(4.2163199048e-2f), // asinCoeff0
     164             :         _4(2.4181311049e-2f), // asinCoeff0
     165             :         _4(4.5470025998e-2f), // asinCoeff0
     166             :         _4(7.4953002686e-2f), // asinCoeff0
     167             :         _4(1.6666752422e-1f), // asinCoeff0
     168             :         _4(0.f), // padding (for alignment with double)
     169             :         _4(0.f), // padding (for alignment with double)
     170             :         _4(0.f), // padding (for alignment with double)
     171             :     // cacheline
     172             :         _4(0.f), // padding (for alignment with double)
     173             :         _4(0.f), // padding (for alignment with double)
     174             :         _4(0.f), // padding (for alignment with double)
     175             :         _4(0.f), // padding (for alignment with double)
     176             :         _4(0.f), // padding (for alignment with double)
     177             :         _4(0.f), // padding (for alignment with double)
     178             :         _4(0.f), // padding (for alignment with double)
     179             :         _4(0.f), // padding (for alignment with double)
     180             :         _4(0.f), // padding (for alignment with double)
     181             :         _4(0.f), // padding (for alignment with double)
     182             :         _4(0.f), // padding (for alignment with double)
     183             :         _4(0.f), // padding (for alignment with double)
     184             :     };
     185             : #undef _4
     186             : 
     187             :     const unsigned       int c_general::absMaskFloat[2] = { 0xffffffffu, 0x7fffffffu };
     188             :     const unsigned       int c_general::signMaskFloat[2] = { 0x0u, 0x80000000u };
     189             :     const unsigned       int c_general::highMaskFloat = 0xfffff000u;
     190             :     const              float c_general::oneFloat = 1.f;
     191             :     const unsigned     short c_general::minShort[2] = { 0x8000u, 0x8000u };
     192             :     const unsigned     short c_general::one16[2] = { 1, 1 };
     193             :     const              float c_general::_2power31 = 1u << 31;
     194             : 
     195             :     // cacheline 4
     196             :     const unsigned long long c_general::highMaskDouble = 0xfffffffff8000000ull;
     197             :     const             double c_general::oneDouble = 1.;
     198             :     const unsigned long long c_general::frexpMask = 0xbfefffffffffffffull;
     199             : 
     200             :     const unsigned long long c_log<double>::data[21] = {
     201             :         0x000003ff000003ffull // bias TODO: remove
     202             :       , 0x7ff0000000000000ull // exponentMask (+inf)
     203             : 
     204             :       , 0x3f1ab4c293c31bb0ull // P[0]
     205             :       , 0x3fdfd6f53f5652f2ull // P[1]
     206             :       , 0x4012d2baed926911ull // P[2]
     207             :       , 0x402cff72c63eeb2eull // P[3]
     208             :       , 0x4031efd6924bc84dull // P[4]
     209             :       , 0x401ed5637d7edcf8ull // P[5]
     210             : 
     211             :       , 0x40269320ae97ef8eull // Q[0]
     212             :       , 0x40469d2c4e19c033ull // Q[1]
     213             :       , 0x4054bf33a326bdbdull // Q[2]
     214             :       , 0x4051c9e2eb5eae21ull // Q[3]
     215             :       , 0x4037200a9e1f25b2ull // Q[4]
     216             : 
     217             :       , 0xfff0000000000000ull // -inf
     218             :       , 0x0010000000000000ull // min()
     219             :       , 0x3fe6a09e667f3bcdull // 1/sqrt(2)
     220             :       , 0x3fe6300000000000ull // round(ln(2) * 512) / 512
     221             :       , 0xbf2bd0105c610ca8ull // ln(2) - round(ln(2) * 512) / 512
     222             :       , 0x3fe0000000000000ull // 0.5
     223             :       , 0x3fdbcb7b1526e50eull // log10(e)
     224             :       , 0x3ff71547652b82feull // log2(e)
     225             :     };
     226             : 
     227             :     template<> const unsigned int c_log<float>::data[21] = {
     228             :         0x0000007fu // bias TODO: remove
     229             :       , 0x7f800000u // exponentMask (+inf)
     230             : 
     231             :       , 0x3d9021bbu //  7.0376836292e-2f // P[0]
     232             :       , 0xbdebd1b8u // -1.1514610310e-1f // P[1]
     233             :       , 0x3def251au //  1.1676998740e-1f // P[2]
     234             :       , 0xbdfe5d4fu // -1.2420140846e-1f // P[3]
     235             :       , 0x3e11e9bfu //  1.4249322787e-1f // P[4]
     236             :       , 0xbe2aae50u // -1.6668057665e-1f // P[5]
     237             :       , 0x3e4cceacu //  2.0000714765e-1f // P[6]
     238             :       , 0xbe7ffffcu // -2.4999993993e-1f // P[7]
     239             :       , 0x3eaaaaaau //  3.3333331174e-1f // P[8]
     240             :       , 0           // padding because of c_log<double>
     241             :       , 0           // padding because of c_log<double>
     242             : 
     243             :       , 0xff800000u // -inf
     244             :       , 0x00800000u // min()
     245             :       , 0x3f3504f3u // 1/sqrt(2)
     246             :       , 0x3f318000u // round(ln(2) * 512) / 512
     247             :       , 0xb95e8083u // ln(2) - round(ln(2) * 512) / 512
     248             :       , 0x3f000000u // 0.5
     249             :       , 0x3ede5bd9u // log10(e)
     250             :       , 0x3fb8aa3bu // log2(e)
     251             :     };
     252             : } // namespace AVX
     253             : 
     254             : namespace SSE
     255             : {
     256             :     // cacheline 1
     257             :     V_ALIGN(64) const int c_general::absMaskFloat[4] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };
     258             :     V_ALIGN(16) const unsigned int c_general::signMaskFloat[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
     259             :     V_ALIGN(16) const unsigned int c_general::highMaskFloat[4] = { 0xfffff000u, 0xfffff000u, 0xfffff000u, 0xfffff000u };
     260             :     V_ALIGN(16) const short c_general::minShort[8] = { -0x8000, -0x8000, -0x8000, -0x8000, -0x8000, -0x8000, -0x8000, -0x8000 };
     261             :     V_ALIGN(16) extern const unsigned short _IndexesFromZero8[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
     262             : 
     263             :     // cacheline 2
     264             :     V_ALIGN(16) extern const unsigned int   _IndexesFromZero4[4] = { 0, 1, 2, 3 };
     265             :     V_ALIGN(16) const unsigned short c_general::one16[8] = { 1, 1, 1, 1, 1, 1, 1, 1 };
     266             :     V_ALIGN(16) const unsigned int c_general::one32[4] = { 1, 1, 1, 1 };
     267             :     V_ALIGN(16) const float c_general::oneFloat[4] = { 1.f, 1.f, 1.f, 1.f };
     268             : 
     269             :     // cacheline 3
     270             :     V_ALIGN(16) const unsigned long long c_general::highMaskDouble[2] = { 0xfffffffff8000000ull, 0xfffffffff8000000ull };
     271             :     V_ALIGN(16) const double c_general::oneDouble[2] = { 1., 1. };
     272             :     V_ALIGN(16) const long long c_general::absMaskDouble[2] = { 0x7fffffffffffffffll, 0x7fffffffffffffffll };
     273             :     V_ALIGN(16) const unsigned long long c_general::signMaskDouble[2] = { 0x8000000000000000ull, 0x8000000000000000ull };
     274             :     V_ALIGN(16) const unsigned long long c_general::frexpMask[2] = { 0xbfefffffffffffffull, 0xbfefffffffffffffull };
     275             : 
     276             : #define _2(x) x, x
     277             :     template<> const double c_trig<double>::data[] = {
     278             :     // cacheline 4
     279             :         _2(Vc_buildDouble(1, 0x921fb54442d18ull, -1)), // π/4
     280             :         _2(Vc_buildDouble(1, 0x921fb40000000ull, -1)), // π/4 - 30bits precision
     281             :         _2(Vc_buildDouble(1, 0x4442d00000000ull, -25)), // π/4 remainder1 - 32bits precision
     282             :         _2(Vc_buildDouble(1, 0x8469898cc5170ull, -49)), // π/4 remainder2
     283             :     // cacheline 5
     284             :         _2(0.0625),
     285             :         _2(16.),
     286             :         _2(0.), // padding
     287             :         _2(0.), // padding
     288             :     // cacheline 6
     289             :         _2(Vc_buildDouble( 1, 0x555555555554bull,  -5)), // ~ 1/4!
     290             :         _2(Vc_buildDouble(-1, 0x6c16c16c14f91ull, -10)), // ~-1/6!
     291             :         _2(Vc_buildDouble( 1, 0xa01a019c844f5ull, -16)), // ~ 1/8!
     292             :         _2(Vc_buildDouble(-1, 0x27e4f7eac4bc6ull, -22)), // ~-1/10!
     293             :     // cacheline 7
     294             :         _2(Vc_buildDouble( 1, 0x1ee9d7b4e3f05ull, -29)), // ~ 1/12!
     295             :         _2(Vc_buildDouble(-1, 0x8fa49a0861a9bull, -37)), // ~-1/14!
     296             :         _2(Vc_buildDouble(-1, 0x5555555555548ull,  -3)), // ~-1/3!
     297             :         _2(Vc_buildDouble( 1, 0x111111110f7d0ull,  -7)), // ~ 1/5!
     298             :     // cacheline 8
     299             :         _2(Vc_buildDouble(-1, 0xa01a019bfdf03ull, -13)), // ~-1/7!
     300             :         _2(Vc_buildDouble( 1, 0x71de3567d48a1ull, -19)), // ~ 1/9!
     301             :         _2(Vc_buildDouble(-1, 0xae5e5a9291f5dull, -26)), // ~-1/11!
     302             :         _2(Vc_buildDouble( 1, 0x5d8fd1fd19ccdull, -33)), // ~ 1/13!
     303             :     // cacheline 9
     304             :         _2(0.), // padding (for alignment with float)
     305             :         _2(Vc_buildDouble(1, 0x8BE60DB939105ull,  0)), // 4/π
     306             :         _2(Vc_buildDouble(1, 0x921fb54442d18ull,  0)), // π/2
     307             :         _2(Vc_buildDouble(1, 0x921fb54442d18ull,  1)), // π
     308             :     // cacheline 10
     309             :         _2(Vc_buildDouble(-1, 0xc007fa1f72594ull, -1)), // atan P coefficients
     310             :         _2(Vc_buildDouble(-1, 0x028545b6b807aull,  4)), // atan P coefficients
     311             :         _2(Vc_buildDouble(-1, 0x2c08c36880273ull,  6)), // atan P coefficients
     312             :         _2(Vc_buildDouble(-1, 0xeb8bf2d05ba25ull,  6)), // atan P coefficients
     313             :     // cacheline 11
     314             :         _2(Vc_buildDouble(-1, 0x03669fd28ec8eull,  6)), // atan P coefficients
     315             :         _2(Vc_buildDouble( 1, 0x8dbc45b14603cull,  4)), // atan Q coefficients
     316             :         _2(Vc_buildDouble( 1, 0x4a0dd43b8fa25ull,  7)), // atan Q coefficients
     317             :         _2(Vc_buildDouble( 1, 0xb0e18d2e2be3bull,  8)), // atan Q coefficients
     318             :     // cacheline 12
     319             :         _2(Vc_buildDouble( 1, 0xe563f13b049eaull,  8)), // atan Q coefficients
     320             :         _2(Vc_buildDouble( 1, 0x8519efbbd62ecull,  7)), // atan Q coefficients
     321             :         _2(Vc_buildDouble( 1, 0x3504f333f9de6ull,  1)), // tan( 3/8 π )
     322             :         _2(0.66),                                    // lower threshold for special casing in atan
     323             :     // cacheline 13
     324             :         _2(Vc_buildDouble(1, 0x1A62633145C07ull, -54)), // remainder of pi/2
     325             :         _2(1.e-8), // small asin input threshold
     326             :         _2(0.625), // large asin input threshold
     327             :         _2(0.), // padding
     328             :     // cacheline 14
     329             :         _2(Vc_buildDouble( 1, 0x84fc3988e9f08ull, -9)), // asinCoeff0
     330             :         _2(Vc_buildDouble(-1, 0x2079259f9290full, -1)), // asinCoeff0
     331             :         _2(Vc_buildDouble( 1, 0xbdff5baf33e6aull,  2)), // asinCoeff0
     332             :         _2(Vc_buildDouble(-1, 0x991aaac01ab68ull,  4)), // asinCoeff0
     333             :     // cacheline 15
     334             :         _2(Vc_buildDouble( 1, 0xc896240f3081dull,  4)), // asinCoeff0
     335             :         _2(Vc_buildDouble(-1, 0x5f2a2b6bf5d8cull,  4)), // asinCoeff1
     336             :         _2(Vc_buildDouble( 1, 0x26219af6a7f42ull,  7)), // asinCoeff1
     337             :         _2(Vc_buildDouble(-1, 0x7fe08959063eeull,  8)), // asinCoeff1
     338             :     // cacheline 16
     339             :         _2(Vc_buildDouble( 1, 0x56709b0b644beull,  8)), // asinCoeff1
     340             :         _2(Vc_buildDouble( 1, 0x16b9b0bd48ad3ull, -8)), // asinCoeff2
     341             :         _2(Vc_buildDouble(-1, 0x34341333e5c16ull, -1)), // asinCoeff2
     342             :         _2(Vc_buildDouble( 1, 0x5c74b178a2dd9ull,  2)), // asinCoeff2
     343             :     // cacheline 17
     344             :         _2(Vc_buildDouble(-1, 0x04331de27907bull,  4)), // asinCoeff2
     345             :         _2(Vc_buildDouble( 1, 0x39007da779259ull,  4)), // asinCoeff2
     346             :         _2(Vc_buildDouble(-1, 0x0656c06ceafd5ull,  3)), // asinCoeff2
     347             :         _2(Vc_buildDouble(-1, 0xd7b590b5e0eabull,  3)), // asinCoeff3
     348             :     // cacheline 18
     349             :         _2(Vc_buildDouble( 1, 0x19fc025fe9054ull,  6)), // asinCoeff3
     350             :         _2(Vc_buildDouble(-1, 0x265bb6d3576d7ull,  7)), // asinCoeff3
     351             :         _2(Vc_buildDouble( 1, 0x1705684ffbf9dull,  7)), // asinCoeff3
     352             :         _2(Vc_buildDouble(-1, 0x898220a3607acull,  5)), // asinCoeff3
     353             :     };
     354             : #undef _2
     355             : #define _4(x) x, x, x, x
     356             :     template<> const float c_trig<float>::data[] = {
     357             :     // cacheline
     358             :         _4(Vc_buildFloat( 1, 0x490FDB,  -1)), // π/4
     359             :         _4(Vc_buildFloat( 1, 0x491000,  -1)), // π/4 - 12 bits precision
     360             :         _4(Vc_buildFloat(-1, 0x157000, -19)), // π/4 remainder1 - 12 bits precision
     361             :         _4(Vc_buildFloat(-1, 0x6F4B9F, -32)), // π/4 remainder2
     362             :     // cacheline
     363             :         _4(0.0625f),
     364             :         _4(16.f),
     365             :         _4(0.f), // padding
     366             :         _4(0.f), // padding
     367             :     // cacheline
     368             :         _4(4.166664568298827e-2f),  // ~ 1/4!
     369             :         _4(-1.388731625493765e-3f), // ~-1/6!
     370             :         _4(2.443315711809948e-5f),  // ~ 1/8!
     371             :         _4(0.f), // padding (for alignment with double)
     372             :     // cacheline
     373             :         _4(0.f), // padding (for alignment with double)
     374             :         _4(0.f), // padding (for alignment with double)
     375             :         _4(-1.6666654611e-1f), // ~-1/3!
     376             :         _4(8.3321608736e-3f),  // ~ 1/5!
     377             :     // cacheline
     378             :         _4(-1.9515295891e-4f), // ~-1/7!
     379             :         _4(0.f), // padding (for alignment with double)
     380             :         _4(0.f), // padding (for alignment with double)
     381             :         _4(0.f), // padding (for alignment with double)
     382             :     // cacheline
     383             :         _4(8192.f), // loss threshold
     384             :         _4(Vc_buildFloat(1, 0x22F983, 0)), // 1.27323949337005615234375 = 4/π
     385             :         _4(Vc_buildFloat(1, 0x490FDB, 0)), // π/2
     386             :         _4(Vc_buildFloat(1, 0x490FDB, 1)), // π
     387             :     // cacheline
     388             :         _4(8.05374449538e-2f), // atan P coefficients
     389             :         _4(1.38776856032e-1f), // atan P coefficients
     390             :         _4(1.99777106478e-1f), // atan P coefficients
     391             :         _4(3.33329491539e-1f), // atan P coefficients
     392             :     // cacheline
     393             :         _4(0.f), // padding (for alignment with double)
     394             :         _4(0.f), // padding (for alignment with double)
     395             :         _4(0.f), // padding (for alignment with double)
     396             :         _4(0.f), // padding (for alignment with double)
     397             :     // cacheline
     398             :         _4(0.f), // padding (for alignment with double)
     399             :         _4(0.f), // padding (for alignment with double)
     400             :         _4(2.414213562373095f), // tan( 3/8 π )
     401             :         _4(0.414213562373095f), // tan( 1/8 π ) lower threshold for special casing in atan
     402             :     // cacheline
     403             :         _4(Vc_buildFloat(-1, 0x3BBD2E, -25)), // remainder of pi/2
     404             :         _4(1.e-4f), // small asin input threshold
     405             :         _4(0.f), // padding (for alignment with double)
     406             :         _4(0.f), // padding (for alignment with double)
     407             :     // cacheline
     408             :         _4(4.2163199048e-2f), // asinCoeff0
     409             :         _4(2.4181311049e-2f), // asinCoeff0
     410             :         _4(4.5470025998e-2f), // asinCoeff0
     411             :         _4(7.4953002686e-2f), // asinCoeff0
     412             :     // cacheline
     413             :         _4(1.6666752422e-1f), // asinCoeff0
     414             :         _4(0.f), // padding (for alignment with double)
     415             :         _4(0.f), // padding (for alignment with double)
     416             :         _4(0.f), // padding (for alignment with double)
     417             :     // cacheline
     418             :         _4(0.f), // padding (for alignment with double)
     419             :         _4(0.f), // padding (for alignment with double)
     420             :         _4(0.f), // padding (for alignment with double)
     421             :         _4(0.f), // padding (for alignment with double)
     422             :     // cacheline
     423             :         _4(0.f), // padding (for alignment with double)
     424             :         _4(0.f), // padding (for alignment with double)
     425             :         _4(0.f), // padding (for alignment with double)
     426             :         _4(0.f), // padding (for alignment with double)
     427             :     // cacheline
     428             :         _4(0.f), // padding (for alignment with double)
     429             :         _4(0.f), // padding (for alignment with double)
     430             :         _4(0.f), // padding (for alignment with double)
     431             :         _4(0.f), // padding (for alignment with double)
     432             :     };
     433             : #undef _4
     434             : 
     435             :     // cacheline 8
     436             :     V_ALIGN(16) extern const unsigned char _IndexesFromZero16[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
     437             : 
     438             :     V_ALIGN(64) const unsigned long long c_log<double>::data[21 * 2] = {
     439             :       /* 0*/   0x000003ff000003ffull, 0x000003ff000003ffull // bias TODO: remove
     440             :       /* 1*/ , 0x7ff0000000000000ull, 0x7ff0000000000000ull // exponentMask (+inf)
     441             : 
     442             :       /* 2*/ , 0x3f1ab4c293c31bb0ull, 0x3f1ab4c293c31bb0ull // P[0]
     443             :       /* 3*/ , 0x3fdfd6f53f5652f2ull, 0x3fdfd6f53f5652f2ull // P[1]
     444             :       /* 4*/ , 0x4012d2baed926911ull, 0x4012d2baed926911ull // P[2]
     445             :       /* 5*/ , 0x402cff72c63eeb2eull, 0x402cff72c63eeb2eull // P[3]
     446             :       /* 6*/ , 0x4031efd6924bc84dull, 0x4031efd6924bc84dull // P[4]
     447             :       /* 7*/ , 0x401ed5637d7edcf8ull, 0x401ed5637d7edcf8ull // P[5]
     448             : 
     449             :       /* 8*/ , 0x40269320ae97ef8eull, 0x40269320ae97ef8eull // Q[0]
     450             :       /* 9*/ , 0x40469d2c4e19c033ull, 0x40469d2c4e19c033ull // Q[1]
     451             :       /*10*/ , 0x4054bf33a326bdbdull, 0x4054bf33a326bdbdull // Q[2]
     452             :       /*11*/ , 0x4051c9e2eb5eae21ull, 0x4051c9e2eb5eae21ull // Q[3]
     453             :       /*12*/ , 0x4037200a9e1f25b2ull, 0x4037200a9e1f25b2ull // Q[4]
     454             : 
     455             :       /*13*/ , 0xfff0000000000000ull, 0xfff0000000000000ull // -inf
     456             :       /*14*/ , 0x0010000000000000ull, 0x0010000000000000ull // min()
     457             :       /*15*/ , 0x3fe6a09e667f3bcdull, 0x3fe6a09e667f3bcdull // 1/sqrt(2)
     458             :       /*16*/ , 0x3fe6300000000000ull, 0x3fe6300000000000ull // round(ln(2) * 512) / 512
     459             :       /*17*/ , 0xbf2bd0105c610ca8ull, 0xbf2bd0105c610ca8ull // ln(2) - round(ln(2) * 512) / 512
     460             :       /*18*/ , 0x3fe0000000000000ull, 0x3fe0000000000000ull // 0.5
     461             :       /*19*/ , 0x3fdbcb7b1526e50eull, 0x3fdbcb7b1526e50eull // log10(e)
     462             :       /*20*/ , 0x3ff71547652b82feull, 0x3ff71547652b82feull // log2(e)
     463             :     };
     464             : 
     465             :     template<> V_ALIGN(64) const unsigned int c_log<float>::data[21 * 4] = {
     466             :         0x0000007fu, 0x0000007fu, 0x0000007fu, 0x0000007fu, // bias TODO: remove
     467             :         0x7f800000u, 0x7f800000u, 0x7f800000u, 0x7f800000u, // exponentMask (+inf)
     468             : 
     469             :         0x3d9021bbu, 0x3d9021bbu, 0x3d9021bbu, 0x3d9021bbu, //  7.0376836292e-2f // P[0]
     470             :         0xbdebd1b8u, 0xbdebd1b8u, 0xbdebd1b8u, 0xbdebd1b8u, // -1.1514610310e-1f // P[1]
     471             :         0x3def251au, 0x3def251au, 0x3def251au, 0x3def251au, //  1.1676998740e-1f // P[2]
     472             :         0xbdfe5d4fu, 0xbdfe5d4fu, 0xbdfe5d4fu, 0xbdfe5d4fu, // -1.2420140846e-1f // P[3]
     473             :         0x3e11e9bfu, 0x3e11e9bfu, 0x3e11e9bfu, 0x3e11e9bfu, //  1.4249322787e-1f // P[4]
     474             :         0xbe2aae50u, 0xbe2aae50u, 0xbe2aae50u, 0xbe2aae50u, // -1.6668057665e-1f // P[5]
     475             :         0x3e4cceacu, 0x3e4cceacu, 0x3e4cceacu, 0x3e4cceacu, //  2.0000714765e-1f // P[6]
     476             :         0xbe7ffffcu, 0xbe7ffffcu, 0xbe7ffffcu, 0xbe7ffffcu, // -2.4999993993e-1f // P[7]
     477             :         0x3eaaaaaau, 0x3eaaaaaau, 0x3eaaaaaau, 0x3eaaaaaau, //  3.3333331174e-1f // P[8]
     478             :         0,           0,           0,           0,           // padding because of c_log<double>
     479             :         0,           0,           0,           0,           // padding because of c_log<double>
     480             : 
     481             :         0xff800000u, 0xff800000u, 0xff800000u, 0xff800000u, // -inf
     482             :         0x00800000u, 0x00800000u, 0x00800000u, 0x00800000u, // min()
     483             :         0x3f3504f3u, 0x3f3504f3u, 0x3f3504f3u, 0x3f3504f3u, // 1/sqrt(2)
     484             :         // ln(2) = 0x3fe62e42fefa39ef
     485             :         // ln(2) = Vc_buildDouble( 1, 0x00062e42fefa39ef, -1)
     486             :         //       = Vc_buildFloat( 1, 0x00317217(f7d), -1) + Vc_buildFloat( 1, 0x0077d1cd, -25)
     487             :         //       = Vc_buildFloat( 1, 0x00318000(000), -1) + Vc_buildFloat(-1, 0x005e8083, -13)
     488             :         0x3f318000u, 0x3f318000u, 0x3f318000u, 0x3f318000u, // round(ln(2) * 512) / 512
     489             :         0xb95e8083u, 0xb95e8083u, 0xb95e8083u, 0xb95e8083u, // ln(2) - round(ln(2) * 512) / 512
     490             :         0x3f000000u, 0x3f000000u, 0x3f000000u, 0x3f000000u, // 0.5
     491             :         0x3ede5bd9u, 0x3ede5bd9u, 0x3ede5bd9u, 0x3ede5bd9u, // log10(e)
     492             :         0x3fb8aa3bu, 0x3fb8aa3bu, 0x3fb8aa3bu, 0x3fb8aa3bu, // log2(e)
     493             :         // log10(2) = 0x3fd34413509f79ff
     494             :         //          = Vc_buildDouble( 1, 0x00034413509f79ff, -2)
     495             :         //          = Vc_buildFloat( 1, 0x001a209a(84fbcff8), -2) + Vc_buildFloat( 1, 0x0004fbcff(8), -26)
     496             :         //Vc_buildFloat( 1, 0x001a209a, -2), // log10(2)
     497             :         //Vc_buildFloat( 1, 0x001a209a, -2), // log10(2)
     498             :         //Vc_buildFloat( 1, 0x001a209a, -2), // log10(2)
     499             :         //Vc_buildFloat( 1, 0x001a209a, -2), // log10(2)
     500             :     };
     501             : } // namespace SSE
     502             : 
     503             : V_ALIGN(64) unsigned int RandomState[16] = {
     504             :     0x5a383a4fu, 0xc68bd45eu, 0x691d6d86u, 0xb367e14fu,
     505             :     0xd689dbaau, 0xfde442aau, 0x3d265423u, 0x1a77885cu,
     506             :     0x36ed2684u, 0xfb1f049du, 0x19e52f31u, 0x821e4dd7u,
     507             :     0x23996d25u, 0x5962725au, 0x6aced4ceu, 0xd4c610f3u
     508             : };
     509             : 
     510             : // dummy symbol to emit warnings with GCC 4.3
     511             : namespace Warnings {
     512           0 :     void _operator_bracket_warning() {}
     513             : } // namespace Warnings
     514             : 
     515             : const char LIBRARY_VERSION[] = VC_VERSION_STRING;
     516             : const unsigned int LIBRARY_VERSION_NUMBER = VC_VERSION_NUMBER;
     517             : const unsigned int LIBRARY_ABI_VERSION = VC_LIBRARY_ABI_VERSION;
     518             : 
     519             : void checkLibraryAbi(unsigned int compileTimeAbi, unsigned int versionNumber, const char *compileTimeVersion) {
     520           6 :     if (LIBRARY_ABI_VERSION != compileTimeAbi || LIBRARY_VERSION_NUMBER < versionNumber) {
     521           0 :         printf("The versions of libVc.a (%s) and Vc/version.h (%s) are incompatible. Aborting.\n", LIBRARY_VERSION, compileTimeVersion);
     522           0 :         abort();
     523             :     }
     524           3 : }
     525             : 
     526             : } // namespace Vc
     527             : } // namespace AliRoot
     528             : 
     529             : #undef V_ALIGN

Generated by: LCOV version 1.11