Line data Source code
1 : // **************************************************************************
2 : // * This file is property of and copyright by the ALICE HLT Project *
3 : // * All rights reserved. *
4 : // * *
5 : // * Primary Authors: *
6 : // * Copyright 2009 Matthias Kretz <kretz@kde.org> *
7 : // * *
8 : // * Permission to use, copy, modify and distribute this software and its *
9 : // * documentation strictly for non-commercial purposes is hereby granted *
10 : // * without fee, provided that the above copyright notice appears in all *
11 : // * copies and that both the copyright notice and this permission notice *
12 : // * appear in the supporting documentation. The authors make no claims *
13 : // * about the suitability of this software for any purpose. It is *
14 : // * provided "as is" without express or implied warranty. *
15 : // **************************************************************************
16 :
17 : #include "AliHLTTPCCASliceData.h"
18 : #include "AliHLTTPCCAClusterData.h"
19 : #include "AliHLTTPCCAMath.h"
20 : #include "AliHLTArray.h"
21 : #include "AliHLTTPCCAHit.h"
22 : #include "AliHLTTPCCAParam.h"
23 : #include "AliHLTTPCCAGPUConfig.h"
24 : #include "AliHLTTPCCAGPUTracker.h"
25 : #include "MemoryAssignmentHelpers.h"
26 : #include <iostream>
27 : #include <string.h>
28 :
29 : // calculates an approximation for 1/sqrt(x)
30 : // Google for 0x5f3759df :)
31 : static inline float fastInvSqrt( float _x )
32 : {
33 : // the function calculates fast inverse sqrt
34 :
35 0 : union { float f; int i; } x = { _x };
36 0 : const float xhalf = 0.5f * x.f;
37 0 : x.i = 0x5f3759df - ( x.i >> 1 );
38 0 : x.f = x.f * ( 1.5f - xhalf * x.f * x.f );
39 0 : return x.f;
40 : }
41 :
42 : inline void AliHLTTPCCASliceData::CreateGrid( AliHLTTPCCARow *row, const float2* data, int ClusterDataHitNumberOffset )
43 : {
44 : // grid creation
45 :
46 0 : if ( row->NHits() <= 0 ) { // no hits or invalid data
47 : // grid coordinates don't matter, since there are no hits
48 0 : row->fGrid.CreateEmpty();
49 0 : return;
50 : }
51 :
52 : float yMin = 1.e3f;
53 : float yMax = -1.e3f;
54 : float zMin = 1.e3f;
55 : float zMax = -1.e3f;
56 0 : for ( int i = ClusterDataHitNumberOffset; i < ClusterDataHitNumberOffset + row->fNHits; ++i ) {
57 0 : const float y = data[i].x;
58 0 : const float z = data[i].y;
59 0 : if ( yMax < y ) yMax = y;
60 0 : if ( yMin > y ) yMin = y;
61 0 : if ( zMax < z ) zMax = z;
62 0 : if ( zMin > z ) zMin = z;
63 : }
64 :
65 0 : const float norm = fastInvSqrt( row->fNHits );
66 0 : row->fGrid.Create( yMin, yMax, zMin, zMax,
67 0 : CAMath::Max( ( yMax - yMin ) * norm, 2.f ),
68 0 : CAMath::Max( ( zMax - zMin ) * norm, 2.f ) );
69 0 : }
70 :
71 : inline void AliHLTTPCCASliceData::PackHitData( AliHLTTPCCARow* const row, const AliHLTArray<AliHLTTPCCAHit> &binSortedHits )
72 : {
73 : // hit data packing
74 :
75 : static const float shortPackingConstant = 1.f / 65535.f;
76 0 : const float y0 = row->fGrid.YMin();
77 0 : const float z0 = row->fGrid.ZMin();
78 0 : const float stepY = ( row->fGrid.YMax() - y0 ) * shortPackingConstant;
79 0 : const float stepZ = ( row->fGrid.ZMax() - z0 ) * shortPackingConstant;
80 0 : const float stepYi = 1.f / stepY;
81 0 : const float stepZi = 1.f / stepZ;
82 :
83 0 : row->fHy0 = y0;
84 0 : row->fHz0 = z0;
85 0 : row->fHstepY = stepY;
86 0 : row->fHstepZ = stepZ;
87 0 : row->fHstepYi = stepYi;
88 0 : row->fHstepZi = stepZi;
89 :
90 0 : for ( int hitIndex = 0; hitIndex < row->fNHits; ++hitIndex ) {
91 : // bin sorted index!
92 0 : const int globalHitIndex = row->fHitNumberOffset + hitIndex;
93 0 : const AliHLTTPCCAHit &hh = binSortedHits[hitIndex];
94 0 : const float xx = ( ( hh.Y() - y0 ) * stepYi ) + .5 ;
95 0 : const float yy = ( ( hh.Z() - z0 ) * stepZi ) + .5 ;
96 0 : if ( xx < 0 || yy < 0 || xx >= 65536 || yy >= 65536 ) {
97 0 : std::cout << "!!!! hit packing error!!! " << xx << " " << yy << " " << std::endl;
98 0 : }
99 : // HitData is bin sorted
100 0 : fHitData[globalHitIndex].x = (unsigned short) xx;
101 0 : fHitData[globalHitIndex].y = (unsigned short) yy;
102 : }
103 0 : }
104 :
105 : void AliHLTTPCCASliceData::Clear()
106 : {
107 0 : fNumberOfHits = 0;
108 0 : }
109 :
110 : void AliHLTTPCCASliceData::InitializeRows( const AliHLTTPCCAParam &p )
111 : {
112 : // initialisation of rows
113 0 : if (!fRows) fRows = new AliHLTTPCCARow[HLTCA_ROW_COUNT + 1];
114 0 : for ( int i = 0; i < p.NRows(); ++i ) {
115 0 : fRows[i].fX = p.RowX( i );
116 0 : fRows[i].fMaxY = CAMath::Tan( p.DAlpha() / 2. ) * fRows[i].fX;
117 : }
118 0 : }
119 :
120 : #ifndef HLTCA_GPUCODE
121 : AliHLTTPCCASliceData::~AliHLTTPCCASliceData()
122 0 : {
123 : //Standard Destrcutor
124 0 : if (fRows)
125 : {
126 0 : if (!fIsGpuSliceData) delete[] fRows;
127 0 : fRows = NULL;
128 0 : }
129 0 : if (fMemory)
130 : {
131 0 : if (!fIsGpuSliceData) delete[] fMemory;
132 0 : fMemory = NULL;
133 0 : }
134 :
135 0 : }
136 : #endif
137 :
138 : GPUh() void AliHLTTPCCASliceData::SetGPUSliceDataMemory(void* const pSliceMemory, void* const pRowMemory)
139 : {
140 : //Set Pointer to slice data memory to external memory
141 0 : fMemory = (char*) pSliceMemory;
142 0 : fRows = (AliHLTTPCCARow*) pRowMemory;
143 0 : }
144 :
145 : size_t AliHLTTPCCASliceData::SetPointers(const AliHLTTPCCAClusterData *data, bool allocate)
146 : {
147 : //Set slice data internal pointers
148 :
149 0 : int hitMemCount = HLTCA_ROW_COUNT * (sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v) - 1) + data->NumberOfClusters();
150 : //Calculate Memory needed to store hits in rows
151 :
152 : const unsigned int kVectorAlignment = 256 /*sizeof( uint4 )*/ ;
153 0 : fNumberOfHitsPlusAlign = NextMultipleOf < ( kVectorAlignment > sizeof(HLTCA_GPU_ROWALIGNMENT) ? kVectorAlignment : sizeof(HLTCA_GPU_ROWALIGNMENT)) / sizeof( int ) > ( hitMemCount );
154 0 : fNumberOfHits = data->NumberOfClusters();
155 0 : const int firstHitInBinSize = (23 + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(int)) * HLTCA_ROW_COUNT + 4 * fNumberOfHits + 3;
156 : //FIXME: sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(int) * HLTCA_ROW_COUNT is way to big and only to ensure to reserve enough memory for GPU Alignment.
157 : //Might be replaced by correct value
158 :
159 : const int memorySize =
160 : // LinkData, HitData
161 0 : fNumberOfHitsPlusAlign * 4 * sizeof( short ) +
162 : // FirstHitInBin
163 0 : NextMultipleOf<kVectorAlignment>( ( firstHitInBinSize ) * sizeof( int ) ) +
164 : // HitWeights, ClusterDataIndex
165 0 : fNumberOfHitsPlusAlign * 2 * sizeof( int );
166 :
167 : if ( 1 )// fMemorySize < memorySize ) { // release the memory on CPU
168 : {
169 0 : fMemorySize = memorySize + 4;
170 0 : if (allocate)
171 : {
172 0 : if (!fIsGpuSliceData)
173 : {
174 0 : if (fMemory)
175 : {
176 0 : delete[] fMemory;
177 : }
178 0 : fMemory = new char[fMemorySize];// kVectorAlignment];
179 0 : }
180 : else
181 : {
182 0 : if (fMemorySize > HLTCA_GPU_SLICE_DATA_MEMORY)
183 : {
184 0 : return(0);
185 : }
186 : }
187 : }
188 : }
189 :
190 0 : char *mem = fMemory;
191 0 : AssignMemory( fLinkUpData, mem, fNumberOfHitsPlusAlign );
192 0 : AssignMemory( fLinkDownData, mem, fNumberOfHitsPlusAlign );
193 0 : AssignMemory( fHitData, mem, fNumberOfHitsPlusAlign );
194 0 : AssignMemory( fFirstHitInBin, mem, firstHitInBinSize );
195 0 : fGpuMemorySize = mem - fMemory;
196 :
197 : //Memory Allocated below will not be copied to GPU but instead be initialized on the gpu itself. Therefore it must not be copied to GPU!
198 0 : AssignMemory( fHitWeights, mem, fNumberOfHitsPlusAlign );
199 0 : AssignMemory( fClusterDataIndex, mem, fNumberOfHitsPlusAlign );
200 0 : return(mem - fMemory);
201 0 : }
202 :
203 : void AliHLTTPCCASliceData::InitFromClusterData( const AliHLTTPCCAClusterData &data )
204 : {
205 : // initialisation from cluster data
206 :
207 : ////////////////////////////////////
208 : // 0. sort rows
209 : ////////////////////////////////////
210 :
211 0 : fNumberOfHits = data.NumberOfClusters();
212 :
213 0 : float2* YZData = new float2[fNumberOfHits];
214 0 : int* tmpHitIndex = new int[fNumberOfHits];
215 :
216 0 : int RowOffset[HLTCA_ROW_COUNT];
217 0 : int NumberOfClustersInRow[HLTCA_ROW_COUNT];
218 0 : memset(NumberOfClustersInRow, 0, HLTCA_ROW_COUNT * sizeof(int));
219 0 : fFirstRow = HLTCA_ROW_COUNT;
220 0 : fLastRow = 0;
221 :
222 0 : for (int i = 0;i < fNumberOfHits;i++)
223 : {
224 0 : const int tmpRow = data.RowNumber(i);
225 0 : NumberOfClustersInRow[tmpRow]++;
226 0 : if (tmpRow > fLastRow) fLastRow = tmpRow;
227 0 : if (tmpRow < fFirstRow) fFirstRow = tmpRow;
228 : }
229 : int tmpOffset = 0;
230 0 : for (int i = fFirstRow;i <= fLastRow;i++)
231 : {
232 0 : RowOffset[i] = tmpOffset;
233 0 : tmpOffset += NumberOfClustersInRow[i];
234 : }
235 : {
236 0 : int RowsFilled[HLTCA_ROW_COUNT];
237 0 : memset(RowsFilled, 0, HLTCA_ROW_COUNT * sizeof(int));
238 0 : for (int i = 0;i < fNumberOfHits;i++)
239 : {
240 : float2 tmp;
241 0 : tmp.x = data.Y(i);
242 0 : tmp.y = data.Z(i);
243 0 : int tmpRow = data.RowNumber(i);
244 0 : int newIndex = RowOffset[tmpRow] + (RowsFilled[tmpRow])++;
245 0 : YZData[newIndex] = tmp;
246 0 : tmpHitIndex[newIndex] = i;
247 : }
248 0 : }
249 0 : if (fFirstRow == HLTCA_ROW_COUNT) fFirstRow = 0;
250 :
251 : ////////////////////////////////////
252 : // 1. prepare arrays
253 : ////////////////////////////////////
254 :
255 0 : const int numberOfRows = fLastRow - fFirstRow + 1;
256 :
257 0 : if (SetPointers(&data, true) == 0)
258 : {
259 0 : delete[] YZData;
260 0 : delete[] tmpHitIndex;
261 0 : return;
262 : }
263 :
264 : ////////////////////////////////////
265 : // 2. fill HitData and FirstHitInBin
266 : ////////////////////////////////////
267 :
268 0 : for ( int rowIndex = 0; rowIndex < fFirstRow; ++rowIndex ) {
269 0 : AliHLTTPCCARow &row = fRows[rowIndex];
270 0 : row.fGrid.CreateEmpty();
271 0 : row.fNHits = 0;
272 0 : row.fFullSize = 0;
273 0 : row.fHitNumberOffset = 0;
274 0 : row.fFirstHitInBinOffset = 0;
275 :
276 0 : row.fHy0 = 0.f;
277 0 : row.fHz0 = 0.f;
278 0 : row.fHstepY = 1.f;
279 0 : row.fHstepZ = 1.f;
280 0 : row.fHstepYi = 1.f;
281 0 : row.fHstepZi = 1.f;
282 : }
283 0 : for ( int rowIndex = fLastRow + 1; rowIndex < HLTCA_ROW_COUNT + 1; ++rowIndex ) {
284 0 : AliHLTTPCCARow &row = fRows[rowIndex];
285 0 : row.fGrid.CreateEmpty();
286 0 : row.fNHits = 0;
287 0 : row.fFullSize = 0;
288 0 : row.fHitNumberOffset = 0;
289 0 : row.fFirstHitInBinOffset = 0;
290 :
291 0 : row.fHy0 = 0.f;
292 0 : row.fHz0 = 0.f;
293 0 : row.fHstepY = 1.f;
294 0 : row.fHstepZ = 1.f;
295 0 : row.fHstepYi = 1.f;
296 0 : row.fHstepZi = 1.f;
297 : }
298 :
299 :
300 0 : AliHLTResizableArray<AliHLTTPCCAHit> binSortedHits( fNumberOfHits + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v));
301 :
302 : int gridContentOffset = 0;
303 : int hitOffset = 0;
304 :
305 0 : int binCreationMemorySize = 103 * 2 + fNumberOfHits;
306 0 : AliHLTResizableArray<unsigned short> binCreationMemory( binCreationMemorySize );
307 :
308 0 : fGPUSharedDataReq = 0;
309 :
310 0 : for ( int rowIndex = fFirstRow; rowIndex <= fLastRow; ++rowIndex ) {
311 0 : AliHLTTPCCARow &row = fRows[rowIndex];
312 0 : row.fNHits = NumberOfClustersInRow[rowIndex];
313 0 : assert( row.fNHits < ( 1 << sizeof( unsigned short ) * 8 ) );
314 0 : row.fHitNumberOffset = hitOffset;
315 0 : hitOffset += NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(NumberOfClustersInRow[rowIndex]);
316 :
317 0 : row.fFirstHitInBinOffset = gridContentOffset;
318 :
319 0 : CreateGrid( &row, YZData, RowOffset[rowIndex] );
320 0 : const AliHLTTPCCAGrid &grid = row.fGrid;
321 0 : const int numberOfBins = grid.N();
322 :
323 : int binCreationMemorySizeNew;
324 0 : if ( ( binCreationMemorySizeNew = numberOfBins * 2 + 6 + row.fNHits + sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(unsigned short) * numberOfRows + 1) > binCreationMemorySize ) {
325 : binCreationMemorySize = binCreationMemorySizeNew;
326 0 : binCreationMemory.Resize( binCreationMemorySize );
327 : }
328 :
329 0 : AliHLTArray<unsigned short> c = binCreationMemory; // number of hits in all previous bins
330 0 : AliHLTArray<unsigned short> bins = c + ( numberOfBins + 3 ); // cache for the bin index for every hit in this row
331 0 : AliHLTArray<unsigned short> filled = bins + row.fNHits; // counts how many hits there are per bin
332 :
333 0 : for ( unsigned int bin = 0; bin < row.fGrid.N() + 3; ++bin ) {
334 0 : filled[bin] = 0; // initialize filled[] to 0
335 : }
336 :
337 0 : for ( int hitIndex = 0; hitIndex < row.fNHits; ++hitIndex ) {
338 0 : const int globalHitIndex = RowOffset[rowIndex] + hitIndex;
339 0 : const unsigned short bin = row.fGrid.GetBin( YZData[globalHitIndex].x, YZData[globalHitIndex].y );
340 :
341 0 : bins[hitIndex] = bin;
342 0 : ++filled[bin];
343 : }
344 :
345 : unsigned short n = 0;
346 0 : for ( int bin = 0; bin < numberOfBins + 3; ++bin ) {
347 0 : c[bin] = n;
348 0 : n += filled[bin];
349 : }
350 :
351 0 : for ( int hitIndex = 0; hitIndex < row.fNHits; ++hitIndex ) {
352 0 : const unsigned short bin = bins[hitIndex];
353 0 : --filled[bin];
354 0 : const unsigned short ind = c[bin] + filled[bin]; // generate an index for this hit that is >= c[bin] and < c[bin + 1]
355 0 : const int globalBinsortedIndex = row.fHitNumberOffset + ind;
356 0 : const int globalHitIndex = RowOffset[rowIndex] + hitIndex;
357 :
358 : // allows to find the global hit index / coordinates from a global bin sorted hit index
359 0 : fClusterDataIndex[globalBinsortedIndex] = tmpHitIndex[globalHitIndex];
360 0 : binSortedHits[ind].SetY( YZData[globalHitIndex].x );
361 0 : binSortedHits[ind].SetZ( YZData[globalHitIndex].y );
362 : }
363 :
364 0 : PackHitData( &row, binSortedHits );
365 :
366 0 : for ( int i = 0; i < numberOfBins; ++i ) {
367 0 : fFirstHitInBin[row.fFirstHitInBinOffset + i] = c[i]; // global bin-sorted hit index
368 : }
369 0 : const unsigned short a = c[numberOfBins];
370 : // grid.N is <= row.fNHits
371 0 : const int nn = numberOfBins + grid.Ny() + 3;
372 0 : for ( int i = numberOfBins; i < nn; ++i ) {
373 0 : assert( (signed) row.fFirstHitInBinOffset + i < 23 * numberOfRows + 4 * fNumberOfHits + 3 );
374 0 : fFirstHitInBin[row.fFirstHitInBinOffset + i] = a;
375 : }
376 :
377 0 : row.fFullSize = nn;
378 0 : gridContentOffset += nn;
379 :
380 0 : if (NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.fNHits) + nn > (unsigned) fGPUSharedDataReq)
381 0 : fGPUSharedDataReq = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(row.fNHits) + nn;
382 :
383 : //Make pointer aligned
384 0 : gridContentOffset = NextMultipleOf<sizeof(HLTCA_GPU_ROWALIGNMENT) / sizeof(ushort_v)>(gridContentOffset);
385 0 : }
386 :
387 0 : delete[] YZData;
388 0 : delete[] tmpHitIndex;
389 :
390 : #if 0
391 : //SG cell finder - test code
392 :
393 : if ( fTmpHitInputIDs ) delete[] fTmpHitInputIDs;
394 : fTmpHitInputIDs = new int [NHits];
395 : const float areaY = .5;
396 : const float areaZ = .5;
397 : int newRowNHitsTotal = 0;
398 : bool *usedHits = new bool [NHits];
399 : for ( int iHit = 0; iHit < NHits; iHit++ ) usedHits[iHit] = 0;
400 : for ( int iRow = 0; iRow < fParam.NRows(); iRow++ ) {
401 : rowHeaders[iRow*2 ] = newRowNHitsTotal; // new first hit
402 : rowHeaders[iRow*2+1] = 0; // new N hits
403 : int newRowNHits = 0;
404 : int oldRowFirstHit = RowFirstHit[iRow];
405 : int oldRowLastHit = oldRowFirstHit + RowNHits[iRow];
406 : for ( int iHit = oldRowFirstHit; iHit < oldRowLastHit; iHit++ ) {
407 : if ( usedHits[iHit] ) continue;
408 : float x0 = X[iHit];
409 : float y0 = Y[iHit];
410 : float z0 = Z[iHit];
411 : float cx = x0;
412 : float cy = y0;
413 : float cz = z0;
414 : int nclu = 1;
415 : usedHits[iHit] = 1;
416 : if ( 0 ) for ( int jHit = iHit + 1; jHit < oldRowLastHit; jHit++ ) {//SG!!!
417 : //if( usedHits[jHit] ) continue;
418 : float dy = Y[jHit] - y0;
419 : float dz = Z[jHit] - z0;
420 : if ( CAMath::Abs( dy ) < areaY && CAMath::Abs( dz ) < areaZ ) {
421 : cx += X[jHit];
422 : cy += Y[jHit];
423 : cz += Z[jHit];
424 : nclu++;
425 : usedHits[jHit] = 1;
426 : }
427 : }
428 : int id = newRowNHitsTotal + newRowNHits;
429 : hitsXYZ[id*3+0 ] = cx / nclu;
430 : hitsXYZ[id*3+1 ] = cy / nclu;
431 : hitsXYZ[id*3+2 ] = cz / nclu;
432 : fTmpHitInputIDs[id] = iHit;
433 : newRowNHits++;
434 : }
435 : rowHeaders[iRow*2+1] = newRowNHits;
436 : newRowNHitsTotal += newRowNHits;
437 : }
438 : NHitsTotal() = newRowNHitsTotal;
439 : reinterpret_cast<int*>( fInputEvent )[1+fParam.NRows()*2] = newRowNHitsTotal;
440 :
441 : delete[] usedHits;
442 : #endif
443 0 : }
444 :
445 : void AliHLTTPCCASliceData::ClearHitWeights()
446 : {
447 : // clear hit weights
448 :
449 : #ifdef ENABLE_VECTORIZATION
450 : const int_v v0( Zero );
451 : const int *const end = fHitWeights + fNumberOfHits;
452 : for ( int *mem = fHitWeights; mem < end; mem += v0.Size ) {
453 : v0.store( mem );
454 : }
455 : #else
456 0 : for ( int i = 0; i < fNumberOfHitsPlusAlign; ++i ) {
457 0 : fHitWeights[i] = 0;
458 : }
459 : #endif
460 0 : }
461 :
462 : void AliHLTTPCCASliceData::ClearLinks()
463 : {
464 : // link cleaning
465 :
466 : #ifdef ENABLE_VECTORIZATION
467 : const short_v v0( -1 );
468 : const short *const end1 = fLinkUpData + fNumberOfHits;
469 : for ( short *mem = fLinkUpData; mem < end; mem += v0.Size ) {
470 : v0.store( mem );
471 : }
472 : const short *const end2 = fLinkDownData + fNumberOfHits;
473 : for ( short *mem = fLinkDownData; mem < end; mem += v0.Size ) {
474 : v0.store( mem );
475 : }
476 : #else
477 0 : for ( int i = 0; i < fNumberOfHits; ++i ) {
478 0 : fLinkUpData[i] = -1;
479 : }
480 0 : for ( int i = 0; i < fNumberOfHits; ++i ) {
481 0 : fLinkDownData[i] = -1;
482 : }
483 : #endif
484 0 : }
485 :
|