The Gaudi Framework  master (82fdf313)
Loading...
Searching...
No Matches
GPUCruncher.cpp
Go to the documentation of this file.
1/***********************************************************************************\
2* (c) Copyright 2023-2024 CERN for the benefit of the LHCb and ATLAS collaborations *
3* *
4* This software is distributed under the terms of the Apache version 2 licence, *
5* copied verbatim in the file "LICENSE". *
6* *
7* In applying this licence, CERN does not waive the privileges and immunities *
8* granted to it by virtue of its status as an Intergovernmental Organization *
9* or submit itself to any jurisdiction. *
10\***********************************************************************************/
11
12#include "GPUCruncher.h"
13#include <algorithm>
14#include <chrono>
15#include <ctime>
16#include <sys/resource.h>
17#include <sys/times.h>
18#include <tbb/tick_count.h>
19
21
23
24#define ON_DEBUG if ( msgLevel( MSG::DEBUG ) )
25#define DEBUG_MSG ON_DEBUG debug()
26
27#define ON_VERBOSE if ( msgLevel( MSG::VERBOSE ) )
28#define VERBOSE_MSG ON_VERBOSE verbose()
29
30//------------------------------------------------------------------------------
31
32GPUCruncher::GPUCruncher( const std::string& name, // the algorithm instance name
33 ISvcLocator* pSvc )
34 : AsynchronousAlgorithm( name, pSvc ) {
35
36 // Register the algo in the static concurrent hash map in order to
37 // monitor the # of copies
38 CHM::accessor name_ninstances;
39 m_name_ncopies_map.insert( name_ninstances, name );
40 name_ninstances->second += 1;
41}
42
44 for ( uint i = 0; i < m_inputHandles.size(); ++i ) delete m_inputHandles[i];
45
46 for ( uint i = 0; i < m_outputHandles.size(); ++i ) delete m_outputHandles[i];
47}
48
50 auto sc = Algorithm::initialize();
51 if ( !sc ) return sc;
52
53 // This is a bit ugly. There is no way to declare a vector of DataObjectHandles, so
54 // we need to wait until initialize when we've read in the input and output key
55 // properties, and know their size, and then turn them
56 // into Handles and register them with the framework by calling declareProperty. We
57 // could call declareInput/declareOutput on them too.
58
59 int i = 0;
60 for ( auto k : m_inpKeys ) {
61 DEBUG_MSG << "adding input key " << k << endmsg;
63 declareProperty( "dummy_in_" + std::to_string( i ), *( m_inputHandles.back() ) );
64 i++;
65 }
66
67 i = 0;
68 for ( auto k : m_outKeys ) {
69 DEBUG_MSG << "adding output key " << k << endmsg;
71 declareProperty( "dummy_out_" + std::to_string( i ), *( m_outputHandles.back() ) );
72 i++;
73 }
74
75 return sc;
76}
77
78//------------------------------------------------------------------------------
79
80StatusCode GPUCruncher::execute( const EventContext& ctx ) const // the execution of the algorithm
81{
82
83 double crunchtime;
84 std::vector<double> input{};
85 /* This will disappear with a thread safe random number generator service.
86 * Use basic Box-Muller to generate Gaussian random numbers.
87 * The quality is not good for in depth study given that the generator is a
88 * linear congruent.
89 * Throw away basically a free number: we are in a ~~cpu~~ /gpu/ cruncher after all.
90 * The seed is taken from the clock, but we could assign a seed per module to
91 * ensure reproducibility.
92 *
93 * This is not an overkill but rather an exercise towards a thread safe
94 * random number generation.
95 */
96
97 auto getGausRandom = []( double mean, double sigma ) -> double {
98 unsigned int seed = std::clock();
99
100 auto getUnifRandom = []( unsigned int& seed ) -> double {
101 // from "Numerical Recipes"
102 constexpr unsigned int m = 232;
103 constexpr unsigned int a = 1664525;
104 constexpr unsigned int c = 1013904223;
105 seed = ( a * seed + c ) % m;
106 const double unif = double( seed ) / m;
107 return unif;
108 };
109
110 double unif1, unif2;
111 do {
112 unif1 = getUnifRandom( seed );
113 unif2 = getUnifRandom( seed );
114 } while ( unif1 < std::numeric_limits<double>::epsilon() );
115
116 const double normal = sqrt( -2. * log( unif1 ) ) * cos( 2 * M_PI * unif2 );
117
118 return normal * sigma + mean;
119 };
120
121 crunchtime = fabs( getGausRandom( m_avg_runtime, m_var_runtime ) );
122 // Generate input vector
123 input.reserve( 50000 * crunchtime );
124 for ( int i = 0; i < 50000 * crunchtime; ++i ) { input.push_back( getGausRandom( 20.0, 1.0 ) ); }
125 unsigned int crunchtime_ms = 1000 * crunchtime;
126
127 // First figure out what output should be
128 double lower_bound = std::ranges::min( input );
129 double upper_bound = std::ranges::max( input ) * 256;
130 DEBUG_MSG << "Crunching time will be: " << crunchtime_ms << " ms" << endmsg;
131 DEBUG_MSG << "Start event " << ctx.evt() << " in slot " << ctx.slot() << " on pthreadID " << std::hex
132 << pthread_self() << std::dec << endmsg;
133
134 // start timer
135 tbb::tick_count starttbb = tbb::tick_count::now();
136
137 VERBOSE_MSG << "inputs number: " << m_inputHandles.size() << endmsg;
138 for ( auto& inputHandle : m_inputHandles ) {
139 if ( !inputHandle->isValid() ) continue;
140
141 VERBOSE_MSG << "get from TS: " << inputHandle->objKey() << endmsg;
142 DataObject* obj = nullptr;
143 try {
144 obj = inputHandle->get();
145 } catch ( const GaudiException& e ) {
146 error() << "Caught exception with message " << e.what() << " in evt " << ctx.evt() << endmsg;
147 throw;
148 }
149 if ( obj == nullptr ) error() << "A read object was a null pointer." << endmsg;
150 }
151
152 info() << "Crunching..." << endmsg;
153 auto startcrunch = std::chrono::steady_clock::now();
154 std::vector<double> out{};
155 gpuExecute( input, out ).orThrow( "GPU_EXECUTE" );
156 auto endcrunch = std::chrono::steady_clock::now();
157 int total_entries = std::accumulate( out.begin() + 2, out.end(), 0, std::plus{} );
158 bool match =
159 ( out.at( 0 ) == lower_bound ) && ( out.at( 1 ) == upper_bound ) && ( total_entries == 256 * input.size() );
160 info() << "Crunched." << endmsg;
161 ( match ? info() : warning() )
162 << std::format(
163 "GPU Crunch time: {} s. Input length {}, total entries {}. Pass: Lower {}, Upper {}, Entries {} ({} "
164 "missing)",
165 std::chrono::duration_cast<std::chrono::milliseconds>( endcrunch - startcrunch ).count() / 1e3,
166 input.size(), total_entries, out.at( 0 ) == lower_bound, out.at( 1 ) == upper_bound,
167 total_entries == 256 * input.size(), 256 * input.size() - total_entries )
168 << endmsg;
169
170 VERBOSE_MSG << "outputs number: " << m_outputHandles.size() << endmsg;
171 for ( auto& outputHandle : m_outputHandles ) {
172 if ( !outputHandle->isValid() ) continue;
173
174 VERBOSE_MSG << "put to TS: " << outputHandle->objKey() << endmsg;
175 try {
176 outputHandle->put( std::make_unique<DataObject>() );
177 } catch ( const GaudiException& e ) {
178 error() << "Caught exception with message " << e.what() << " in evt " << ctx.evt() << endmsg;
179 throw;
180 }
181 }
182
183 tbb::tick_count endtbb = tbb::tick_count::now();
184 const double actualRuntime = ( endtbb - starttbb ).seconds();
185
186 DEBUG_MSG << "Finish event " << ctx.evt() << " in " << int( 1000 * actualRuntime ) << " ms" << endmsg;
187
188 DEBUG_MSG << "Timing: ExpectedCrunchtime= " << crunchtime_ms
189 << " ms. ActualTotalRuntime= " << int( 1000 * actualRuntime )
190 << " ms. Ratio= " << crunchtime / actualRuntime << endmsg;
191
192 return StatusCode::SUCCESS;
193}
194
195//------------------------------------------------------------------------------
196
197StatusCode GPUCruncher::finalize() // the finalization of the algorithm
198{
199 MsgStream log( msgSvc(), name() );
200
201 unsigned int ninstances;
202
203 {
204 CHM::const_accessor const_name_ninstances;
205 m_name_ncopies_map.find( const_name_ninstances, name() );
206 ninstances = const_name_ninstances->second;
207 }
208
209 constexpr double s2ms = 1000.;
210 // do not show repetitions
211 if ( ninstances != 0 ) {
212 info() << "Summary: name= " << name() << "\t avg_runtime= " << m_avg_runtime * s2ms << "\t n_clones= " << ninstances
213 << endmsg;
214
215 CHM::accessor name_ninstances;
216 m_name_ncopies_map.find( name_ninstances, name() );
217 name_ninstances->second = 0;
218 }
219
220 return Algorithm::finalize();
221}
222
223//------------------------------------------------------------------------------
#define DEBUG_MSG
#define VERBOSE_MSG
HepRndm::Engine< DRand48Engine > e3
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition MsgStream.h:198
#define DECLARE_COMPONENT(type)
MsgStream & error() const
shortcut for the method msgStream(MSG::ERROR)
MsgStream & warning() const
shortcut for the method msgStream(MSG::WARNING)
const SmartIF< IMessageSvc > & msgSvc() const
The standard message service.
MsgStream & info() const
shortcut for the method msgStream(MSG::INFO)
DataObjectHandle.h GaudiKernel/DataObjectHandle.h.
A DataObject is the base class of any identifiable object on any data store.
Definition DataObject.h:37
This class represents an entry point to all the event specific data.
A test asynchronous algorithm on the GPU.
Definition GPUCruncher.h:26
GPUCruncher()
the default constructor is disabled
StatusCode finalize() override
the finalization of the algorithm
StatusCode gpuExecute(const std::vector< double > &in, std::vector< double > &out) const
The GPU intensive function.
std::vector< DataObjectHandle< DataObject > * > m_inputHandles
Definition GPUCruncher.h:68
static CHM m_name_ncopies_map
Definition GPUCruncher.h:71
Gaudi::Property< double > m_var_runtime
Definition GPUCruncher.h:62
Gaudi::Property< double > m_avg_runtime
Definition GPUCruncher.h:61
tbb::concurrent_hash_map< std::string, unsigned int > CHM
Definition GPUCruncher.h:29
virtual ~GPUCruncher()
virtual & protected desctrustor
StatusCode execute(const EventContext &ctx) const override
the execution of the algorithm
Gaudi::Property< std::vector< std::string > > m_inpKeys
Definition GPUCruncher.h:58
std::vector< DataObjectHandle< DataObject > * > m_outputHandles
Definition GPUCruncher.h:69
StatusCode initialize() override
Its initialization.
Gaudi::Property< std::vector< std::string > > m_outKeys
Definition GPUCruncher.h:59
Gaudi::Details::PropertyBase * declareProperty(const std::string &name, ToolHandle< T > &hndl, const std::string &doc="none")
Definition Algorithm.h:286
StatusCode initialize() override
the default (empty) implementation of IStateful::initialize() method
Definition Algorithm.h:175
StatusCode finalize() override
the default (empty) implementation of IStateful::finalize() method
Definition Algorithm.h:181
const std::string & name() const override
The identifying name of the algorithm object.
Define general base for Gaudi exception.
const char * what() const override
method from std::exception
The ISvcLocator is the interface implemented by the Service Factory in the Application Manager to loc...
Definition ISvcLocator.h:42
Definition of the MsgStream class used to transmit messages.
Definition MsgStream.h:29
This class is used for returning status codes from appropriate routines.
Definition StatusCode.h:64
constexpr static const auto SUCCESS
Definition StatusCode.h:99