#include </builds/gaudi/Gaudi/GaudiHive/src/GPUCruncher.h>

Inheritance diagram for GPUCruncher:

Collaboration diagram for GPUCruncher:

[legend]

Public Types
typedef tbb::concurrent_hash_map< std::string, unsigned int >	CHM

Public Member Functions
bool	isClonable () const override

StatusCode	execute (const EventContext &ctx) const override
	the execution of the algorithm More...

StatusCode	initialize () override
	Its initialization. More...

StatusCode	finalize () override
	the finalization of the algorithm More...

double	get_runtime () const

	GPUCruncher (const std::string &name, ISvcLocator *pSvc)

virtual	~GPUCruncher ()
	virtual & protected desctrustor More...

Private Member Functions
	GPUCruncher ()
	the default constructor is disabled More...

	GPUCruncher (const GPUCruncher &)
	the copy constructor is disabled More...

GPUCruncher &	operator= (const GPUCruncher &)
	the assignement operator is disabled More...

StatusCode	gpuExecute (const std::pmr::vector< double > &in, std::vector< double > &out) const
	The GPU intensive function. More...

Private Attributes
Gaudi::Property< std::vector< std::string > >	m_inpKeys { this, "inpKeys", {}, "" }

Gaudi::Property< std::vector< std::string > >	m_outKeys { this, "outKeys", {}, "" }

Gaudi::Property< double >	m_avg_runtime { this, "avgRuntime", 1., "Average runtime of the module." }

Gaudi::Property< double >	m_var_runtime { this, "varRuntime", 0.01, "Variance of the runtime of the module." }

Gaudi::Property< bool >	m_local_rndm_gen

std::pmr::memory_resource *	pinned

const uint	MAX_INPUTS = 40

const uint	MAX_OUTPUTS = 10

std::vector< DataObjectHandle< DataObject > * >	m_inputHandles

std::vector< DataObjectHandle< DataObject > * >	m_outputHandles

Static Private Attributes
static CHM	m_name_ncopies_map

Detailed Description

A test asynchronous algorithm. Might eventually run computations on a GPU but for now it just sleeps for a few seconds.

Definition at line 26 of file GPUCruncher.h.

Member Typedef Documentation

◆ CHM

typedef tbb::concurrent_hash_map<std::string, unsigned int> GPUCruncher::CHM

Definition at line 29 of file GPUCruncher.h.

Constructor & Destructor Documentation

◆ GPUCruncher() [1/3]

GPUCruncher::GPUCruncher	(	const std::string &	name,
		ISvcLocator *	pSvc
	)

Definition at line 34 of file GPUCruncher.cpp.

     : AsynchronousAlgorithm( name, pSvc ) {
  
   // Register the algo in the static concurrent hash map in order to
   // monitor the # of copies
   CHM::accessor name_ninstances;
   m_name_ncopies_map.insert( name_ninstances, name );
   name_ninstances->second += 1;
 }

◆ ~GPUCruncher()

GPUCruncher::~GPUCruncher ( )

virtual

virtual & protected desctrustor

Definition at line 45 of file GPUCruncher.cpp.

                           {
   for ( uint i = 0; i < m_inputHandles.size(); ++i ) delete m_inputHandles[i];
  
   for ( uint i = 0; i < m_outputHandles.size(); ++i ) delete m_outputHandles[i];
 }

◆ GPUCruncher() [2/3]

GPUCruncher::GPUCruncher ( )

private

the default constructor is disabled

◆ GPUCruncher() [3/3]

GPUCruncher::GPUCruncher ( const GPUCruncher & )

private

the copy constructor is disabled

Member Function Documentation

◆ execute()

StatusCode GPUCruncher::execute ( const EventContext & ctx ) const

override

the execution of the algorithm

Definition at line 84 of file GPUCruncher.cpp.

 {
  
   double                   crunchtime;
   std::pmr::vector<double> input( pinned );
   if ( m_local_rndm_gen ) {
     /* This will disappear with a thread safe random number generator service.
      * Use basic Box-Muller to generate Gaussian random numbers.
      * The quality is not good for in depth study given that the generator is a
      * linear congruent.
      * Throw away basically a free number: we are in a ~~cpu~~ /gpu/ cruncher after all.
      * The seed is taken from the clock, but we could assign a seed per module to
      * ensure reproducibility.
      *
      * This is not an overkill but rather an exercise towards a thread safe
      * random number generation.
      */
  
     auto getGausRandom = []( double mean, double sigma ) -> double {
       unsigned int seed = std::clock();
  
       auto getUnifRandom = []( unsigned int& seed ) -> double {
         // from "Numerical Recipes"
         constexpr unsigned int m = 232;
         constexpr unsigned int a = 1664525;
         constexpr unsigned int c = 1013904223;
         seed                     = ( a * seed + c ) % m;
         const double unif        = double( seed ) / m;
         return unif;
       };
  
       double unif1, unif2;
       do {
         unif1 = getUnifRandom( seed );
         unif2 = getUnifRandom( seed );
       } while ( unif1 < std::numeric_limits<double>::epsilon() );
  
       const double normal = sqrt( -2. * log( unif1 ) ) * cos( 2 * M_PI * unif2 );
  
       return normal * sigma + mean;
     };
  
     crunchtime = fabs( getGausRandom( m_avg_runtime, m_var_runtime ) );
     // Generate input vector
     input.reserve( 40000 * crunchtime );
     for ( int i = 0; i < 40000 * crunchtime; ++i ) { input.push_back( getGausRandom( 10.0, 1.0 ) ); }
     // End Of temp block
   } else {
     // Should be a member.
     HiveRndm::HiveNumbers rndmgaus( randSvc(), Rndm::Gauss( m_avg_runtime, m_var_runtime ) );
     crunchtime = std::fabs( rndmgaus() );
     // Generate input vector
     for ( int i = 0; i < 2000 * crunchtime; ++i ) { input.push_back( rndmgaus() ); }
   }
   unsigned int crunchtime_ms = 1000 * crunchtime;
  
   DEBUG_MSG << "Crunching time will be: " << crunchtime_ms << " ms" << endmsg;
   DEBUG_MSG << "Start event " << ctx.evt() << " in slot " << ctx.slot() << " on pthreadID " << std::hex
             << pthread_self() << std::dec << endmsg;
  
   // start timer
   tbb::tick_count starttbb = tbb::tick_count::now();
  
   VERBOSE_MSG << "inputs number: " << m_inputHandles.size() << endmsg;
   for ( auto& inputHandle : m_inputHandles ) {
     if ( !inputHandle->isValid() ) continue;
  
     VERBOSE_MSG << "get from TS: " << inputHandle->objKey() << endmsg;
     DataObject* obj = nullptr;
     obj             = inputHandle->get();
     if ( obj == nullptr ) error() << "A read object was a null pointer." << endmsg;
   }
  
   // Use fiber sleep, should eventually be a GPU computation
   info() << "Crunching..." << endmsg;
   auto                startcrunch = std::chrono::steady_clock::now();
   std::vector<double> out{ 3.0, 5.0 };
   gpuExecute( input, out ).orThrow( "GPU_EXECUTE" );
   auto endcrunch = std::chrono::steady_clock::now();
   info() << "Crunched." << endmsg;
   fmt::print( "{}   GPU Crunch time: {}. Input length {}, output length {}.\n", name(),
               Gaudi::CUDA::SI(
                   std::chrono::duration_cast<std::chrono::milliseconds>( endcrunch - startcrunch ).count() / 1e3, "s" ),
               input.size(), out.size() );
  
   VERBOSE_MSG << "outputs number: " << m_outputHandles.size() << endmsg;
   for ( auto& outputHandle : m_outputHandles ) {
     if ( !outputHandle->isValid() ) continue;
  
     VERBOSE_MSG << "put to TS: " << outputHandle->objKey() << endmsg;
     outputHandle->put( std::make_unique<DataObject>() );
   }
  
   tbb::tick_count endtbb        = tbb::tick_count::now();
   const double    actualRuntime = ( endtbb - starttbb ).seconds();
  
   DEBUG_MSG << "Finish event " << ctx.evt() << " in " << int( 1000 * actualRuntime ) << " ms" << endmsg;
  
   DEBUG_MSG << "Timing: ExpectedCrunchtime= " << crunchtime_ms
             << " ms. ActualTotalRuntime= " << int( 1000 * actualRuntime )
             << " ms. Ratio= " << crunchtime / actualRuntime << endmsg;
  
   return StatusCode::SUCCESS;
 }

◆ finalize()

StatusCode GPUCruncher::finalize ( )

override

the finalization of the algorithm

Definition at line 191 of file GPUCruncher.cpp.

 {
   MsgStream log( msgSvc(), name() );
  
   unsigned int ninstances;
  
   {
     CHM::const_accessor const_name_ninstances;
     m_name_ncopies_map.find( const_name_ninstances, name() );
     ninstances = const_name_ninstances->second;
   }
  
   constexpr double s2ms = 1000.;
   // do not show repetitions
   if ( ninstances != 0 ) {
     info() << "Summary: name= " << name() << "\t avg_runtime= " << m_avg_runtime * s2ms << "\t n_clones= " << ninstances
            << endmsg;
  
     CHM::accessor name_ninstances;
     m_name_ncopies_map.find( name_ninstances, name() );
     name_ninstances->second = 0;
   }
  
   return Algorithm::finalize();
 }

◆ get_runtime()

double GPUCruncher::get_runtime ( ) const

inline

Definition at line 40 of file GPUCruncher.h.

40 { return m_avg_runtime; }

◆ gpuExecute()

StatusCode GPUCruncher::gpuExecute	(	const std::pmr::vector< double > &	in,
		std::vector< double > &	out
	)		const

private

The GPU intensive function.

◆ initialize()

StatusCode GPUCruncher::initialize ( )

override

Its initialization.

Definition at line 51 of file GPUCruncher.cpp.

                                    {
   auto sc = Algorithm::initialize();
   if ( !sc ) return sc;
  
   pinned = Gaudi::CUDA::get_pinned_memory_resource();
  
   // This is a bit ugly. There is no way to declare a vector of DataObjectHandles, so
   // we need to wait until initialize when we've read in the input and output key
   // properties, and know their size, and then turn them
   // into Handles and register them with the framework by calling declareProperty. We
   // could call declareInput/declareOutput on them too.
  
   int i = 0;
   for ( auto k : m_inpKeys ) {
     DEBUG_MSG << "adding input key " << k << endmsg;
     m_inputHandles.push_back( new DataObjectHandle<DataObject>( k, Gaudi::DataHandle::Reader, this ) );
     declareProperty( "dummy_in_" + std::to_string( i ), *( m_inputHandles.back() ) );
     i++;
   }
  
   i = 0;
   for ( auto k : m_outKeys ) {
     DEBUG_MSG << "adding output key " << k << endmsg;
     m_outputHandles.push_back( new DataObjectHandle<DataObject>( k, Gaudi::DataHandle::Writer, this ) );
     declareProperty( "dummy_out_" + std::to_string( i ), *( m_outputHandles.back() ) );
     i++;
   }
  
   return sc;
 }

◆ isClonable()

bool GPUCruncher::isClonable ( ) const

inlineoverride

Definition at line 31 of file GPUCruncher.h.

31 { return true; }

◆ operator=()

GPUCruncher& GPUCruncher::operator= ( const GPUCruncher & )

private

the assignement operator is disabled

Member Data Documentation

◆ m_avg_runtime

Gaudi::Property<double> GPUCruncher::m_avg_runtime { this, "avgRuntime", 1., "Average runtime of the module." }

private

Definition at line 61 of file GPUCruncher.h.

◆ m_inpKeys

Gaudi::Property<std::vector<std::string> > GPUCruncher::m_inpKeys { this, "inpKeys", {}, "" }

private

Definition at line 58 of file GPUCruncher.h.

◆ m_inputHandles

std::vector<DataObjectHandle<DataObject>*> GPUCruncher::m_inputHandles

private

Definition at line 73 of file GPUCruncher.h.

◆ m_local_rndm_gen

Gaudi::Property<bool> GPUCruncher::m_local_rndm_gen

private

Initial value:

{ this, "localRndm", true,

"Decide if the local random generator is to be used" }

Definition at line 63 of file GPUCruncher.h.

◆ m_name_ncopies_map

GPUCruncher::CHM GPUCruncher::m_name_ncopies_map

staticprivate

Definition at line 76 of file GPUCruncher.h.

◆ m_outKeys

Gaudi::Property<std::vector<std::string> > GPUCruncher::m_outKeys { this, "outKeys", {}, "" }

private

Definition at line 59 of file GPUCruncher.h.

◆ m_outputHandles

std::vector<DataObjectHandle<DataObject>*> GPUCruncher::m_outputHandles

private

Definition at line 74 of file GPUCruncher.h.

◆ m_var_runtime

Gaudi::Property<double> GPUCruncher::m_var_runtime { this, "varRuntime", 0.01, "Variance of the runtime of the module." }

private

Definition at line 62 of file GPUCruncher.h.

◆ MAX_INPUTS

const uint GPUCruncher::MAX_INPUTS = 40

private

Definition at line 70 of file GPUCruncher.h.

◆ MAX_OUTPUTS

const uint GPUCruncher::MAX_OUTPUTS = 10

private

Definition at line 71 of file GPUCruncher.h.

◆ pinned

std::pmr::memory_resource* GPUCruncher::pinned

private

Definition at line 67 of file GPUCruncher.h.

The documentation for this class was generated from the following files:

GaudiHive/src/GPUCruncher.h
GaudiHive/src/GPUCruncher.cpp

Public Types

Public Member Functions

Private Member Functions

Private Attributes

Static Private Attributes

Detailed Description

Member Typedef Documentation

◆ CHM

Constructor & Destructor Documentation

◆ GPUCruncher() [1/3]

◆ ~GPUCruncher()

◆ GPUCruncher() [2/3]

◆ GPUCruncher() [3/3]

Member Function Documentation

◆ execute()

◆ finalize()

◆ get_runtime()

◆ gpuExecute()

◆ initialize()

◆ isClonable()

◆ operator=()

Member Data Documentation

◆ m_avg_runtime

◆ m_inpKeys

◆ m_inputHandles

◆ m_local_rndm_gen

◆ m_name_ncopies_map

◆ m_outKeys

◆ m_outputHandles

◆ m_var_runtime

◆ MAX_INPUTS

◆ MAX_OUTPUTS

◆ pinned