ForwardSchedulerSvc.cpp
Go to the documentation of this file.
1 // Local
2 #include "AlgResourcePool.h"
3 #include "AlgoExecutionTask.h"
4 #include "ForwardSchedulerSvc.h"
5 
6 // Framework includes
8 #include "GaudiKernel/Algorithm.h" // will be IAlgorithm if context getter promoted to interface
10 #include "GaudiKernel/IAlgorithm.h"
12 #include "GaudiKernel/SvcFactory.h"
15 
16 // C++
17 #include <algorithm>
18 #include <map>
19 #include <queue>
20 #include <sstream>
21 #include <unordered_set>
22 
23 // External libs
24 #include "boost/thread.hpp"
25 #include "boost/tokenizer.hpp"
26 #include "boost/algorithm/string.hpp"
27 // DP waiting for the TBB service
28 #include "tbb/task_scheduler_init.h"
29 
32 
33 // Instantiation of a static factory class used by clients to create instances of this service
35 
36 //===========================================================================
37 // Infrastructure methods
38 
39 
45 
46  // Initialise mother class (read properties, ...)
48  if ( !sc.isSuccess() ) warning() << "Base class could not be initialized" << endmsg;
49 
50  // Get hold of the TBBSvc. This should initialize the thread pool
51  m_threadPoolSvc = serviceLocator()->service( "ThreadPoolSvc" );
52  if ( !m_threadPoolSvc.isValid() ) {
53  fatal() << "Error retrieving ThreadPoolSvc" << endmsg;
54  return StatusCode::FAILURE;
55  }
56 
57  // Activate the scheduler in another thread.
58  info() << "Activating scheduler in a separate thread" << endmsg;
59  m_thread = std::thread( std::bind( &ForwardSchedulerSvc::activate, this ) );
60 
61  while ( m_isActive != ACTIVE ) {
62  if ( m_isActive == FAILURE ) {
63  fatal() << "Terminating initialization" << endmsg;
64  return StatusCode::FAILURE;
65  } else {
66  info() << "Waiting for ForwardSchedulerSvc to activate" << endmsg;
67  sleep( 1 );
68  }
69  }
70 
71  // Get the algo resource pool
72  m_algResourcePool = serviceLocator()->service( "AlgResourcePool" );
73  if ( !m_algResourcePool.isValid() ) {
74  fatal() << "Error retrieving AlgoResourcePool" << endmsg;
75  return StatusCode::FAILURE;
76  }
77 
78  m_algExecStateSvc = serviceLocator()->service("AlgExecStateSvc");
79  if (!m_algExecStateSvc.isValid()) {
80  fatal() << "Error retrieving AlgExecStateSvc" << endmsg;
81  return StatusCode::FAILURE;
82  }
83 
84  // Get Whiteboard
85  m_whiteboard = serviceLocator()->service( m_whiteboardSvcName );
86  if ( !m_whiteboard.isValid() ) {
87  fatal() << "Error retrieving EventDataSvc interface IHiveWhiteBoard." << endmsg;
88  return StatusCode::FAILURE;
89  }
90 
91  // Check the MaxEventsInFlight parameters and react
92  // Deprecated for the moment
93  size_t numberOfWBSlots = m_whiteboard->getNumberOfStores();
94  if ( m_maxEventsInFlight != 0 ) {
95  warning() << "Property MaxEventsInFlight was set. This works but it's deprecated. "
96  << "Please migrate your code options files." << endmsg;
97 
98  if ( m_maxEventsInFlight != (int)numberOfWBSlots ) {
99  warning() << "In addition, the number of events in flight (" << m_maxEventsInFlight
100  << ") differs from the slots in the whiteboard (" << numberOfWBSlots
101  << "). Setting the number of events in flight to " << numberOfWBSlots << endmsg;
102  }
103  }
104 
105  // set global concurrency flags
107 
108  // Align the two quantities
109  m_maxEventsInFlight = numberOfWBSlots;
110 
111  // Set the number of free slots
112  m_freeSlots = m_maxEventsInFlight;
113 
114  if ( m_algosDependencies.size() != 0 ) {
115  warning() << " ##### Property AlgosDependencies is deprecated and ignored."
116  << " FIX your job options #####" << endmsg;
117  }
118 
119  // Get the list of algorithms
120  const std::list<IAlgorithm*>& algos = m_algResourcePool->getFlatAlgList();
121  const unsigned int algsNumber = algos.size();
122  info() << "Found " << algsNumber << " algorithms" << endmsg;
123 
124  /* Dependencies
125  1) Look for handles in algo, if none
126  2) Assume none are required
127  */
128 
129  DataObjIDColl globalInp, globalOutp;
130 
131  // figure out all outputs
132  for (IAlgorithm* ialgoPtr : algos) {
133  Algorithm* algoPtr = dynamic_cast<Algorithm*>(ialgoPtr);
134  if (!algoPtr) {
135  fatal() << "Could not convert IAlgorithm into Algorithm: this will result in a crash." << endmsg;
136  }
137  for (auto id : algoPtr->outputDataObjs()) {
138  auto r = globalOutp.insert(id);
139  if (!r.second) {
140  warning() << "multiple algorithms declare " << id << " as output! could be a single instance in multiple paths though, or control flow may guarantee only one runs...!" << endmsg;
141  }
142  }
143  }
144  info() << "outputs:\n" ;
145  for (const auto& i : globalOutp ) {
146  info() << i << '\n' ;
147  }
148  info() << endmsg;
149 
150 
151 
152  info() << "Data Dependencies for Algorithms:";
153 
154  std::vector<DataObjIDColl> m_algosDependencies;
155  for ( IAlgorithm* ialgoPtr : algos ) {
156  Algorithm* algoPtr = dynamic_cast<Algorithm*>( ialgoPtr );
157  if ( nullptr == algoPtr )
158  fatal() << "Could not convert IAlgorithm into Algorithm: this will result in a crash." << endmsg;
159 
160  info() << "\n " << algoPtr->name();
161 
162  // FIXME
163  DataObjIDColl algoDependencies;
164  if ( !algoPtr->inputDataObjs().empty() || !algoPtr->outputDataObjs().empty() ) {
165  for ( auto id : algoPtr->inputDataObjs() ) {
166  info() << "\n o INPUT " << id;
167  if (id.key().find(":")!=std::string::npos) {
168  info() << " contains alternatives which require resolution... " << endmsg;
169  auto tokens = boost::tokenizer<boost::char_separator<char>>{id.key(),boost::char_separator<char>{":"}};
170  auto itok = std::find_if( tokens.begin(), tokens.end(),
171  [&](const std::string& t) {
172  return globalOutp.find( DataObjID{t} ) != globalOutp.end();
173  } );
174  if (itok!=tokens.end()) {
175  info() << "found matching output for " << *itok << " -- updating scheduler info" << endmsg;
176  id.updateKey(*itok);
177  } else {
178  error() << "failed to find alternate in global output list" << endmsg;
179  }
180  }
181  algoDependencies.insert( id );
182  globalInp.insert( id );
183  }
184  for ( auto id : algoPtr->outputDataObjs() ) {
185  info() << "\n o OUTPUT " << id;
186  if (id.key().find(":")!=std::string::npos) {
187  info() << " alternatives are NOT allowed for outputs..." << endmsg;
188  }
189  }
190  } else {
191  info() << "\n none";
192  }
193  m_algosDependencies.emplace_back( algoDependencies );
194  }
195  info() << endmsg;
196 
197  // Fill the containers to convert algo names to index
198  m_algname_vect.reserve( algsNumber );
199  unsigned int index = 0;
200  IAlgorithm* dataLoaderAlg( nullptr );
201  for ( IAlgorithm* algo : algos ) {
202  const std::string& name = algo->name();
203  m_algname_index_map[name] = index;
204  m_algname_vect.emplace_back( name );
205  if (algo->name() == m_useDataLoader) {
206  dataLoaderAlg = algo;
207  }
208  index++;
209  }
210 
211  // Check if we have unmet global input dependencies
212  if ( m_checkDeps ) {
213  DataObjIDColl unmetDep;
214  for ( auto o : globalInp ) {
215  if ( globalOutp.find( o ) == globalOutp.end() ) {
216  unmetDep.insert( o );
217  }
218  }
219 
220  if ( unmetDep.size() > 0 ) {
221 
222  std::ostringstream ost;
223  for ( auto& o : unmetDep ) {
224  ost << "\n o " << o << " required by Algorithm: ";
225  for ( size_t i = 0; i < m_algosDependencies.size(); ++i ) {
226  if ( m_algosDependencies[i].find( o ) != m_algosDependencies[i].end() ) {
227  ost << "\n * " << m_algname_vect[i];
228  }
229  }
230  }
231 
232  if ( m_useDataLoader != "" ) {
233  // Find the DataLoader Alg
234  if (dataLoaderAlg == nullptr) {
235  fatal() << "No DataLoader Algorithm \"" << m_useDataLoader.value()
236  << "\" found, and unmet INPUT dependencies "
237  << "detected:\n" << ost.str() << endmsg;
238  return StatusCode::FAILURE;
239  }
240 
241  info() << "Will attribute the following unmet INPUT dependencies to \""
242  << dataLoaderAlg->type() << "/" << dataLoaderAlg->name()
243  << "\" Algorithm"
244  << ost.str() << endmsg;
245 
246  // Set the property Load of DataLoader Alg
247  Algorithm *dataAlg = dynamic_cast<Algorithm*>(dataLoaderAlg);
248  if ( !dataAlg ) {
249  fatal() << "Unable to dcast DataLoader \"" << m_useDataLoader.value()
250  << "\" IAlg to Algorithm" << endmsg;
251  return StatusCode::FAILURE;
252  }
253 
254  for (auto& id : unmetDep) {
255  debug() << "adding OUTPUT dep \"" << id << "\" to "
256  << dataLoaderAlg->type() << "/" << dataLoaderAlg->name()
257  << endmsg;
259  }
260 
261  } else {
262  fatal() << "Auto DataLoading not requested, "
263  << "and the following unmet INPUT dependencies were found:"
264  << ost.str() << endmsg;
265  return StatusCode::FAILURE;
266  }
267 
268  } else {
269  info() << "No unmet INPUT data dependencies were found" << endmsg;
270  }
271  }
272 
273  const AlgResourcePool* algPool = dynamic_cast<const AlgResourcePool*>( m_algResourcePool.get() );
274  sc = m_efManager.initialize( algPool->getPRGraph(), m_algname_index_map);
275  unsigned int controlFlowNodeNumber = m_efManager.getPrecedenceRulesGraph()->getControlFlowNodeCounter();
276 
277  // Shortcut for the message service
278  SmartIF<IMessageSvc> messageSvc( serviceLocator() );
279  if ( !messageSvc.isValid() ) error() << "Error retrieving MessageSvc interface IMessageSvc." << endmsg;
280 
281  m_eventSlots.assign( m_maxEventsInFlight,
282  EventSlot( m_algosDependencies, algsNumber, controlFlowNodeNumber, messageSvc ) );
283  std::for_each( m_eventSlots.begin(), m_eventSlots.end(), []( EventSlot& slot ) { slot.complete = true; } );
284 
285  // Clearly inform about the level of concurrency
286  info() << "Concurrency level information:" << endmsg;
287  info() << " o Number of events in flight: " << m_maxEventsInFlight << endmsg;
288  info() << " o Number of algorithms in flight: " << m_maxAlgosInFlight << endmsg;
289  info() << " o TBB thread pool size: " << m_threadPoolSize << endmsg;
290 
291  return sc;
292 }
293 //---------------------------------------------------------------------------
294 
299 
301  if ( !sc.isSuccess() ) warning() << "Base class could not be finalized" << endmsg;
302 
303  sc = deactivate();
304  if ( !sc.isSuccess() ) warning() << "Scheduler could not be deactivated" << endmsg;
305 
306  info() << "Joining Scheduler thread" << endmsg;
307  m_thread.join();
308 
309  // Final error check after thread pool termination
310  if ( m_isActive == FAILURE ) {
311  error() << "problems in scheduler thread" << endmsg;
312  return StatusCode::FAILURE;
313  }
314 
315  // m_efManager.getPrecedenceRulesGraph()->dumpExecutionPlan();
316 
317  return sc;
318 }
319 //---------------------------------------------------------------------------
331 
332  if (msgLevel(MSG::DEBUG))
333  debug() << "ForwardSchedulerSvc::activate()" << endmsg;
334 
336  error() << "problems initializing ThreadPoolSvc" << endmsg;
338  return;
339  }
340 
341  // Wait for actions pushed into the queue by finishing tasks.
342  action thisAction;
344 
345  m_isActive = ACTIVE;
346 
347  // Continue to wait if the scheduler is running or there is something to do
348  info() << "Start checking the actionsQueue" << endmsg;
349  while ( m_isActive == ACTIVE or m_actionsQueue.size() != 0 ) {
350  m_actionsQueue.pop( thisAction );
351  sc = thisAction();
352  if ( sc != StatusCode::SUCCESS )
353  verbose() << "Action did not succeed (which is not bad per se)." << endmsg;
354  else
355  verbose() << "Action succeeded." << endmsg;
356  }
357 
358  info() << "Terminating thread-pool resources" << endmsg;
360  error() << "Problems terminating thread pool" << endmsg;
362  }
363 }
364 
365 //---------------------------------------------------------------------------
366 
374 
375  if ( m_isActive == ACTIVE ) {
376  // Drain the scheduler
378  // This would be the last action
379  m_actionsQueue.push( [this]() -> StatusCode {
381  return StatusCode::SUCCESS;
382  } );
383  }
384 
385  return StatusCode::SUCCESS;
386 }
387 
388 //===========================================================================
389 
390 //===========================================================================
391 // Utils and shortcuts
392 
393 inline const std::string& ForwardSchedulerSvc::index2algname( unsigned int index ) {
394  return m_algname_vect[index];
395 }
396 
397 //---------------------------------------------------------------------------
398 
399 inline unsigned int ForwardSchedulerSvc::algname2index( const std::string& algoname ) {
400  unsigned int index = m_algname_index_map[algoname];
401  return index;
402 }
403 
404 //===========================================================================
405 // EventSlot management
413 
414  if ( m_first ) {
415  m_first = false;
416  }
417 
418  if ( !eventContext ) {
419  fatal() << "Event context is nullptr" << endmsg;
420  return StatusCode::FAILURE;
421  }
422 
423  if ( m_freeSlots.load() == 0 ) {
424  if ( msgLevel( MSG::DEBUG ) ) debug() << "A free processing slot could not be found." << endmsg;
425  return StatusCode::FAILURE;
426  }
427 
428  // no problem as push new event is only called from one thread (event loop manager)
429  m_freeSlots--;
430 
431  auto action = [this, eventContext]() -> StatusCode {
432  // Event processing slot forced to be the same as the wb slot
433  const unsigned int thisSlotNum = eventContext->slot();
434  EventSlot& thisSlot = m_eventSlots[thisSlotNum];
435  if ( !thisSlot.complete ) {
436  fatal() << "The slot " << thisSlotNum << " is supposed to be a finished event but it's not" << endmsg;
437  return StatusCode::FAILURE;
438  }
439 
440  info() << "Executing event " << eventContext->evt() << " on slot " << thisSlotNum << endmsg;
441  thisSlot.reset( eventContext );
442 
443  return this->updateStates( thisSlotNum );
444  }; // end of lambda
445 
446  // Kick off the scheduling!
447  if ( msgLevel( MSG::VERBOSE ) ) {
448  verbose() << "Pushing the action to update the scheduler for slot " << eventContext->slot() << endmsg;
449  verbose() << "Free slots available " << m_freeSlots.load() << endmsg;
450  }
451  m_actionsQueue.push( action );
452 
453  return StatusCode::SUCCESS;
454 }
455 
456 //---------------------------------------------------------------------------
458  StatusCode sc;
459  for ( auto context : eventContexts ) {
460  sc = pushNewEvent( context );
461  if ( sc != StatusCode::SUCCESS ) return sc;
462  }
463  return sc;
464 }
465 
466 //---------------------------------------------------------------------------
468  return std::max( m_freeSlots.load(), 0 );
469 }
470 
471 //---------------------------------------------------------------------------
476  unsigned int slotNum = 0;
477  for ( auto& thisSlot : m_eventSlots ) {
478  if ( not thisSlot.algsStates.allAlgsExecuted() and not thisSlot.complete ) {
479  updateStates( slotNum );
480  }
481  slotNum++;
482  }
483  return StatusCode::SUCCESS;
484 }
485 
486 //---------------------------------------------------------------------------
491  // debug() << "popFinishedEvent: queue size: " << m_finishedEvents.size() << endmsg;
492  if ( m_freeSlots.load() == m_maxEventsInFlight or m_isActive == INACTIVE ) {
493  // debug() << "freeslots: " << m_freeSlots << "/" << m_maxEventsInFlight
494  // << " active: " << m_isActive << endmsg;
495  return StatusCode::FAILURE;
496  } else {
497  // debug() << "freeslots: " << m_freeSlots << "/" << m_maxEventsInFlight
498  // << " active: " << m_isActive << endmsg;
499  m_finishedEvents.pop( eventContext );
500  m_freeSlots++;
501  if (msgLevel(MSG::DEBUG))
502  debug() << "Popped slot " << eventContext->slot() << "(event "
503  << eventContext->evt() << ")" << endmsg;
504  return StatusCode::SUCCESS;
505  }
506 }
507 
508 //---------------------------------------------------------------------------
513  if ( m_finishedEvents.try_pop( eventContext ) ) {
514  if ( msgLevel( MSG::DEBUG ) )
515  debug() << "Try Pop successful slot " << eventContext->slot() << "(event " << eventContext->evt() << ")"
516  << endmsg;
517  m_freeSlots++;
518  return StatusCode::SUCCESS;
519  }
520  return StatusCode::FAILURE;
521 }
522 
523 //---------------------------------------------------------------------------
530 
531  // Set the number of slots available to an error code
532  m_freeSlots.store( 0 );
533 
534  fatal() << "*** Event " << eventContext->evt() << " on slot "
535  << eventContext->slot() << " failed! ***" << endmsg;
536 
537  std::ostringstream ost;
538  m_algExecStateSvc->dump(ost, *eventContext);
539 
540  info() << "Dumping Alg Exec State for slot " << eventContext->slot()
541  << ":\n" << ost.str() << endmsg;
542 
543  dumpSchedulerState(-1);
544 
545  // Empty queue and deactivate the service
546  action thisAction;
547  while ( m_actionsQueue.try_pop( thisAction ) ) {
548  };
549  deactivate();
550 
551  // Push into the finished events queue the failed context
552  EventContext* thisEvtContext;
553  while ( m_finishedEvents.try_pop( thisEvtContext ) ) {
554  m_finishedEvents.push( thisEvtContext );
555  };
556  m_finishedEvents.push( eventContext );
557 
558  return StatusCode::FAILURE;
559 }
560 
561 //===========================================================================
562 
563 //===========================================================================
564 // States Management
565 
576 
577  m_updateNeeded = true;
578 
579  // Fill a map of initial state / action using closures.
580  // done to update the states w/o several if/elses
581  // Posterchild for constexpr with gcc4.7 onwards!
582  /*const std::map<AlgsExecutionStates::State, std::function<StatusCode(unsigned int iAlgo, int si)>>
583  statesTransitions = {
584  {AlgsExecutionStates::CONTROLREADY, std::bind(&ForwardSchedulerSvc::promoteToDataReady,
585  this,
586  std::placeholders::_1,
587  std::placeholders::_2)},
588  {AlgsExecutionStates::DATAREADY, std::bind(&ForwardSchedulerSvc::promoteToScheduled,
589  this,
590  std::placeholders::_1,
591  std::placeholders::_2)}
592  };*/
593 
594  StatusCode global_sc( StatusCode::FAILURE, true );
595 
596  // Sort from the oldest to the newest event
597  // Prepare a vector of pointers to the slots to avoid copies
598  std::vector<EventSlot*> eventSlotsPtrs;
599 
600  // Consider all slots if si <0 or just one otherwise
601  if ( si < 0 ) {
602  const int eventsSlotsSize( m_eventSlots.size() );
603  eventSlotsPtrs.reserve( eventsSlotsSize );
604  for ( auto slotIt = m_eventSlots.begin(); slotIt != m_eventSlots.end(); slotIt++ ) {
605  if ( !slotIt->complete ) eventSlotsPtrs.push_back( &( *slotIt ) );
606  }
607  std::sort( eventSlotsPtrs.begin(), eventSlotsPtrs.end(),
608  []( EventSlot* a, EventSlot* b ) { return a->eventContext->evt() < b->eventContext->evt(); } );
609  } else {
610  eventSlotsPtrs.push_back( &m_eventSlots[si] );
611  }
612 
613  for ( EventSlot* thisSlotPtr : eventSlotsPtrs ) {
614  int iSlot = thisSlotPtr->eventContext->slot();
615 
616  // Cache the states of the algos to improve readability and performance
617  auto& thisSlot = m_eventSlots[iSlot];
618  AlgsExecutionStates& thisAlgsStates = thisSlot.algsStates;
619 
620  // Take care of the control ready update
621  m_efManager.updateEventState( thisAlgsStates, thisSlot.controlFlowState );
622 
623  // DF note: all this this is a loop over all algs and applies CR->DR and DR->SCHD transistions
624  /*for (unsigned int iAlgo=0;iAlgo<m_algname_vect.size();++iAlgo){
625  const AlgsExecutionStates::State& algState = thisAlgsStates[iAlgo];
626  if (algState==AlgsExecutionStates::ERROR)
627  error() << " Algo " << index2algname(iAlgo) << " is in ERROR state." << endmsg;
628  // Loop on state transitions from the one suited to algo state up to the one for SCHEDULED.
629  partial_sc=StatusCode::SUCCESS;
630  for (auto state_transition = statesTransitions.find(algState);
631  state_transition!=statesTransitions.end() && partial_sc.isSuccess();
632  state_transition++){
633  partial_sc = state_transition->second(iAlgo,iSlot);
634  if (partial_sc.isFailure()){
635  verbose() << "Could not apply transition from "
636  << AlgsExecutionStates::stateNames[thisAlgsStates[iAlgo]]
637  << " for algorithm " << index2algname(iAlgo)
638  << " on processing slot " << iSlot << endmsg;
639  }
640  else{global_sc=partial_sc;}
641  } // end loop on transitions
642  }*/ // end loop on algos
643 
644  StatusCode partial_sc( StatusCode::FAILURE, true );
645  // first update CONTROLREADY to DATAREADY
646  for ( auto it = thisAlgsStates.begin( AlgsExecutionStates::State::CONTROLREADY );
647  it != thisAlgsStates.end( AlgsExecutionStates::State::CONTROLREADY ); ++it ) {
648 
649  uint algIndex = *it;
650  partial_sc = promoteToDataReady(algIndex, iSlot);
651  if (partial_sc.isFailure())
652  if (msgLevel(MSG::VERBOSE))
653  verbose() << "Could not apply transition from "
654  << AlgsExecutionStates::stateNames[AlgsExecutionStates::State::CONTROLREADY]
655  << " for algorithm " << index2algname(algIndex) << " on processing slot " << iSlot << endmsg;
656  }
657 
658  // now update DATAREADY to SCHEDULED
659  for ( auto it = thisAlgsStates.begin( AlgsExecutionStates::State::DATAREADY );
660  it != thisAlgsStates.end( AlgsExecutionStates::State::DATAREADY ); ++it ) {
661  uint algIndex = *it;
662 
663  partial_sc = promoteToScheduled( algIndex, iSlot );
664 
665  if (msgLevel(MSG::VERBOSE))
666  if (partial_sc.isFailure())
667  verbose() << "Could not apply transition from "
668  << AlgsExecutionStates::stateNames[AlgsExecutionStates::State::DATAREADY]
669  << " for algorithm " << index2algname(algIndex) << " on processing slot " << iSlot << endmsg;
670  }
671 
672  // Not complete because this would mean that the slot is already free!
673  if ( !thisSlot.complete && m_efManager.rootDecisionResolved( thisSlot.controlFlowState ) &&
674  !thisSlot.algsStates.algsPresent( AlgsExecutionStates::CONTROLREADY ) &&
675  !thisSlot.algsStates.algsPresent( AlgsExecutionStates::DATAREADY ) &&
676  !thisSlot.algsStates.algsPresent( AlgsExecutionStates::SCHEDULED ) ) {
677 
678  thisSlot.complete = true;
679  // if the event did not fail, add it to the finished events
680  // otherwise it is taken care of in the error handling already
681  if(m_algExecStateSvc->eventStatus(*thisSlot.eventContext) == EventStatus::Success) {
682  m_finishedEvents.push(thisSlot.eventContext);
683  if (msgLevel(MSG::DEBUG))
684  debug() << "Event " << thisSlot.eventContext->evt() << " finished (slot "
685  << thisSlot.eventContext->slot() << ")." << endmsg;
686  }
687  // now let's return the fully evaluated result of the control flow
688  if ( msgLevel( MSG::DEBUG ) ) {
690  m_efManager.printEventState( ss, thisSlot.algsStates, thisSlot.controlFlowState, 0 );
691  debug() << ss.str() << endmsg;
692  }
693 
694  thisSlot.eventContext = nullptr;
695  } else {
696  StatusCode eventStalledSC = isStalled(iSlot);
697  if (! eventStalledSC.isSuccess()) {
698  m_algExecStateSvc->setEventStatus(EventStatus::AlgStall, *thisSlot.eventContext);
699  eventFailed(thisSlot.eventContext).ignore();
700  }
701  }
702  } // end loop on slots
703 
704  verbose() << "States Updated." << endmsg;
705 
706  return global_sc;
707 }
708 
709 //---------------------------------------------------------------------------
710 
718  // Get the slot
719  EventSlot& thisSlot = m_eventSlots[iSlot];
720 
721  if ( m_actionsQueue.empty() && m_algosInFlight == 0 &&
723 
724  info() << "About to declare a stall" << endmsg;
725  fatal() << "*** Stall detected! ***\n" << endmsg;
726  dumpSchedulerState( iSlot );
727  // throw GaudiException ("Stall detected",name(),StatusCode::FAILURE);
728 
729  return StatusCode::FAILURE;
730  }
731  return StatusCode::SUCCESS;
732 }
733 
734 //---------------------------------------------------------------------------
735 
742 
743  // To have just one big message
744  std::ostringstream outputMessageStream;
745 
746  outputMessageStream << "============================== Execution Task State ============================="
747  << std::endl;
748  dumpState( outputMessageStream );
749 
750  outputMessageStream << std::endl
751  << "============================== Scheduler State ================================="
752  << std::endl;
753 
754  int slotCount = -1;
755  for ( auto thisSlot : m_eventSlots ) {
756  slotCount++;
757  if ( thisSlot.complete ) continue;
758 
759  outputMessageStream << "----------- slot: " << thisSlot.eventContext->slot()
760  << " event: " << thisSlot.eventContext->evt() << " -----------" << std::endl;
761 
762  if ( 0 > iSlot or iSlot == slotCount ) {
763  outputMessageStream << "Algorithms states:" << std::endl;
764 
765  const DataObjIDColl& wbSlotContent( thisSlot.dataFlowMgr.content() );
766  for ( unsigned int algoIdx = 0; algoIdx < thisSlot.algsStates.size(); ++algoIdx ) {
767  outputMessageStream << " o " << index2algname( algoIdx ) << " ["
768  << AlgsExecutionStates::stateNames[thisSlot.algsStates[algoIdx]] << "] Data deps: ";
769  DataObjIDColl deps( thisSlot.dataFlowMgr.dataDependencies( algoIdx ) );
770  const int depsSize = deps.size();
771  if ( depsSize == 0 ) outputMessageStream << " none";
772 
773  DataObjIDColl missing;
774  for ( auto d : deps ) {
775  outputMessageStream << d << " ";
776  if ( wbSlotContent.find( d ) == wbSlotContent.end() ) {
777  // outputMessageStream << "[missing] ";
778  missing.insert( d );
779  }
780  }
781 
782  if ( !missing.empty() ) {
783  outputMessageStream << ". The following are missing: ";
784  for ( auto d : missing ) {
785  outputMessageStream << d << " ";
786  }
787  }
788 
789  outputMessageStream << std::endl;
790  }
791 
792  // Snapshot of the WhiteBoard
793  outputMessageStream << "\nWhiteboard contents: " << std::endl;
794  for ( auto& product : wbSlotContent ) outputMessageStream << " o " << product << std::endl;
795 
796  // Snapshot of the ControlFlow
797  outputMessageStream << "\nControl Flow:" << std::endl;
798  std::stringstream cFlowStateStringStream;
799  m_efManager.printEventState( cFlowStateStringStream, thisSlot.algsStates, thisSlot.controlFlowState, 0 );
800 
801  outputMessageStream << cFlowStateStringStream.str() << std::endl;
802  }
803  }
804 
805  outputMessageStream << "=================================== END ======================================" << std::endl;
806 
807  info() << "Dumping Scheduler State " << std::endl << outputMessageStream.str() << endmsg;
808 }
809 
810 //---------------------------------------------------------------------------
811 
813 
814  // Do the control flow
815  StatusCode sc = m_eventSlots[si].algsStates.updateState(iAlgo,AlgsExecutionStates::CONTROLREADY);
816  if (sc.isSuccess())
817  if (msgLevel(MSG::VERBOSE))
818  verbose() << "Promoting " << index2algname(iAlgo) << " to CONTROLREADY on slot "
819  << si << endmsg;
820 
821  return sc;
822 }
823 
824 //---------------------------------------------------------------------------
825 
827 
828  StatusCode sc = m_eventSlots[si].dataFlowMgr.canAlgorithmRun( iAlgo );
829 
830  StatusCode updateSc( StatusCode::FAILURE );
831  if ( sc == StatusCode::SUCCESS )
832  updateSc = m_eventSlots[si].algsStates.updateState( iAlgo, AlgsExecutionStates::DATAREADY );
833 
834  if (updateSc.isSuccess())
835  if (msgLevel(MSG::VERBOSE))
836  verbose() << "Promoting " << index2algname(iAlgo) << " to DATAREADY on slot "
837  << si<< endmsg;
838 
839  return updateSc;
840 }
841 
842 //---------------------------------------------------------------------------
843 
845 
847 
848  const std::string& algName( index2algname( iAlgo ) );
849  IAlgorithm* ialgoPtr = nullptr;
850  StatusCode sc( m_algResourcePool->acquireAlgorithm( algName, ialgoPtr ) );
851 
852  if ( sc.isSuccess() ) { // if we managed to get an algorithm instance try to schedule it
853  EventContext* eventContext( m_eventSlots[si].eventContext );
854  if ( !eventContext )
855  fatal() << "Event context for algorithm " << algName << " is a nullptr (slot " << si << ")" << endmsg;
856 
857  ++m_algosInFlight;
858  // prepare a scheduler action to run once the algorithm is executed
859  auto promote2ExecutedClosure = std::bind(&ForwardSchedulerSvc::promoteToExecuted,
860  this,
861  iAlgo,
862  eventContext->slot(),
863  ialgoPtr,
864  eventContext);
865  // Avoid to use tbb if the pool size is 1 and run in this thread
866  if (-100 != m_threadPoolSize) {
867 
868  // this parent task is needed to promote an Algorithm as EXECUTED,
869  // it will be started as soon as the child task (see below) is completed
870  tbb::task* triggerAlgoStateUpdate = new(tbb::task::allocate_root())
871  enqueueSchedulerActionTask(this, promote2ExecutedClosure);
872  // setting parent's refcount to 1 is made here only for consistency
873  // (in this case since it is not scheduled explicitly and there it has only one child task)
874  triggerAlgoStateUpdate->set_ref_count(1);
875  // the child task that executes an Algorithm
876  tbb::task* algoTask = new(triggerAlgoStateUpdate->allocate_child())
877  AlgoExecutionTask(ialgoPtr, iAlgo, eventContext, serviceLocator(), m_algExecStateSvc);
878  // schedule the algoTask
879  tbb::task::enqueue( *algoTask);
880 
881  } else {
882  AlgoExecutionTask theTask(ialgoPtr, iAlgo, eventContext, serviceLocator(), m_algExecStateSvc);
883  theTask.execute();
884  promote2ExecutedClosure();
885  }
886 
887  if ( msgLevel( MSG::DEBUG ) )
888  debug() << "Algorithm " << algName << " was submitted on event " << eventContext->evt() << " in slot " << si
889  << ". Algorithms scheduled are " << m_algosInFlight << endmsg;
890 
891  StatusCode updateSc( m_eventSlots[si].algsStates.updateState( iAlgo, AlgsExecutionStates::SCHEDULED ) );
892 
893  if ( msgLevel( MSG::VERBOSE ) ) dumpSchedulerState( -1 );
894 
895  if (updateSc.isSuccess())
896  if (msgLevel(MSG::VERBOSE))
897  verbose() << "Promoting " << index2algname(iAlgo) << " to SCHEDULED on slot "
898  << si << endmsg;
899  return updateSc;
900  } else {
901  if ( msgLevel( MSG::DEBUG ) )
902  debug() << "Could not acquire instance for algorithm " << index2algname( iAlgo ) << " on slot " << si << endmsg;
903  return sc;
904  }
905 }
906 
907 //---------------------------------------------------------------------------
908 
910  EventContext* eventContext ) {
911 
912  // Put back the instance
913  Algorithm* castedAlgo = dynamic_cast<Algorithm*>( algo ); // DP: expose context getter in IAlgo?
914  if ( !castedAlgo ) fatal() << "The casting did not succeed!" << endmsg;
915  // EventContext* eventContext = castedAlgo->getContext();
916 
917  // Check if the execution failed
918  if (m_algExecStateSvc->eventStatus(*eventContext) != EventStatus::Success)
919  eventFailed(eventContext).ignore();
920 
921  Gaudi::Hive::setCurrentContext(eventContext);
922  StatusCode sc = m_algResourcePool->releaseAlgorithm( algo->name(), algo );
923 
924  if ( !sc.isSuccess() ) {
925  error() << "[Event " << eventContext->evt() << ", Slot " << eventContext->slot() << "] "
926  << "Instance of algorithm " << algo->name() << " could not be properly put back." << endmsg;
927  return StatusCode::FAILURE;
928  }
929 
930  m_algosInFlight--;
931 
932  EventSlot& thisSlot = m_eventSlots[si];
933 
934  // Update the catalog: some new products may be there
935  m_whiteboard->selectStore( eventContext->slot() ).ignore();
936 
937  // update prods in the dataflow
938  // DP: Handles could be used. Just update what the algo wrote
939  DataObjIDColl new_products;
940  m_whiteboard->getNewDataObjects( new_products ).ignore();
941  for ( const auto& new_product : new_products )
942  if ( msgLevel( MSG::DEBUG ) ) debug() << "Found in WB [" << si << "]: " << new_product << endmsg;
943  thisSlot.dataFlowMgr.updateDataObjectsCatalog( new_products );
944 
945  if ( msgLevel( MSG::DEBUG ) )
946  debug() << "Algorithm " << algo->name() << " executed in slot " << si << ". Algorithms scheduled are "
947  << m_algosInFlight << endmsg;
948 
949  // Limit number of updates
950  if ( m_updateNeeded ) {
951  // Schedule an update of the status of the algorithms
952  auto updateAction = std::bind( &ForwardSchedulerSvc::updateStates, this, -1);
953  m_actionsQueue.push( updateAction );
954  m_updateNeeded = false;
955  }
956 
957  if ( msgLevel( MSG::DEBUG ) )
958  debug() << "Trying to handle execution result of " << index2algname( iAlgo ) << " on slot " << si << endmsg;
959  State state;
960  if ( algo->filterPassed() ) {
961  state = State::EVTACCEPTED;
962  } else {
963  state = State::EVTREJECTED;
964  }
965 
966  sc = thisSlot.algsStates.updateState( iAlgo, state );
967 
968  if (sc.isSuccess())
969  if (msgLevel(MSG::VERBOSE))
970  verbose() << "Promoting " << index2algname(iAlgo) << " on slot " << si << " to "
972 
973  return sc;
974 }
975 
976 //===========================================================================
978 
980  m_sState.push_back( SchedulerState( a, e, t ) );
981 }
982 
983 //===========================================================================
985 
987 
988  for ( std::list<SchedulerState>::iterator itr = m_sState.begin(); itr != m_sState.end(); ++itr ) {
989  if ( *itr == a ) {
990  m_sState.erase( itr );
991  return true;
992  }
993  }
994 
995  error() << "could not find Alg " << a->name() << " in Scheduler!" << endmsg;
996  return false;
997 }
998 
999 //===========================================================================
1001 
1003 
1004  for ( auto it : m_sState ) {
1005  ost << " " << it << std::endl;
1006  }
1007 }
1008 
1009 //===========================================================================
1011 
1013 
1014  std::ostringstream ost;
1015  ost << "dumping Executing Threads: [" << m_sState.size() << "]" << std::endl;
1016  dumpState( ost );
1017 
1018  info() << ost.str() << endmsg;
1019 }
virtual StatusCode initPool(const int &poolSize)=0
Initializes the thread pool.
StatusCode deactivate()
Deactivate scheduler.
bool algsPresent(State state) const
StatusCode initialize() override
Definition: Service.cpp:64
virtual StatusCode acquireAlgorithm(const std::string &name, IAlgorithm *&algo, bool blocking=false)=0
Acquire a certain algorithm using its name.
T empty(T...args)
StatusCode eventFailed(EventContext *eventContext)
Method to check if an event failed and take appropriate actions.
void updateEventState(AlgsExecutionStates &algo_states, std::vector< int > &node_decisions) const
Update the state of algorithms to controlready, where possible.
void updateDataObjectsCatalog(const DataObjIDColl &newProducts)
Update the catalog of available products in the slot.
void printEventState(std::stringstream &ss, AlgsExecutionStates &states, const std::vector< int > &node_decisions, const unsigned int &recursionLevel) const
Print the state of the control flow for a given event.
SmartIF< IAlgResourcePool > m_algResourcePool
Cache for the algorithm resource pool.
const std::string & name() const override
The identifying name of the algorithm object.
Definition: Algorithm.cpp:715
virtual concurrency::PrecedenceRulesGraph * getPRGraph() const
StatusCode finalize() override
Definition: Service.cpp:174
ContextID_t slot() const
Definition: EventContext.h:41
MsgStream & info() const
shortcut for the method msgStream(MSG::INFO)
AlgsExecutionStates algsStates
Vector of algorithms states.
Definition: EventSlot.h:37
virtual void dump(std::ostringstream &ost, const EventContext &ctx) const =0
const DataObjIDColl & outputDataObjs() const override
bool isSuccess() const
Test for a status code of SUCCESS.
Definition: StatusCode.h:74
StatusCode isStalled(int si)
Check if the scheduling is in a stall.
EventContext * eventContext
Cache for the eventContext.
Definition: EventSlot.h:32
Header file for class GaudiAlgorithm.
StatusCode updateStates(int si=-1)
Loop on algorithm in the slots and promote them to successive states (-1 means all slots...
StatusCode finalize() override
Finalise.
MsgStream & verbose() const
shortcut for the method msgStream(MSG::VERBOSE)
T endl(T...args)
virtual bool filterPassed() const =0
Did this algorithm pass or fail its filter criterion for the last event?
SmartIF< IThreadPoolSvc > m_threadPoolSvc
T end(T...args)
The SchedulerSvc implements the IScheduler interface.
StatusCode pushNewEvent(EventContext *eventContext) override
Make an event available to the scheduler.
The AlgResourcePool is a concrete implementation of the IAlgResourcePool interface.
This class represents an entry point to all the event specific data.
Definition: EventContext.h:25
bool isFailure() const
Test for a status code of FAILURE.
Definition: StatusCode.h:84
SmartIF< IHiveWhiteBoard > m_whiteboard
A shortcut to the whiteboard.
StatusCode m_drain()
Drain the actions present in the queue.
Gaudi::Property< unsigned int > m_maxAlgosInFlight
virtual const std::string & type() const =0
The type of the algorithm.
tbb::task * execute() override
ContextEvt_t evt() const
Definition: EventContext.h:40
STL class.
std::atomic_int m_freeSlots
Atomic to account for asyncronous updates by the scheduler wrt the rest.
virtual StatusCode terminatePool()=0
Finalize the thread pool.
void addAlg(Algorithm *, EventContext *, pthread_t)
T push_back(T...args)
MsgStream & error() const
shortcut for the method msgStream(MSG::ERROR)
bool rootDecisionResolved(const std::vector< int > &node_decisions) const
Check whether root decision was resolved.
DataFlowManager dataFlowMgr
DataFlowManager of this slot.
Definition: EventSlot.h:41
virtual StatusCode selectStore(size_t partitionIndex)=0
Activate an given &#39;slot&#39; for all subsequent calls within the same thread id.
The AlgsExecutionStates encodes the state machine for the execution of algorithms within a single eve...
unsigned int m_algosInFlight
Number of algoritms presently in flight.
MsgStream & warning() const
shortcut for the method msgStream(MSG::WARNING)
tbb::concurrent_bounded_queue< EventContext * > m_finishedEvents
Queue of finished events.
std::thread m_thread
The thread in which the activate function runs.
virtual StatusCode getNewDataObjects(DataObjIDColl &products)=0
Get the latest new data objects registred in store.
std::vector< std::string > m_algname_vect
Vector to bookkeep the information necessary to the index2name conversion.
T join(T...args)
static std::list< SchedulerState > m_sState
std::vector< EventSlot > m_eventSlots
Vector of events slots.
StatusCode promoteToScheduled(unsigned int iAlgo, int si)
This class is used for returning status codes from appropriate routines.
Definition: StatusCode.h:26
const DataObjIDColl & inputDataObjs() const override
unsigned int algname2index(const std::string &algoname)
Convert a name to an integer.
StatusCode promoteToExecuted(unsigned int iAlgo, int si, IAlgorithm *algo, EventContext *)
virtual void setEventStatus(const EventStatus::Status &sc, const EventContext &ctx)=0
unsigned int freeSlots() override
Get free slots number.
T bind(T...args)
StatusCode pushNewEvents(std::vector< EventContext * > &eventContexts) override
#define DECLARE_SERVICE_FACTORY(x)
Definition: Service.h:242
bool complete
Flags completion of the event.
Definition: EventSlot.h:39
Gaudi::Property< int > m_maxEventsInFlight
T max(T...args)
The IAlgorithm is the interface implemented by the Algorithm base class.
Definition: IAlgorithm.h:27
GAUDI_API void setCurrentContext(const EventContext *ctx)
tbb::concurrent_bounded_queue< action > m_actionsQueue
Queue where closures are stored and picked for execution.
bool m_updateNeeded
Keep track of update actions scheduled.
T insert(T...args)
void addDependency(const DataObjID &id, const Gaudi::DataHandle::Mode &mode) override
StatusCode tryPopFinishedEvent(EventContext *&eventContext) override
Try to fetch an event from the scheduler.
Base class from which all concrete algorithm classes should be derived.
Definition: Algorithm.h:78
T find_if(T...args)
T size(T...args)
STL class.
std::unordered_map< std::string, unsigned int > m_algname_index_map
Map to bookkeep the information necessary to the name2index conversion.
void activate()
Activate scheduler.
void reset(EventContext *theeventContext)
Reset all resources in order to reuse the slot.
Definition: EventSlot.h:26
MsgStream & debug() const
shortcut for the method msgStream(MSG::DEBUG)
bool isValid() const
Allow for check if smart pointer is valid.
Definition: SmartIF.h:62
T begin(T...args)
SmartIF< IAlgExecStateSvc > m_algExecStateSvc
Algorithm execution state manager.
Iterator begin(State kind)
StatusCode promoteToControlReady(unsigned int iAlgo, int si)
Algorithm promotion: Accepted by the control flow.
concurrency::ExecutionFlowManager m_efManager
Member to take care of the control flow.
virtual const EventStatus::Status & eventStatus(const EventContext &ctx) const =0
StatusCode popFinishedEvent(EventContext *&eventContext) override
Blocks until an event is availble.
void dumpSchedulerState(int iSlot)
Dump the state of the scheduler.
Class representing the event slot.
Definition: EventSlot.h:11
static std::mutex m_ssMut
const std::string & index2algname(unsigned int index)
Convert an integer to a name.
bool delAlg(Algorithm *)
T sort(T...args)
std::atomic< ActivationState > m_isActive
Flag to track if the scheduler is active or not.
void ignore() const
Definition: StatusCode.h:106
MsgStream & fatal() const
shortcut for the method msgStream(MSG::FATAL)
MSG::Level msgLevel() const
get the output level from the embedded MsgStream
State
Execution states of the algorithms.
T for_each(T...args)
virtual StatusCode releaseAlgorithm(const std::string &name, IAlgorithm *&algo)=0
Release a certain algorithm.
Gaudi::Property< int > m_threadPoolSize
SmartIF< ISvcLocator > & serviceLocator() const override
Retrieve pointer to service locator.
Definition: Service.cpp:292
STL class.
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition: MsgStream.h:244
StatusCode promoteToDataReady(unsigned int iAlgo, int si)
static GAUDI_API void setNumConcEvents(const std::size_t &nE)
T reserve(T...args)
static std::map< State, std::string > stateNames
T emplace_back(T...args)
Iterator end(State kind)
StatusCode updateState(unsigned int iAlgo, State newState)