df/d49/_avalanche_scheduler_svc_8cpp_source.html

/***********************************************************************************\

* (c) Copyright 1998-2025 CERN for the benefit of the LHCb and ATLAS collaborations *

*                                                                                   *

* This software is distributed under the terms of the Apache version 2 licence,     *

* copied verbatim in the file "LICENSE".                                            *

*                                                                                   *

* In applying this licence, CERN does not waive the privileges and immunities       *

* granted to it by virtue of its status as an Intergovernmental Organization        *

* or submit itself to any jurisdiction.                                             *

\***********************************************************************************/

#include "AvalancheSchedulerSvc.h"

#include "AlgTask.h"

#include "FiberManager.h"

#include "GraphDumper.h"

#include "ThreadPoolSvc.h"


// Framework includes

#include <Gaudi/Algorithm.h> // can be removed ASA dynamic casts to Algorithm are removed

#include <GaudiKernel/ConcurrencyFlags.h>

#include <GaudiKernel/DataHandleHolderVisitor.h>

#include <GaudiKernel/IAlgorithm.h>

#include <GaudiKernel/IDataManagerSvc.h>

#include <GaudiKernel/SerializeSTL.h>

#include <GaudiKernel/ThreadLocalContext.h>


// C++

#include <algorithm>

#include <fstream>

#include <map>

#include <queue>

#include <regex>

#include <semaphore>

#include <sstream>

#include <string_view>

#include <thread>

#include <unordered_set>


// External libs

#include <boost/algorithm/string.hpp>

#include <boost/thread.hpp>

#include <boost/tokenizer.hpp>


// Instantiation of a static factory class used by clients to create instances of this service

DECLARE_COMPONENT( AvalancheSchedulerSvc )


#define ON_DEBUG if ( msgLevel( MSG::DEBUG ) )

#define ON_VERBOSE if ( msgLevel( MSG::VERBOSE ) )


namespace {

  struct DataObjIDSorter {

    bool operator()( const DataObjID* a, const DataObjID* b ) { return a->fullKey() < b->fullKey(); }

  };


  // Sort a DataObjIDColl in a well-defined, reproducible manner.

  // Used for making debugging dumps.

  std::vector<const DataObjID*> sortedDataObjIDColl( const DataObjIDColl& coll ) {

    std::vector<const DataObjID*> v;

    v.reserve( coll.size() );

    for ( const DataObjID& id : coll ) v.push_back( &id );

    std::sort( v.begin(), v.end(), DataObjIDSorter() );

    return v;

  }


  bool subSlotAlgsInStates( const EventSlot& slot, std::initializer_list<AlgsExecutionStates::State> testStates ) {

    return std::any_of( slot.allSubSlots.begin(), slot.allSubSlots.end(),

                        [testStates]( const EventSlot& ss ) { return ss.algsStates.containsAny( testStates ); } );

  }

} // namespace


//---------------------------------------------------------------------------


StatusCode AvalancheSchedulerSvc::initialize() {


  // Initialise mother class (read properties, ...)

  StatusCode sc( Service::initialize() );

  if ( sc.isFailure() ) warning() << "Base class could not be initialized" << endmsg;


  // Get hold of the TBBSvc. This should initialize the thread pool

  m_threadPoolSvc = serviceLocator()->service( "ThreadPoolSvc" );

  if ( !m_threadPoolSvc.isValid() ) {

    fatal() << "Error retrieving ThreadPoolSvc" << endmsg;

    return StatusCode::FAILURE;

  }

  auto castTPS = dynamic_cast<ThreadPoolSvc*>( m_threadPoolSvc.get() );

  if ( !castTPS ) {

    fatal() << "Cannot cast ThreadPoolSvc" << endmsg;

    return StatusCode::FAILURE;

  }

  m_arena = castTPS->getArena();

  if ( !m_arena ) {

    fatal() << "Cannot find valid TBB task_arena" << endmsg;

    return StatusCode::FAILURE;

  }


  // Activate the scheduler in another thread.

  info() << "Activating scheduler in a separate thread" << endmsg;

  std::binary_semaphore fiber_manager_initalized{ 0 };

  m_thread = std::thread( [this, &fiber_manager_initalized]() {

    // Initialize FiberManager

    this->m_fiberManager = std::make_unique<FiberManager>( this->m_numOffloadThreads.value() );

    fiber_manager_initalized.release();

    this->activate();

  } );

  // Wait for initialization to complete

  fiber_manager_initalized.acquire();


  while ( m_isActive != ACTIVE ) {

    if ( m_isActive == FAILURE ) {

      fatal() << "Terminating initialization" << endmsg;

      return StatusCode::FAILURE;

    } else {

      ON_DEBUG debug() << "Waiting for AvalancheSchedulerSvc to activate" << endmsg;

      sleep( 1 );

    }

  }


  if ( m_enableCondSvc ) {

    // Get hold of the CondSvc

    m_condSvc = serviceLocator()->service( "CondSvc" );

    if ( !m_condSvc.isValid() ) {

      warning() << "No CondSvc found, or not enabled. "

                << "Will not manage CondAlgorithms" << endmsg;

      m_enableCondSvc = false;

    }

  }


  // Get the algo resource pool

  m_algResourcePool = serviceLocator()->service( "AlgResourcePool" );

  if ( !m_algResourcePool.isValid() ) {

    fatal() << "Error retrieving AlgoResourcePool" << endmsg;

    return StatusCode::FAILURE;

  }


  m_algExecStateSvc = serviceLocator()->service( "AlgExecStateSvc" );

  if ( !m_algExecStateSvc.isValid() ) {

    fatal() << "Error retrieving AlgExecStateSvc" << endmsg;

    return StatusCode::FAILURE;

  }


  // Get Whiteboard

  m_whiteboard = serviceLocator()->service( m_whiteboardSvcName );

  if ( !m_whiteboard.isValid() ) {

    fatal() << "Error retrieving EventDataSvc interface IHiveWhiteBoard." << endmsg;

    return StatusCode::FAILURE;

  }


  // Set the MaxEventsInFlight parameters from the number of WB stores

  m_maxEventsInFlight = m_whiteboard->getNumberOfStores();


  // Set the number of free slots

  m_freeSlots = m_maxEventsInFlight;


  // Get the list of algorithms

  const std::list<IAlgorithm*>& algos      = m_algResourcePool->getFlatAlgList();

  const unsigned int            algsNumber = algos.size();

  if ( algsNumber != 0 ) {

    info() << "Found " << algsNumber << " algorithms" << endmsg;

  } else {

    error() << "No algorithms found" << endmsg;

    return StatusCode::FAILURE;

  }


  /* Dependencies

   1) Look for handles in algo, if none

   2) Assume none are required

  */


  DataObjIDColl globalInp, globalOutp;


  // figure out all outputs

  std::map<std::string, DataObjIDColl> algosOutputDependenciesMap;

  for ( IAlgorithm* ialgoPtr : algos ) {

    Gaudi::Algorithm* algoPtr = dynamic_cast<Gaudi::Algorithm*>( ialgoPtr );

    if ( !algoPtr ) {

      fatal() << "Could not convert IAlgorithm into Gaudi::Algorithm: this will result in a crash." << endmsg;

      return StatusCode::FAILURE;

    }


    DataObjIDColl algoOutputs;

    for ( auto id : algoPtr->outputDataObjs() ) {

      globalOutp.insert( id );

      algoOutputs.insert( id );

    }

    algosOutputDependenciesMap[algoPtr->name()] = algoOutputs;

  }


  std::ostringstream ostdd;

  ostdd << "Data Dependencies for Algorithms:";


  std::map<std::string, DataObjIDColl> algosInputDependenciesMap;

  for ( IAlgorithm* ialgoPtr : algos ) {

    Gaudi::Algorithm* algoPtr = dynamic_cast<Gaudi::Algorithm*>( ialgoPtr );

    if ( nullptr == algoPtr ) {

      fatal() << "Could not convert IAlgorithm into Gaudi::Algorithm for " << ialgoPtr->name()

              << ": this will result in a crash." << endmsg;

      return StatusCode::FAILURE;

    }


    DataObjIDColl i1, i2;

    DHHVisitor    avis( i1, i2 );

    algoPtr->acceptDHVisitor( &avis );


    ostdd << "\n  " << algoPtr->name();


    auto write_owners = [&avis, &ostdd]( const DataObjID& id ) {

      auto owners = avis.owners_names_of( id );

      if ( !owners.empty() ) { GaudiUtils::operator<<( ostdd << ' ', owners ); }

    };


    DataObjIDColl algoDependencies;

    if ( !algoPtr->inputDataObjs().empty() || !algoPtr->outputDataObjs().empty() ) {

      for ( const DataObjID* idp : sortedDataObjIDColl( algoPtr->inputDataObjs() ) ) {

        DataObjID id = *idp;

        ostdd << "\n    o INPUT  " << id;

        write_owners( id );

        algoDependencies.insert( id );

        globalInp.insert( id );

      }

      for ( const DataObjID* id : sortedDataObjIDColl( algoPtr->outputDataObjs() ) ) {

        ostdd << "\n    o OUTPUT " << *id;

        write_owners( *id );

        if ( id->key().find( ":" ) != std::string::npos ) {

          error() << " in Alg " << algoPtr->name() << " alternatives are NOT allowed for outputs! id: " << *id

                  << endmsg;

          m_showDataDeps = true;

        }

      }

    } else {

      ostdd << "\n      none";

    }

    algosInputDependenciesMap[algoPtr->name()] = algoDependencies;

  }


  if ( m_showDataDeps ) { info() << ostdd.str() << endmsg; }


  // If requested, dump a graph of the data dependencies in a .dot or .md file

  if ( not m_dataDepsGraphFile.empty() ) {

    if ( dumpGraphFile( algosInputDependenciesMap, algosOutputDependenciesMap ).isFailure() ) {

      return StatusCode::FAILURE;

    }

  }


  // Check if we have unmet global input dependencies, and, optionally, heal them

  // WARNING: this step must be done BEFORE the Precedence Service is initialized

  DataObjIDColl unmetDepInp, unusedOutp;

  if ( m_checkDeps || m_checkOutput ) {

    std::set<std::string> requiredInputKeys;

    for ( auto o : globalInp ) {

      // track aliases

      // (assuming there should be no items with different class and same key corresponding to different objects)

      requiredInputKeys.insert( o.key() );

      if ( globalOutp.find( o ) == globalOutp.end() ) unmetDepInp.insert( o );

    }

    if ( m_checkOutput ) {

      for ( auto o : globalOutp ) {

        if ( globalInp.find( o ) == globalInp.end() && requiredInputKeys.find( o.key() ) == requiredInputKeys.end() ) {

          // check ignores

          bool ignored{};

          for ( const std::string& algoName : m_checkOutputIgnoreList ) {

            auto it = algosOutputDependenciesMap.find( algoName );

            if ( it != algosOutputDependenciesMap.end() ) {

              if ( it->second.find( o ) != it->second.end() ) {

                ignored = true;

                break;

              }

            }

          }

          if ( !ignored ) { unusedOutp.insert( o ); }

        }

      }

    }

  }


  if ( m_checkDeps ) {

    if ( unmetDepInp.size() > 0 ) {


      auto printUnmet = [&]( auto msg ) {

        for ( const DataObjID* o : sortedDataObjIDColl( unmetDepInp ) ) {

          msg << "   o " << *o << "    required by Algorithm: " << endmsg;


          for ( const auto& p : algosInputDependenciesMap )

            if ( p.second.find( *o ) != p.second.end() ) msg << "       * " << p.first << endmsg;

        }

      };


      if ( !m_useDataLoader.empty() ) {


        // Find the DataLoader Alg

        IAlgorithm* dataLoaderAlg( nullptr );

        for ( IAlgorithm* algo : algos )

          if ( m_useDataLoader == algo->name() ) {

            dataLoaderAlg = algo;

            break;

          }


        if ( dataLoaderAlg == nullptr ) {

          fatal() << "No DataLoader Algorithm \"" << m_useDataLoader.value()

                  << "\" found, and unmet INPUT dependencies "

                  << "detected:" << endmsg;

          printUnmet( fatal() );

          return StatusCode::FAILURE;

        }


        info() << "Will attribute the following unmet INPUT dependencies to \"" << dataLoaderAlg->type() << "/"

               << dataLoaderAlg->name() << "\" Algorithm" << endmsg;

        printUnmet( info() );


        // Set the property Load of DataLoader Alg

        Gaudi::Algorithm* dataAlg = dynamic_cast<Gaudi::Algorithm*>( dataLoaderAlg );

        if ( !dataAlg ) {

          fatal() << "Unable to dcast DataLoader \"" << m_useDataLoader.value() << "\" IAlg to Gaudi::Algorithm"

                  << endmsg;

          return StatusCode::FAILURE;

        }


        for ( auto& id : unmetDepInp ) {

          ON_DEBUG debug() << "adding OUTPUT dep \"" << id << "\" to " << dataLoaderAlg->type() << "/"

                           << dataLoaderAlg->name() << endmsg;

          dataAlg->addDependency( id, Gaudi::DataHandle::Writer );

        }


      } else {

        fatal() << "Auto DataLoading not requested, "

                << "and the following unmet INPUT dependencies were found:" << endmsg;

        printUnmet( fatal() );

        return StatusCode::FAILURE;

      }


    } else {

      info() << "No unmet INPUT data dependencies were found" << endmsg;

    }

  }


  if ( m_checkOutput ) {

    if ( unusedOutp.size() > 0 ) {


      auto printUnusedOutp = [&]( auto msg ) {

        for ( const DataObjID* o : sortedDataObjIDColl( unusedOutp ) ) {

          msg << "   o " << *o << "    produced by Algorithm: " << endmsg;


          for ( const auto& p : algosOutputDependenciesMap )

            if ( p.second.find( *o ) != p.second.end() ) msg << "       * " << p.first << endmsg;

        }

      };


      fatal() << "The following unused OUTPUT items were found:" << endmsg;

      printUnusedOutp( fatal() );

      return StatusCode::FAILURE;

    } else {

      info() << "No unused OUTPUT items were found" << endmsg;

    }

  }


  // Get the precedence service

  m_precSvc = serviceLocator()->service( "PrecedenceSvc" );

  if ( !m_precSvc.isValid() ) {

    fatal() << "Error retrieving PrecedenceSvc" << endmsg;

    return StatusCode::FAILURE;

  }

  const PrecedenceSvc* precSvc = dynamic_cast<const PrecedenceSvc*>( m_precSvc.get() );

  if ( !precSvc ) {

    fatal() << "Unable to dcast PrecedenceSvc" << endmsg;

    return StatusCode::FAILURE;

  }


  // Fill the containers to convert algo names to index

  m_algname_vect.resize( algsNumber );

  for ( IAlgorithm* algo : algos ) {

    const std::string& name    = algo->name();

    auto               index   = precSvc->getRules()->getAlgorithmNode( name )->getAlgoIndex();

    m_algname_index_map[name]  = index;

    m_algname_vect.at( index ) = name;

  }


  // Shortcut for the message service

  SmartIF<IMessageSvc> messageSvc( serviceLocator() );

  if ( !messageSvc.isValid() ) error() << "Error retrieving MessageSvc interface IMessageSvc." << endmsg;


  m_eventSlots.reserve( m_maxEventsInFlight );

  for ( size_t i = 0; i < m_maxEventsInFlight; ++i ) {

    m_eventSlots.emplace_back( algsNumber, precSvc->getRules()->getControlFlowNodeCounter(), messageSvc );

    m_eventSlots.back().complete = true;

  }


  if ( m_threadPoolSize > 1 ) { m_maxAlgosInFlight = (size_t)m_threadPoolSize; }


  // Clearly inform about the level of concurrency

  info() << "Concurrency level information:" << endmsg;

  info() << " o Number of events in flight: " << m_maxEventsInFlight << endmsg;

  info() << " o TBB thread pool size: " << m_threadPoolSize << endmsg;

  info() << " o Fiber thread pool size: " << m_numOffloadThreads << endmsg;


  // Inform about task scheduling prescriptions

  info() << "Task scheduling settings:" << endmsg;

  info() << " o Avalanche generation mode: "

         << ( m_optimizationMode.empty() ? "disabled" : m_optimizationMode.toString() ) << endmsg;

  info() << " o Preemptive scheduling of CPU-blocking tasks: "

         << ( m_enablePreemptiveBlockingTasks

                  ? ( "enabled (max. " + std::to_string( m_maxBlockingAlgosInFlight ) + " concurrent tasks)" )

                  : "disabled" )

         << endmsg;

  info() << " o Scheduling of condition tasks: " << ( m_enableCondSvc ? "enabled" : "disabled" ) << endmsg;


  if ( m_showControlFlow ) m_precSvc->dumpControlFlow();


  if ( m_showDataFlow ) m_precSvc->dumpDataFlow();


  // Simulate execution flow

  if ( m_simulateExecution ) sc = m_precSvc->simulate( m_eventSlots[0] );


  return sc;

}


//---------------------------------------------------------------------------


StatusCode AvalancheSchedulerSvc::finalize() {


  StatusCode sc( Service::finalize() );

  if ( sc.isFailure() ) warning() << "Base class could not be finalized" << endmsg;


  sc = deactivate();

  if ( sc.isFailure() ) warning() << "Scheduler could not be deactivated" << endmsg;


  debug() << "Deleting FiberManager" << endmsg;

  m_fiberManager.reset();


  info() << "Joining Scheduler thread" << endmsg;

  m_thread.join();


  // Final error check after thread pool termination

  if ( m_isActive == FAILURE ) {

    error() << "problems in scheduler thread" << endmsg;

    return StatusCode::FAILURE;

  }


  return sc;

}


//---------------------------------------------------------------------------


void AvalancheSchedulerSvc::activate() {


  ON_DEBUG debug() << "AvalancheSchedulerSvc::activate()" << endmsg;


  if ( m_threadPoolSvc->initPool( m_threadPoolSize, m_maxParallelismExtra ).isFailure() ) {

    error() << "problems initializing ThreadPoolSvc" << endmsg;

    m_isActive = FAILURE;

    return;

  }


  // Wait for actions pushed into the queue by finishing tasks.

  action     thisAction;

  StatusCode sc( StatusCode::SUCCESS );


  m_isActive = ACTIVE;


  // Continue to wait if the scheduler is running or there is something to do

  ON_DEBUG debug() << "Start checking the actionsQueue" << endmsg;

  while ( m_isActive == ACTIVE || m_actionsQueue.size() != 0 ) {

    m_actionsQueue.pop( thisAction );

    sc = thisAction();

    ON_VERBOSE {

      if ( sc.isFailure() )

        verbose() << "Action did not succeed (which is not bad per se)." << endmsg;

      else

        verbose() << "Action succeeded." << endmsg;

    }

    else sc.ignore();


    // If all queued actions have been processed, update the slot states

    if ( m_needsUpdate.load() && m_actionsQueue.empty() ) {

      sc = iterate();

      ON_VERBOSE {

        if ( sc.isFailure() )

          verbose() << "Iteration did not succeed (which is not bad per se)." << endmsg;

        else

          verbose() << "Iteration succeeded." << endmsg;

      }

      else sc.ignore();

    }

  }


  ON_DEBUG debug() << "Terminating thread-pool resources" << endmsg;

  if ( m_threadPoolSvc->terminatePool().isFailure() ) {

    error() << "Problems terminating thread pool" << endmsg;

    m_isActive = FAILURE;

  }

}


//---------------------------------------------------------------------------


StatusCode AvalancheSchedulerSvc::deactivate() {


  if ( m_isActive == ACTIVE ) {


    // Set the number of slots available to an error code

    m_freeSlots.store( 0 );


    // Empty queue

    action thisAction;

    while ( m_actionsQueue.try_pop( thisAction ) ) {};


    // This would be the last action

    m_actionsQueue.push( [this]() -> StatusCode {

      ON_VERBOSE verbose() << "Deactivating scheduler" << endmsg;

      m_isActive = INACTIVE;

      return StatusCode::SUCCESS;

    } );

  }


  return StatusCode::SUCCESS;

}


//---------------------------------------------------------------------------


// EventSlot management


StatusCode AvalancheSchedulerSvc::pushNewEvent( EventContext* eventContext ) {


  if ( !eventContext ) {

    fatal() << "Event context is nullptr" << endmsg;

    return StatusCode::FAILURE;

  }


  if ( m_freeSlots.load() == 0 ) {

    ON_DEBUG debug() << "A free processing slot could not be found." << endmsg;

    return StatusCode::FAILURE;

  }


  // no problem as push new event is only called from one thread (event loop manager)

  --m_freeSlots;


  auto action = [this, eventContext]() -> StatusCode {

    // Event processing slot forced to be the same as the wb slot

    const unsigned int thisSlotNum = eventContext->slot();

    EventSlot&         thisSlot    = m_eventSlots[thisSlotNum];

    if ( !thisSlot.complete ) {

      fatal() << "The slot " << thisSlotNum << " is supposed to be a finished event but it's not" << endmsg;

      return StatusCode::FAILURE;

    }


    ON_DEBUG debug() << "Executing event " << eventContext->evt() << " on slot " << thisSlotNum << endmsg;

    thisSlot.reset( eventContext );


    // Result status code:

    StatusCode result = StatusCode::SUCCESS;


    // promote to CR and DR the initial set of algorithms

    Cause cs = { Cause::source::Root, "RootDecisionHub" };

    if ( m_precSvc->iterate( thisSlot, cs ).isFailure() ) {

      error() << "Failed to call IPrecedenceSvc::iterate for slot " << thisSlotNum << endmsg;

      result = StatusCode::FAILURE;

    }


    if ( this->iterate().isFailure() ) {

      error() << "Failed to call AvalancheSchedulerSvc::updateStates for slot " << thisSlotNum << endmsg;

      result = StatusCode::FAILURE;

    }


    return result;

  }; // end of lambda


  // Kick off scheduling

  ON_VERBOSE {

    verbose() << "Pushing the action to update the scheduler for slot " << eventContext->slot() << endmsg;

    verbose() << "Free slots available " << m_freeSlots.load() << endmsg;

  }


  m_actionsQueue.push( action );


  return StatusCode::SUCCESS;

}


//---------------------------------------------------------------------------


StatusCode AvalancheSchedulerSvc::pushNewEvents( std::vector<EventContext*>& eventContexts ) {

  StatusCode sc;

  for ( auto context : eventContexts ) {

    sc = pushNewEvent( context );

    if ( sc != StatusCode::SUCCESS ) return sc;

  }

  return sc;

}


//---------------------------------------------------------------------------


unsigned int AvalancheSchedulerSvc::freeSlots() { return std::max( m_freeSlots.load(), 0 ); }


//---------------------------------------------------------------------------


void AvalancheSchedulerSvc::dumpState() { dumpSchedulerState( -1 ); }


//---------------------------------------------------------------------------


StatusCode AvalancheSchedulerSvc::popFinishedEvent( EventContext*& eventContext ) {


  // ON_DEBUG debug() << "popFinishedEvent: queue size: " << m_finishedEvents.size() << endmsg;

  if ( m_freeSlots.load() == (int)m_maxEventsInFlight || m_isActive == INACTIVE ) {

    // ON_DEBUG debug() << "freeslots: " << m_freeSlots << "/" << m_maxEventsInFlight

    //      << " active: " << m_isActive << endmsg;

    return StatusCode::FAILURE;

  } else {

    // ON_DEBUG debug() << "freeslots: " << m_freeSlots << "/" << m_maxEventsInFlight

    //      << " active: " << m_isActive << endmsg;

    m_finishedEvents.pop( eventContext );

    ++m_freeSlots;

    ON_DEBUG debug() << "Popped slot " << eventContext->slot() << " (event " << eventContext->evt() << ")" << endmsg;

    return StatusCode::SUCCESS;

  }

}


//---------------------------------------------------------------------------


StatusCode AvalancheSchedulerSvc::tryPopFinishedEvent( EventContext*& eventContext ) {


  if ( m_finishedEvents.try_pop( eventContext ) ) {

    ON_DEBUG debug() << "Try Pop successful slot " << eventContext->slot() << "(event " << eventContext->evt() << ")"

                     << endmsg;

    ++m_freeSlots;

    return StatusCode::SUCCESS;

  }

  return StatusCode::FAILURE;

}


//--------------------------------------------------------------------------


StatusCode AvalancheSchedulerSvc::iterate() {


  StatusCode global_sc( StatusCode::SUCCESS );


  // Retry algorithms

  const size_t retries = m_retryQueue.size();

  for ( unsigned int retryIndex = 0; retryIndex < retries; ++retryIndex ) {

    TaskSpec retryTS = std::move( m_retryQueue.front() );

    m_retryQueue.pop();

    global_sc = schedule( std::move( retryTS ) );

  }


  // Loop over all slots

  OccupancySnapshot nextSnap;

  auto              now = std::chrono::system_clock::now();

  for ( EventSlot& thisSlot : m_eventSlots ) {


    // Ignore slots without a valid context (relevant when populating scheduler for first time)

    if ( !thisSlot.eventContext ) continue;


    int iSlot = thisSlot.eventContext->slot();


    // Cache the states of the algorithms to improve readability and performance

    AlgsExecutionStates& thisAlgsStates = thisSlot.algsStates;


    StatusCode partial_sc = StatusCode::FAILURE;


    // Make an occupancy snapshot

    if ( m_snapshotInterval != std::chrono::duration<int64_t, std::milli>::min() &&

         now - m_lastSnapshot >= m_snapshotInterval ) {


      // Initialise snapshot

      if ( nextSnap.states.empty() ) {

        nextSnap.time = now;

        nextSnap.states.resize( m_eventSlots.size() );

      }


      // Store alg states

      std::vector<int>& slotStateTotals = nextSnap.states[iSlot];

      slotStateTotals.resize( AState::MAXVALUE );

      for ( uint8_t state = 0; state < AState::MAXVALUE; ++state ) {

        slotStateTotals[state] = thisSlot.algsStates.sizeOfSubset( AState( state ) );

      }


      // Add subslot alg states

      for ( auto& subslot : thisSlot.allSubSlots ) {

        for ( uint8_t state = 0; state < AState::MAXVALUE; ++state ) {

          slotStateTotals[state] += subslot.algsStates.sizeOfSubset( AState( state ) );

        }

      }

    }


    // Perform DR->SCHEDULED

    const auto& drAlgs = thisAlgsStates.algsInState( AState::DATAREADY );

    for ( uint algIndex : drAlgs ) {

      const std::string& algName{ index2algname( algIndex ) };

      unsigned int       rank{ m_optimizationMode.empty() ? 0 : m_precSvc->getPriority( algName ) };

      bool               asynchronous{ m_precSvc->isAsynchronous( algName ) };


      partial_sc =

          schedule( TaskSpec( nullptr, algIndex, algName, rank, asynchronous, iSlot, thisSlot.eventContext.get() ) );


      ON_VERBOSE if ( partial_sc.isFailure() ) verbose()

          << "Could not apply transition from " << AState::DATAREADY << " for algorithm " << algName

          << " on processing slot " << iSlot << endmsg;

    }


    // Check for algorithms ready in sub-slots

    for ( auto& subslot : thisSlot.allSubSlots ) {

      const auto& drAlgsSubSlot = subslot.algsStates.algsInState( AState::DATAREADY );

      for ( uint algIndex : drAlgsSubSlot ) {

        const std::string& algName{ index2algname( algIndex ) };

        unsigned int       rank{ m_optimizationMode.empty() ? 0 : m_precSvc->getPriority( algName ) };

        bool               asynchronous{ m_precSvc->isAsynchronous( algName ) };

        partial_sc =

            schedule( TaskSpec( nullptr, algIndex, algName, rank, asynchronous, iSlot, subslot.eventContext.get() ) );

      }

    }


    if ( m_dumpIntraEventDynamics ) {

      std::stringstream s;

      s << "START, " << thisAlgsStates.sizeOfSubset( AState::CONTROLREADY ) << ", "

        << thisAlgsStates.sizeOfSubset( AState::DATAREADY ) << ", " << thisAlgsStates.sizeOfSubset( AState::SCHEDULED )

        << ", " << std::chrono::high_resolution_clock::now().time_since_epoch().count() << "\n";

      auto          threads = ( m_threadPoolSize != -1 ) ? std::to_string( m_threadPoolSize )

                                                         : std::to_string( std::thread::hardware_concurrency() );

      std::ofstream myfile;

      myfile.open( "IntraEventFSMOccupancy_" + threads + "T.csv", std::ios::app );

      myfile << s.str();

      myfile.close();

    }


    // Not complete because this would mean that the slot is already free!

    if ( m_precSvc->CFRulesResolved( thisSlot ) &&

         !thisSlot.algsStates.containsAny(

             { AState::CONTROLREADY, AState::DATAREADY, AState::SCHEDULED, AState::RESOURCELESS } ) &&

         !subSlotAlgsInStates( thisSlot,

                               { AState::CONTROLREADY, AState::DATAREADY, AState::SCHEDULED, AState::RESOURCELESS } ) &&

         !thisSlot.complete ) {


      thisSlot.complete = true;

      // if the event did not fail, add it to the finished events

      // otherwise it is taken care of in the error handling

      if ( m_algExecStateSvc->eventStatus( *thisSlot.eventContext ) == EventStatus::Success ) {

        ON_DEBUG debug() << "Event " << thisSlot.eventContext->evt() << " finished (slot "

                         << thisSlot.eventContext->slot() << ")." << endmsg;

        m_finishedEvents.push( thisSlot.eventContext.release() );

      }


      // now let's return the fully evaluated result of the control flow

      ON_DEBUG debug() << m_precSvc->printState( thisSlot ) << endmsg;


      thisSlot.eventContext.reset( nullptr );


    } else if ( isStalled( thisSlot ) ) {

      m_algExecStateSvc->setEventStatus( EventStatus::AlgStall, *thisSlot.eventContext );

      eventFailed( thisSlot.eventContext.get() ); // can't release yet

    }

    partial_sc.ignore();

  } // end loop on slots


  // Process snapshot

  if ( !nextSnap.states.empty() ) {

    m_lastSnapshot = nextSnap.time;

    m_snapshotCallback( std::move( nextSnap ) );

  }


  ON_VERBOSE verbose() << "Iteration done." << endmsg;

  m_needsUpdate.store( false );

  return global_sc;

}


//---------------------------------------------------------------------------

// Update algorithm state and, optionally, revise states of other downstream algorithms


StatusCode AvalancheSchedulerSvc::revise( unsigned int iAlgo, EventContext* contextPtr, AState state, bool iterate ) {

  StatusCode sc;

  auto       slotIndex = contextPtr->slot();

  EventSlot& slot      = m_eventSlots[slotIndex];

  Cause      cs        = { Cause::source::Task, index2algname( iAlgo ) };


  if ( contextPtr->usesSubSlot() ) {

    // Sub-slot

    auto       subSlotIndex = contextPtr->subSlot();

    EventSlot& subSlot      = slot.allSubSlots[subSlotIndex];


    sc = subSlot.algsStates.set( iAlgo, state );


    if ( sc.isSuccess() ) {

      ON_VERBOSE verbose() << "Promoted " << index2algname( iAlgo ) << " to " << state << " [slot:" << slotIndex

                           << ", subslot:" << subSlotIndex << ", event:" << contextPtr->evt() << "]" << endmsg;

      // Revise states of algorithms downstream the precedence graph

      if ( iterate ) sc = m_precSvc->iterate( subSlot, cs );

    }

  } else {

    // Event level (standard behaviour)

    sc = slot.algsStates.set( iAlgo, state );


    if ( sc.isSuccess() ) {

      ON_VERBOSE verbose() << "Promoted " << index2algname( iAlgo ) << " to " << state << " [slot:" << slotIndex

                           << ", event:" << contextPtr->evt() << "]" << endmsg;

      // Revise states of algorithms downstream the precedence graph

      if ( iterate ) sc = m_precSvc->iterate( slot, cs );

    }

  }

  return sc;

}


//---------------------------------------------------------------------------


bool AvalancheSchedulerSvc::isStalled( const EventSlot& slot ) const {


  if ( !slot.algsStates.containsAny( { AState::DATAREADY, AState::SCHEDULED, AState::RESOURCELESS } ) &&

       !subSlotAlgsInStates( slot, { AState::DATAREADY, AState::SCHEDULED, AState::RESOURCELESS } ) ) {


    error() << "*** Stall detected, event context: " << slot.eventContext.get() << endmsg;


    return true;

  }

  return false;

}


//---------------------------------------------------------------------------


void AvalancheSchedulerSvc::eventFailed( EventContext* eventContext ) {

  const uint slotIdx = eventContext->slot();


  error() << "Event " << eventContext->evt() << " on slot " << slotIdx << " failed" << endmsg;


  dumpSchedulerState( msgLevel( MSG::VERBOSE ) ? -1 : slotIdx );


  // dump temporal and topological precedence analysis (if enabled in the PrecedenceSvc)

  m_precSvc->dumpPrecedenceRules( m_eventSlots[slotIdx] );


  // Push into the finished events queue the failed context

  m_eventSlots[slotIdx].complete = true;

  m_finishedEvents.push( m_eventSlots[slotIdx].eventContext.release() );

}


//---------------------------------------------------------------------------


void AvalancheSchedulerSvc::dumpSchedulerState( int iSlot ) {


  // To have just one big message

  std::ostringstream outputMS;


  outputMS << "Dumping scheduler state\n"

           << "=========================================================================================\n"

           << "++++++++++++++++++++++++++++++++++++ SCHEDULER STATE ++++++++++++++++++++++++++++++++++++\n"

           << "=========================================================================================\n\n";


  //===========================================================================


  outputMS << "------------------ Last schedule: Task/Event/Slot/Thread/State Mapping "

           << "------------------\n\n";


  // Figure if TimelineSvc is available (used below to detect threads IDs)

  auto timelineSvc = serviceLocator()->service<ITimelineSvc>( "TimelineSvc", false );

  if ( !timelineSvc.isValid() || !timelineSvc->isEnabled() ) {

    outputMS << "WARNING Enable TimelineSvc in record mode (RecordTimeline = True) to trace the mapping\n";

  } else {


    // Figure optimal printout layout

    size_t indt( 0 );

    for ( auto& slot : m_eventSlots ) {


      const auto& schedAlgs = slot.algsStates.algsInState( AState::SCHEDULED );

      for ( uint algIndex : schedAlgs ) {

        if ( index2algname( algIndex ).length() > indt ) indt = index2algname( algIndex ).length();

      }

    }


    // Figure the last running schedule across all slots

    for ( auto& slot : m_eventSlots ) {


      const auto& schedAlgs = slot.algsStates.algsInState( AState::SCHEDULED );

      for ( uint algIndex : schedAlgs ) {


        const std::string& algoName{ index2algname( algIndex ) };


        outputMS << "  task: " << std::setw( indt ) << algoName << " evt/slot: " << slot.eventContext->evt() << "/"

                 << slot.eventContext->slot();


        // Try to get POSIX threads IDs the currently running tasks are scheduled to

        if ( timelineSvc.isValid() ) {

          TimelineEvent te{};

          te.algorithm = algoName;

          te.slot      = slot.eventContext->slot();

          te.event     = slot.eventContext->evt();


          if ( timelineSvc->getTimelineEvent( te ) )

            outputMS << " thread.id: 0x" << std::hex << te.thread << std::dec;

          else

            outputMS << " thread.id: [unknown]"; // this means a task has just

                                                 // been signed off as SCHEDULED,

                                                 // but has not been assigned to a thread yet

                                                 // (i.e., not running yet)

        }

        outputMS << " state: [" << m_algExecStateSvc->algExecState( algoName, *( slot.eventContext ) ) << "]\n";

      }

    }

  }


  //===========================================================================


  outputMS << "\n---------------------------- Task/CF/FSM Mapping "

           << ( 0 > iSlot ? "[all slots] --" : "[target slot] " ) << "--------------------------\n\n";


  int  slotCount   = -1;

  bool wasAlgError = ( iSlot >= 0 ) ? m_eventSlots[iSlot].algsStates.containsAny( { AState::ERROR } ) ||

                                          subSlotAlgsInStates( m_eventSlots[iSlot], { AState::ERROR } )

                                    : false;


  for ( auto& slot : m_eventSlots ) {

    ++slotCount;

    if ( slot.complete ) continue;


    outputMS << "[ slot: "

             << ( slot.eventContext->valid() ? std::to_string( slot.eventContext->slot() ) : "[ctx invalid]" )

             << ", event: "

             << ( slot.eventContext->valid() ? std::to_string( slot.eventContext->evt() ) : "[ctx invalid]" );


    if ( slot.eventContext->eventID().isValid() ) { outputMS << ", eventID: " << slot.eventContext->eventID(); }

    outputMS << " ]:\n\n";


    if ( 0 > iSlot || iSlot == slotCount ) {


      // If an alg has thrown an error then it's not a failure of the CF/DF graph

      if ( wasAlgError ) {

        outputMS << "ERROR alg(s):";

        int         errorCount = 0;

        const auto& errorAlgs  = slot.algsStates.algsInState( AState::ERROR );

        for ( uint algIndex : errorAlgs ) {

          outputMS << " " << index2algname( algIndex );

          ++errorCount;

        }

        if ( errorCount == 0 ) outputMS << " in subslot(s)";

        outputMS << "\n\n";

      } else {

        // Snapshot of the Control Flow and FSM states

        outputMS << m_precSvc->printState( slot ) << "\n";

      }


      // Mention sub slots (this is expensive if the number of sub-slots is high)

      if ( m_verboseSubSlots && !slot.allSubSlots.empty() ) {

        outputMS << "\nNumber of sub-slots: " << slot.allSubSlots.size() << "\n\n";

        auto slotID = slot.eventContext->valid() ? std::to_string( slot.eventContext->slot() ) : "[ctx invalid]";

        for ( auto& ss : slot.allSubSlots ) {

          outputMS << "[ slot: " << slotID << ", sub-slot: "

                   << ( ss.eventContext->valid() ? std::to_string( ss.eventContext->subSlot() ) : "[ctx invalid]" )

                   << ", entry: " << ss.entryPoint << ", event: "

                   << ( ss.eventContext->valid() ? std::to_string( ss.eventContext->evt() ) : "[ctx invalid]" )

                   << " ]:\n\n";

          if ( wasAlgError ) {

            outputMS << "ERROR alg(s):";

            const auto& errorAlgs = ss.algsStates.algsInState( AState::ERROR );

            for ( uint algIndex : errorAlgs ) { outputMS << " " << index2algname( algIndex ); }

            outputMS << "\n\n";

          } else {

            // Snapshot of the Control Flow and FSM states in sub slot

            outputMS << m_precSvc->printState( ss ) << "\n";

          }

        }

      }

    }

  }


  //===========================================================================


  if ( 0 <= iSlot && !wasAlgError ) {

    outputMS << "\n------------------------------ Algorithm Execution States -----------------------------\n\n";

    m_algExecStateSvc->dump( outputMS, *( m_eventSlots[iSlot].eventContext ) );

  }


  outputMS << "\n=========================================================================================\n"

           << "++++++++++++++++++++++++++++++++++++++ END OF DUMP ++++++++++++++++++++++++++++++++++++++\n"

           << "=========================================================================================\n\n";


  info() << outputMS.str() << endmsg;

}


//---------------------------------------------------------------------------


StatusCode AvalancheSchedulerSvc::schedule( TaskSpec&& ts ) {


  // Check if a free Algorithm instance is available

  StatusCode getAlgSC( m_algResourcePool->acquireAlgorithm( ts.algName, ts.algPtr ) );


  // If an instance is available, proceed to scheduling

  StatusCode sc;

  if ( getAlgSC.isSuccess() ) {


    // Decide how to schedule the task and schedule it

    if ( -100 != m_threadPoolSize ) {


      // Cache values before moving the TaskSpec further

      unsigned int     algIndex{ ts.algIndex };

      std::string_view algName( ts.algName );

      unsigned int     algRank{ ts.algRank };

      bool             asynchronous{ ts.asynchronous };

      int              slotIndex{ ts.slotIndex };

      EventContext*    contextPtr{ ts.contextPtr };


      if ( asynchronous ) {

        // Add to asynchronous scheduled queue

        m_scheduledAsynchronousQueue.push( std::move( ts ) );


        // Schedule task

        m_fiberManager->schedule( AlgTask( this, serviceLocator(), m_algExecStateSvc, asynchronous ) );

      }


      if ( !asynchronous ) {

        // Add the algorithm to the scheduled queue

        m_scheduledQueue.push( std::move( ts ) );


        // Prepare a TBB task that will execute the Algorithm according to the above queued specs

        m_arena->enqueue( AlgTask( this, serviceLocator(), m_algExecStateSvc, asynchronous ) );

        ++m_algosInFlight;

      }

      sc = revise( algIndex, contextPtr, AState::SCHEDULED );


      ON_DEBUG debug() << "Scheduled " << algName << " [slot:" << slotIndex << ", event:" << contextPtr->evt()

                       << ", rank:" << algRank << ", asynchronous:" << ( asynchronous ? "yes" : "no" )

                       << "]. Scheduled algorithms: " << m_algosInFlight + m_blockingAlgosInFlight

                       << ( m_enablePreemptiveBlockingTasks

                                ? " (including " + std::to_string( m_blockingAlgosInFlight ) + " - off TBB runtime)"

                                : "" )

                       << endmsg;


    } else { // Avoid scheduling via TBB if the pool size is -100. Instead, run here in the scheduler's control thread

      // Beojan: I don't think this bit works. ts hasn't been pushed into any queue so AlgTask won't retrieve it

      ++m_algosInFlight;

      sc = revise( ts.algIndex, ts.contextPtr, AState::SCHEDULED );

      AlgTask( this, serviceLocator(), m_algExecStateSvc, ts.asynchronous )();

      --m_algosInFlight;

    }

  } else { // if no Algorithm instance available, retry later


    sc = revise( ts.algIndex, ts.contextPtr, AState::RESOURCELESS );

    // Add the algorithm to the retry queue

    m_retryQueue.push( std::move( ts ) );

  }


  ON_VERBOSE dumpSchedulerState( -1 );


  return sc;

}


//---------------------------------------------------------------------------


StatusCode AvalancheSchedulerSvc::signoff( const TaskSpec& ts ) {


  Gaudi::Hive::setCurrentContext( ts.contextPtr );


  --m_algosInFlight;


  const AlgExecStateRef algstate = m_algExecStateSvc->algExecState( ts.algPtr, *( ts.contextPtr ) );

  AState                state    = algstate.execStatus().isSuccess()

                                       ? ( algstate.filterPassed() ? AState::EVTACCEPTED : AState::EVTREJECTED )

                                       : AState::ERROR;


  // Update algorithm state and revise the downstream states

  auto sc = revise( ts.algIndex, ts.contextPtr, state, true );


  ON_DEBUG debug() << "Executed " << ts.algName << " [slot:" << ts.slotIndex << ", event:" << ts.contextPtr->evt()

                   << ", rank:" << ts.algRank << ", asynchronous:" << ( ts.asynchronous ? "yes" : "no" )

                   << "]. Scheduled algorithms: " << m_algosInFlight + m_blockingAlgosInFlight

                   << ( m_enablePreemptiveBlockingTasks

                            ? " (including " + std::to_string( m_blockingAlgosInFlight ) + " - off TBB runtime)"

                            : "" )

                   << endmsg;


  // Prompt a call to updateStates

  m_needsUpdate.store( true );

  return sc;

}


//---------------------------------------------------------------------------


// Method to inform the scheduler about event views


StatusCode AvalancheSchedulerSvc::scheduleEventView( const EventContext* sourceContext, const std::string& nodeName,

                                                     std::unique_ptr<EventContext> viewContext ) {

  //  Prevent view nesting

  if ( sourceContext->usesSubSlot() ) {

    fatal() << "Attempted to nest EventViews at node " << nodeName << ": this is not supported" << endmsg;

    return StatusCode::FAILURE;

  }


  ON_VERBOSE verbose() << "Queuing a view for [" << viewContext.get() << "]" << endmsg;


  // It's not possible to create an std::functional from a move-capturing lambda

  // So, we have to release the unique pointer

  auto action = [this, slotIndex = sourceContext->slot(), viewContextPtr = viewContext.release(),

                 &nodeName]() -> StatusCode {

    // Attach the sub-slot to the top-level slot

    EventSlot& topSlot = this->m_eventSlots[slotIndex];


    if ( viewContextPtr ) {

      // Re-create the unique pointer

      auto viewContext = std::unique_ptr<EventContext>( viewContextPtr );

      topSlot.addSubSlot( std::move( viewContext ), nodeName );

      return StatusCode::SUCCESS;

    } else {

      // Disable the view node if there are no views

      topSlot.disableSubSlots( nodeName );

      return StatusCode::SUCCESS;

    }

  };


  m_actionsQueue.push( std::move( action ) );


  return StatusCode::SUCCESS;

}


//---------------------------------------------------------------------------


// Sample occupancy at fixed interval (ms)

// Negative value to deactivate, 0 to snapshot every change

// Each sample, apply the callback function to the result


void AvalancheSchedulerSvc::recordOccupancy( int samplePeriod, std::function<void( OccupancySnapshot )> callback ) {


  auto action = [this, samplePeriod, callback = std::move( callback )]() -> StatusCode {

    if ( samplePeriod < 0 ) {

      this->m_snapshotInterval = std::chrono::duration<int64_t, std::milli>::min();

    } else {

      this->m_snapshotInterval = std::chrono::duration<int64_t, std::milli>( samplePeriod );

      m_snapshotCallback       = std::move( callback );

    }

    return StatusCode::SUCCESS;

  };


  m_actionsQueue.push( std::move( action ) );

}


StatusCode AvalancheSchedulerSvc::dumpGraphFile( const std::map<std::string, DataObjIDColl>& inDeps,

                                                 const std::map<std::string, DataObjIDColl>& outDeps ) const {

  // Both maps should have the same algorithm entries

  assert( inDeps.size() == outDeps.size() );


  Gaudi::Hive::Graph g{ m_dataDepsGraphFile.value() };

  info() << "Dumping data dependencies graph to file: " << g.fileName() << endmsg;


  // define algs and objects

  std::set<std::size_t> definedObjects;


  // Regex for selection of algs and objects

  std::regex algNameRegex( m_dataDepsGraphAlgoPattern.value() );

  std::regex objNameRegex( m_dataDepsGraphObjectPattern.value() );


  // inDeps and outDeps should have the same entries

  std::size_t algoIndex = 0ul;

  for ( const auto& [algName, ideps] : inDeps ) {

    if ( not std::regex_search( algName, algNameRegex ) ) continue;

    std::string algIndex = "Alg_" + std::to_string( algoIndex );

    g.addNode( algName, algIndex );


    // inputs

    for ( const auto& dep : ideps ) {

      if ( not std::regex_search( dep.fullKey(), objNameRegex ) ) continue;


      const auto [itr, inserted] = definedObjects.insert( dep.hash() );

      std::string objIndex       = "obj_" + std::to_string( dep.hash() );

      if ( inserted ) g.addNode( dep.key(), objIndex );


      g.addEdge( dep.key(), objIndex, algName, algIndex );

    } // loop on ideps


    const auto& odeps = outDeps.at( algName );

    for ( const auto& dep : odeps ) {

      if ( not std::regex_search( dep.fullKey(), objNameRegex ) ) continue;


      const auto [itr, inserted] = definedObjects.insert( dep.hash() );

      std::string objIndex       = "obj_" + std::to_string( dep.hash() );

      if ( inserted ) g.addNode( dep.key(), objIndex );


      g.addEdge( algName, algIndex, dep.key(), objIndex );

    } // loop on odeps


    ++algoIndex;

  } // loop on inDeps


  return StatusCode::SUCCESS;

}


AlgTask.h

ON_VERBOSE
#define ON_VERBOSE
Definition ApplicationMgr.cpp:40

AvalancheSchedulerSvc.h

ConcurrencyFlags.h

DataHandleHolderVisitor.h

DataObjIDColl
std::unordered_set< DataObjID, DataObjID_Hasher > DataObjIDColl
Definition DataObjID.h:121

FiberManager.h

Algorithm.h

GraphDumper.h

IAlgorithm.h

IDataManagerSvc.h

endmsg
MsgStream & endmsg(MsgStream &s)
MsgStream Modifier: endmsg. Calls the output method of the MsgStream.
Definition MsgStream.h:198

ON_DEBUG
#define ON_DEBUG
Definition OutputStream.cpp:37

DECLARE_COMPONENT
#define DECLARE_COMPONENT(type)
Definition PluginServiceV1.h:45

SerializeSTL.h
Provide serialization function (output only) for some common STL classes (vectors,...

ThreadLocalContext.h

ThreadPoolSvc.h

AlgExecStateRef
wrapper on an Algorithm state.
Definition IAlgExecStateSvc.h:32

AlgExecStateRef::execStatus
const StatusCode & execStatus() const
Definition IAlgExecStateSvc.h:155

AlgExecStateRef::filterPassed
bool filterPassed() const
Definition IAlgExecStateSvc.h:148

AlgsExecutionStates
The AlgsExecutionStates encodes the state machine for the execution of algorithms within a single eve...
Definition AlgsExecutionStates.h:37

AlgsExecutionStates::algsInState
const boost::container::flat_set< int > algsInState(State state) const
Definition AlgsExecutionStates.h:82

AlgsExecutionStates::sizeOfSubset
size_t sizeOfSubset(State state) const
Definition AlgsExecutionStates.h:88

AlgsExecutionStates::containsAny
bool containsAny(std::initializer_list< State > l) const
check if the collection contains at least one state of any listed types
Definition AlgsExecutionStates.h:74

AlgsExecutionStates::set
StatusCode set(unsigned int iAlgo, State newState)
Definition AlgsExecutionStates.cpp:23

AvalancheSchedulerSvc
Definition AvalancheSchedulerSvc.h:113

AvalancheSchedulerSvc::pushNewEvent
StatusCode pushNewEvent(EventContext *eventContext) override
Make an event available to the scheduler.
Definition AvalancheSchedulerSvc.cpp:546

AvalancheSchedulerSvc::ACTIVE
@ ACTIVE
Definition AvalancheSchedulerSvc.h:162

AvalancheSchedulerSvc::FAILURE
@ FAILURE
Definition AvalancheSchedulerSvc.h:162

AvalancheSchedulerSvc::INACTIVE
@ INACTIVE
Definition AvalancheSchedulerSvc.h:162

AvalancheSchedulerSvc::m_checkOutputIgnoreList
Gaudi::Property< std::vector< std::string > > m_checkOutputIgnoreList
Definition AvalancheSchedulerSvc.h:199

AvalancheSchedulerSvc::m_threadPoolSvc
SmartIF< IThreadPoolSvc > m_threadPoolSvc
Definition AvalancheSchedulerSvc.h:367

AvalancheSchedulerSvc::m_useDataLoader
Gaudi::Property< std::string > m_useDataLoader
Definition AvalancheSchedulerSvc.h:206

AvalancheSchedulerSvc::dumpState
void dumpState() override
Dump scheduler state for all slots.
Definition AvalancheSchedulerSvc.cpp:619

AvalancheSchedulerSvc::activate
void activate()
Activate scheduler.
Definition AvalancheSchedulerSvc.cpp:458

AvalancheSchedulerSvc::m_optimizationMode
Gaudi::Property< std::string > m_optimizationMode
Definition AvalancheSchedulerSvc.h:184

AvalancheSchedulerSvc::popFinishedEvent
StatusCode popFinishedEvent(EventContext *&eventContext) override
Blocks until an event is available.
Definition AvalancheSchedulerSvc.cpp:625

AvalancheSchedulerSvc::m_maxAlgosInFlight
size_t m_maxAlgosInFlight
Definition AvalancheSchedulerSvc.h:372

AvalancheSchedulerSvc::m_dumpIntraEventDynamics
Gaudi::Property< bool > m_dumpIntraEventDynamics
Definition AvalancheSchedulerSvc.h:186

AvalancheSchedulerSvc::m_lastSnapshot
std::chrono::system_clock::time_point m_lastSnapshot
Definition AvalancheSchedulerSvc.h:166

AvalancheSchedulerSvc::m_algname_vect
std::vector< std::string > m_algname_vect
Vector to bookkeep the information necessary to the index2name conversion.
Definition AvalancheSchedulerSvc.h:260

AvalancheSchedulerSvc::m_threadPoolSize
Gaudi::Property< int > m_threadPoolSize
Definition AvalancheSchedulerSvc.h:169

AvalancheSchedulerSvc::finalize
StatusCode finalize() override
Finalise.
Definition AvalancheSchedulerSvc.cpp:424

AvalancheSchedulerSvc::m_scheduledQueue
tbb::concurrent_priority_queue< TaskSpec, AlgQueueSort > m_scheduledQueue
Queues for scheduled algorithms.
Definition AvalancheSchedulerSvc.h:357

AvalancheSchedulerSvc::m_snapshotCallback
std::function< void(OccupancySnapshot)> m_snapshotCallback
Definition AvalancheSchedulerSvc.h:167

AvalancheSchedulerSvc::m_retryQueue
std::queue< TaskSpec > m_retryQueue
Definition AvalancheSchedulerSvc.h:359

AvalancheSchedulerSvc::m_verboseSubSlots
Gaudi::Property< bool > m_verboseSubSlots
Definition AvalancheSchedulerSvc.h:220

AvalancheSchedulerSvc::m_actionsQueue
tbb::concurrent_bounded_queue< action > m_actionsQueue
Queue where closures are stored and picked for execution.
Definition AvalancheSchedulerSvc.h:318

AvalancheSchedulerSvc::m_condSvc
SmartIF< ICondSvc > m_condSvc
A shortcut to service for Conditions handling.
Definition AvalancheSchedulerSvc.h:281

AvalancheSchedulerSvc::AState
AlgsExecutionStates::State AState
Definition AvalancheSchedulerSvc.h:159

AvalancheSchedulerSvc::isStalled
bool isStalled(const EventSlot &) const
Check if scheduling in a particular slot is in a stall.
Definition AvalancheSchedulerSvc.cpp:841

AvalancheSchedulerSvc::m_algExecStateSvc
SmartIF< IAlgExecStateSvc > m_algExecStateSvc
Algorithm execution state manager.
Definition AvalancheSchedulerSvc.h:278

AvalancheSchedulerSvc::pushNewEvents
StatusCode pushNewEvents(std::vector< EventContext * > &eventContexts) override
Definition AvalancheSchedulerSvc.cpp:604

AvalancheSchedulerSvc::revise
StatusCode revise(unsigned int iAlgo, EventContext *contextPtr, AState state, bool iterate=false)
Definition AvalancheSchedulerSvc.cpp:800

AvalancheSchedulerSvc::m_finishedEvents
tbb::concurrent_bounded_queue< EventContext * > m_finishedEvents
Queue of finished events.
Definition AvalancheSchedulerSvc.h:275

AvalancheSchedulerSvc::deactivate
StatusCode deactivate()
Deactivate scheduler.
Definition AvalancheSchedulerSvc.cpp:515

AvalancheSchedulerSvc::m_maxEventsInFlight
size_t m_maxEventsInFlight
Definition AvalancheSchedulerSvc.h:371

AvalancheSchedulerSvc::m_maxBlockingAlgosInFlight
Gaudi::Property< unsigned int > m_maxBlockingAlgosInFlight
Definition AvalancheSchedulerSvc.h:179

AvalancheSchedulerSvc::m_algosInFlight
unsigned int m_algosInFlight
Number of algorithms presently in flight.
Definition AvalancheSchedulerSvc.h:284

AvalancheSchedulerSvc::m_blockingAlgosInFlight
unsigned int m_blockingAlgosInFlight
Number of algorithms presently in flight.
Definition AvalancheSchedulerSvc.h:287

AvalancheSchedulerSvc::m_dataDepsGraphObjectPattern
Gaudi::Property< std::string > m_dataDepsGraphObjectPattern
Definition AvalancheSchedulerSvc.h:232

AvalancheSchedulerSvc::m_showDataFlow
Gaudi::Property< bool > m_showDataFlow
Definition AvalancheSchedulerSvc.h:214

AvalancheSchedulerSvc::schedule
StatusCode schedule(TaskSpec &&)
Definition AvalancheSchedulerSvc.cpp:1022

AvalancheSchedulerSvc::m_precSvc
SmartIF< IPrecedenceSvc > m_precSvc
A shortcut to the Precedence Service.
Definition AvalancheSchedulerSvc.h:263

AvalancheSchedulerSvc::m_checkDeps
Gaudi::Property< bool > m_checkDeps
Definition AvalancheSchedulerSvc.h:195

AvalancheSchedulerSvc::m_snapshotInterval
std::chrono::duration< int64_t, std::milli > m_snapshotInterval
Definition AvalancheSchedulerSvc.h:165

AvalancheSchedulerSvc::m_dataDepsGraphFile
Gaudi::Property< std::string > m_dataDepsGraphFile
Definition AvalancheSchedulerSvc.h:222

AvalancheSchedulerSvc::m_algResourcePool
SmartIF< IAlgResourcePool > m_algResourcePool
Cache for the algorithm resource pool.
Definition AvalancheSchedulerSvc.h:313

AvalancheSchedulerSvc::m_showControlFlow
Gaudi::Property< bool > m_showControlFlow
Definition AvalancheSchedulerSvc.h:217

AvalancheSchedulerSvc::m_simulateExecution
Gaudi::Property< bool > m_simulateExecution
Definition AvalancheSchedulerSvc.h:181

AvalancheSchedulerSvc::dumpGraphFile
StatusCode dumpGraphFile(const std::map< std::string, DataObjIDColl > &inDeps, const std::map< std::string, DataObjIDColl > &outDeps) const
Definition AvalancheSchedulerSvc.cpp:1178

AvalancheSchedulerSvc::m_whiteboardSvcName
Gaudi::Property< std::string > m_whiteboardSvcName
Definition AvalancheSchedulerSvc.h:178

AvalancheSchedulerSvc::tryPopFinishedEvent
StatusCode tryPopFinishedEvent(EventContext *&eventContext) override
Try to fetch an event from the scheduler.
Definition AvalancheSchedulerSvc.cpp:646

AvalancheSchedulerSvc::AlgTask
friend class AlgTask
Definition AvalancheSchedulerSvc.h:115

AvalancheSchedulerSvc::m_algname_index_map
std::unordered_map< std::string, unsigned int > m_algname_index_map
Map to bookkeep the information necessary to the name2index conversion.
Definition AvalancheSchedulerSvc.h:254

AvalancheSchedulerSvc::m_needsUpdate
std::atomic< bool > m_needsUpdate
Definition AvalancheSchedulerSvc.h:362

AvalancheSchedulerSvc::scheduleEventView
virtual StatusCode scheduleEventView(const EventContext *sourceContext, const std::string &nodeName, std::unique_ptr< EventContext > viewContext) override
Method to inform the scheduler about event views.
Definition AvalancheSchedulerSvc.cpp:1123

AvalancheSchedulerSvc::m_maxParallelismExtra
Gaudi::Property< int > m_maxParallelismExtra
Definition AvalancheSchedulerSvc.h:174

AvalancheSchedulerSvc::signoff
StatusCode signoff(const TaskSpec &)
The call to this method is triggered only from within the AlgTask.
Definition AvalancheSchedulerSvc.cpp:1092

AvalancheSchedulerSvc::action
std::function< StatusCode()> action
Definition AvalancheSchedulerSvc.h:160

AvalancheSchedulerSvc::m_dataDepsGraphAlgoPattern
Gaudi::Property< std::string > m_dataDepsGraphAlgoPattern
Definition AvalancheSchedulerSvc.h:227

AvalancheSchedulerSvc::m_numOffloadThreads
Gaudi::Property< int > m_numOffloadThreads
Definition AvalancheSchedulerSvc.h:191

AvalancheSchedulerSvc::m_checkOutput
Gaudi::Property< bool > m_checkOutput
Definition AvalancheSchedulerSvc.h:197

AvalancheSchedulerSvc::m_isActive
std::atomic< ActivationState > m_isActive
Flag to track if the scheduler is active or not.
Definition AvalancheSchedulerSvc.h:245

AvalancheSchedulerSvc::initialize
StatusCode initialize() override
Initialise.
Definition AvalancheSchedulerSvc.cpp:78

AvalancheSchedulerSvc::m_enableCondSvc
Gaudi::Property< bool > m_enableCondSvc
Definition AvalancheSchedulerSvc.h:209

AvalancheSchedulerSvc::recordOccupancy
virtual void recordOccupancy(int samplePeriod, std::function< void(OccupancySnapshot)> callback) override
Sample occupancy at fixed interval (ms) Negative value to deactivate, 0 to snapshot every change Each...
Definition AvalancheSchedulerSvc.cpp:1163

AvalancheSchedulerSvc::eventFailed
void eventFailed(EventContext *eventContext)
Method to execute if an event failed.
Definition AvalancheSchedulerSvc.cpp:859

AvalancheSchedulerSvc::m_enablePreemptiveBlockingTasks
Gaudi::Property< bool > m_enablePreemptiveBlockingTasks
Definition AvalancheSchedulerSvc.h:188

AvalancheSchedulerSvc::m_freeSlots
std::atomic_int m_freeSlots
Atomic to account for asyncronous updates by the scheduler wrt the rest.
Definition AvalancheSchedulerSvc.h:272

AvalancheSchedulerSvc::m_arena
tbb::task_arena * m_arena
Definition AvalancheSchedulerSvc.h:368

AvalancheSchedulerSvc::freeSlots
unsigned int freeSlots() override
Get free slots number.
Definition AvalancheSchedulerSvc.cpp:615

AvalancheSchedulerSvc::dumpSchedulerState
void dumpSchedulerState(int iSlot)
Dump the state of the scheduler.
Definition AvalancheSchedulerSvc.cpp:880

AvalancheSchedulerSvc::m_fiberManager
std::unique_ptr< FiberManager > m_fiberManager
Definition AvalancheSchedulerSvc.h:369

AvalancheSchedulerSvc::m_whiteboard
SmartIF< IHiveWhiteBoard > m_whiteboard
A shortcut to the whiteboard.
Definition AvalancheSchedulerSvc.h:266

AvalancheSchedulerSvc::m_scheduledAsynchronousQueue
tbb::concurrent_priority_queue< TaskSpec, AlgQueueSort > m_scheduledAsynchronousQueue
Definition AvalancheSchedulerSvc.h:358

AvalancheSchedulerSvc::iterate
StatusCode iterate()
Loop on all slots to schedule DATAREADY algorithms and sign off ready events.
Definition AvalancheSchedulerSvc.cpp:666

AvalancheSchedulerSvc::m_showDataDeps
Gaudi::Property< bool > m_showDataDeps
Definition AvalancheSchedulerSvc.h:211

AvalancheSchedulerSvc::m_thread
std::thread m_thread
The thread in which the activate function runs.
Definition AvalancheSchedulerSvc.h:248

AvalancheSchedulerSvc::index2algname
const std::string & index2algname(unsigned int index)
Convert an integer to a name.
Definition AvalancheSchedulerSvc.h:257

AvalancheSchedulerSvc::m_eventSlots
std::vector< EventSlot > m_eventSlots
Vector of events slots.
Definition AvalancheSchedulerSvc.h:269

CommonMessagingBase::error
MsgStream & error() const
shortcut for the method msgStream(MSG::ERROR)
Definition CommonMessaging.h:107

CommonMessagingBase::verbose
MsgStream & verbose() const
shortcut for the method msgStream(MSG::VERBOSE)
Definition CommonMessaging.h:119

CommonMessagingBase::warning
MsgStream & warning() const
shortcut for the method msgStream(MSG::WARNING)
Definition CommonMessaging.h:110

CommonMessagingBase::fatal
MsgStream & fatal() const
shortcut for the method msgStream(MSG::FATAL)
Definition CommonMessaging.h:101

CommonMessagingBase::debug
MsgStream & debug() const
shortcut for the method msgStream(MSG::DEBUG)
Definition CommonMessaging.h:116

CommonMessagingBase::msg
MsgStream & msg() const
shortcut for the method msgStream(MSG::INFO)
Definition CommonMessaging.h:122

CommonMessagingBase::info
MsgStream & info() const
shortcut for the method msgStream(MSG::INFO)
Definition CommonMessaging.h:113

CommonMessaging< implements< IService, IProperty, IStateful > >::msgLevel
MSG::Level msgLevel() const
Definition CommonMessaging.h:147

DHHVisitor
Definition DataHandleHolderVisitor.h:20

DHHVisitor::owners_names_of
std::vector< std::string > owners_names_of(const DataObjID &id, bool with_main=false) const
Definition DataHandleHolderVisitor.cpp:82

DataHandleHolderBase::outputDataObjs
const DataObjIDColl & outputDataObjs() const override
Definition DataHandleHolderBase.h:83

DataHandleHolderBase::addDependency
void addDependency(const DataObjID &id, const Gaudi::DataHandle::Mode &mode) override
Definition DataHandleHolderBase.h:85

DataHandleHolderBase::inputDataObjs
const DataObjIDColl & inputDataObjs() const override
Definition DataHandleHolderBase.h:82

DataObjID
Definition DataObjID.h:47

DataObjID::fullKey
std::string fullKey() const
combination of the key and the ClassName, mostly for debugging
Definition DataObjID.cpp:103

EventContext
This class represents an entry point to all the event specific data.
Definition EventContext.h:35

EventContext::evt
ContextEvt_t evt() const
Definition EventContext.h:51

EventContext::subSlot
ContextID_t subSlot() const
Definition EventContext.h:53

EventContext::slot
ContextID_t slot() const
Definition EventContext.h:52

EventContext::usesSubSlot
bool usesSubSlot() const
Definition EventContext.h:54

Gaudi::Algorithm
Base class from which all concrete algorithm classes should be derived.
Definition Algorithm.h:87

Gaudi::Algorithm::acceptDHVisitor
void acceptDHVisitor(IDataHandleVisitor *) const override
Definition Algorithm.cpp:183

Gaudi::Algorithm::name
const std::string & name() const override
The identifying name of the algorithm object.
Definition Algorithm.cpp:529

Gaudi::DataHandle::Writer
@ Writer
Definition DataHandle.h:39

Gaudi::Hive::Graph
utilities to dump graphs in different formats
Definition GraphDumper.h:30

IAlgorithm
The IAlgorithm is the interface implemented by the Algorithm base class.
Definition IAlgorithm.h:36

IAlgorithm::type
virtual const std::string & type() const =0
The type of the algorithm.

ISvcLocator::service
virtual SmartIF< IService > & service(const Gaudi::Utils::TypeNameString &typeName, const bool createIf=true)=0
Returns a smart pointer to a service.

ITimelineSvc
Definition ITimelineSvc.h:20

PrecedenceSvc
A service to resolve the task execution precedence.
Definition PrecedenceSvc.h:30

PrecedenceSvc::getRules
const concurrency::PrecedenceRulesGraph * getRules() const
Precedence rules accessor.
Definition PrecedenceSvc.h:74

Service::serviceLocator
SmartIF< ISvcLocator > & serviceLocator() const override
Retrieve pointer to service locator.
Definition Service.cpp:336

Service::finalize
StatusCode finalize() override
Definition Service.cpp:223

Service::name
const std::string & name() const override
Retrieve name of the service.
Definition Service.cpp:333

Service::initialize
StatusCode initialize() override
Definition Service.cpp:118

SmartIF
Small smart pointer class with automatic reference counting for IInterface.
Definition SmartIF.h:28

SmartIF::isValid
bool isValid() const
Allow for check if smart pointer is valid.
Definition SmartIF.h:69

StatusCode
This class is used for returning status codes from appropriate routines.
Definition StatusCode.h:64

StatusCode::isFailure
bool isFailure() const
Definition StatusCode.h:129

StatusCode::ignore
const StatusCode & ignore() const
Allow discarding a StatusCode without warning.
Definition StatusCode.h:139

StatusCode::isSuccess
bool isSuccess() const
Definition StatusCode.h:314

StatusCode::SUCCESS
constexpr static const auto SUCCESS
Definition StatusCode.h:99

StatusCode::FAILURE
constexpr static const auto FAILURE
Definition StatusCode.h:100

ThreadPoolSvc
A service which initializes a TBB thread pool.
Definition ThreadPoolSvc.h:37

concurrency::AlgorithmNode::getAlgoIndex
unsigned int getAlgoIndex() const
Get algorithm index.
Definition PrecedenceRulesGraph.h:521

concurrency::PrecedenceRulesGraph::getControlFlowNodeCounter
unsigned int getControlFlowNodeCounter() const
Get total number of control flow graph nodes.
Definition PrecedenceRulesGraph.h:660

concurrency::PrecedenceRulesGraph::getAlgorithmNode
AlgorithmNode * getAlgorithmNode(const std::string &algoName) const
Get the AlgorithmNode from by algorithm name using graph index.
Definition PrecedenceRulesGraph.h:652

EventStatus::AlgStall
@ AlgStall
Definition IAlgExecStateSvc.h:75

EventStatus::Success
@ Success
Definition IAlgExecStateSvc.h:75

Gaudi::Hive::setCurrentContext
GAUDI_API void setCurrentContext(const EventContext *ctx)
Definition ThreadLocalContext.cpp:41

GaudiUtils::operator<<
std::ostream & operator<<(std::ostream &s, const std::pair< T1, T2 > &p)
Serialize an std::pair in a python like format. E.g. "(1, 2)".
Definition SerializeSTL.h:91

MSG::VERBOSE
@ VERBOSE
Definition IMessageSvc.h:22

AvalancheSchedulerSvc::TaskSpec
Struct to hold entries in the alg queues.
Definition AvalancheSchedulerSvc.h:321

Cause
Definition PrecedenceRulesGraph.h:397

Cause::source::Task
@ Task
Definition PrecedenceRulesGraph.h:398

Cause::source::Root
@ Root
Definition PrecedenceRulesGraph.h:398

EventSlot
Class representing an event slot.
Definition EventSlot.h:23

EventSlot::eventContext
std::unique_ptr< EventContext > eventContext
Cache for the eventContext.
Definition EventSlot.h:82

EventSlot::allSubSlots
std::vector< EventSlot > allSubSlots
Actual sub-slot instances.
Definition EventSlot.h:99

EventSlot::complete
bool complete
Flags completion of the event.
Definition EventSlot.h:88

EventSlot::addSubSlot
void addSubSlot(std::unique_ptr< EventContext > viewContext, const std::string &nodeName)
Add a subslot to the slot (this constructs a new slot and registers it with the parent one)
Definition EventSlot.h:60

EventSlot::reset
void reset(EventContext *theeventContext)
Reset all resources in order to reuse the slot (thread-unsafe)
Definition EventSlot.h:48

EventSlot::algsStates
AlgsExecutionStates algsStates
Vector of algorithms states.
Definition EventSlot.h:84

EventSlot::disableSubSlots
void disableSubSlots(const std::string &nodeName)
Disable event views for a given CF view node by registering an empty container Contact B.
Definition EventSlot.h:77

TimelineEvent
Definition TimelineEvent.h:17

TimelineEvent::algorithm
std::string algorithm
Definition TimelineEvent.h:25