// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
// =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
//
// rminternal.h
//
// Main internal header file for ConcRT's Resource Manager.
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#pragma once
namespace Concurrency
{
namespace details
{
// The lowest two bits are used to determine the type of the pointer stored in the
// execution resource TLS slot (created in the RM).
static const size_t TlsResourceBitMask = 0x3;
static const size_t TlsResourceInResource = 0x0;
static const size_t TlsResourceInProxy = 0x1;
// The RM has an array of processor nodes and cores representing the hardware topology on the machine.
// Every scheduler that asks for an allocation, gets it's own copy of this array of nodes/cores once they
// have been granted an allocation of cores by the RM, that is stored in the corresponding scheduler proxy.
///
/// An abstraction of a hardware affinity that understands how to deal with affinity on both Win7 and pre-Win7
/// platforms.
///
struct HardwareAffinity
{
public:
///
/// Construct an empty affinity.
///
HardwareAffinity()
{
memset(&m_affinity, 0, sizeof(m_affinity));
m_affinity.Group = 0;
m_affinity.Mask = 0;
}
///
/// Construct a hardware affinity from a given thread.
///
HardwareAffinity(HANDLE hThread);
///
/// Construct a Win7 understood affinity.
///
HardwareAffinity(USHORT processorGroup, ULONG_PTR affinityMask)
{
memset(&m_affinity, 0, sizeof(m_affinity));
m_affinity.Group = processorGroup;
m_affinity.Mask = affinityMask;
}
///
/// Construct a pre-Win7 understood affinity.
///
HardwareAffinity(DWORD_PTR affinityMask)
{
memset(&m_affinity, 0, sizeof(m_affinity));
m_affinity.Group = 0;
m_affinity.Mask = affinityMask;
}
///
/// Copy construct an affinity.
///
HardwareAffinity(const HardwareAffinity &src)
{
memcpy(&m_affinity, &src.m_affinity, sizeof(m_affinity));
}
///
/// Compare two affinities
///
bool operator==(const HardwareAffinity &rhs)
{
return (rhs.m_affinity.Group == m_affinity.Group && rhs.m_affinity.Mask == m_affinity.Mask);
}
///
/// Compare two affinities
///
bool operator!=(const HardwareAffinity &rhs)
{
return !operator==(rhs);
}
///
/// Copy an affinity.
///
HardwareAffinity& operator=(const HardwareAffinity &rhs)
{
m_affinity.Group = rhs.m_affinity.Group;
m_affinity.Mask = rhs.m_affinity.Mask;
return *this;
}
///
/// Return the group number.
///
USHORT GetGroup()
{
return m_affinity.Group;
}
///
/// Return the group mask.
///
KAFFINITY GetMask()
{
return m_affinity.Mask;
}
///
/// Modify the affinity to be the intersection of the existing affinity and the provided affinity.
///
void IntersectWith(KAFFINITY affinity)
{
m_affinity.Mask &= affinity;
}
///
/// Applies this hardware affinity to a thread.
///
///
/// The thread handle to which to apply this affinity.
///
void ApplyTo(HANDLE hThread);
GROUP_AFFINITY m_affinity;
};
///
/// Base class for description of a core or hardware thread.
///
struct ProcessorCore
{
enum CoreState
{
// Initial state for all cores.
Unknown = 0,
// A core that is not assigned to a scheduler proxy, either because the scheduler proxy did
// not ask for it (it already has what it desires), or because it is assigned to some other scheduler.
// This state is only used for core in a scheduler's local view of resources.
Unassigned,
// The core is available and may be reserved for or allocated to a scheduler.
Available,
// The core is reserved for a scheduler by the Resource Manager. This state is only used for core
// in a scheduler's local view of resources.
Reserved,
// The core is allocated to a scheduler. Cores transition from Reserved to Allocated once the
// scheduler proxy has allocated execution resources or virtual processors for its scheduler.
Allocated,
// When a scheduler releases a core for a different scheduler, it sets its core state to Stolen.
// This enables it to track the cores it has relinquished. If the core is not allocated to the
// receiving scheduler, it will revert back to Allocated for the scheduler proxy it came from.
Stolen,
// A core is considered idle during dynamic core migration if the scheduler(s) that core is assigned
// to, have all vprocs de-activated. This state is only used for cores in the global map.
Idle
};
// 'Available' means available for assignment to a scheduler during the allocation calculation.
CoreState m_coreState;
// The Resource Manager assigned identifier for the processor core.
unsigned int m_id;
// The processor number in Win7 {group, processor number} id scheme.
BYTE m_processorNumber;
};
// Forward declaration.
struct GlobalNode;
class ResourceManager;
///
/// Representation of a processor core within the RM's global map of execution resources. Information in this struct
/// represents a systemwide view of the underlying hardware thread.
///
struct GlobalCore : public ProcessorCore
{
// Back pointer to the node for topological enumeration.
GlobalNode *m_pOwningNode;
// The number of schedulers that this core is assigned to.
unsigned int m_useCount;
// Used to send notifications to qualifying schedulers regarding external subscription level changes.
LONG m_currentSubscriptionLevel;
LONG m_previousSubscriptionLevel;
// This field is used during core migration to represent the number of schedulers that this core has been allocated
// to, that have also deactivated all virtual processors on the core, i.e. the number of schedulers that are 'idle'
// with respect to this core. When this is equal to the use count, the core is considered 'idle'.
unsigned int m_idleSchedulers;
///
/// Initializes a processor node.
///
void Initialize(GlobalNode *pOwningNode, unsigned int id, BYTE processorNumber)
{
m_pTopologyObject = _concrt_new TopologyObject(this);
m_pOwningNode = pOwningNode;
m_id = id;
m_processorNumber = processorNumber;
}
~GlobalCore()
{
delete m_pTopologyObject;
}
//**************************************************
// Topology walking:
//
//
// TRANSITION: This needs to be eliminated. There are a slew of memcpy and memset operations on GlobalCore and GlobalNode which
// assume this is how they initialize and copy the base class. We cannot add virtual methods on GlobalCore without cleaning ALL of that
// up. Hence, we heap alloc a side object until such time as it can be cleaned up.
//
struct TopologyObject : public ITopologyExecutionResource
{
GlobalCore *m_pCore;
TopologyObject(GlobalCore *pCore) : m_pCore(pCore)
{
}
///
/// Returns an interface to the next execution resource in enumeration order.
///
///
/// An interface to the next execution resource in enumeration order. If there are no more nodes in enumeration order of the node to which
/// this execution resource belongs, this method will return NULL.
///
///
///
/**/
virtual ITopologyExecutionResource *GetNext() const;
///
/// Returns the resource manager's unique identifier for this execution resource.
///
///
/// The resource manager's unique identifier for this execution resource.
///
/**/
virtual unsigned int GetId() const
{
return m_pCore->m_id;
}
} *m_pTopologyObject;
};
///
/// Representation of a processor core within a scheduler proxy's local map of execution resources. Information in this struct
/// represents the schedulers utilization of the underlying hardware thread.
///
struct SchedulerCore : public ProcessorCore
{
// When virtual processor roots are created for a scheduler proxy, or external threads are subscribed, the corresponding
// execution resources are inserted into this list.
List m_resources;
// Each scheduler core has a pointer to the corresponding global core's use count.
unsigned int* m_pGlobalUseCountPtr{};
// This field represents the number of activated virtual processors and subscribed threads that a scheduler has
// on this core at any time. When a virtual processor root is deactivated, or when a thread subscription is released
// the count is decremented. The core is considered to be 'idle' in the scheduler it belongs to, if this value is 0.
volatile LONG m_subscriptionLevel{};
// Used to send notifications to qualifying schedulers regarding external subscription level changes.
LONG m_currentSubscriptionLevel{};
LONG m_previousSubscriptionLevel{};
// The number of threads that were assigned to this core through initial allocation or core migration.
// Note that this is not necessarily equal to the number of roots in the m_resources list, since the list
// includes oversubscribed vproc roots as well.
unsigned int m_numAssignedThreads{};
// The total number of threads (running on vprocs and external) that require this core to be fixed.
unsigned int m_numFixedThreads{};
// The number of external threads that run on this core.
unsigned int m_numExternalThreads{};
// This is set to true for a scheduler proxy's core during static allocation or core migration if the subscription
// level on the core is found to be 0 when the Dynamic RM worker is executing. The subscription value can change
// as soon as it is captured, but the captured value is what is used for successive computation.
bool m_fIdleDuringDRM{};
// This is set to true for a scheduler proxy's core during core migration, if this is an borrowed core.
// An borrowed core is a core that is assigned to one or more different schedulers, but was found to be idle.
// The RM temporarily assigns idle resources to schedulers that need them.
bool m_fBorrowed{};
// This variable is set to true when a borrowed core is converted to a fixed core. When the core is unfixed,
// it is marked borrowed again.
bool m_fPreviouslyBorrowed{};
///
/// Returns whether this core is fixed, i.e., cannot be removed by the RM.
///
bool IsFixed()
{
return m_numFixedThreads > 0;
}
///
/// Returns whether this core is idle, i.e., its subscription level was 0 at the time it was retrieved by the RM.
/// Note that this state could change, but once we capture it, we consider it idle until the next time it is captured.
///
bool IsIdle()
{
return m_fIdleDuringDRM;
}
///
/// Returns whether this core is borrowed, i.e., it was temporarily lent to this scheduler due to the owning
/// scheduler being idle on this core.
///
bool IsBorrowed()
{
return m_fBorrowed;
}
};
///
/// Base class for the description of a processor package or NUMA node.
///
struct ProcessorNode
{
// The affinity mask of the node.
ULONG_PTR m_nodeAffinity;
// The total number of cores in the node.
unsigned int m_coreCount;
// The number cores that are available for allocation (cores whose state is ProcessorCore::Available)
unsigned int m_availableCores;
// The group number in the Win7 {group, mask} id format.
unsigned int m_processorGroup;
// The node id which maps to a scheduler node id.
unsigned int m_id;
// The NUMA node to which this node belongs.
DWORD m_numaNodeNumber;
};
///
/// Representation of a processor node within a scheduler proxy's local map of execution resources. Information in this struct
/// represents the schedulers utilization of the underlying node.
///
struct SchedulerNode : public ProcessorNode
{
// A scratch field used during allocation. The allocation routine works by looking at cores with m_useCount=0,
// grabs all it can, then looks at m_useCount=1, then m_useCount=2, etc... During an allocation attempt at a particular
// use count, cores that were reserved at previous use counts are stored in m_reservedCores, and cores available at the
// current use count are stored in m_availableCores. A subset of the available cores may be reserved for the scheduler
// proxy.
unsigned int m_reservedCores;
// number of cores allocated to the scheduler proxy (this field is only applicable to a scheduler proxy's nodes)
unsigned int m_allocatedCores;
// The number of allocated cores that are borrowed. An borrowed core is a core that is assigned to
// one or more different schedulers, but was found to be idle. The RM temporarily assigns idle resources to
// schedulers that need them.
unsigned int m_numBorrowedCores;
// The number of cores on this node that are considered fixed. Fixed cores cannot be removed by the RM during static/dynamic allocation.
unsigned int m_numFixedCores;
// The number of cores in this node for the scheduler in question that were found to be idle during the dynamic RM phase. This is
// a scratch field, and the value is stale outside of dynamic RM phases.
unsigned int m_numDRMIdle;
// The number of borrowed cores in this node for the scheduler in question that were found to be idle during the dynamic RM phase.
// This is a scratch field, and the value is stale outside of dynamic RM phases.
unsigned int m_numDRMBorrowedIdle;
// The array of cores in this node.
SchedulerCore * m_pCores;
///
/// Returns the number of cores that were found to be idle.
///
unsigned int GetNumIdleCores()
{
return m_numDRMIdle;
}
///
/// Returns the number of allocated cores in this node that are fixed - cannot be removed by dynamic RM.
///
unsigned int GetNumFixedCores()
{
return m_numFixedCores;
}
///
/// Returns the number of movable cores within this node.
///
unsigned int GetNumMigratableCores()
{
return m_allocatedCores - m_numFixedCores;
}
///
/// Returns the number of owned cores - cores that are not borrowed from a different scheduler.
///
unsigned int GetNumOwnedCores()
{
return m_allocatedCores - m_numBorrowedCores;
}
///
/// Returns the number of non-borrowed, non-fixed cores.
unsigned int GetNumOwnedMigratableCores()
{
return m_allocatedCores - m_numBorrowedCores - m_numFixedCores;
}
///
/// Returns the number of borrowed cores - cores that were temporarily lent to this scheduler since the scheduler(s) they
/// were assigned to, were not using them.
///
unsigned int GetNumBorrowedCores()
{
return m_numBorrowedCores;
}
///
/// Returns the number of borrowed cores that are idle.
///
unsigned int GetNumBorrowedIdleCores()
{
return m_numDRMBorrowedIdle;
}
///
/// Returns the number of borrowed cores that are not idle.
///
unsigned int GetNumBorrowedInUseCores()
{
ASSERT(m_numBorrowedCores >= m_numDRMBorrowedIdle);
return (m_numBorrowedCores - m_numDRMBorrowedIdle);
}
///
/// Deallocates memory allocated by the node.
///
void Cleanup(void)
{
delete [] m_pCores;
}
};
///
/// Representation of a processor node within the RM's global map of execution resources. Information in this struct
/// represents a systemwide view of the underlying node.
///
struct GlobalNode : public ProcessorNode
{
// Back pointer to the RM for topological enumeration
ResourceManager *m_pRM;
// A scratch field used during dynamic RM allocation, on the RM's global copy of nodes. Idle cores represents the number
// of cores on this node that are idle and can temporarily be assigned to another scheduler that needs cores.
unsigned int m_idleCores;
// The array of cores in this node.
GlobalCore * m_pCores;
///
/// Initializes a processor node.
///
//
// coreCount == 0 / baseProcNum == 0 is for regular topologies
// coreCount != 0 / baseProcNum != 0 is for fake / created topologies
//
void Initialize(ResourceManager *pRM, USHORT id, USHORT processorGroup, ULONG_PTR affinityMask, unsigned int coreCount = 0, unsigned int baseProcNum = 0)
{
m_pTopologyObject = _concrt_new TopologyObject(this);
m_pRM = pRM;
m_id = id;
m_processorGroup = processorGroup;
m_nodeAffinity = affinityMask;
if (coreCount == 0)
m_coreCount = NumberOfBitsSet(affinityMask);
else
m_coreCount = coreCount;
m_availableCores = 0;
m_pCores = _concrt_new GlobalCore[m_coreCount];
memset(m_pCores, 0, m_coreCount * sizeof(GlobalCore));
for (unsigned int i = 0, j = 0; j < m_coreCount; ++i)
{
ASSERT(i < sizeof(ULONG_PTR) * 8);
// Check if the LSB of the affinity mask is set.
if (coreCount != 0 || ((affinityMask & 1) != 0))
{
// Bit 0 of the affinity mask corresponds to processor number 0, bit 1 to processor number 1, etc...
if (coreCount == 0)
m_pCores[j++].Initialize(this, (m_processorGroup << 8) + i, (BYTE)i);
else
#pragma warning(suppress: 6385) // False positive warning, VSO-1806798
m_pCores[j++].Initialize(this, (m_processorGroup << 16) + (id << 8) + i, (BYTE)i + (BYTE)baseProcNum);
}
// Right shift the affinity by 1.
affinityMask >>= 1;
}
}
///
/// Returns the next core from pCore within the node.
///
GlobalCore *GetNextGlobalCore(const GlobalCore *pCore)
{
unsigned int idx = (unsigned int)((pCore - m_pCores) + 1);
return ((idx < m_coreCount) ? &m_pCores[idx] : NULL);
}
~GlobalNode()
{
delete m_pTopologyObject;
}
//**************************************************
// Topology Walking:
//
//
// TRANSITION: This needs to be eliminated. There are a slew of memcpy and memset operations on GlobalCore and GlobalNode which
// assume this is how they initialize and copy the base class. We cannot add virtual methods on GlobalCore without cleaning ALL of that
// up. Hence, we heap alloc a side object until such time as it can be cleaned up.
//
struct TopologyObject : public ITopologyNode
{
GlobalNode *m_pNode;
TopologyObject(GlobalNode *pNode) : m_pNode(pNode)
{
}
///
/// Returns an interface to the next node in enumeration order.
///
///
/// An interface to the next node in enumeration order. If there are no more nodes in enumeration order of the system topology, this method
/// will return NULL.
///
///
///
/**/
virtual ITopologyNode *GetNext() const;
///
/// Returns the resource manager's unique identifier for this node.
///
///
/// The resource manager's unique identifier for this node.
///
///
/// The Concurrency Runtime represents hardware threads on the system in groups of processor nodes. Nodes are usually derived from
/// the hardware topology of the system. For example, all processors on a specific socket or a specific NUMA node may belong to the
/// same processor node. The Resource Manager assigns unique identifiers to these nodes starting with 0 up to and including
/// nodeCount - 1, where nodeCount represents the total number of processor nodes on the system.
/// The count of nodes can be obtained from the function GetProcessorNodeCount.
///
/**/
virtual unsigned int GetId() const
{
return m_pNode->m_id;
}
///
/// Returns the Windows assigned NUMA node number to which this Resource Manager node belongs.
///
///
/// The Windows assigned NUMA node number to which this Resource Manager node belongs.
///
///
/// A thread proxy running on a virtual processor root belonging to this node will have affinity to at least the NUMA node level for the NUMA
/// node returned by this method.
///
/**/
virtual DWORD GetNumaNode() const
{
return m_pNode->m_numaNodeNumber;
}
///
/// Returns the number of execution resources grouped together under this node.
///
///
/// The number of execution resources grouped together under this node.
///
///
/**/
virtual unsigned int GetExecutionResourceCount() const
{
return m_pNode->m_coreCount;
}
///
/// Returns the first execution resource grouped under this node in enumeration order.
///
///
/// The first execution resource grouped under this node in enumeration order.
///
///
/**/
virtual ITopologyExecutionResource *GetFirstExecutionResource() const
{
return m_pNode->m_pCores->m_pTopologyObject;
}
} *m_pTopologyObject;
};
///
/// Used to store information during static and dynamic allocation.
///
struct AllocationData
{
// Index into an array of schedulers - used for sorting, etc.
unsigned int m_index;
// Additional allocation to give to a scheduler after proportional allocation decisions are made.
unsigned int m_allocation;
// Used to hold a scaled allocation value during proportional allocation.
double m_scaledAllocation;
// The scheduler proxy this allocation data is for.
SchedulerProxy *m_pProxy;
// Number of idle cores in a scheduler proxy during static allocation or dynamic core migration.
unsigned int m_numIdleCores;
// Number of idle cores in a scheduler proxy during static allocation or dynamic core migration that are also borrowed. During core
// migration these cores are the first to go.
unsigned int m_numBorrowedIdleCores;
};
struct StaticAllocationData : public AllocationData
{
// A field used during static allocation to decide on an allocation proportional to each scheduler's desired value.
double m_adjustedDesired;
// Tells if a thread subscription is a part of this static allocation request.
bool m_fNeedsExternalThreadAllocation;
// Keeps track of stolen cores during static allocation.
unsigned int m_numCoresStolen;
};
struct DynamicAllocationData : public AllocationData
{
// This variable is toggled back in forth during dynamic migration to instruct the RM whether or not
// an exact fit allocation should be attempted - i.e. if a node has 3 available cores, but this scheduler proxy
// needs only 2, keep searching in case a later node is found with 2 available cores.
bool m_fExactFitAllocation;
// Fully loaded is set to true when a scheduler is using all the cores that are allocated to it (no cores are idle)
// AND it has less than its desired number of cores.
bool m_fFullyLoaded;
// Number suggested as an appropriate allocation for the scheduler proxy, by the hill climbing instance.
unsigned int m_suggestedAllocation;
#if defined(CONCRT_TRACING)
unsigned int m_originalSuggestedAllocation;
#endif
union
{
#pragma warning(push)
#pragma warning(disable: 4201) // nonstandard extension used: nameless struct/union
// Struct used for a receiving proxy.
struct
{
// Number of nodes in the scheduler proxy that are partially allocated.
unsigned int m_numPartiallyFilledNodes;
// As we go through dynamic allocation, the starting node index moves along the array of sorted nodes,
// in a scheduling proxy that is receiving cores.
unsigned int m_startingNodeIndex;
};
// Struct used for a giving proxy.
struct
{
// Maximum number of borrowed idle cores this scheduler can give up.
unsigned int m_borrowedIdleCoresToMigrate;
// Maximum number of borrowed in-use cores this scheduler can give up.
unsigned int m_borrowedInUseCoresToMigrate;
// Maximum number of owned cores this scheduler can give up.
unsigned int m_ownedCoresToMigrate;
};
#pragma warning(pop)
};
};
} // namespace details
}; // namespace Concurrency