CUDNN Frontend API  8.3.0
cudnn_frontend Namespace Reference

Classes

class  BackendDescriptor
 
class  ConditionalStreamer
 
class  ConvDesc_v8
 
class  ConvDescBuilder_v8
 
class  cudnnException
 
class  Engine_v8
 
class  EngineBuilder_v8
 
class  EngineConfig_v8
 
class  EngineConfigBuilder_v8
 
class  EngineConfigGenerator
 
class  EngineFallbackList_v8
 
class  EngineFallbackListBuilder_v8
 
class  EngineHeuristics_v8
 
class  EngineHeuristicsBuilder_v8
 
class  ExecutionPlan_v8
 
class  ExecutionPlanBuilder_v8
 
class  ExecutionPlanCache_v1
 Plan Cache structure for the above table. More...
 
class  ExecutionPlanCache_v2
 
class  MatMulDesc_v8
 
class  MatMulDescBuilder_v8
 
class  OpaqueBackendPointer
 
class  Operation_v8
 
class  OperationBuilder_v8
 
class  OperationGraph_v8
 
class  OperationGraphBuilder_v8
 
class  PointWiseDesc_v8
 
class  PointWiseDescBuilder_v8
 
class  ReductionDesc_v8
 
class  ReductionDescBuilder_v8
 
class  Tensor_v8
 
class  TensorBuilder_v8
 
class  VariantPack_v8
 
class  VariantPackBuilder_v8
 

Typedefs

using ManagedOpaqueDescriptor = std::shared_ptr< OpaqueBackendPointer >
 
using Tensor = Tensor_v8
 
using TensorBuilder = TensorBuilder_v8
 
using ConvDesc = ConvDesc_v8
 
using ConvDescBuilder = ConvDescBuilder_v8
 
using PointWiseDescBuilder = PointWiseDescBuilder_v8
 
using PointWiseDesc = PointWiseDesc_v8
 
using MatMulDesc = MatMulDesc_v8
 
using MatMulDescBuilder = MatMulDescBuilder_v8
 
using ReductionDesc = ReductionDesc_v8
 
using ReductionDescBuilder = ReductionDescBuilder_v8
 
using Operation = Operation_v8
 
using OperationBuilder = OperationBuilder_v8
 
using EngineHeuristicsBuilder = EngineHeuristicsBuilder_v8
 
using EngineHeuristics = EngineHeuristics_v8
 
using EngineBuilder = EngineBuilder_v8
 
using Engine = Engine_v8
 
using EngineConfig = EngineConfig_v8
 
using EngineConfigBuilder = EngineConfigBuilder_v8
 
using VariantPack = VariantPack_v8
 
using VariantPackBuilder = VariantPackBuilder_v8
 
using EngineFallbackList = EngineFallbackList_v8
 
using EngineFallbackListBuilder = EngineFallbackListBuilder_v8
 
using EngineConfigList = std::vector< ManagedOpaqueDescriptor >
 
using executionPlans_t = std::vector< cudnn_frontend::ExecutionPlan >
 Variety of renames. More...
 
using Predicate = std::function< bool(cudnn_frontend::ExecutionPlan const &plan)>
 
using GeneratorSource = std::function< cudnn_frontend::EngineConfigList(cudnn_frontend::OperationGraph &)>
 
using ExecutionPlan = ExecutionPlan_v8
 
using ExecutionPlanBuilder = ExecutionPlanBuilder_v8
 
using ExecutionPlanCache = ExecutionPlanCache_v2
 
using OperationGraph = OperationGraph_v8
 
using OperationGraphBuilder = OperationGraphBuilder_v8
 
using feature_vector_t = std::vector< int64_t >
 Detailed feature_vector. Generally the Tensor and Operation properties. More...
 

Enumerations

enum  CudnnFindSamplingTechnique {
  CudnnFindSamplingTechnique::CUDNN_FIND_SAMPLE_ONCE,
  CudnnFindSamplingTechnique::CUDNN_FIND_SAMPLE_MEDIAN_OF_THREE,
  CudnnFindSamplingTechnique::CUDNN_FIND_SAMPLE_TILL_STABLE
}
 

Functions

static ManagedOpaqueDescriptor make_shared_backend_pointer (cudnnBackendDescriptorType_t type)
 
static auto filter (Predicate pred, executionPlans_t &plans) -> executionPlans_t
 
static auto get_fallback_engine_list (cudnnBackendDescriptorType_t mode, const std::string &opGraphTag) -> std::vector< int >
 
static bool load_from_config (json &json_handle, const std::string &errata_json)
 
template<typename T >
static bool check_rule (const json &json_handle, const std::string &executionPlanTag, cudnnHandle_t handle, T fn)
 
template<typename T >
static bool check_errata (const json &json_handle, const std::string &executionPlanTag, cudnnHandle_t handle, T fn)
 
static void filter (EngineConfigList &from, EngineConfigList &to, std::function< bool(cudnnBackendDescriptor_t)> filter_fn)
 
template<cudnnBackendNumericalNote_t NUMERIC_NOTE>
bool hasNumericalNote (cudnnBackendDescriptor_t engine_config)
 
template<CudnnFindSamplingTechnique samplingTechnique>
auto time_sorted_plan (cudnnHandle_t handle, executionPlans_t plans, VariantPack const &variantPack) -> executionPlans_t
 
template<std::size_t SIZE>
EngineConfigList get_heuristics_list (std::array< cudnnBackendHeurMode_t, SIZE > modes, OperationGraph_v8 &opGraph, std::function< bool(cudnnBackendDescriptor_t)> filter_fn)
 
bool & isLoggingEnabled ()
 
std::ostream & getStream ()
 
ConditionalStreamergetLogger ()
 
static std::ostream & operator<< (std::ostream &os, const BackendDescriptor &desc)
 
static cudnnStatus_t cudnnReorderFilterAndBiasInt8x32 (cudnnHandle_t handle, const Tensor_v8 &tensor, const ConvDesc_v8 &conv_desc, void *dev_filter_ptr, void *reordered_filter_ptr, void *dev_bias_ptr, void *reordered_bias_ptr)
 
static void throw_if (std::function< bool()> expr, const char *message, cudnnStatus_t status)
 
static void throw_if (bool expr, const char *message, cudnnStatus_t status)
 
static std::string to_string (cudnnDataType_t type)
 
static std::string to_string (cudnnStatus_t status)
 
static void set_error_and_throw_exception (BackendDescriptor const *desc, cudnnStatus_t status, const char *message)
 

Detailed Description

Execution Plan Caching: Goal is to auto-tune once and then save the best auto-tuned result for a problem for later use. For every unique Operation Graph (denoted by a string) we have a set of plans identified by a feature vector. The feature vector could be Tensor dimension/data_type and so on. Multiple operation Graph can share a feature vector type but may have different Execution Plan(s). The v1 cache has the following format. It is the reponsibility of the user to query the correct cache for the given device/operation graph combination.

Typedef Documentation

◆ ConvDesc

Definition at line 121 of file cudnn_frontend.h.

◆ ConvDescBuilder

Definition at line 122 of file cudnn_frontend.h.

◆ Engine

Definition at line 134 of file cudnn_frontend.h.

◆ EngineBuilder

Definition at line 133 of file cudnn_frontend.h.

◆ EngineConfig

Definition at line 135 of file cudnn_frontend.h.

◆ EngineConfigBuilder

◆ EngineConfigList

EngineConfigList class This is a RAII type class that holds naked EngineConfig backendDescriptor. The purpose of this class is to provide an easy interface to store the EngineConfigs generated from various source and apply a filter.

Definition at line 248 of file cudnn_frontend_EngineConfig.h.

◆ EngineFallbackList

◆ EngineFallbackListBuilder

◆ EngineHeuristics

Definition at line 132 of file cudnn_frontend.h.

◆ EngineHeuristicsBuilder

◆ ExecutionPlan

◆ ExecutionPlanBuilder

◆ ExecutionPlanCache

◆ executionPlans_t

Variety of renames.

Definition at line 30 of file cudnn_frontend_EngineConfigGenerator.h.

◆ feature_vector_t

using cudnn_frontend::feature_vector_t = typedef std::vector<int64_t>

Detailed feature_vector. Generally the Tensor and Operation properties.

Definition at line 34 of file cudnn_frontend_utils.h.

◆ GeneratorSource

◆ ManagedOpaqueDescriptor

Definition at line 84 of file cudnn_backend_base.h.

◆ MatMulDesc

Definition at line 125 of file cudnn_frontend.h.

◆ MatMulDescBuilder

Definition at line 126 of file cudnn_frontend.h.

◆ Operation

Definition at line 129 of file cudnn_frontend.h.

◆ OperationBuilder

Definition at line 130 of file cudnn_frontend.h.

◆ OperationGraph

◆ OperationGraphBuilder

◆ PointWiseDesc

Definition at line 124 of file cudnn_frontend.h.

◆ PointWiseDescBuilder

◆ Predicate

using cudnn_frontend::Predicate = typedef std::function<bool(cudnn_frontend::ExecutionPlan const &plan)>

Definition at line 31 of file cudnn_frontend_EngineConfigGenerator.h.

◆ ReductionDesc

Definition at line 127 of file cudnn_frontend.h.

◆ ReductionDescBuilder

◆ Tensor

Definition at line 119 of file cudnn_frontend.h.

◆ TensorBuilder

Definition at line 120 of file cudnn_frontend.h.

◆ VariantPack

Definition at line 137 of file cudnn_frontend.h.

◆ VariantPackBuilder

Enumeration Type Documentation

◆ CudnnFindSamplingTechnique

Enumerator
CUDNN_FIND_SAMPLE_ONCE 

Sample once quick but may have unstable values.

CUDNN_FIND_SAMPLE_MEDIAN_OF_THREE 

Sample 3 times and take median.

CUDNN_FIND_SAMPLE_TILL_STABLE 

Sample multiple times till stable.

Definition at line 34 of file cudnn_frontend_EngineConfigGenerator.h.

Function Documentation

◆ check_errata()

template<typename T >
static bool cudnn_frontend::check_errata ( const json json_handle,
const std::string &  executionPlanTag,
cudnnHandle_t  handle,
fn 
)
static

Definition at line 90 of file cudnn_frontend_Errata.h.

References getLogger().

◆ check_rule()

template<typename T >
static bool cudnn_frontend::check_rule ( const json json_handle,
const std::string &  executionPlanTag,
cudnnHandle_t  handle,
fn 
)
static

◆ cudnnReorderFilterAndBiasInt8x32()

static cudnnStatus_t cudnn_frontend::cudnnReorderFilterAndBiasInt8x32 ( cudnnHandle_t  handle,
const Tensor_v8 tensor,
const ConvDesc_v8 conv_desc,
void *  dev_filter_ptr,
void *  reordered_filter_ptr,
void *  dev_bias_ptr,
void *  reordered_bias_ptr 
)
static

◆ filter() [1/2]

static void cudnn_frontend::filter ( EngineConfigList from,
EngineConfigList to,
std::function< bool(cudnnBackendDescriptor_t)>  filter_fn 
)
static

Definition at line 33 of file cudnn_frontend_Filters.h.

◆ filter() [2/2]

static auto cudnn_frontend::filter ( Predicate  pred,
executionPlans_t plans 
) -> executionPlans_t
static

Filter out the execution plan based on the prerequisite conditions. Goes through vector of execution plans and if the predicate returns not to block (false), it is inserted into the filtered plans.

Definition at line 106 of file cudnn_frontend_EngineConfigGenerator.h.

References getLogger().

Referenced by cudnn_frontend::EngineConfigGenerator::cudnnGetPlan(), and get_heuristics_list().

◆ get_fallback_engine_list()

static auto cudnn_frontend::get_fallback_engine_list ( cudnnBackendDescriptorType_t  mode,
const std::string &  opGraphTag 
) -> std::vector<int>
static

Here we are using the term "bias" in the operationGraph as a proxy for the conv*bias* operation graph. We are not strictly checking the order of the operations in the graph. We propose this as a temporary workaround until the backend API supports querying the fallback list directly from cudnn

Definition at line 30 of file cudnn_frontend_EngineFallbackList.h.

Referenced by cudnn_frontend::EngineFallbackListBuilder_v8::build().

◆ get_heuristics_list()

template<std::size_t SIZE>
EngineConfigList cudnn_frontend::get_heuristics_list ( std::array< cudnnBackendHeurMode_t, SIZE >  modes,
OperationGraph_v8 opGraph,
std::function< bool(cudnnBackendDescriptor_t)>  filter_fn 
)

◆ getLogger()

◆ getStream()

std::ostream& cudnn_frontend::getStream ( )
inline

Definition at line 39 of file cudnn_frontend_Logging.h.

References isLoggingEnabled().

Referenced by getLogger().

◆ hasNumericalNote()

template<cudnnBackendNumericalNote_t NUMERIC_NOTE>
bool cudnn_frontend::hasNumericalNote ( cudnnBackendDescriptor_t  engine_config)

Definition at line 45 of file cudnn_frontend_Filters.h.

References make_shared_backend_pointer().

◆ isLoggingEnabled()

bool& cudnn_frontend::isLoggingEnabled ( )
inline

◆ load_from_config()

static bool cudnn_frontend::load_from_config ( json json_handle,
const std::string &  errata_json 
)
static

Definition at line 40 of file cudnn_frontend_Errata.h.

◆ make_shared_backend_pointer()

◆ operator<<()

static std::ostream& cudnn_frontend::operator<< ( std::ostream &  os,
const BackendDescriptor desc 
)
static

◆ set_error_and_throw_exception()

static void cudnn_frontend::set_error_and_throw_exception ( BackendDescriptor const *  desc,
cudnnStatus_t  status,
const char *  message 
)
inlinestatic

Definition at line 138 of file cudnn_frontend_utils.h.

References cudnn_frontend::cudnnException::cudnnException(), cudnn_frontend::BackendDescriptor::set_error(), cudnn_frontend::BackendDescriptor::set_status(), and to_string().

Referenced by cudnn_frontend::MatMulDescBuilder_v8::build(), cudnn_frontend::ReductionDescBuilder_v8::build(), cudnn_frontend::VariantPackBuilder_v8::build(), cudnn_frontend::EngineFallbackListBuilder_v8::build(), cudnn_frontend::OperationGraphBuilder_v8::build(), cudnn_frontend::EngineHeuristicsBuilder_v8::build(), cudnn_frontend::EngineConfigBuilder_v8::build(), cudnn_frontend::ConvDescBuilder_v8::build(), cudnn_frontend::TensorBuilder_v8::build(), cudnn_frontend::PointWiseDescBuilder_v8::build(), cudnn_frontend::EngineBuilder_v8::build(), cudnn_frontend::ExecutionPlanBuilder_v8::build(), cudnn_frontend::OperationBuilder_v8::build(), cudnn_frontend::OperationBuilder_v8::build_conv_backward_data(), cudnn_frontend::OperationBuilder_v8::build_conv_backward_filter(), cudnn_frontend::OperationBuilder_v8::build_conv_forward(), cudnn_frontend::OperationBuilder_v8::build_matmul_op(), cudnn_frontend::OperationBuilder_v8::build_pointwise_op(), cudnn_frontend::OperationBuilder_v8::build_reduction_op(), cudnn_frontend::Engine_v8::buildKnobs(), cudnn_frontend::ExecutionPlan_v8::buildTag(), cudnn_frontend::ExecutionPlan_v8::computeWorkSpaceSize(), cudnn_frontend::Engine_v8::Engine_v8(), cudnn_frontend::EngineConfig_v8::EngineConfig_v8(), cudnn_frontend::ExecutionPlan_v8::fetchNotes(), cudnn_frontend::EngineHeuristics_v8::getEngineConfig(), cudnn_frontend::EngineHeuristics_v8::getEngineConfigCount(), cudnn_frontend::OperationGraph_v8::getEngineCount(), cudnn_frontend::OperationBuilder_v8::setaMatDesc(), cudnn_frontend::OperationBuilder_v8::setbDesc(), cudnn_frontend::OperationBuilder_v8::setbMatDesc(), cudnn_frontend::OperationBuilder_v8::setcDesc(), cudnn_frontend::OperationBuilder_v8::setcMatDesc(), cudnn_frontend::EngineConfigBuilder_v8::setEngine(), cudnn_frontend::OperationBuilder_v8::setmatmulDesc(), cudnn_frontend::OperationBuilder_v8::setpwDesc(), cudnn_frontend::OperationBuilder_v8::setreductionDesc(), and cudnn_frontend::OperationBuilder_v8::setwDesc().

◆ throw_if() [1/2]

static void cudnn_frontend::throw_if ( std::function< bool()>  expr,
const char *  message,
cudnnStatus_t  status 
)
inlinestatic

◆ throw_if() [2/2]

static void cudnn_frontend::throw_if ( bool  expr,
const char *  message,
cudnnStatus_t  status 
)
inlinestatic

◆ time_sorted_plan()

template<CudnnFindSamplingTechnique samplingTechnique>
auto cudnn_frontend::time_sorted_plan ( cudnnHandle_t  handle,
executionPlans_t  plans,
VariantPack const &  variantPack 
) -> executionPlans_t

Sorts the execution plans by their run time. The run time of plan may not trivial and hence we run it multiple times till we get a stable value. We have an additional dry-run which helps stabilize the time further.

Definition at line 38 of file cudnn_frontend_find_plan.h.

References CUDNN_FIND_SAMPLE_MEDIAN_OF_THREE, CUDNN_FIND_SAMPLE_ONCE, CUDNN_FIND_SAMPLE_TILL_STABLE, cudnn_frontend::ExecutionPlan_v8::getExecutionTime(), getLogger(), and to_string().

◆ to_string() [1/2]

static std::string cudnn_frontend::to_string ( cudnnDataType_t  type)
inlinestatic

◆ to_string() [2/2]

static std::string cudnn_frontend::to_string ( cudnnStatus_t  status)
inlinestatic

Definition at line 101 of file cudnn_frontend_utils.h.