CUDNN Frontend API  8.3.0
cudnn_frontend_find_plan.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #pragma once
24 
26 #include <iomanip>
27 #include <set>
28 
29 namespace cudnn_frontend {
30 
36 template <CudnnFindSamplingTechnique samplingTechnique>
37 auto
38 time_sorted_plan(cudnnHandle_t handle, executionPlans_t plans, VariantPack const &variantPack) -> executionPlans_t {
39  executionPlans_t time_sorted_plans;
40 
41  auto plan_cmp = [](const ExecutionPlan& a, const ExecutionPlan& b) {return a.getExecutionTime() < b.getExecutionTime();};
42  std::set<std::reference_wrapper<ExecutionPlan>, decltype(plan_cmp)> timed_execution_plans(plan_cmp);
43 
44  const int maxIterCount =
46  ? 1
47  : (samplingTechnique == CudnnFindSamplingTechnique::CUDNN_FIND_SAMPLE_MEDIAN_OF_THREE) ? 3 : 100;
48  const float threshhold = 0.95f;
49 
50  cudaEvent_t start, stop;
51  cudaEventCreate(&start);
52  cudaEventCreate(&stop);
53  cudaDeviceSynchronize();
54 
55  for (auto &plan : plans) {
56  float time_ms = 0.0f;
57  float final_time_ms = 0.0f;
58  float min_time_ms = std::numeric_limits<float>::max();
59 
60  // Warm-up run
61  auto warmup_status = ::cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc());
62  if (warmup_status != CUDNN_STATUS_SUCCESS) {
63  getLogger() << "[cudnn_frontend] Plan " << plan.getTag() << " failed with " << to_string(warmup_status) << std::endl;
64  continue;
65  }
66  cudaDeviceSynchronize();
67 
68  for (int i = 0; i < maxIterCount; i++) {
69  cudaEventRecord(start);
70 
71  ::cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc());
72 
73  cudaEventRecord(stop);
74  cudaEventSynchronize(stop);
75  cudaEventElapsedTime(&time_ms, start, stop);
76 
78  final_time_ms = std::min(min_time_ms, time_ms);
79  if (time_ms / min_time_ms < threshhold) {
80  min_time_ms = final_time_ms;
81  } else {
82  break;
83  }
84  } else {
85  final_time_ms = i == (maxIterCount / 2) ? time_ms : final_time_ms;
86  }
87  }
88  getLogger() << "[cudnn_frontend] Plan " << plan.getTag() << " took " << std::setw(10) << final_time_ms << std::endl;
89  plan.setExecutionTime(final_time_ms);
90  timed_execution_plans.insert(plan);
91  }
92 
93  for (ExecutionPlan &plan : timed_execution_plans) {
94  time_sorted_plans.emplace_back(std::move(plan));
95  }
96 
97  cudaEventDestroy(start);
98  cudaEventDestroy(stop);
99 
100  getLogger() << "[cudnn_frontend] Auto-tuning returns " << time_sorted_plans.size() << " plans." << std::endl;
101 
102  return time_sorted_plans;
103 }
104 
105 template <CudnnFindSamplingTechnique samplingTechnique>
106 auto
109  cudnn_frontend::VariantPack const &variantPack) -> executionPlans_t {
111  executionPlans_t plans = cudnnGetPlan(handle, opGraph);
112  return time_sorted_plan<samplingTechnique>(handle, std::move(plans), variantPack);
113 }
114 
115 template <CudnnFindSamplingTechnique samplingTechnique>
116 auto
119  cudnn_frontend::VariantPack const &variantPack,
120  Predicate pred) -> executionPlans_t {
122  executionPlans_t plans = cudnnGetPlan(handle, opGraph, pred);
123  return time_sorted_plan<samplingTechnique>(handle, std::move(plans), variantPack);
124 }
125 
126 template <CudnnFindSamplingTechnique samplingTechnique>
127 auto
130  cudnn_frontend::VariantPack const &variantPack,
134  auto sorted_plans = cudnnFindPlan<samplingTechnique>(handle, opGraph, variantPack, pred);
136  if (cache.is_fastest_plan_stable(opGraph, sorted_plans.front().getTag())) {
137  cache.add_plan_to_cache(opGraph, sorted_plans.front());
138  }
139  return sorted_plans.front();
140 }
141 
142 }
ConditionalStreamer & getLogger()
std::function< bool(cudnn_frontend::ExecutionPlan const &plan)> Predicate
std::vector< cudnn_frontend::ExecutionPlan > executionPlans_t
Variety of renames.
static std::string to_string(cudnnDataType_t type)
Sample once quick but may have unstable values.
auto cudnnFindPlanAndCache(cudnnHandle_t handle, cudnn_frontend::OperationGraph &opGraph, cudnn_frontend::VariantPack const &variantPack, cudnn_frontend::ExecutionPlanCache &cache, Predicate pred=[](const cudnn_frontend::ExecutionPlan &) {return false;}) -> cudnn_frontend::ExecutionPlan
auto cudnnFindPlan(cudnnHandle_t handle, cudnn_frontend::OperationGraph &opGraph, cudnn_frontend::VariantPack const &variantPack, Predicate pred) -> executionPlans_t
auto time_sorted_plan(cudnnHandle_t handle, executionPlans_t plans, VariantPack const &variantPack) -> executionPlans_t