Skip to content

Commit d39d64b

Browse files
Qualcomm AI Engine Direct - gpu support part1 (#12165)
### Summary - rename folders in backends/qualcomm/runtime/backends - add gpu infra ### Test plan ```bash python backends/qualcomm/tests/test_qnn_delegate.py TestQNNFloatingPointOperator.test_qnn_backend_conv2d -b build-android/ -m SM8750 -s 5f396958 --online_prepare --backend gpu ``` --------- Co-authored-by: chenweng-quic <168707118+chenweng-quic@users.noreply.github.com> Co-authored-by: Cheng-Hsin Weng <chenweng@qti.qualcomm.com>
1 parent 38e2e8a commit d39d64b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+887
-76
lines changed

backends/qualcomm/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ backends/qualcomm
5656
| | # Meanwhile, this is also the runtime responsbile for executing compiled
5757
| | # models on a device.
5858
| └── backends # Backends supported by QNN.
59-
| └── htpbackend
59+
| └── gpu / htp
6060
| ├── aarch64 # Configuration required to run on device. (Device Part).
6161
| └── x86_64 # Configuration required to compile graph on host. (AoT Part).
6262
├── scripts # Misc supporting scripts, not related to core functionality.

backends/qualcomm/runtime/backends/CMakeLists.txt

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -43,58 +43,70 @@ target_sources(
4343
${CMAKE_CURRENT_LIST_DIR}/QnnProfiler.cpp
4444
)
4545

46-
# qnn_device
47-
set(HOST_ARCHITECTURE
48-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/${CMAKE_SYSTEM_PROCESSOR}
46+
set(HOST_ARCHITECTURE_GPU
47+
${CMAKE_CURRENT_LIST_DIR}/gpu/${CMAKE_SYSTEM_PROCESSOR}
48+
)
49+
set(HOST_ARCHITECTURE_HTP
50+
${CMAKE_CURRENT_LIST_DIR}/htp/${CMAKE_SYSTEM_PROCESSOR}
4951
)
52+
set(HOST_ARCHITECTURE_IR ${CMAKE_CURRENT_LIST_DIR}/ir/${CMAKE_SYSTEM_PROCESSOR})
5053

54+
# qnn_device
5155
target_sources(
5256
qnn_device
5357
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.h
54-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevice.h
58+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuDevice.h
59+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.h
5560
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.cpp
56-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevice.cpp
57-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevicePlatformInfoConfig.h
58-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDeviceCustomConfig.h
61+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.cpp
62+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevicePlatformInfoConfig.h
63+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpDeviceCustomConfig.h
5964
# When offline prepare context cache in x86 host we have to provide
6065
# platform infomation and SocModel to Qnn
61-
${HOST_ARCHITECTURE}/HtpDevicePlatformInfoConfig.cpp
62-
${HOST_ARCHITECTURE}/HtpDeviceCustomConfig.cpp
66+
${HOST_ARCHITECTURE_HTP}/HtpDevicePlatformInfoConfig.cpp
67+
${HOST_ARCHITECTURE_HTP}/HtpDeviceCustomConfig.cpp
6368
)
6469

6570
# qnn_context
6671
target_sources(
6772
qnn_context
6873
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.h
69-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.h
70-
${CMAKE_CURRENT_LIST_DIR}/irbackend/IrContext.h
71-
PRIVATE
72-
${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.cpp
73-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.cpp
74-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContextCustomConfig.h
75-
${HOST_ARCHITECTURE}/HtpContextCustomConfig.cpp
76-
${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/IrContext.cpp
74+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.h
75+
${CMAKE_CURRENT_LIST_DIR}/ir/IrContext.h
76+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.h
77+
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.cpp
78+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.cpp
79+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpContextCustomConfig.h
80+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.cpp
81+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContextCustomConfig.h
82+
${HOST_ARCHITECTURE_GPU}/GpuContextCustomConfig.cpp
83+
${HOST_ARCHITECTURE_HTP}/HtpContextCustomConfig.cpp
84+
${HOST_ARCHITECTURE_IR}/IrContext.cpp
7785
)
7886

7987
# qnn_backend_cache
8088
target_sources(
8189
qnn_backend_cache
8290
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.h
83-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.h
91+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackendCache.h
8492
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.cpp
85-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.cpp
93+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackendCache.cpp
8694
)
8795

8896
# qnn_graph
8997
target_sources(
9098
qnn_graph
9199
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.h
92-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraph.h
100+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.h
101+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraph.h
93102
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.cpp
94-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraph.cpp
95-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraphCustomConfig.h
96-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraphCustomConfig.cpp
97-
${HOST_ARCHITECTURE}/HtpGraphCustomConfig.cpp
103+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.cpp
104+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraphCustomConfig.h
105+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraphCustomConfig.cpp
106+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraph.cpp
107+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.h
108+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.cpp
109+
${HOST_ARCHITECTURE_HTP}/HtpGraphCustomConfig.cpp
98110
)
99111

100112
# qnn_op_package_manager
@@ -108,9 +120,13 @@ target_sources(
108120
target_sources(
109121
qnn_backend
110122
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.h
111-
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackend.h
112-
${CMAKE_CURRENT_LIST_DIR}/irbackend/IrBackend.h
123+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.h
124+
${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackend.h
125+
${CMAKE_CURRENT_LIST_DIR}/ir/IrBackend.h
113126
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.cpp
127+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.cpp
128+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.h
129+
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.cpp
114130
)
115131

116132
# qnn_mem_manager
@@ -138,6 +154,5 @@ target_sources(
138154
target_sources(
139155
qnn_dlc_manager
140156
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDlcManager.h
141-
PRIVATE
142-
${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/QnnDlcManager.cpp
157+
PRIVATE ${HOST_ARCHITECTURE_IR}/QnnDlcManager.cpp
143158
)

backends/qualcomm/runtime/backends/QnnBackendFactory.cpp

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,16 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
5353
EnumNameQnnExecuTorchHtpPdSession(htp_options->pd_session()));
5454
QNN_EXECUTORCH_LOG_INFO(
5555
"use_conv_hmx in htp_options: %d", htp_options->use_conv_hmx());
56+
QNN_EXECUTORCH_LOG_INFO(
57+
"use_dlbc in htp_options: %d", htp_options->use_dlbc());
5658
QNN_EXECUTORCH_LOG_INFO(
5759
"use_fold_relu in htp_options: %d", htp_options->use_fold_relu());
60+
QNN_EXECUTORCH_LOG_INFO(
61+
"use_multi_contexts in htp_options: %d",
62+
htp_options->use_multi_contexts());
63+
QNN_EXECUTORCH_LOG_INFO(
64+
"use_weight_sharing in htp_options: %d",
65+
htp_options->use_weight_sharing());
5866
}
5967
backend_params->qnn_backend_ptr_ =
6068
std::make_unique<HtpBackend>(implementation, logger);
@@ -86,13 +94,66 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
8694
get_option(options->log_level()));
8795
backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED;
8896
} break;
89-
case QnnExecuTorchBackendType::kGpuBackend:
97+
case QnnExecuTorchBackendType::kGpuBackend: {
98+
auto gpu_options = options->backend_options()->gpu_options();
99+
if (options->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) {
100+
QNN_EXECUTORCH_LOG_INFO(
101+
"performance_mode in gpu_options: %s",
102+
EnumNameQnnExecuTorchGpuPerformanceMode(
103+
gpu_options->performance_mode()));
104+
QNN_EXECUTORCH_LOG_INFO(
105+
"precision in gpu_options: %s",
106+
EnumNameQnnExecuTorchGpuPrecision(gpu_options->precision()));
107+
QNN_EXECUTORCH_LOG_INFO(
108+
"use_memory_optimizations in gpu_options: %d",
109+
gpu_options->use_memory_optimizations());
110+
QNN_EXECUTORCH_LOG_INFO(
111+
"use_node_optimizations in gpu_options: %d",
112+
gpu_options->use_node_optimizations());
113+
QNN_EXECUTORCH_LOG_INFO(
114+
"use_queue_recording in gpu_options: %d",
115+
gpu_options->use_queue_recording());
116+
QNN_EXECUTORCH_LOG_INFO(
117+
"use_weight_sharing in gpu_options: %d",
118+
gpu_options->use_weight_sharing());
119+
}
120+
backend_params->qnn_backend_ptr_ =
121+
std::make_unique<GpuBackend>(implementation, logger, gpu_options);
122+
123+
backend_params->qnn_device_ptr_ =
124+
std::make_unique<GpuDevice>(implementation, logger);
125+
126+
backend_params->qnn_backend_cache_ptr_ =
127+
std::make_unique<QnnBackendCache>(qnn_context_blob);
128+
129+
backend_params->qnn_context_ptr_ = std::make_unique<GpuContext>(
130+
implementation,
131+
backend_params->qnn_backend_ptr_.get(),
132+
backend_params->qnn_device_ptr_.get(),
133+
backend_params->qnn_backend_cache_ptr_.get(),
134+
qnn_dlc_manager,
135+
gpu_options);
136+
137+
backend_params->qnn_graph_ptr_ = std::make_unique<GpuGraph>(
138+
implementation,
139+
backend_params->qnn_backend_ptr_.get(),
140+
backend_params->qnn_context_ptr_.get(),
141+
options->profile_level(),
142+
gpu_options);
143+
} break;
90144
case QnnExecuTorchBackendType::kDspBackend:
91145
case QnnExecuTorchBackendType::kUndefinedBackend:
92146
default:
93147
return nullptr;
94148
}
95149

150+
backend_params->qnn_mem_manager_ptr_ = std::make_unique<QnnMemManager>(
151+
implementation,
152+
backend_params->qnn_context_ptr_.get(),
153+
options->log_level());
154+
155+
backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED;
156+
96157
if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion() == Error::Ok) {
97158
return backend_params;
98159
}

backends/qualcomm/runtime/backends/QnnBackendFactory.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,15 @@
1717
#include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
1818
#include <executorch/backends/qualcomm/runtime/backends/QnnLogger.h>
1919
#include <executorch/backends/qualcomm/runtime/backends/QnnMemManager.h>
20-
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h>
21-
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h>
22-
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h>
23-
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h>
24-
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h>
20+
#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuBackend.h>
21+
#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuContext.h>
22+
#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuDevice.h>
23+
#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuGraph.h>
24+
#include <executorch/backends/qualcomm/runtime/backends/htp/HtpBackend.h>
25+
#include <executorch/backends/qualcomm/runtime/backends/htp/HtpBackendCache.h>
26+
#include <executorch/backends/qualcomm/runtime/backends/htp/HtpContext.h>
27+
#include <executorch/backends/qualcomm/runtime/backends/htp/HtpDevice.h>
28+
#include <executorch/backends/qualcomm/runtime/backends/htp/HtpGraph.h>
2529

2630
#include <memory>
2731
namespace executorch {

backends/qualcomm/runtime/backends/QnnDeviceCommon.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class QnnDevice {
2929
return handle_;
3030
}
3131

32-
executorch::runtime::Error Configure();
32+
virtual executorch::runtime::Error Configure();
3333

3434
protected:
3535
virtual executorch::runtime::Error MakeConfig(

backends/qualcomm/runtime/backends/QnnDlcManager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
#include <QnnTypes.h>
1212
#include <executorch/backends/qualcomm/runtime/backends/QnnBackendFactory.h>
13-
#include <executorch/backends/qualcomm/runtime/backends/irbackend/IrContext.h>
13+
#include <executorch/backends/qualcomm/runtime/backends/ir/IrContext.h>
1414

1515
#include "QnnWrapperUtils.hpp"
1616
namespace executorch {
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright (c) Qualcomm Innovation Center, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuBackend.h>
10+
11+
#include "GPU/QnnGpuCommon.h"
12+
13+
namespace executorch {
14+
namespace backends {
15+
namespace qnn {
16+
17+
using executorch::runtime::Error;
18+
19+
GpuBackend::GpuBackend(
20+
const QnnImplementation& implementation,
21+
QnnLogger* logger,
22+
const QnnExecuTorchGpuBackendOptions* gpu_options)
23+
: QnnBackend(implementation, logger) {
24+
gpu_backend_custom_config_ =
25+
std::make_unique<GpuBackendCustomConfig>(gpu_options);
26+
}
27+
28+
Qnn_Version_t GpuBackend::GetExpectedBackendVersion() const {
29+
Qnn_Version_t backend_version;
30+
backend_version.major = QNN_GPU_API_VERSION_MAJOR;
31+
backend_version.minor = QNN_GPU_API_VERSION_MINOR;
32+
backend_version.patch = QNN_GPU_API_VERSION_PATCH;
33+
return backend_version;
34+
}
35+
36+
bool GpuBackend::IsProfileEventTypeParentOfNodeTime(
37+
QnnProfile_EventType_t event_type) {
38+
return (event_type == QNN_PROFILE_EVENTTYPE_EXECUTE);
39+
}
40+
41+
Error GpuBackend::MakeConfig(std::vector<const QnnBackend_Config_t*>& config) {
42+
const std::vector<QnnBackend_CustomConfig_t>& backend_custom_config =
43+
gpu_backend_custom_config_->CreateBackendCustomConfig();
44+
45+
uint32_t num_custom_configs = backend_custom_config.size();
46+
backend_config_.resize(num_custom_configs);
47+
// +1 for null terminated
48+
config.reserve(num_custom_configs + 1);
49+
50+
for (std::size_t i = 0; i < num_custom_configs; ++i) {
51+
backend_config_[i].option = QNN_BACKEND_CONFIG_OPTION_CUSTOM;
52+
backend_config_[i].customConfig = backend_custom_config[i];
53+
config.push_back(&backend_config_[i]);
54+
}
55+
56+
config.push_back(nullptr);
57+
return Error::Ok;
58+
}
59+
60+
} // namespace qnn
61+
} // namespace backends
62+
} // namespace executorch
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright (c) Qualcomm Innovation Center, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h>
12+
13+
namespace executorch {
14+
namespace backends {
15+
namespace qnn {
16+
17+
class GpuBackend : public QnnBackend {
18+
public:
19+
GpuBackend(
20+
const QnnImplementation& implementation,
21+
QnnLogger* logger,
22+
const QnnExecuTorchGpuBackendOptions* gpu_options);
23+
24+
Qnn_Version_t GetExpectedBackendVersion() const override;
25+
26+
bool IsProfileEventTypeParentOfNodeTime(
27+
QnnProfile_EventType_t event_type) override;
28+
29+
protected:
30+
executorch::runtime::Error MakeConfig(
31+
std::vector<const QnnBackend_Config_t*>& config) override;
32+
33+
private:
34+
std::vector<QnnBackend_Config_t> backend_config_;
35+
std::unique_ptr<GpuBackendCustomConfig> gpu_backend_custom_config_;
36+
};
37+
38+
} // namespace qnn
39+
} // namespace backends
40+
} // namespace executorch

0 commit comments

Comments
 (0)