Skip to content

Commit 270f3e5

Browse files
hsharma35meta-codesync[bot]
authored andcommitted
Migrate generic quantized cadence operators to oss. (#16025)
Summary: Pull Request resolved: #16025 Migrates operators from internal repo to cadence/generic. Reviewed By: DrJessop Differential Revision: D87900844
1 parent ee236cb commit 270f3e5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+4400
-2358
lines changed

backends/cadence/aot/TARGETS

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -146,17 +146,17 @@ executorch_generated_lib(
146146
deps = [
147147
"//executorch/backends/cadence/generic/kernels:cadence_kernels",
148148
"//executorch/backends/cadence/generic/operators:op_requantize",
149-
"//executorch/backends/cadence/generic/operators:im2row_out",
150-
"//executorch/backends/cadence/generic/operators:dequantize_per_tensor",
151-
"//executorch/backends/cadence/generic/operators:quantize_per_tensor",
152-
"//executorch/backends/cadence/generic/operators:quantized_add_out",
153-
"//executorch/backends/cadence/generic/operators:quantized_conv2d_nchw_out",
154-
"//executorch/backends/cadence/generic/operators:quantized_conv2d_nhwc_out",
155-
"//executorch/backends/cadence/generic/operators:quantized_fully_connected_out",
156-
"//executorch/backends/cadence/generic/operators:quantized_layer_norm",
157-
"//executorch/backends/cadence/generic/operators:quantized_linear_out",
158-
"//executorch/backends/cadence/generic/operators:quantized_matmul_out",
159-
"//executorch/backends/cadence/generic/operators:quantized_relu_out",
149+
"//executorch/backends/cadence/generic/operators:op_im2row",
150+
"//executorch/backends/cadence/generic/operators:op_dequantize_per_tensor",
151+
"//executorch/backends/cadence/generic/operators:op_quantize_per_tensor",
152+
"//executorch/backends/cadence/generic/operators:op_quantized_add",
153+
"//executorch/backends/cadence/generic/operators:op_quantized_conv2d",
154+
"//executorch/backends/cadence/generic/operators:op_quantized_conv1d",
155+
"//executorch/backends/cadence/generic/operators:op_quantized_fully_connected",
156+
"//executorch/backends/cadence/generic/operators:op_quantized_layer_norm",
157+
"//executorch/backends/cadence/generic/operators:op_quantized_linear",
158+
"//executorch/backends/cadence/generic/operators:op_quantized_matmul",
159+
"//executorch/backends/cadence/generic/operators:op_quantized_relu",
160160
"//executorch/kernels/portable:executorch_all_ops",
161161
"//executorch/kernels/portable:operators",
162162
],

backends/cadence/aot/functions.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -349,12 +349,12 @@
349349
- arg_meta: null
350350
kernel_name: impl::generic::im2row_per_tensor_out
351351

352-
- func: cadence::quantized_conv2d_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
352+
- func: cadence::quantized_conv2d_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
353353
kernels:
354354
- arg_meta: null
355355
kernel_name: impl::generic::quantized_conv2d_nchw_per_tensor_out
356356

357-
- func: cadence::quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
357+
- func: cadence::quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
358358
kernels:
359359
- arg_meta: null
360360
kernel_name: impl::generic::quantized_conv2d_nhwc_per_tensor_out

backends/cadence/generic/kernels/kernels.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <executorch/backends/cadence/generic/kernels/kernels.h>
10+
1011
#include <algorithm>
1112
#include <cmath>
1213
#include <cstring>

backends/cadence/generic/kernels/kernels.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
#include "inttypes.h"
10-
#include "stddef.h"
9+
#include <stddef.h>
10+
11+
#include <cstdint>
1112

1213
namespace impl {
1314
namespace generic {

backends/cadence/generic/operators/TARGETS

Lines changed: 0 additions & 5 deletions
This file was deleted.
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
/**
12+
* @file cadence_type_util.h
13+
* @brief Common type macros for Cadence quantized operators
14+
*
15+
* This header provides utility macros for iterating over supported quantized
16+
* data types in Cadence operators. These macros are used with switch statements
17+
* to dispatch to type-specific implementations.
18+
*/
19+
20+
/**
21+
* Macro to iterate over standard Cadence quantized types (uint8_t, int8_t)
22+
*
23+
* Usage:
24+
* ET_FORALL_CADENCE_QUANTIZED_TYPES(MACRO)
25+
*
26+
* Where MACRO is defined as: #define MACRO(ctype, name) ...
27+
* - ctype: C++ type (uint8_t or int8_t)
28+
* - name: ExecutorTorch ScalarType name suffix (Byte or Char)
29+
*
30+
* Example:
31+
* #define HANDLE_TYPE(ctype, name) \
32+
* case ScalarType::name: \
33+
* return process<ctype>(tensor); \
34+
* break;
35+
*
36+
* ScalarType dtype = tensor.scalar_type();
37+
* switch (dtype) {
38+
* ET_FORALL_CADENCE_QUANTIZED_TYPES(HANDLE_TYPE)
39+
* default:
40+
* ET_CHECK_MSG(false, "Unsupported dtype");
41+
* }
42+
*/
43+
#define ET_FORALL_CADENCE_QUANTIZED_TYPES(_) \
44+
_(uint8_t, Byte) \
45+
_(int8_t, Char)
46+
47+
/**
48+
* Macro to iterate over extended Cadence quantized types including int16_t
49+
*
50+
* Usage:
51+
* ET_FORALL_CADENCE_QUANTIZED_TYPES_WITH_INT16(MACRO)
52+
*
53+
* Where MACRO is defined as: #define MACRO(ctype, name) ...
54+
* - ctype: C++ type (uint8_t, int8_t, or int16_t)
55+
* - name: ExecutorTorch ScalarType name suffix (Byte, Char, or Short)
56+
*
57+
* This macro includes int16_t support for operators that can handle 16-bit
58+
* quantized values (e.g., quantized_linear, quantized_fully_connected).
59+
*/
60+
#define ET_FORALL_CADENCE_QUANTIZED_TYPES_WITH_INT16(_) \
61+
_(uint8_t, Byte) \
62+
_(int8_t, Char) \
63+
_(int16_t, Short)
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/backends/cadence/generic/operators/op_quantized_add.h>
10+
11+
#include <executorch/backends/cadence/generic/kernels/kernels.h>
12+
#include <executorch/backends/cadence/generic/operators/quantized_op_macros.h>
13+
#include <executorch/kernels/portable/cpu/scalar_utils.h>
14+
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
15+
16+
namespace impl::generic::native {
17+
18+
using ::executorch::aten::Scalar;
19+
using ::executorch::aten::ScalarType;
20+
using ::executorch::aten::Tensor;
21+
using ::executorch::runtime::KernelRuntimeContext;
22+
using ::impl::generic::kernels::dequantize;
23+
using ::impl::generic::kernels::quantize;
24+
25+
DECLARE_POINTWISE_TENSOR_QUANTIZED_BINARY_OP(quantized_add_, +);
26+
27+
#define DECLARE_POINTWISE_SCALAR_QUANTIZED_BINARY_OP(BINARY_FUNC_NAME, OP) \
28+
template <typename T> \
29+
void BINARY_FUNC_NAME( \
30+
const Tensor& X, \
31+
float X_scale, \
32+
int32_t X_zero_point, \
33+
const float Y, \
34+
float out_scale, \
35+
int32_t out_zero_point, \
36+
Tensor& out) { \
37+
const T* __restrict__ X_data = X.const_data_ptr<T>(); \
38+
T* __restrict__ out_data = out.mutable_data_ptr<T>(); \
39+
float inv_out_scale = 1.0f / out_scale; \
40+
for (size_t i = 0, e = X.numel(); i < e; ++i) { \
41+
float x = dequantize<T>(X_data[i], X_scale, X_zero_point); \
42+
float z = x OP Y; \
43+
out_data[i] = quantize<T>(z, inv_out_scale, out_zero_point); \
44+
} \
45+
}
46+
47+
DECLARE_POINTWISE_SCALAR_QUANTIZED_BINARY_OP(quantized_add_Scalar_, +);
48+
49+
Tensor& quantized_add_out(
50+
ET_UNUSED KernelRuntimeContext& ctx,
51+
const Tensor& X,
52+
const Tensor& X_scale_t,
53+
const Tensor& X_zero_point_t,
54+
const Tensor& Y,
55+
const Tensor& Y_scale_t,
56+
const Tensor& Y_zero_point_t,
57+
double out_scale,
58+
int64_t out_zero_point,
59+
Tensor& out) {
60+
float X_scale = X_scale_t.const_data_ptr<float>()[0];
61+
int32_t X_zero_point = X_zero_point_t.const_data_ptr<int32_t>()[0];
62+
float Y_scale = Y_scale_t.const_data_ptr<float>()[0];
63+
int32_t Y_zero_point = Y_zero_point_t.const_data_ptr<int32_t>()[0];
64+
65+
#define typed_quantized_add(ctype, dtype) \
66+
case ScalarType::dtype: { \
67+
quantized_add_<ctype>( \
68+
X, \
69+
X_scale, \
70+
X_zero_point, \
71+
Y, \
72+
Y_scale, \
73+
Y_zero_point, \
74+
static_cast<float>(out_scale), \
75+
static_cast<int32_t>(out_zero_point), \
76+
out); \
77+
break; \
78+
}
79+
80+
ScalarType dtype = out.scalar_type();
81+
switch (dtype) {
82+
ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_add);
83+
default:
84+
ET_DCHECK_MSG(
85+
false, "Unhandled dtype %s", torch::executor::toString(dtype));
86+
}
87+
#undef typed_quantized_add
88+
89+
return out;
90+
}
91+
92+
Tensor& quantized_add_per_tensor_out(
93+
ET_UNUSED KernelRuntimeContext& ctx,
94+
const Tensor& X,
95+
double X_scale,
96+
int64_t X_zero_point,
97+
const Tensor& Y,
98+
double Y_scale,
99+
int64_t Y_zero_point,
100+
double out_scale,
101+
int64_t out_zero_point,
102+
Tensor& out) {
103+
#define typed_quantized_add(ctype, dtype) \
104+
case ScalarType::dtype: { \
105+
quantized_add_<ctype>( \
106+
X, \
107+
static_cast<float>(X_scale), \
108+
static_cast<int32_t>(X_zero_point), \
109+
Y, \
110+
static_cast<float>(Y_scale), \
111+
static_cast<int32_t>(Y_zero_point), \
112+
static_cast<float>(out_scale), \
113+
static_cast<int32_t>(out_zero_point), \
114+
out); \
115+
break; \
116+
}
117+
118+
ScalarType dtype = out.scalar_type();
119+
switch (dtype) {
120+
ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_add);
121+
default:
122+
ET_DCHECK_MSG(
123+
false, "Unhandled dtype %s", torch::executor::toString(dtype));
124+
}
125+
#undef typed_quantized_add
126+
return out;
127+
}
128+
129+
Tensor& quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
130+
ET_UNUSED KernelRuntimeContext& ctx,
131+
const Tensor& X,
132+
double X_scale,
133+
int64_t X_zero_point,
134+
const Tensor& Y,
135+
double Y_scale,
136+
int64_t Y_zero_point,
137+
double out_scale,
138+
int64_t out_zero_point,
139+
Tensor& out) {
140+
quantized_add_<int8_t>(
141+
X,
142+
static_cast<float>(X_scale),
143+
static_cast<int32_t>(X_zero_point),
144+
Y,
145+
static_cast<float>(Y_scale),
146+
static_cast<int32_t>(Y_zero_point),
147+
static_cast<float>(out_scale),
148+
static_cast<int32_t>(out_zero_point),
149+
out);
150+
return out;
151+
}
152+
153+
Tensor& quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
154+
ET_UNUSED KernelRuntimeContext& ctx,
155+
const Tensor& X,
156+
double X_scale,
157+
int64_t X_zero_point,
158+
const Tensor& Y,
159+
double Y_scale,
160+
int64_t Y_zero_point,
161+
double out_scale,
162+
int64_t out_zero_point,
163+
Tensor& out) {
164+
quantized_add_<uint8_t>(
165+
X,
166+
static_cast<float>(X_scale),
167+
static_cast<int32_t>(X_zero_point),
168+
Y,
169+
static_cast<float>(Y_scale),
170+
static_cast<int32_t>(Y_zero_point),
171+
static_cast<float>(out_scale),
172+
static_cast<int32_t>(out_zero_point),
173+
out);
174+
return out;
175+
}
176+
177+
Tensor& quantized_add_Scalar_out(
178+
ET_UNUSED KernelRuntimeContext& ctx,
179+
const Tensor& X,
180+
const Tensor& X_scale_t,
181+
const Tensor& X_zero_point_t,
182+
const Scalar& Y_scalar,
183+
double out_scale,
184+
int64_t out_zero_point,
185+
Tensor& out) {
186+
float X_scale = X_scale_t.const_data_ptr<float>()[0];
187+
int32_t X_zero_point = X_zero_point_t.const_data_ptr<int32_t>()[0];
188+
float Y = static_cast<float>(
189+
::torch::executor::native::utils::scalar_to<double>(Y_scalar));
190+
#define typed_quantized_add_Scalar(ctype, dtype) \
191+
case ScalarType::dtype: { \
192+
quantized_add_Scalar_<ctype>( \
193+
X, \
194+
X_scale, \
195+
X_zero_point, \
196+
Y, \
197+
static_cast<float>(out_scale), \
198+
static_cast<int32_t>(out_zero_point), \
199+
out); \
200+
break; \
201+
}
202+
203+
ScalarType dtype = out.scalar_type();
204+
switch (dtype) {
205+
ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_add_Scalar)
206+
default:
207+
ET_DCHECK_MSG(
208+
false, "Unhandled dtype %s", torch::executor::toString(dtype));
209+
}
210+
#undef typed_quantized_add_Scalar
211+
return out;
212+
}
213+
214+
#undef DECLARE_POINTWISE_SCALAR_QUANTIZED_BINARY_OP
215+
216+
} // namespace impl::generic::native

0 commit comments

Comments
 (0)