Skip to content

Commit d56b68c

Browse files
hsharma35meta-codesync[bot]
authored andcommitted
Migrate generic cadence operators to oss. (pytorch#16025)
Summary: Pull Request resolved: pytorch#16025 Migrates operators from internal repo to cadence/generic. Differential Revision: D87900844
1 parent e4ac457 commit d56b68c

37 files changed

+4249
-2177
lines changed

backends/cadence/aot/TARGETS

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,10 @@ executorch_generated_lib(
145145
visibility = ["PUBLIC"],
146146
deps = [
147147
"//executorch/backends/cadence/generic/kernels:cadence_kernels",
148-
"//executorch/backends/cadence/generic/operators:op_requantize_out",
149-
"//executorch/backends/cadence/generic/operators:im2row_out",
150-
"//executorch/backends/cadence/generic/operators:dequantize_per_tensor",
151-
"//executorch/backends/cadence/generic/operators:quantize_per_tensor",
148+
"//executorch/backends/cadence/generic/operators:op_requantize",
149+
"//executorch/backends/cadence/generic/operators:op_im2row",
150+
"//executorch/backends/cadence/generic/operators:op_dequantize_per_tensor",
151+
"//executorch/backends/cadence/generic/operators:op_quantize_per_tensor",
152152
"//executorch/backends/cadence/generic/operators:quantized_add_out",
153153
"//executorch/backends/cadence/generic/operators:quantized_conv2d_nchw_out",
154154
"//executorch/backends/cadence/generic/operators:quantized_conv2d_nhwc_out",

backends/cadence/generic/operators/TARGETS

Lines changed: 0 additions & 5 deletions
This file was deleted.
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
/**
12+
* @file cadence_type_util.h
13+
* @brief Common type macros for Cadence quantized operators
14+
*
15+
* This header provides utility macros for iterating over supported quantized
16+
* data types in Cadence operators. These macros are used with switch statements
17+
* to dispatch to type-specific implementations.
18+
*/
19+
20+
/**
21+
* Macro to iterate over standard Cadence quantized types (uint8_t, int8_t)
22+
*
23+
* Usage:
24+
* ET_FORALL_CADENCE_QUANTIZED_TYPES(MACRO)
25+
*
26+
* Where MACRO is defined as: #define MACRO(ctype, name) ...
27+
* - ctype: C++ type (uint8_t or int8_t)
28+
* - name: ExecutorTorch ScalarType name suffix (Byte or Char)
29+
*
30+
* Example:
31+
* #define HANDLE_TYPE(ctype, name) \
32+
* case ScalarType::name: \
33+
* return process<ctype>(tensor); \
34+
* break;
35+
*
36+
* ScalarType dtype = tensor.scalar_type();
37+
* switch (dtype) {
38+
* ET_FORALL_CADENCE_QUANTIZED_TYPES(HANDLE_TYPE)
39+
* default:
40+
* ET_CHECK_MSG(false, "Unsupported dtype");
41+
* }
42+
*/
43+
#define ET_FORALL_CADENCE_QUANTIZED_TYPES(_) \
44+
_(uint8_t, Byte) \
45+
_(int8_t, Char)
46+
47+
/**
48+
* Macro to iterate over extended Cadence quantized types including int16_t
49+
*
50+
* Usage:
51+
* ET_FORALL_CADENCE_QUANTIZED_TYPES_WITH_INT16(MACRO)
52+
*
53+
* Where MACRO is defined as: #define MACRO(ctype, name) ...
54+
* - ctype: C++ type (uint8_t, int8_t, or int16_t)
55+
* - name: ExecutorTorch ScalarType name suffix (Byte, Char, or Short)
56+
*
57+
* This macro includes int16_t support for operators that can handle 16-bit
58+
* quantized values (e.g., quantized_linear, quantized_fully_connected).
59+
*/
60+
#define ET_FORALL_CADENCE_QUANTIZED_TYPES_WITH_INT16(_) \
61+
_(uint8_t, Byte) \
62+
_(int8_t, Char) \
63+
_(int16_t, Short)
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/backends/cadence/generic/operators/op_quantized_add.h>
10+
11+
#include <executorch/backends/cadence/generic/kernels/kernels.h>
12+
#include <executorch/backends/cadence/generic/operators/quantized_op_macros.h>
13+
#include <executorch/kernels/portable/cpu/scalar_utils.h>
14+
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
15+
16+
namespace impl {
17+
namespace generic {
18+
namespace native {
19+
20+
using ::executorch::aten::Scalar;
21+
using ::executorch::aten::ScalarType;
22+
using ::executorch::aten::Tensor;
23+
using ::executorch::runtime::KernelRuntimeContext;
24+
using ::impl::generic::kernels::dequantize;
25+
using ::impl::generic::kernels::quantize;
26+
27+
DECLARE_POINTWISE_TENSOR_QUANTIZED_BINARY_OP(quantized_add_, +);
28+
29+
#define DECLARE_POINTWISE_SCALAR_QUANTIZED_BINARY_OP(BINARY_FUNC_NAME, OP) \
30+
template <typename T> \
31+
void BINARY_FUNC_NAME( \
32+
const Tensor& X, \
33+
float X_scale, \
34+
int32_t X_zero_point, \
35+
const float Y, \
36+
float out_scale, \
37+
int32_t out_zero_point, \
38+
Tensor& out) { \
39+
const T* __restrict__ X_data = X.const_data_ptr<T>(); \
40+
T* __restrict__ out_data = out.mutable_data_ptr<T>(); \
41+
float inv_out_scale = 1.0f / out_scale; \
42+
for (size_t i = 0, e = X.numel(); i < e; ++i) { \
43+
float x = dequantize<T>(X_data[i], X_scale, X_zero_point); \
44+
float z = x OP Y; \
45+
out_data[i] = quantize<T>(z, inv_out_scale, out_zero_point); \
46+
} \
47+
}
48+
49+
DECLARE_POINTWISE_SCALAR_QUANTIZED_BINARY_OP(quantized_add_Scalar_, +);
50+
51+
Tensor& quantized_add_out(
52+
ET_UNUSED KernelRuntimeContext& ctx,
53+
const Tensor& X,
54+
const Tensor& X_scale_t,
55+
const Tensor& X_zero_point_t,
56+
const Tensor& Y,
57+
const Tensor& Y_scale_t,
58+
const Tensor& Y_zero_point_t,
59+
double out_scale,
60+
int64_t out_zero_point,
61+
Tensor& out) {
62+
float X_scale = X_scale_t.const_data_ptr<float>()[0];
63+
int32_t X_zero_point = X_zero_point_t.const_data_ptr<int32_t>()[0];
64+
float Y_scale = Y_scale_t.const_data_ptr<float>()[0];
65+
int32_t Y_zero_point = Y_zero_point_t.const_data_ptr<int32_t>()[0];
66+
67+
#define typed_quantized_add(ctype, dtype) \
68+
case ScalarType::dtype: { \
69+
quantized_add_<ctype>( \
70+
X, \
71+
X_scale, \
72+
X_zero_point, \
73+
Y, \
74+
Y_scale, \
75+
Y_zero_point, \
76+
static_cast<float>(out_scale), \
77+
static_cast<int32_t>(out_zero_point), \
78+
out); \
79+
break; \
80+
}
81+
82+
ScalarType dtype = out.scalar_type();
83+
switch (dtype) {
84+
ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_add);
85+
default:
86+
ET_DCHECK_MSG(
87+
false, "Unhandled dtype %s", torch::executor::toString(dtype));
88+
}
89+
#undef typed_quantized_add
90+
91+
return out;
92+
}
93+
94+
Tensor& quantized_add_per_tensor_out(
95+
ET_UNUSED KernelRuntimeContext& ctx,
96+
const Tensor& X,
97+
double X_scale,
98+
int64_t X_zero_point,
99+
const Tensor& Y,
100+
double Y_scale,
101+
int64_t Y_zero_point,
102+
double out_scale,
103+
int64_t out_zero_point,
104+
Tensor& out) {
105+
#define typed_quantized_add(ctype, dtype) \
106+
case ScalarType::dtype: { \
107+
quantized_add_<ctype>( \
108+
X, \
109+
static_cast<float>(X_scale), \
110+
static_cast<int32_t>(X_zero_point), \
111+
Y, \
112+
static_cast<float>(Y_scale), \
113+
static_cast<int32_t>(Y_zero_point), \
114+
static_cast<float>(out_scale), \
115+
static_cast<int32_t>(out_zero_point), \
116+
out); \
117+
break; \
118+
}
119+
120+
ScalarType dtype = out.scalar_type();
121+
switch (dtype) {
122+
ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_add);
123+
default:
124+
ET_DCHECK_MSG(
125+
false, "Unhandled dtype %s", torch::executor::toString(dtype));
126+
}
127+
#undef typed_quantized_add
128+
return out;
129+
}
130+
131+
Tensor& quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
132+
ET_UNUSED KernelRuntimeContext& ctx,
133+
const Tensor& X,
134+
double X_scale,
135+
int64_t X_zero_point,
136+
const Tensor& Y,
137+
double Y_scale,
138+
int64_t Y_zero_point,
139+
double out_scale,
140+
int64_t out_zero_point,
141+
Tensor& out) {
142+
quantized_add_<int8_t>(
143+
X,
144+
static_cast<float>(X_scale),
145+
static_cast<int32_t>(X_zero_point),
146+
Y,
147+
static_cast<float>(Y_scale),
148+
static_cast<int32_t>(Y_zero_point),
149+
static_cast<float>(out_scale),
150+
static_cast<int32_t>(out_zero_point),
151+
out);
152+
return out;
153+
}
154+
155+
Tensor& quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
156+
ET_UNUSED KernelRuntimeContext& ctx,
157+
const Tensor& X,
158+
double X_scale,
159+
int64_t X_zero_point,
160+
const Tensor& Y,
161+
double Y_scale,
162+
int64_t Y_zero_point,
163+
double out_scale,
164+
int64_t out_zero_point,
165+
Tensor& out) {
166+
quantized_add_<uint8_t>(
167+
X,
168+
static_cast<float>(X_scale),
169+
static_cast<int32_t>(X_zero_point),
170+
Y,
171+
static_cast<float>(Y_scale),
172+
static_cast<int32_t>(Y_zero_point),
173+
static_cast<float>(out_scale),
174+
static_cast<int32_t>(out_zero_point),
175+
out);
176+
return out;
177+
}
178+
179+
Tensor& quantized_add_Scalar_out(
180+
ET_UNUSED KernelRuntimeContext& ctx,
181+
const Tensor& X,
182+
const Tensor& X_scale_t,
183+
const Tensor& X_zero_point_t,
184+
const Scalar& Y_scalar,
185+
double out_scale,
186+
int64_t out_zero_point,
187+
Tensor& out) {
188+
float X_scale = X_scale_t.const_data_ptr<float>()[0];
189+
int32_t X_zero_point = X_zero_point_t.const_data_ptr<int32_t>()[0];
190+
float Y = static_cast<float>(
191+
::torch::executor::native::utils::scalar_to<double>(Y_scalar));
192+
#define typed_quantized_add_Scalar(ctype, dtype) \
193+
case ScalarType::dtype: { \
194+
quantized_add_Scalar_<ctype>( \
195+
X, \
196+
X_scale, \
197+
X_zero_point, \
198+
Y, \
199+
static_cast<float>(out_scale), \
200+
static_cast<int32_t>(out_zero_point), \
201+
out); \
202+
break; \
203+
}
204+
205+
ScalarType dtype = out.scalar_type();
206+
switch (dtype) {
207+
ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_add_Scalar)
208+
default:
209+
ET_DCHECK_MSG(
210+
false, "Unhandled dtype %s", torch::executor::toString(dtype));
211+
}
212+
#undef typed_quantized_add_Scalar
213+
return out;
214+
}
215+
216+
#undef DECLARE_POINTWISE_SCALAR_QUANTIZED_BINARY_OP
217+
218+
} // namespace native
219+
} // namespace generic
220+
} // namespace impl
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <executorch/runtime/core/exec_aten/exec_aten.h>
12+
#include <executorch/runtime/kernel/kernel_runtime_context.h>
13+
14+
namespace impl {
15+
namespace generic {
16+
namespace native {
17+
18+
::executorch::aten::Tensor& quantized_add_out(
19+
::executorch::runtime::KernelRuntimeContext& ctx,
20+
const ::executorch::aten::Tensor& X,
21+
const ::executorch::aten::Tensor& X_scale,
22+
const ::executorch::aten::Tensor& X_zero_point,
23+
const ::executorch::aten::Tensor& Y,
24+
const ::executorch::aten::Tensor& Y_scale,
25+
const ::executorch::aten::Tensor& Y_zero_point,
26+
double out_scale,
27+
int64_t out_zero_point,
28+
::executorch::aten::Tensor& out);
29+
30+
::executorch::aten::Tensor& quantized_add_per_tensor_out(
31+
::executorch::runtime::KernelRuntimeContext& ctx,
32+
const ::executorch::aten::Tensor& X,
33+
double X_scale,
34+
int64_t X_zero_point,
35+
const ::executorch::aten::Tensor& Y,
36+
double Y_scale,
37+
int64_t Y_zero_point,
38+
double out_scale,
39+
int64_t out_zero_point,
40+
::executorch::aten::Tensor& out);
41+
42+
::executorch::aten::Tensor& quantized_add_Scalar_out(
43+
::executorch::runtime::KernelRuntimeContext& ctx,
44+
const ::executorch::aten::Tensor& X,
45+
const ::executorch::aten::Tensor& X_scale,
46+
const ::executorch::aten::Tensor& X_zero_point,
47+
const ::executorch::aten::Scalar& Y,
48+
double out_scale,
49+
int64_t out_zero_point,
50+
::executorch::aten::Tensor& out);
51+
52+
::executorch::aten::Tensor& quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
53+
::executorch::runtime::KernelRuntimeContext& ctx,
54+
const ::executorch::aten::Tensor& X,
55+
double X_scale,
56+
int64_t X_zero_point,
57+
const ::executorch::aten::Tensor& Y,
58+
double Y_scale,
59+
int64_t Y_zero_point,
60+
double out_scale,
61+
int64_t out_zero_point,
62+
::executorch::aten::Tensor& out);
63+
64+
::executorch::aten::Tensor& quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
65+
::executorch::runtime::KernelRuntimeContext& ctx,
66+
const ::executorch::aten::Tensor& X,
67+
double X_scale,
68+
int64_t X_zero_point,
69+
const ::executorch::aten::Tensor& Y,
70+
double Y_scale,
71+
int64_t Y_zero_point,
72+
double out_scale,
73+
int64_t out_zero_point,
74+
::executorch::aten::Tensor& out);
75+
76+
} // namespace native
77+
} // namespace generic
78+
} // namespace impl

0 commit comments

Comments
 (0)