Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions backends/cadence/aot/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -146,17 +146,17 @@ executorch_generated_lib(
deps = [
"//executorch/backends/cadence/generic/kernels:cadence_kernels",
"//executorch/backends/cadence/generic/operators:op_requantize",
"//executorch/backends/cadence/generic/operators:im2row_out",
"//executorch/backends/cadence/generic/operators:dequantize_per_tensor",
"//executorch/backends/cadence/generic/operators:quantize_per_tensor",
"//executorch/backends/cadence/generic/operators:quantized_add_out",
"//executorch/backends/cadence/generic/operators:quantized_conv2d_nchw_out",
"//executorch/backends/cadence/generic/operators:quantized_conv2d_nhwc_out",
"//executorch/backends/cadence/generic/operators:quantized_fully_connected_out",
"//executorch/backends/cadence/generic/operators:quantized_layer_norm",
"//executorch/backends/cadence/generic/operators:quantized_linear_out",
"//executorch/backends/cadence/generic/operators:quantized_matmul_out",
"//executorch/backends/cadence/generic/operators:quantized_relu_out",
"//executorch/backends/cadence/generic/operators:op_im2row",
"//executorch/backends/cadence/generic/operators:op_dequantize_per_tensor",
"//executorch/backends/cadence/generic/operators:op_quantize_per_tensor",
"//executorch/backends/cadence/generic/operators:op_quantized_add",
"//executorch/backends/cadence/generic/operators:op_quantized_conv2d",
"//executorch/backends/cadence/generic/operators:op_quantized_conv1d",
"//executorch/backends/cadence/generic/operators:op_quantized_fully_connected",
"//executorch/backends/cadence/generic/operators:op_quantized_layer_norm",
"//executorch/backends/cadence/generic/operators:op_quantized_linear",
"//executorch/backends/cadence/generic/operators:op_quantized_matmul",
"//executorch/backends/cadence/generic/operators:op_quantized_relu",
"//executorch/kernels/portable:executorch_all_ops",
"//executorch/kernels/portable:operators",
],
Expand Down
4 changes: 2 additions & 2 deletions backends/cadence/aot/functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -349,12 +349,12 @@
- arg_meta: null
kernel_name: impl::generic::im2row_per_tensor_out

- func: cadence::quantized_conv2d_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
- func: cadence::quantized_conv2d_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: impl::generic::quantized_conv2d_nchw_per_tensor_out

- func: cadence::quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
- func: cadence::quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: impl::generic::quantized_conv2d_nhwc_per_tensor_out
Expand Down
1 change: 1 addition & 0 deletions backends/cadence/generic/kernels/kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

#include <executorch/backends/cadence/generic/kernels/kernels.h>

#include <algorithm>
#include <cmath>
#include <cstring>
Expand Down
5 changes: 3 additions & 2 deletions backends/cadence/generic/kernels/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
* LICENSE file in the root directory of this source tree.
*/

#include "inttypes.h"
#include "stddef.h"
#include <stddef.h>

#include <cstdint>

namespace impl {
namespace generic {
Expand Down
5 changes: 0 additions & 5 deletions backends/cadence/generic/operators/TARGETS

This file was deleted.

63 changes: 63 additions & 0 deletions backends/cadence/generic/operators/cadence_type_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

/**
* @file cadence_type_util.h
* @brief Common type macros for Cadence quantized operators
*
* This header provides utility macros for iterating over supported quantized
* data types in Cadence operators. These macros are used with switch statements
* to dispatch to type-specific implementations.
*/

/**
* Macro to iterate over standard Cadence quantized types (uint8_t, int8_t)
*
* Usage:
* ET_FORALL_CADENCE_QUANTIZED_TYPES(MACRO)
*
* Where MACRO is defined as: #define MACRO(ctype, name) ...
* - ctype: C++ type (uint8_t or int8_t)
* - name: ExecutorTorch ScalarType name suffix (Byte or Char)
*
* Example:
* #define HANDLE_TYPE(ctype, name) \
* case ScalarType::name: \
* return process<ctype>(tensor); \
* break;
*
* ScalarType dtype = tensor.scalar_type();
* switch (dtype) {
* ET_FORALL_CADENCE_QUANTIZED_TYPES(HANDLE_TYPE)
* default:
* ET_CHECK_MSG(false, "Unsupported dtype");
* }
*/
#define ET_FORALL_CADENCE_QUANTIZED_TYPES(_) \
_(uint8_t, Byte) \
_(int8_t, Char)

/**
* Macro to iterate over extended Cadence quantized types including int16_t
*
* Usage:
* ET_FORALL_CADENCE_QUANTIZED_TYPES_WITH_INT16(MACRO)
*
* Where MACRO is defined as: #define MACRO(ctype, name) ...
* - ctype: C++ type (uint8_t, int8_t, or int16_t)
* - name: ExecutorTorch ScalarType name suffix (Byte, Char, or Short)
*
* This macro includes int16_t support for operators that can handle 16-bit
* quantized values (e.g., quantized_linear, quantized_fully_connected).
*/
#define ET_FORALL_CADENCE_QUANTIZED_TYPES_WITH_INT16(_) \
_(uint8_t, Byte) \
_(int8_t, Char) \
_(int16_t, Short)
216 changes: 216 additions & 0 deletions backends/cadence/generic/operators/op_quantized_add.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/cadence/generic/operators/op_quantized_add.h>

#include <executorch/backends/cadence/generic/kernels/kernels.h>
#include <executorch/backends/cadence/generic/operators/quantized_op_macros.h>
#include <executorch/kernels/portable/cpu/scalar_utils.h>
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>

namespace impl::generic::native {

using ::executorch::aten::Scalar;
using ::executorch::aten::ScalarType;
using ::executorch::aten::Tensor;
using ::executorch::runtime::KernelRuntimeContext;
using ::impl::generic::kernels::dequantize;
using ::impl::generic::kernels::quantize;

DECLARE_POINTWISE_TENSOR_QUANTIZED_BINARY_OP(quantized_add_, +);

#define DECLARE_POINTWISE_SCALAR_QUANTIZED_BINARY_OP(BINARY_FUNC_NAME, OP) \
template <typename T> \
void BINARY_FUNC_NAME( \
const Tensor& X, \
float X_scale, \
int32_t X_zero_point, \
const float Y, \
float out_scale, \
int32_t out_zero_point, \
Tensor& out) { \
const T* __restrict__ X_data = X.const_data_ptr<T>(); \
T* __restrict__ out_data = out.mutable_data_ptr<T>(); \
float inv_out_scale = 1.0f / out_scale; \
for (size_t i = 0, e = X.numel(); i < e; ++i) { \
float x = dequantize<T>(X_data[i], X_scale, X_zero_point); \
float z = x OP Y; \
out_data[i] = quantize<T>(z, inv_out_scale, out_zero_point); \
} \
}

DECLARE_POINTWISE_SCALAR_QUANTIZED_BINARY_OP(quantized_add_Scalar_, +);

Tensor& quantized_add_out(
ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& X,
const Tensor& X_scale_t,
const Tensor& X_zero_point_t,
const Tensor& Y,
const Tensor& Y_scale_t,
const Tensor& Y_zero_point_t,
double out_scale,
int64_t out_zero_point,
Tensor& out) {
float X_scale = X_scale_t.const_data_ptr<float>()[0];
int32_t X_zero_point = X_zero_point_t.const_data_ptr<int32_t>()[0];
float Y_scale = Y_scale_t.const_data_ptr<float>()[0];
int32_t Y_zero_point = Y_zero_point_t.const_data_ptr<int32_t>()[0];

#define typed_quantized_add(ctype, dtype) \
case ScalarType::dtype: { \
quantized_add_<ctype>( \
X, \
X_scale, \
X_zero_point, \
Y, \
Y_scale, \
Y_zero_point, \
static_cast<float>(out_scale), \
static_cast<int32_t>(out_zero_point), \
out); \
break; \
}

ScalarType dtype = out.scalar_type();
switch (dtype) {
ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_add);
default:
ET_DCHECK_MSG(
false, "Unhandled dtype %s", torch::executor::toString(dtype));
}
#undef typed_quantized_add

return out;
}

Tensor& quantized_add_per_tensor_out(
ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& X,
double X_scale,
int64_t X_zero_point,
const Tensor& Y,
double Y_scale,
int64_t Y_zero_point,
double out_scale,
int64_t out_zero_point,
Tensor& out) {
#define typed_quantized_add(ctype, dtype) \
case ScalarType::dtype: { \
quantized_add_<ctype>( \
X, \
static_cast<float>(X_scale), \
static_cast<int32_t>(X_zero_point), \
Y, \
static_cast<float>(Y_scale), \
static_cast<int32_t>(Y_zero_point), \
static_cast<float>(out_scale), \
static_cast<int32_t>(out_zero_point), \
out); \
break; \
}

ScalarType dtype = out.scalar_type();
switch (dtype) {
ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_add);
default:
ET_DCHECK_MSG(
false, "Unhandled dtype %s", torch::executor::toString(dtype));
}
#undef typed_quantized_add
return out;
}

Tensor& quantized_add_asym8sxasym8s_asym8s_per_tensor_out(
ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& X,
double X_scale,
int64_t X_zero_point,
const Tensor& Y,
double Y_scale,
int64_t Y_zero_point,
double out_scale,
int64_t out_zero_point,
Tensor& out) {
quantized_add_<int8_t>(
X,
static_cast<float>(X_scale),
static_cast<int32_t>(X_zero_point),
Y,
static_cast<float>(Y_scale),
static_cast<int32_t>(Y_zero_point),
static_cast<float>(out_scale),
static_cast<int32_t>(out_zero_point),
out);
return out;
}

Tensor& quantized_add_asym8uxasym8u_asym8u_per_tensor_out(
ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& X,
double X_scale,
int64_t X_zero_point,
const Tensor& Y,
double Y_scale,
int64_t Y_zero_point,
double out_scale,
int64_t out_zero_point,
Tensor& out) {
quantized_add_<uint8_t>(
X,
static_cast<float>(X_scale),
static_cast<int32_t>(X_zero_point),
Y,
static_cast<float>(Y_scale),
static_cast<int32_t>(Y_zero_point),
static_cast<float>(out_scale),
static_cast<int32_t>(out_zero_point),
out);
return out;
}

Tensor& quantized_add_Scalar_out(
ET_UNUSED KernelRuntimeContext& ctx,
const Tensor& X,
const Tensor& X_scale_t,
const Tensor& X_zero_point_t,
const Scalar& Y_scalar,
double out_scale,
int64_t out_zero_point,
Tensor& out) {
float X_scale = X_scale_t.const_data_ptr<float>()[0];
int32_t X_zero_point = X_zero_point_t.const_data_ptr<int32_t>()[0];
float Y = static_cast<float>(
::torch::executor::native::utils::scalar_to<double>(Y_scalar));
#define typed_quantized_add_Scalar(ctype, dtype) \
case ScalarType::dtype: { \
quantized_add_Scalar_<ctype>( \
X, \
X_scale, \
X_zero_point, \
Y, \
static_cast<float>(out_scale), \
static_cast<int32_t>(out_zero_point), \
out); \
break; \
}

ScalarType dtype = out.scalar_type();
switch (dtype) {
ET_FORALL_CADENCE_QUANTIZED_TYPES(typed_quantized_add_Scalar)
default:
ET_DCHECK_MSG(
false, "Unhandled dtype %s", torch::executor::toString(dtype));
}
#undef typed_quantized_add_Scalar
return out;
}

#undef DECLARE_POINTWISE_SCALAR_QUANTIZED_BINARY_OP

} // namespace impl::generic::native
Loading
Loading