Skip to content

Commit a7a9130

Browse files
authored
Adding operators: bitwiseand, bitwiseor, bitwisexor, embedding, eq, fmod, ge, gt, hardtanh, le, lt, maskedfill, ne, select_copy and quantisedmatmul. Link all kernels to internal compiler (#10840)
Differential Revision: D76058902 Pull Request resolved: #11402
1 parent fad25ad commit a7a9130

25 files changed

+5356
-15
lines changed

backends/cadence/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ elseif(EXECUTORCH_FUSION_G3_OPT)
8282
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
8383
else()
8484
set(TARGET_DIR reference)
85+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
8586
endif()
8687

8788

backends/cadence/aot/functions_hifi.yaml

Lines changed: 104 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,36 @@
3232
- arg_meta: null
3333
kernel_name: cadence::impl::HiFi::add_out
3434

35+
- op: bitwise_and.Scalar_out
36+
kernels:
37+
- arg_meta: null
38+
kernel_name: cadence::impl::HiFi::bitwise_and_Scalar_out
39+
40+
- op: bitwise_and.Tensor_out
41+
kernels:
42+
- arg_meta: null
43+
kernel_name: cadence::impl::HiFi::bitwise_and_Tensor_out
44+
45+
- op: bitwise_or.Scalar_out
46+
kernels:
47+
- arg_meta: null
48+
kernel_name: cadence::impl::HiFi::bitwise_or_Scalar_out
49+
50+
- op: bitwise_or.Tensor_out
51+
kernels:
52+
- arg_meta: null
53+
kernel_name: cadence::impl::HiFi::bitwise_or_Tensor_out
54+
55+
- op: bitwise_xor.Scalar_out
56+
kernels:
57+
- arg_meta: null
58+
kernel_name: cadence::impl::HiFi::bitwise_xor_Scalar_out
59+
60+
- op: bitwise_xor.Tensor_out
61+
kernels:
62+
- arg_meta: null
63+
kernel_name: cadence::impl::HiFi::bitwise_xor_Tensor_out
64+
3565
- op: bmm.out
3666
kernels:
3767
- arg_meta: null
@@ -65,27 +95,82 @@
6595
- op: embedding.out
6696
kernels:
6797
- arg_meta: null
68-
kernel_name: torch::executor::embedding_out
98+
kernel_name: cadence::impl::HiFi::embedding_out
99+
100+
- op: eq.Tensor_out
101+
kernels:
102+
- arg_meta: null
103+
kernel_name: cadence::impl::HiFi::eq_tensor_out
104+
105+
- op: fmod.Tensor_out
106+
kernels:
107+
- arg_meta: null
108+
kernel_name: cadence::impl::HiFi::fmod_Tensor_out
109+
110+
- op: fmod.Scalar_out
111+
kernels:
112+
- arg_meta: null
113+
kernel_name: cadence::impl::HiFi::fmod_Scalar_out
69114

70115
- op: full.out
71116
kernels:
72117
- arg_meta: null
73118
kernel_name: cadence::impl::HiFi::full_out
74119

75-
- op: gt.Scalar_out
120+
- op: ge.Scalar_out
121+
kernels:
122+
- arg_meta: null
123+
kernel_name: cadence::impl::HiFi::ge_scalar_out
124+
125+
- op: ge.Tensor_out
76126
kernels:
77127
- arg_meta: null
78-
kernel_name: torch::executor::gt_scalar_out
128+
kernel_name: cadence::impl::HiFi::ge_tensor_out
79129

80130
- op: gelu.out
81131
kernels:
82132
- arg_meta: null
83133
kernel_name: torch::executor::gelu_out
84134

135+
- op: gt.Scalar_out
136+
kernels:
137+
- arg_meta: null
138+
kernel_name: cadence::impl::HiFi::gt_scalar_out
139+
140+
- op: gt.Tensor_out
141+
kernels:
142+
- arg_meta: null
143+
kernel_name: cadence::impl::HiFi::gt_tensor_out
144+
85145
- op: hardtanh.out
86146
kernels:
87147
- arg_meta: null
88-
kernel_name: torch::executor::hardtanh_out
148+
kernel_name: cadence::impl::HiFi::hardtanh_out
149+
150+
- op: le.Scalar_out
151+
kernels:
152+
- arg_meta: null
153+
kernel_name: cadence::impl::HiFi::le_scalar_out
154+
155+
- op: le.Tensor_out
156+
kernels:
157+
- arg_meta: null
158+
kernel_name: cadence::impl::HiFi::le_tensor_out
159+
160+
- op: lt.Scalar_out
161+
kernels:
162+
- arg_meta: null
163+
kernel_name: cadence::impl::HiFi::lt_scalar_out
164+
165+
- op: lt.Tensor_out
166+
kernels:
167+
- arg_meta: null
168+
kernel_name: cadence::impl::HiFi::lt_tensor_out
169+
170+
- op: masked_fill.Scalar_out
171+
kernels:
172+
- arg_meta: null
173+
kernel_name: cadence::impl::HiFi::masked_fill_scalar_out
89174

90175
- op: max_pool2d_with_indices.out
91176
kernels:
@@ -117,6 +202,11 @@
117202
- arg_meta: null
118203
kernel_name: cadence::impl::HiFi::mul_out
119204

205+
- op: ne.Tensor_out
206+
kernels:
207+
- arg_meta: null
208+
kernel_name: cadence::impl::HiFi::ne_tensor_out
209+
120210
- op: permute_copy.out
121211
kernels:
122212
- arg_meta: null
@@ -147,6 +237,11 @@
147237
- arg_meta: null
148238
kernel_name: cadence::impl::HiFi::rsqrt_out
149239

240+
- op: select_copy.int_out
241+
kernels:
242+
- arg_meta: null
243+
kernel_name: cadence::impl::HiFi::select_copy_int_out
244+
150245
- op: sigmoid.out
151246
kernels:
152247
- arg_meta: null
@@ -239,6 +334,11 @@
239334
- arg_meta: null
240335
kernel_name: cadence::impl::HiFi::quantized_fully_connected_out
241336

337+
- func: cadence::quantized_matmul.out(Tensor X, int X_zero_point, Tensor Y, int Y_zero_point, Tensor? bias, int out_multiplier, int out_shift, int out_zero_point, bool transposed, *, Tensor(a!) out) -> Tensor(a!)
338+
kernels:
339+
- arg_meta: null
340+
kernel_name: cadence::impl::HiFi::quantized_matmul_out
341+
242342
- func: cadence::quantized_fully_connected.per_tensor_out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, int weight_zero_point, int out_multiplier, int out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
243343
kernels:
244344
- arg_meta: null

backends/cadence/hifi/kernels/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ add_library(
1616
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_clamp_f32_broadcast.c
1717
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_div_f32_broadcast.c
1818
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_div_mode_f32_broadcast.c
19+
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_fmod_broadcast_f32.c
20+
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_greater_lesser_equal_f32.c
21+
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_logicalxor_bool_bool.c
1922
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_minimum_maximum_f32.c
2023
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_mul_f32_broadcast.c
2124
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_pow_f32.c

backends/cadence/hifi/kernels/kernels.h

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
#pragma once
1010
#include <executorch/runtime/kernel/kernel_includes.h>
11-
#include <inttypes.h>
1211
#include <stddef.h>
1312
#include <xa_type_def.h>
1413
/* For NNLIB APIs */
@@ -88,6 +87,42 @@ extern "C" WORD32 xa_nn_elm_div_mode_broadcast_4D_f32xf32_f32(
8887
const WORD32* const p_inp2_shape,
8988
WORD32 mode);
9089

90+
extern "C" WORD32 xa_nn_elm_greater_lesser_equal_f32xf32_f32(
91+
WORD8* __restrict__ p_out,
92+
const FLOAT32* __restrict__ p_inp1,
93+
const FLOAT32* __restrict__ p_inp2,
94+
WORD32 num_elm,
95+
WORD32 kernel_type);
96+
97+
extern "C" WORD32 xa_nn_elm_greater_lesser_equal_broadcast_4D_f32xf32_f32(
98+
WORD8* __restrict__ p_out,
99+
const WORD32* const p_out_shape,
100+
const FLOAT32* __restrict__ p_inp1,
101+
const WORD32* const p_inp1_shape,
102+
const FLOAT32* __restrict__ p_inp2,
103+
const WORD32* const p_inp2_shape,
104+
WORD32 kernel_type);
105+
106+
extern "C" WORD32 xa_nn_elm_fmod_f32xf32_f32(
107+
FLOAT32* __restrict__ p_out,
108+
const FLOAT32* __restrict__ p_inp1,
109+
const FLOAT32* __restrict__ p_inp2,
110+
WORD32 num_elm);
111+
112+
extern "C" WORD32 xa_nn_elm_fmod_broadcast_4D_f32xf32_f32(
113+
FLOAT32* __restrict__ p_out,
114+
const WORD32* const p_out_shape,
115+
const FLOAT32* __restrict__ p_inp1,
116+
const WORD32* const p_inp1_shape,
117+
const FLOAT32* __restrict__ p_inp2,
118+
const WORD32* const p_inp2_shape);
119+
120+
extern "C" WORD32 xa_nn_elm_logicalxor_boolxbool_bool(
121+
WORD8* __restrict__ p_out,
122+
const WORD8* __restrict__ p_inp1,
123+
const WORD8* __restrict__ p_inp2,
124+
WORD32 num_elm);
125+
91126
extern "C" WORD32 xa_nn_elm_maximum_f32xf32_f32(
92127
FLOAT32* __restrict__ p_out,
93128
const FLOAT32* __restrict__ p_inp1,

backends/cadence/hifi/operators/CMakeLists.txt

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,34 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Codegen.cmake)
1818
set(_aten_ops__srcs
1919
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_add.cpp"
2020
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_atan2.cpp"
21+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_bitwise_and.cpp"
22+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_bitwise_or.cpp"
23+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_bitwise_xor.cpp"
2124
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_bmm.cpp"
2225
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_cat.cpp"
2326
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_clamp.cpp"
2427
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_div.cpp"
28+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_embedding.cpp"
29+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_eq.cpp"
30+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_fmod.cpp"
2531
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_full.cpp"
32+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_ge.cpp"
33+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_gt.cpp"
34+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_hardtanh.cpp"
35+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_le.cpp"
36+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_lt.cpp"
37+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_masked_fill.cpp"
2638
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_maximum.cpp"
2739
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mean.cpp"
2840
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_minimum.cpp"
2941
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mm.cpp"
3042
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mul.cpp"
43+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_ne.cpp"
3144
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_permute_copy.cpp"
3245
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_pow.cpp"
3346
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_remainder.cpp"
3447
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_rsqrt.cpp"
48+
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_select_copy.cpp"
3549
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_slice_copy.cpp"
3650
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_softmax.cpp"
3751
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_split_with_sizes_copy.cpp"
@@ -41,22 +55,21 @@ set(_aten_ops__srcs
4155
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_view_copy.cpp"
4256
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_where.cpp"
4357
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
44-
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp"
45-
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_gt.cpp"
4658
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_gelu.cpp"
47-
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_hardtanh.cpp"
4859
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_max_pool2d_with_indices.cpp"
4960
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_to_copy.cpp"
5061
"${EXECUTORCH_ROOT}/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp"
5162
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp"
5263
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp"
5364
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp"
5465
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/dtype_util.cpp"
66+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/delinearize_index.cpp"
5567
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/index_util.cpp"
5668
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/kernel_ops_util.cpp"
5769
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/matmul_ops_util.cpp"
5870
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/reduce_util.cpp"
5971
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp"
72+
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/select_copy_util.cpp"
6073
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/slice_util.cpp"
6174
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/delinearize_index.cpp"
6275
)
@@ -75,7 +88,7 @@ target_include_directories(
7588

7689
# Custom ops that are needed to run the test model.
7790
add_library(
78-
custom_ops "op_quantized_linear_out.cpp" "op_quantized_layer_norm.cpp"
91+
custom_ops "op_quantized_linear_out.cpp" "op_quantized_layer_norm.cpp" "quantized_matmul_out.cpp"
7992
"op_quantize_per_tensor.cpp" "op_quantized_relu_out.cpp" "op_dequantize_per_tensor.cpp"
8093
"op_quantized_conv_out.cpp" "op_quantized_fully_connected_out"
8194
)

0 commit comments

Comments
 (0)