/*** * arm_sve.h - declarations/definitions for ARM64 SVE specific intrinsics * * Copyright (c) Microsoft Corporation. All rights reserved. * *Purpose: * This include file contains the declarations for ARM64 SVE intrinsic * functions. * ****/ #pragma once #if !defined (_M_ARM64) && !defined(_M_ARM64EC) # error "This header is specific to ARM64 target" #endif // !_M_ARM64 && !_M_ARM64EC #ifdef __cplusplus # include #else # include # include #endif #ifdef __cplusplus extern "C" { #endif typedef float float32_t; typedef double float64_t; typedef __SVBFloat16_t svbfloat16_t; typedef __SVBool_t svbool_t; typedef __SVFloat16_t svfloat16_t; typedef __SVFloat32_t svfloat32_t; typedef __SVFloat64_t svfloat64_t; typedef __SVInt8_t svint8_t; typedef __SVInt16_t svint16_t; typedef __SVInt32_t svint32_t; typedef __SVInt64_t svint64_t; typedef __SVUInt8_t svuint8_t; typedef __SVUInt16_t svuint16_t; typedef __SVUInt32_t svuint32_t; typedef __SVUInt64_t svuint64_t; // svBaseType x 2 typedef __SVBFloat16x2_t svbfloat16x2_t; typedef __SVFloat16x2_t svfloat16x2_t; typedef __SVFloat32x2_t svfloat32x2_t; typedef __SVFloat64x2_t svfloat64x2_t; typedef __SVInt16x2_t svint16x2_t; typedef __SVInt32x2_t svint32x2_t; typedef __SVInt64x2_t svint64x2_t; typedef __SVInt8x2_t svint8x2_t; typedef __SVUInt16x2_t svuint16x2_t; typedef __SVUInt32x2_t svuint32x2_t; typedef __SVUInt64x2_t svuint64x2_t; typedef __SVUInt8x2_t svuint8x2_t; // svBaseType x 3 typedef __SVBFloat16x3_t svbfloat16x3_t; typedef __SVFloat16x3_t svfloat16x3_t; typedef __SVFloat32x3_t svfloat32x3_t; typedef __SVFloat64x3_t svfloat64x3_t; typedef __SVInt16x3_t svint16x3_t; typedef __SVInt32x3_t svint32x3_t; typedef __SVInt64x3_t svint64x3_t; typedef __SVInt8x3_t svint8x3_t; typedef __SVUInt16x3_t svuint16x3_t; typedef __SVUInt32x3_t svuint32x3_t; typedef __SVUInt64x3_t svuint64x3_t; typedef __SVUInt8x3_t svuint8x3_t; // svBaseType x 4 typedef __SVBFloat16x4_t svbfloat16x4_t; typedef __SVFloat16x4_t svfloat16x4_t; typedef __SVFloat32x4_t svfloat32x4_t; typedef __SVFloat64x4_t svfloat64x4_t; typedef __SVInt16x4_t svint16x4_t; typedef __SVInt32x4_t svint32x4_t; typedef __SVInt64x4_t svint64x4_t; typedef __SVInt8x4_t svint8x4_t; typedef __SVUInt16x4_t svuint16x4_t; typedef __SVUInt32x4_t svuint32x4_t; typedef __SVUInt64x4_t svuint64x4_t; typedef __SVUInt8x4_t svuint8x4_t; enum svpattern { SV_POW2 = 0, SV_VL1 = 1, SV_VL2 = 2, SV_VL3 = 3, SV_VL4 = 4, SV_VL5 = 5, SV_VL6 = 6, SV_VL7 = 7, SV_VL8 = 8, SV_VL16 = 9, SV_VL32 = 10, SV_VL64 = 11, SV_VL128 = 12, SV_VL256 = 13, SV_MUL4 = 29, SV_MUL3 = 30, SV_ALL = 31 }; enum svprfop { SV_PLDL1KEEP = 0, SV_PLDL1STRM = 1, SV_PLDL2KEEP = 2, SV_PLDL2STRM = 3, SV_PLDL3KEEP = 4, SV_PLDL3STRM = 5, SV_PSTL1KEEP = 8, SV_PSTL1STRM = 9, SV_PSTL2KEEP = 10, SV_PSTL2STRM = 11, SV_PSTL3KEEP = 12, SV_PSTL3STRM = 13 }; // sve: Bit manipulation / Count leading sign bits: Count leading sign bits svuint16_t svcls_s16_z(svbool_t pg, svint16_t op); svuint8_t svcls_s8_z(svbool_t pg, svint8_t op); svuint8_t svcls_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op); svuint64_t svcls_s64_x(svbool_t pg, svint64_t op); svuint32_t svcls_s32_x(svbool_t pg, svint32_t op); svuint16_t svcls_s16_x(svbool_t pg, svint16_t op); svuint64_t svcls_s64_z(svbool_t pg, svint64_t op); svuint8_t svcls_s8_x(svbool_t pg, svint8_t op); svuint32_t svcls_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op); svuint32_t svcls_s32_z(svbool_t pg, svint32_t op); svuint64_t svcls_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op); svuint16_t svcls_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcls_z(pg, op) _Generic((op), \ svint16_t: svcls_s16_z, \ svint8_t: svcls_s8_z, \ svint64_t: svcls_s64_z, \ svint32_t: svcls_s32_z, \ default: __assume(0) \ )(pg, op) #define svcls_m(inactive, pg, op) _Generic((op), \ svint8_t: svcls_s8_m, \ svint32_t: svcls_s32_m, \ svint64_t: svcls_s64_m, \ svint16_t: svcls_s16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcls_x(pg, op) _Generic((op), \ svint64_t: svcls_s64_x, \ svint32_t: svcls_s32_x, \ svint16_t: svcls_s16_x, \ svint8_t: svcls_s8_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Bit manipulation / Count leading zeros: Count leading zero bits svuint8_t svclz_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op); svuint16_t svclz_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op); svuint16_t svclz_s16_z(svbool_t pg, svint16_t op); svuint8_t svclz_s8_z(svbool_t pg, svint8_t op); svuint32_t svclz_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op); svuint64_t svclz_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op); svuint16_t svclz_s16_x(svbool_t pg, svint16_t op); svuint16_t svclz_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op); svuint64_t svclz_u64_x(svbool_t pg, svuint64_t op); svuint32_t svclz_u32_x(svbool_t pg, svuint32_t op); svuint16_t svclz_u16_x(svbool_t pg, svuint16_t op); svuint64_t svclz_u64_z(svbool_t pg, svuint64_t op); svuint32_t svclz_u32_z(svbool_t pg, svuint32_t op); svuint8_t svclz_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op); svuint8_t svclz_u8_z(svbool_t pg, svuint8_t op); svuint16_t svclz_u16_z(svbool_t pg, svuint16_t op); svuint8_t svclz_u8_x(svbool_t pg, svuint8_t op); svuint64_t svclz_s64_x(svbool_t pg, svint64_t op); svuint32_t svclz_s32_x(svbool_t pg, svint32_t op); svuint32_t svclz_s32_z(svbool_t pg, svint32_t op); svuint8_t svclz_s8_x(svbool_t pg, svint8_t op); svuint64_t svclz_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op); svuint32_t svclz_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op); svuint64_t svclz_s64_z(svbool_t pg, svint64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svclz_m(inactive, pg, op) _Generic((op), \ svint8_t: svclz_s8_m, \ svint16_t: svclz_s16_m, \ svint32_t: svclz_s32_m, \ svint64_t: svclz_s64_m, \ svuint16_t: svclz_u16_m, \ svuint8_t: svclz_u8_m, \ svuint64_t: svclz_u64_m, \ svuint32_t: svclz_u32_m, \ default: __assume(0) \ )(inactive, pg, op) #define svclz_z(pg, op) _Generic((op), \ svint16_t: svclz_s16_z, \ svint8_t: svclz_s8_z, \ svuint64_t: svclz_u64_z, \ svuint32_t: svclz_u32_z, \ svuint8_t: svclz_u8_z, \ svuint16_t: svclz_u16_z, \ svint32_t: svclz_s32_z, \ svint64_t: svclz_s64_z, \ default: __assume(0) \ )(pg, op) #define svclz_x(pg, op) _Generic((op), \ svint16_t: svclz_s16_x, \ svuint64_t: svclz_u64_x, \ svuint32_t: svclz_u32_x, \ svuint16_t: svclz_u16_x, \ svuint8_t: svclz_u8_x, \ svint64_t: svclz_s64_x, \ svint32_t: svclz_s32_x, \ svint8_t: svclz_s8_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Bit manipulation / Extend from low bits: Sign-extend the low 16 bits svint32_t svexth_s32_m(svint32_t inactive, svbool_t pg, svint32_t op); svint64_t svexth_s64_z(svbool_t pg, svint64_t op); svint32_t svexth_s32_z(svbool_t pg, svint32_t op); svint64_t svexth_s64_x(svbool_t pg, svint64_t op); svint32_t svexth_s32_x(svbool_t pg, svint32_t op); svint64_t svexth_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); // sve: Bit manipulation / Extend from low bits: Zero-extend the low 16 bits svuint32_t svexth_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op); svuint64_t svexth_u64_z(svbool_t pg, svuint64_t op); svuint32_t svexth_u32_z(svbool_t pg, svuint32_t op); svuint64_t svexth_u64_x(svbool_t pg, svuint64_t op); svuint32_t svexth_u32_x(svbool_t pg, svuint32_t op); svuint64_t svexth_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svexth_m(inactive, pg, op) _Generic((op), \ svint32_t: svexth_s32_m, \ svint64_t: svexth_s64_m, \ svuint32_t: svexth_u32_m, \ svuint64_t: svexth_u64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svexth_z(pg, op) _Generic((op), \ svint64_t: svexth_s64_z, \ svint32_t: svexth_s32_z, \ svuint64_t: svexth_u64_z, \ svuint32_t: svexth_u32_z, \ default: __assume(0) \ )(pg, op) #define svexth_x(pg, op) _Generic((op), \ svint64_t: svexth_s64_x, \ svint32_t: svexth_s32_x, \ svuint64_t: svexth_u64_x, \ svuint32_t: svexth_u32_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Bit manipulation / Extend from low bits: Sign-extend the low 32 bits svint64_t svextw_s64_z(svbool_t pg, svint64_t op); svint64_t svextw_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svint64_t svextw_s64_x(svbool_t pg, svint64_t op); // sve: Bit manipulation / Extend from low bits: Zero-extend the low 32 bits svuint64_t svextw_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op); svuint64_t svextw_u64_z(svbool_t pg, svuint64_t op); svuint64_t svextw_u64_x(svbool_t pg, svuint64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svextw_z(pg, op) _Generic((op), \ svint64_t: svextw_s64_z, \ svuint64_t: svextw_u64_z, \ default: __assume(0) \ )(pg, op) #define svextw_m(inactive, pg, op) _Generic((op), \ svint64_t: svextw_s64_m, \ svuint64_t: svextw_u64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svextw_x(pg, op) _Generic((op), \ svint64_t: svextw_s64_x, \ svuint64_t: svextw_u64_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Bit manipulation / Extend from low bits: Sign-extend the low 8 bits svint64_t svextb_s64_x(svbool_t pg, svint64_t op); svint16_t svextb_s16_m(svint16_t inactive, svbool_t pg, svint16_t op); svint16_t svextb_s16_x(svbool_t pg, svint16_t op); svint32_t svextb_s32_z(svbool_t pg, svint32_t op); svint64_t svextb_s64_z(svbool_t pg, svint64_t op); svint64_t svextb_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svint32_t svextb_s32_x(svbool_t pg, svint32_t op); svint16_t svextb_s16_z(svbool_t pg, svint16_t op); svint32_t svextb_s32_m(svint32_t inactive, svbool_t pg, svint32_t op); // sve: Bit manipulation / Extend from low bits: Zero-extend the low 8 bits svuint32_t svextb_u32_z(svbool_t pg, svuint32_t op); svuint16_t svextb_u16_z(svbool_t pg, svuint16_t op); svuint64_t svextb_u64_x(svbool_t pg, svuint64_t op); svuint32_t svextb_u32_x(svbool_t pg, svuint32_t op); svuint16_t svextb_u16_x(svbool_t pg, svuint16_t op); svuint64_t svextb_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op); svuint64_t svextb_u64_z(svbool_t pg, svuint64_t op); svuint32_t svextb_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op); svuint16_t svextb_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svextb_x(pg, op) _Generic((op), \ svint64_t: svextb_s64_x, \ svint16_t: svextb_s16_x, \ svint32_t: svextb_s32_x, \ svuint64_t: svextb_u64_x, \ svuint32_t: svextb_u32_x, \ svuint16_t: svextb_u16_x, \ default: __assume(0) \ )(pg, op) #define svextb_m(inactive, pg, op) _Generic((op), \ svint16_t: svextb_s16_m, \ svint64_t: svextb_s64_m, \ svint32_t: svextb_s32_m, \ svuint64_t: svextb_u64_m, \ svuint32_t: svextb_u32_m, \ svuint16_t: svextb_u16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svextb_z(pg, op) _Generic((op), \ svint32_t: svextb_s32_z, \ svint64_t: svextb_s64_z, \ svint16_t: svextb_s16_z, \ svuint32_t: svextb_u32_z, \ svuint16_t: svextb_u16_z, \ svuint64_t: svextb_u64_z, \ default: __assume(0) \ )(pg, op) #endif // sve: Bit manipulation / Population count: Count nonzero bits svuint64_t svcnt_u64_z(svbool_t pg, svuint64_t op); svuint32_t svcnt_u32_z(svbool_t pg, svuint32_t op); svuint16_t svcnt_u16_z(svbool_t pg, svuint16_t op); svuint64_t svcnt_s64_z(svbool_t pg, svint64_t op); svuint64_t svcnt_f64_z(svbool_t pg, svfloat64_t op); svuint16_t svcnt_s16_z(svbool_t pg, svint16_t op); svuint16_t svcnt_s16_x(svbool_t pg, svint16_t op); svuint8_t svcnt_s8_x(svbool_t pg, svint8_t op); svuint64_t svcnt_f64_x(svbool_t pg, svfloat64_t op); svuint32_t svcnt_f32_x(svbool_t pg, svfloat32_t op); svuint16_t svcnt_f16_x(svbool_t pg, svfloat16_t op); svuint16_t svcnt_bf16_x(svbool_t pg, svbfloat16_t op); svuint64_t svcnt_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op); svuint32_t svcnt_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op); svuint32_t svcnt_s32_z(svbool_t pg, svint32_t op); svuint16_t svcnt_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op); svuint64_t svcnt_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op); svuint32_t svcnt_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op); svuint16_t svcnt_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op); svuint8_t svcnt_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op); svuint64_t svcnt_f64_m(svuint64_t inactive, svbool_t pg, svfloat64_t op); svuint32_t svcnt_f32_m(svuint32_t inactive, svbool_t pg, svfloat32_t op); svuint16_t svcnt_f16_m(svuint16_t inactive, svbool_t pg, svfloat16_t op); svuint16_t svcnt_bf16_m(svuint16_t inactive, svbool_t pg, svbfloat16_t op); svuint8_t svcnt_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op); svuint64_t svcnt_s64_x(svbool_t pg, svint64_t op); svuint32_t svcnt_s32_x(svbool_t pg, svint32_t op); svuint16_t svcnt_u16_x(svbool_t pg, svuint16_t op); svuint8_t svcnt_s8_z(svbool_t pg, svint8_t op); svuint32_t svcnt_f32_z(svbool_t pg, svfloat32_t op); svuint16_t svcnt_f16_z(svbool_t pg, svfloat16_t op); svuint16_t svcnt_bf16_z(svbool_t pg, svbfloat16_t op); svuint8_t svcnt_u8_x(svbool_t pg, svuint8_t op); svuint8_t svcnt_u8_z(svbool_t pg, svuint8_t op); svuint32_t svcnt_u32_x(svbool_t pg, svuint32_t op); svuint64_t svcnt_u64_x(svbool_t pg, svuint64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcnt_z(pg, op) _Generic((op), \ svuint64_t: svcnt_u64_z, \ svuint32_t: svcnt_u32_z, \ svuint16_t: svcnt_u16_z, \ svint64_t: svcnt_s64_z, \ svfloat64_t: svcnt_f64_z, \ svint16_t: svcnt_s16_z, \ svint32_t: svcnt_s32_z, \ svint8_t: svcnt_s8_z, \ svfloat32_t: svcnt_f32_z, \ svfloat16_t: svcnt_f16_z, \ svbfloat16_t: svcnt_bf16_z, \ svuint8_t: svcnt_u8_z, \ default: __assume(0) \ )(pg, op) #define svcnt_x(pg, op) _Generic((op), \ svint16_t: svcnt_s16_x, \ svint8_t: svcnt_s8_x, \ svfloat64_t: svcnt_f64_x, \ svfloat32_t: svcnt_f32_x, \ svfloat16_t: svcnt_f16_x, \ svbfloat16_t: svcnt_bf16_x, \ svint64_t: svcnt_s64_x, \ svint32_t: svcnt_s32_x, \ svuint16_t: svcnt_u16_x, \ svuint8_t: svcnt_u8_x, \ svuint32_t: svcnt_u32_x, \ svuint64_t: svcnt_u64_x, \ default: __assume(0) \ )(pg, op) #define svcnt_m(inactive, pg, op) _Generic((op), \ svuint64_t: svcnt_u64_m, \ svuint32_t: svcnt_u32_m, \ svuint16_t: svcnt_u16_m, \ svint64_t: svcnt_s64_m, \ svint32_t: svcnt_s32_m, \ svint16_t: svcnt_s16_m, \ svint8_t: svcnt_s8_m, \ svfloat64_t: svcnt_f64_m, \ svfloat32_t: svcnt_f32_m, \ svfloat16_t: svcnt_f16_m, \ svbfloat16_t: svcnt_bf16_m, \ svuint8_t: svcnt_u8_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve: Compare / Absolute greater than or equal to: Absolute compare greater than or equal to svbool_t svacge_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svbool_t svacge_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2); svbool_t svacge_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svbool_t svacge_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svbool_t svacge_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svacge(pg, op1, op2) _Generic((op2), \ svfloat32_t: svacge_f32, \ float32_t: svacge_n_f32, \ svfloat64_t: svacge_f64, \ svfloat16_t: svacge_f16, \ float64_t: svacge_n_f64, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Compare / Absolute greater than: Absolute compare greater than svbool_t svacgt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svbool_t svacgt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svbool_t svacgt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2); svbool_t svacgt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2); svbool_t svacgt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svacgt(pg, op1, op2) _Generic((op2), \ svfloat32_t: svacgt_f32, \ svfloat64_t: svacgt_f64, \ float32_t: svacgt_n_f32, \ float64_t: svacgt_n_f64, \ svfloat16_t: svacgt_f16, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Compare / Absolute less than or equal to: Absolute compare less than or equal to svbool_t svacle_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svbool_t svacle_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svbool_t svacle_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2); svbool_t svacle_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2); svbool_t svacle_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svacle(pg, op1, op2) _Generic((op2), \ svfloat16_t: svacle_f16, \ svfloat64_t: svacle_f64, \ float32_t: svacle_n_f32, \ float64_t: svacle_n_f64, \ svfloat32_t: svacle_f32, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Compare / Absolute less than: Absolute compare less than svbool_t svaclt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svbool_t svaclt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svbool_t svaclt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svbool_t svaclt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2); svbool_t svaclt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaclt(pg, op1, op2) _Generic((op2), \ svfloat16_t: svaclt_f16, \ svfloat32_t: svaclt_f32, \ svfloat64_t: svaclt_f64, \ float32_t: svaclt_n_f32, \ float64_t: svaclt_n_f64, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Compare / Equal to: Compare equal to svbool_t svcmpeq_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2); svbool_t svcmpeq_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2); svbool_t svcmpeq_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2); svbool_t svcmpeq_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2); svbool_t svcmpeq_n_s8(svbool_t pg, svint8_t op1, int8_t op2); svbool_t svcmpeq_n_s16(svbool_t pg, svint16_t op1, int16_t op2); svbool_t svcmpeq_n_s32(svbool_t pg, svint32_t op1, int32_t op2); svbool_t svcmpeq_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2); svbool_t svcmpeq_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2); svbool_t svcmpeq_n_s64(svbool_t pg, svint64_t op1, int64_t op2); svbool_t svcmpeq_s32(svbool_t pg, svint32_t op1, svint32_t op2); svbool_t svcmpeq_u64(svbool_t pg, svuint64_t op1, svuint64_t op2); svbool_t svcmpeq_u32(svbool_t pg, svuint32_t op1, svuint32_t op2); svbool_t svcmpeq_u16(svbool_t pg, svuint16_t op1, svuint16_t op2); svbool_t svcmpeq_u8(svbool_t pg, svuint8_t op1, svuint8_t op2); svbool_t svcmpeq_s64(svbool_t pg, svint64_t op1, svint64_t op2); svbool_t svcmpeq_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2); svbool_t svcmpeq_s16(svbool_t pg, svint16_t op1, svint16_t op2); svbool_t svcmpeq_s8(svbool_t pg, svint8_t op1, svint8_t op2); svbool_t svcmpeq_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svbool_t svcmpeq_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svbool_t svcmpeq_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svbool_t svcmpeq_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2); svbool_t svcmpeq_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2); svbool_t svcmpeq_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2); svbool_t svcmpeq_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2); svbool_t svcmpeq_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcmpeq(pg, op1, op2) _Generic((op2), \ uint16_t: svcmpeq_n_u16, \ float32_t: svcmpeq_n_f32, \ float64_t: svcmpeq_n_f64, \ int8_t: svcmpeq_n_s8, \ int16_t: svcmpeq_n_s16, \ int32_t: svcmpeq_n_s32, \ uint32_t: svcmpeq_n_u32, \ uint8_t: svcmpeq_n_u8, \ int64_t: svcmpeq_n_s64, \ svint32_t: svcmpeq_s32, \ svuint64_t: svcmpeq_u64, \ svuint32_t: svcmpeq_u32, \ svuint16_t: svcmpeq_u16, \ svuint8_t: svcmpeq_u8, \ svint64_t: svcmpeq_s64, \ svint16_t: svcmpeq_s16, \ svint8_t: svcmpeq_s8, \ svfloat64_t: svcmpeq_f64, \ svfloat32_t: svcmpeq_f32, \ svfloat16_t: svcmpeq_f16, \ uint64_t: svcmpeq_n_u64, \ default: __assume(0) \ )(pg, op1, op2) #define svcmpeq_wide(pg, op1, op2) _Generic((op2), \ svint64_t: _Generic((op1), \ svint8_t: svcmpeq_wide_s8, \ svint16_t: svcmpeq_wide_s16, \ svint32_t: svcmpeq_wide_s32, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svint16_t: svcmpeq_wide_n_s16, \ svint8_t: svcmpeq_wide_n_s8, \ svint32_t: svcmpeq_wide_n_s32, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Compare / Greater than or equal to: Compare greater than or equal to svbool_t svcmpge_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2); svbool_t svcmpge_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svbool_t svcmpge_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svbool_t svcmpge_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svbool_t svcmpge_s8(svbool_t pg, svint8_t op1, svint8_t op2); svbool_t svcmpge_s16(svbool_t pg, svint16_t op1, svint16_t op2); svbool_t svcmpge_s32(svbool_t pg, svint32_t op1, svint32_t op2); svbool_t svcmpge_s64(svbool_t pg, svint64_t op1, svint64_t op2); svbool_t svcmpge_u8(svbool_t pg, svuint8_t op1, svuint8_t op2); svbool_t svcmpge_u16(svbool_t pg, svuint16_t op1, svuint16_t op2); svbool_t svcmpge_u32(svbool_t pg, svuint32_t op1, svuint32_t op2); svbool_t svcmpge_u64(svbool_t pg, svuint64_t op1, svuint64_t op2); svbool_t svcmpge_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2); svbool_t svcmpge_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2); svbool_t svcmpge_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2); svbool_t svcmpge_n_s16(svbool_t pg, svint16_t op1, int16_t op2); svbool_t svcmpge_n_s32(svbool_t pg, svint32_t op1, int32_t op2); svbool_t svcmpge_n_s64(svbool_t pg, svint64_t op1, int64_t op2); svbool_t svcmpge_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2); svbool_t svcmpge_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2); svbool_t svcmpge_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2); svbool_t svcmpge_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2); svbool_t svcmpge_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2); svbool_t svcmpge_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2); svbool_t svcmpge_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2); svbool_t svcmpge_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2); svbool_t svcmpge_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2); svbool_t svcmpge_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2); svbool_t svcmpge_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2); svbool_t svcmpge_n_s8(svbool_t pg, svint8_t op1, int8_t op2); svbool_t svcmpge_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2); svbool_t svcmpge_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2); svbool_t svcmpge_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcmpge_wide(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svuint8_t: svcmpge_wide_n_u8, \ svuint16_t: svcmpge_wide_n_u16, \ svuint32_t: svcmpge_wide_n_u32, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svint32_t: svcmpge_wide_s32, \ svint16_t: svcmpge_wide_s16, \ svint8_t: svcmpge_wide_s8, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svuint8_t: svcmpge_wide_u8, \ svuint16_t: svcmpge_wide_u16, \ svuint32_t: svcmpge_wide_u32, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svint8_t: svcmpge_wide_n_s8, \ svint16_t: svcmpge_wide_n_s16, \ svint32_t: svcmpge_wide_n_s32, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svcmpge(pg, op1, op2) _Generic((op2), \ svfloat16_t: svcmpge_f16, \ svfloat32_t: svcmpge_f32, \ svfloat64_t: svcmpge_f64, \ svint8_t: svcmpge_s8, \ svint16_t: svcmpge_s16, \ svint32_t: svcmpge_s32, \ svint64_t: svcmpge_s64, \ svuint8_t: svcmpge_u8, \ svuint16_t: svcmpge_u16, \ svuint32_t: svcmpge_u32, \ svuint64_t: svcmpge_u64, \ float32_t: svcmpge_n_f32, \ float64_t: svcmpge_n_f64, \ int16_t: svcmpge_n_s16, \ int32_t: svcmpge_n_s32, \ int64_t: svcmpge_n_s64, \ uint8_t: svcmpge_n_u8, \ uint16_t: svcmpge_n_u16, \ uint32_t: svcmpge_n_u32, \ uint64_t: svcmpge_n_u64, \ int8_t: svcmpge_n_s8, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Compare / Greater than: Compare greater than svbool_t svcmpgt_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2); svbool_t svcmpgt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svbool_t svcmpgt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svbool_t svcmpgt_s8(svbool_t pg, svint8_t op1, svint8_t op2); svbool_t svcmpgt_s16(svbool_t pg, svint16_t op1, svint16_t op2); svbool_t svcmpgt_s32(svbool_t pg, svint32_t op1, svint32_t op2); svbool_t svcmpgt_s64(svbool_t pg, svint64_t op1, svint64_t op2); svbool_t svcmpgt_u8(svbool_t pg, svuint8_t op1, svuint8_t op2); svbool_t svcmpgt_u16(svbool_t pg, svuint16_t op1, svuint16_t op2); svbool_t svcmpgt_u32(svbool_t pg, svuint32_t op1, svuint32_t op2); svbool_t svcmpgt_u64(svbool_t pg, svuint64_t op1, svuint64_t op2); svbool_t svcmpgt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2); svbool_t svcmpgt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2); svbool_t svcmpgt_n_s8(svbool_t pg, svint8_t op1, int8_t op2); svbool_t svcmpgt_n_s16(svbool_t pg, svint16_t op1, int16_t op2); svbool_t svcmpgt_n_s64(svbool_t pg, svint64_t op1, int64_t op2); svbool_t svcmpgt_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2); svbool_t svcmpgt_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2); svbool_t svcmpgt_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2); svbool_t svcmpgt_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2); svbool_t svcmpgt_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2); svbool_t svcmpgt_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2); svbool_t svcmpgt_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2); svbool_t svcmpgt_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2); svbool_t svcmpgt_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2); svbool_t svcmpgt_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2); svbool_t svcmpgt_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2); svbool_t svcmpgt_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2); svbool_t svcmpgt_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2); svbool_t svcmpgt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svbool_t svcmpgt_n_s32(svbool_t pg, svint32_t op1, int32_t op2); svbool_t svcmpgt_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2); svbool_t svcmpgt_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcmpgt_wide(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svuint8_t: svcmpgt_wide_n_u8, \ svuint32_t: svcmpgt_wide_n_u32, \ svuint16_t: svcmpgt_wide_n_u16, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svint8_t: svcmpgt_wide_s8, \ svint16_t: svcmpgt_wide_s16, \ svint32_t: svcmpgt_wide_s32, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svuint8_t: svcmpgt_wide_u8, \ svuint16_t: svcmpgt_wide_u16, \ svuint32_t: svcmpgt_wide_u32, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svint8_t: svcmpgt_wide_n_s8, \ svint16_t: svcmpgt_wide_n_s16, \ svint32_t: svcmpgt_wide_n_s32, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svcmpgt(pg, op1, op2) _Generic((op2), \ svfloat32_t: svcmpgt_f32, \ svfloat64_t: svcmpgt_f64, \ svint8_t: svcmpgt_s8, \ svint16_t: svcmpgt_s16, \ svint32_t: svcmpgt_s32, \ svint64_t: svcmpgt_s64, \ svuint8_t: svcmpgt_u8, \ svuint16_t: svcmpgt_u16, \ svuint32_t: svcmpgt_u32, \ svuint64_t: svcmpgt_u64, \ float32_t: svcmpgt_n_f32, \ float64_t: svcmpgt_n_f64, \ int8_t: svcmpgt_n_s8, \ int16_t: svcmpgt_n_s16, \ int64_t: svcmpgt_n_s64, \ uint8_t: svcmpgt_n_u8, \ uint16_t: svcmpgt_n_u16, \ uint32_t: svcmpgt_n_u32, \ uint64_t: svcmpgt_n_u64, \ svfloat16_t: svcmpgt_f16, \ int32_t: svcmpgt_n_s32, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Compare / Less than or equal to: Compare less than or equal to svbool_t svcmple_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svbool_t svcmple_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svbool_t svcmple_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2); svbool_t svcmple_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2); svbool_t svcmple_s8(svbool_t pg, svint8_t op1, svint8_t op2); svbool_t svcmple_s16(svbool_t pg, svint16_t op1, svint16_t op2); svbool_t svcmple_s32(svbool_t pg, svint32_t op1, svint32_t op2); svbool_t svcmple_s64(svbool_t pg, svint64_t op1, svint64_t op2); svbool_t svcmple_u8(svbool_t pg, svuint8_t op1, svuint8_t op2); svbool_t svcmple_u16(svbool_t pg, svuint16_t op1, svuint16_t op2); svbool_t svcmple_u32(svbool_t pg, svuint32_t op1, svuint32_t op2); svbool_t svcmple_u64(svbool_t pg, svuint64_t op1, svuint64_t op2); svbool_t svcmple_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2); svbool_t svcmple_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2); svbool_t svcmple_n_s8(svbool_t pg, svint8_t op1, int8_t op2); svbool_t svcmple_n_s16(svbool_t pg, svint16_t op1, int16_t op2); svbool_t svcmple_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svbool_t svcmple_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2); svbool_t svcmple_n_s32(svbool_t pg, svint32_t op1, int32_t op2); svbool_t svcmple_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2); svbool_t svcmple_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2); svbool_t svcmple_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2); svbool_t svcmple_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2); svbool_t svcmple_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2); svbool_t svcmple_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2); svbool_t svcmple_n_s64(svbool_t pg, svint64_t op1, int64_t op2); svbool_t svcmple_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2); svbool_t svcmple_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2); svbool_t svcmple_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2); svbool_t svcmple_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2); svbool_t svcmple_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2); svbool_t svcmple_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2); svbool_t svcmple_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcmple(pg, op1, op2) _Generic((op2), \ svfloat16_t: svcmple_f16, \ svfloat32_t: svcmple_f32, \ svint8_t: svcmple_s8, \ svint16_t: svcmple_s16, \ svint32_t: svcmple_s32, \ svint64_t: svcmple_s64, \ svuint8_t: svcmple_u8, \ svuint16_t: svcmple_u16, \ svuint32_t: svcmple_u32, \ svuint64_t: svcmple_u64, \ float32_t: svcmple_n_f32, \ float64_t: svcmple_n_f64, \ int8_t: svcmple_n_s8, \ int16_t: svcmple_n_s16, \ svfloat64_t: svcmple_f64, \ uint8_t: svcmple_n_u8, \ int32_t: svcmple_n_s32, \ int64_t: svcmple_n_s64, \ uint64_t: svcmple_n_u64, \ uint32_t: svcmple_n_u32, \ uint16_t: svcmple_n_u16, \ default: __assume(0) \ )(pg, op1, op2) #define svcmple_wide(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svuint16_t: svcmple_wide_n_u16, \ svuint32_t: svcmple_wide_n_u32, \ svuint8_t: svcmple_wide_n_u8, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svint32_t: svcmple_wide_n_s32, \ svint16_t: svcmple_wide_n_s16, \ svint8_t: svcmple_wide_n_s8, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svuint32_t: svcmple_wide_u32, \ svuint16_t: svcmple_wide_u16, \ svuint8_t: svcmple_wide_u8, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svint32_t: svcmple_wide_s32, \ svint16_t: svcmple_wide_s16, \ svint8_t: svcmple_wide_s8, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Compare / Less than: Compare less than svbool_t svcmplt_n_s32(svbool_t pg, svint32_t op1, int32_t op2); svbool_t svcmplt_n_s16(svbool_t pg, svint16_t op1, int16_t op2); svbool_t svcmplt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2); svbool_t svcmplt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2); svbool_t svcmplt_u64(svbool_t pg, svuint64_t op1, svuint64_t op2); svbool_t svcmplt_n_s64(svbool_t pg, svint64_t op1, int64_t op2); svbool_t svcmplt_u32(svbool_t pg, svuint32_t op1, svuint32_t op2); svbool_t svcmplt_u8(svbool_t pg, svuint8_t op1, svuint8_t op2); svbool_t svcmplt_s64(svbool_t pg, svint64_t op1, svint64_t op2); svbool_t svcmplt_s32(svbool_t pg, svint32_t op1, svint32_t op2); svbool_t svcmplt_s16(svbool_t pg, svint16_t op1, svint16_t op2); svbool_t svcmplt_s8(svbool_t pg, svint8_t op1, svint8_t op2); svbool_t svcmplt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svbool_t svcmplt_u16(svbool_t pg, svuint16_t op1, svuint16_t op2); svbool_t svcmplt_n_s8(svbool_t pg, svint8_t op1, int8_t op2); svbool_t svcmplt_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2); svbool_t svcmplt_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2); svbool_t svcmplt_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2); svbool_t svcmplt_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2); svbool_t svcmplt_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2); svbool_t svcmplt_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2); svbool_t svcmplt_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2); svbool_t svcmplt_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2); svbool_t svcmplt_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2); svbool_t svcmplt_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2); svbool_t svcmplt_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2); svbool_t svcmplt_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2); svbool_t svcmplt_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2); svbool_t svcmplt_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2); svbool_t svcmplt_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2); svbool_t svcmplt_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2); svbool_t svcmplt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svbool_t svcmplt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcmplt(pg, op1, op2) _Generic((op2), \ int32_t: svcmplt_n_s32, \ int16_t: svcmplt_n_s16, \ float64_t: svcmplt_n_f64, \ float32_t: svcmplt_n_f32, \ svuint64_t: svcmplt_u64, \ int64_t: svcmplt_n_s64, \ svuint32_t: svcmplt_u32, \ svuint8_t: svcmplt_u8, \ svint64_t: svcmplt_s64, \ svint32_t: svcmplt_s32, \ svint16_t: svcmplt_s16, \ svint8_t: svcmplt_s8, \ svfloat16_t: svcmplt_f16, \ svuint16_t: svcmplt_u16, \ int8_t: svcmplt_n_s8, \ uint8_t: svcmplt_n_u8, \ uint16_t: svcmplt_n_u16, \ uint64_t: svcmplt_n_u64, \ uint32_t: svcmplt_n_u32, \ svfloat32_t: svcmplt_f32, \ svfloat64_t: svcmplt_f64, \ default: __assume(0) \ )(pg, op1, op2) #define svcmplt_wide(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svuint32_t: svcmplt_wide_n_u32, \ svuint16_t: svcmplt_wide_n_u16, \ svuint8_t: svcmplt_wide_n_u8, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svint32_t: svcmplt_wide_n_s32, \ svint16_t: svcmplt_wide_n_s16, \ svint8_t: svcmplt_wide_n_s8, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svuint32_t: svcmplt_wide_u32, \ svuint16_t: svcmplt_wide_u16, \ svuint8_t: svcmplt_wide_u8, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svint32_t: svcmplt_wide_s32, \ svint16_t: svcmplt_wide_s16, \ svint8_t: svcmplt_wide_s8, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Compare / Not equal to: Compare not equal to svbool_t svcmpne_n_s64(svbool_t pg, svint64_t op1, int64_t op2); svbool_t svcmpne_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2); svbool_t svcmpne_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2); svbool_t svcmpne_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2); svbool_t svcmpne_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2); svbool_t svcmpne_n_s32(svbool_t pg, svint32_t op1, int32_t op2); svbool_t svcmpne_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2); svbool_t svcmpne_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2); svbool_t svcmpne_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2); svbool_t svcmpne_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2); svbool_t svcmpne_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2); svbool_t svcmpne_n_s16(svbool_t pg, svint16_t op1, int16_t op2); svbool_t svcmpne_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2); svbool_t svcmpne_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2); svbool_t svcmpne_u64(svbool_t pg, svuint64_t op1, svuint64_t op2); svbool_t svcmpne_u32(svbool_t pg, svuint32_t op1, svuint32_t op2); svbool_t svcmpne_u16(svbool_t pg, svuint16_t op1, svuint16_t op2); svbool_t svcmpne_s64(svbool_t pg, svint64_t op1, svint64_t op2); svbool_t svcmpne_s32(svbool_t pg, svint32_t op1, svint32_t op2); svbool_t svcmpne_s16(svbool_t pg, svint16_t op1, svint16_t op2); svbool_t svcmpne_s8(svbool_t pg, svint8_t op1, svint8_t op2); svbool_t svcmpne_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svbool_t svcmpne_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svbool_t svcmpne_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svbool_t svcmpne_n_s8(svbool_t pg, svint8_t op1, int8_t op2); svbool_t svcmpne_u8(svbool_t pg, svuint8_t op1, svuint8_t op2); svbool_t svcmpne_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcmpne(pg, op1, op2) _Generic((op2), \ int64_t: svcmpne_n_s64, \ uint8_t: svcmpne_n_u8, \ uint16_t: svcmpne_n_u16, \ uint32_t: svcmpne_n_u32, \ int32_t: svcmpne_n_s32, \ uint64_t: svcmpne_n_u64, \ int16_t: svcmpne_n_s16, \ float64_t: svcmpne_n_f64, \ float32_t: svcmpne_n_f32, \ svuint64_t: svcmpne_u64, \ svuint32_t: svcmpne_u32, \ svuint16_t: svcmpne_u16, \ svint64_t: svcmpne_s64, \ svint32_t: svcmpne_s32, \ svint16_t: svcmpne_s16, \ svint8_t: svcmpne_s8, \ svfloat64_t: svcmpne_f64, \ svfloat32_t: svcmpne_f32, \ svfloat16_t: svcmpne_f16, \ int8_t: svcmpne_n_s8, \ svuint8_t: svcmpne_u8, \ default: __assume(0) \ )(pg, op1, op2) #define svcmpne_wide(pg, op1, op2) _Generic((op2), \ svint64_t: _Generic((op1), \ svint8_t: svcmpne_wide_s8, \ svint32_t: svcmpne_wide_s32, \ svint16_t: svcmpne_wide_s16, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svint8_t: svcmpne_wide_n_s8, \ svint16_t: svcmpne_wide_n_s16, \ svint32_t: svcmpne_wide_n_s32, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Compare / Unordered with: Compare unordered with svbool_t svcmpuo_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svbool_t svcmpuo_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svbool_t svcmpuo_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svbool_t svcmpuo_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2); svbool_t svcmpuo_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcmpuo(pg, op1, op2) _Generic((op2), \ svfloat64_t: svcmpuo_f64, \ svfloat32_t: svcmpuo_f32, \ svfloat16_t: svcmpuo_f16, \ float64_t: svcmpuo_n_f64, \ float32_t: svcmpuo_n_f32, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Complex arithmetic / Complex addition: Complex add with rotate svfloat16_t svcadd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, uint64_t imm_rotation); svfloat32_t svcadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation); svfloat64_t svcadd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation); svfloat32_t svcadd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation); svfloat64_t svcadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation); svfloat16_t svcadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, uint64_t imm_rotation); svfloat32_t svcadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation); svfloat16_t svcadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, uint64_t imm_rotation); svfloat64_t svcadd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcadd_z(pg, op1, op2, imm_rotation) _Generic((op2), \ svfloat16_t: svcadd_f16_z, \ svfloat32_t: svcadd_f32_z, \ svfloat64_t: svcadd_f64_z, \ default: __assume(0) \ )(pg, op1, op2, imm_rotation) #define svcadd_x(pg, op1, op2, imm_rotation) _Generic((op2), \ svfloat32_t: svcadd_f32_x, \ svfloat64_t: svcadd_f64_x, \ svfloat16_t: svcadd_f16_x, \ default: __assume(0) \ )(pg, op1, op2, imm_rotation) #define svcadd_m(pg, op1, op2, imm_rotation) _Generic((op2), \ svfloat64_t: svcadd_f64_m, \ svfloat32_t: svcadd_f32_m, \ svfloat16_t: svcadd_f16_m, \ default: __assume(0) \ )(pg, op1, op2, imm_rotation) #endif // sve: Complex arithmetic / Complex multiply-accumulate: Complex multiply-add with rotate svfloat16_t svcmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3, uint64_t imm_rotation); svfloat32_t svcmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation); svfloat64_t svcmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation); svfloat16_t svcmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3, uint64_t imm_rotation); svfloat64_t svcmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation); svfloat32_t svcmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation); svfloat16_t svcmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3, uint64_t imm_rotation); svfloat32_t svcmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation); svfloat64_t svcmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation); svfloat16_t svcmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3, uint64_t imm_index, uint64_t imm_rotation); svfloat32_t svcmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_index, uint64_t imm_rotation); // sve2: Complex arithmetic / Complex multiply-accumulate: Complex multiply-add with rotate svuint32_t svcmla_lane_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3, uint64_t imm_index, uint64_t imm_rotation); svuint16_t svcmla_lane_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3, uint64_t imm_index, uint64_t imm_rotation); svint32_t svcmla_lane_s32(svint32_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index, uint64_t imm_rotation); svint16_t svcmla_lane_s16(svint16_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index, uint64_t imm_rotation); svuint64_t svcmla_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3, uint64_t imm_rotation); svuint8_t svcmla_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3, uint64_t imm_rotation); svuint16_t svcmla_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3, uint64_t imm_rotation); svint64_t svcmla_s64(svint64_t op1, svint64_t op2, svint64_t op3, uint64_t imm_rotation); svint32_t svcmla_s32(svint32_t op1, svint32_t op2, svint32_t op3, uint64_t imm_rotation); svint16_t svcmla_s16(svint16_t op1, svint16_t op2, svint16_t op3, uint64_t imm_rotation); svint8_t svcmla_s8(svint8_t op1, svint8_t op2, svint8_t op3, uint64_t imm_rotation); svuint32_t svcmla_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3, uint64_t imm_rotation); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcmla_m(pg, op1, op2, op3, imm_rotation) _Generic((op2), \ svfloat16_t: svcmla_f16_m, \ svfloat32_t: svcmla_f32_m, \ svfloat64_t: svcmla_f64_m, \ default: __assume(0) \ )(pg, op1, op2, op3, imm_rotation) #define svcmla_x(pg, op1, op2, op3, imm_rotation) _Generic((op2), \ svfloat16_t: svcmla_f16_x, \ svfloat64_t: svcmla_f64_x, \ svfloat32_t: svcmla_f32_x, \ default: __assume(0) \ )(pg, op1, op2, op3, imm_rotation) #define svcmla_z(pg, op1, op2, op3, imm_rotation) _Generic((op2), \ svfloat16_t: svcmla_f16_z, \ svfloat32_t: svcmla_f32_z, \ svfloat64_t: svcmla_f64_z, \ default: __assume(0) \ )(pg, op1, op2, op3, imm_rotation) #define svcmla_lane(op1, op2, op3, imm_index, imm_rotation) _Generic((op3), \ svfloat16_t: svcmla_lane_f16, \ svfloat32_t: svcmla_lane_f32, \ svuint32_t: svcmla_lane_u32, \ svuint16_t: svcmla_lane_u16, \ svint32_t: svcmla_lane_s32, \ svint16_t: svcmla_lane_s16, \ default: __assume(0) \ )(op1, op2, op3, imm_index, imm_rotation) #define svcmla(op1, op2, op3, imm_rotation) _Generic((op3), \ svuint64_t: svcmla_u64, \ svuint8_t: svcmla_u8, \ svuint16_t: svcmla_u16, \ svint64_t: svcmla_s64, \ svint32_t: svcmla_s32, \ svint16_t: svcmla_s16, \ svint8_t: svcmla_s8, \ svuint32_t: svcmla_u32, \ default: __assume(0) \ )(op1, op2, op3, imm_rotation) #endif // sve: Data type conversion / Conversions: Down convert and narrow (top) svbfloat16_t svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t op); svbfloat16_t svcvtnt_bf16_f32_m(svbfloat16_t even, svbool_t pg, svfloat32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcvtnt_bf16_x(even, pg, op) _Generic((op), \ svfloat32_t: svcvtnt_bf16_f32_x, \ default: __assume(0) \ )(even, pg, op) #define svcvtnt_bf16_m(even, pg, op) _Generic((op), \ svfloat32_t: svcvtnt_bf16_f32_m, \ default: __assume(0) \ )(even, pg, op) #endif // sve: Data type conversion / Conversions: Floating-point convert svint32_t svcvt_s32_f16_m(svint32_t inactive, svbool_t pg, svfloat16_t op); svint32_t svcvt_s32_f32_m(svint32_t inactive, svbool_t pg, svfloat32_t op); svint32_t svcvt_s32_f64_m(svint32_t inactive, svbool_t pg, svfloat64_t op); svint64_t svcvt_s64_f16_m(svint64_t inactive, svbool_t pg, svfloat16_t op); svint64_t svcvt_s64_f32_m(svint64_t inactive, svbool_t pg, svfloat32_t op); svint64_t svcvt_s64_f64_m(svint64_t inactive, svbool_t pg, svfloat64_t op); svuint16_t svcvt_u16_f16_m(svuint16_t inactive, svbool_t pg, svfloat16_t op); svint32_t svcvt_s32_f32_x(svbool_t pg, svfloat32_t op); svuint32_t svcvt_u32_f16_m(svuint32_t inactive, svbool_t pg, svfloat16_t op); svuint32_t svcvt_u32_f64_m(svuint32_t inactive, svbool_t pg, svfloat64_t op); svuint64_t svcvt_u64_f16_m(svuint64_t inactive, svbool_t pg, svfloat16_t op); svuint64_t svcvt_u64_f32_m(svuint64_t inactive, svbool_t pg, svfloat32_t op); svuint64_t svcvt_u64_f64_m(svuint64_t inactive, svbool_t pg, svfloat64_t op); svint16_t svcvt_s16_f16_x(svbool_t pg, svfloat16_t op); svint32_t svcvt_s32_f16_x(svbool_t pg, svfloat16_t op); svint16_t svcvt_s16_f16_m(svint16_t inactive, svbool_t pg, svfloat16_t op); svuint32_t svcvt_u32_f32_m(svuint32_t inactive, svbool_t pg, svfloat32_t op); svfloat64_t svcvt_f64_f32_z(svbool_t pg, svfloat32_t op); svfloat32_t svcvt_f32_f64_m(svfloat32_t inactive, svbool_t pg, svfloat64_t op); svfloat32_t svcvt_f32_f64_z(svbool_t pg, svfloat64_t op); svbfloat16_t svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op); svfloat16_t svcvt_f16_f32_m(svfloat16_t inactive, svbool_t pg, svfloat32_t op); svfloat16_t svcvt_f16_f64_m(svfloat16_t inactive, svbool_t pg, svfloat64_t op); svfloat32_t svcvt_f32_f16_m(svfloat32_t inactive, svbool_t pg, svfloat16_t op); svint32_t svcvt_s32_f64_x(svbool_t pg, svfloat64_t op); svfloat64_t svcvt_f64_f16_m(svfloat64_t inactive, svbool_t pg, svfloat16_t op); svfloat64_t svcvt_f64_f32_m(svfloat64_t inactive, svbool_t pg, svfloat32_t op); svfloat64_t svcvt_f64_f16_z(svbool_t pg, svfloat16_t op); svfloat16_t svcvt_f16_f32_x(svbool_t pg, svfloat32_t op); svfloat32_t svcvt_f32_f16_x(svbool_t pg, svfloat16_t op); svfloat32_t svcvt_f32_f64_x(svbool_t pg, svfloat64_t op); svfloat64_t svcvt_f64_f16_x(svbool_t pg, svfloat16_t op); svfloat64_t svcvt_f64_f32_x(svbool_t pg, svfloat32_t op); svfloat16_t svcvt_f16_f32_z(svbool_t pg, svfloat32_t op); svfloat16_t svcvt_f16_f64_z(svbool_t pg, svfloat64_t op); svfloat32_t svcvt_f32_f16_z(svbool_t pg, svfloat16_t op); svfloat16_t svcvt_f16_f64_x(svbool_t pg, svfloat64_t op); svint64_t svcvt_s64_f16_x(svbool_t pg, svfloat16_t op); svbfloat16_t svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op); svint64_t svcvt_s64_f64_x(svbool_t pg, svfloat64_t op); svfloat64_t svcvt_f64_u64_m(svfloat64_t inactive, svbool_t pg, svuint64_t op); svfloat16_t svcvt_f16_s16_x(svbool_t pg, svint16_t op); svfloat16_t svcvt_f16_s32_x(svbool_t pg, svint32_t op); svfloat16_t svcvt_f16_s64_x(svbool_t pg, svint64_t op); svfloat16_t svcvt_f16_u16_x(svbool_t pg, svuint16_t op); svfloat16_t svcvt_f16_u32_x(svbool_t pg, svuint32_t op); svfloat16_t svcvt_f16_u64_x(svbool_t pg, svuint64_t op); svint64_t svcvt_s64_f32_x(svbool_t pg, svfloat32_t op); svfloat32_t svcvt_f32_s32_x(svbool_t pg, svint32_t op); svfloat32_t svcvt_f32_s64_x(svbool_t pg, svint64_t op); svfloat32_t svcvt_f32_u32_x(svbool_t pg, svuint32_t op); svfloat32_t svcvt_f32_u64_x(svbool_t pg, svuint64_t op); svfloat64_t svcvt_f64_s32_x(svbool_t pg, svint32_t op); svfloat64_t svcvt_f64_s64_x(svbool_t pg, svint64_t op); svfloat64_t svcvt_f64_u32_m(svfloat64_t inactive, svbool_t pg, svuint32_t op); svfloat64_t svcvt_f64_u32_x(svbool_t pg, svuint32_t op); svfloat16_t svcvt_f16_s16_z(svbool_t pg, svint16_t op); svfloat16_t svcvt_f16_s32_z(svbool_t pg, svint32_t op); svfloat16_t svcvt_f16_s64_z(svbool_t pg, svint64_t op); svfloat16_t svcvt_f16_u16_z(svbool_t pg, svuint16_t op); svfloat16_t svcvt_f16_u32_z(svbool_t pg, svuint32_t op); svfloat16_t svcvt_f16_u64_z(svbool_t pg, svuint64_t op); svfloat32_t svcvt_f32_s32_z(svbool_t pg, svint32_t op); svfloat32_t svcvt_f32_s64_z(svbool_t pg, svint64_t op); svfloat32_t svcvt_f32_u32_z(svbool_t pg, svuint32_t op); svfloat32_t svcvt_f32_u64_z(svbool_t pg, svuint64_t op); svfloat64_t svcvt_f64_s32_z(svbool_t pg, svint32_t op); svfloat64_t svcvt_f64_s64_z(svbool_t pg, svint64_t op); svfloat64_t svcvt_f64_u32_z(svbool_t pg, svuint32_t op); svfloat64_t svcvt_f64_u64_z(svbool_t pg, svuint64_t op); svfloat64_t svcvt_f64_u64_x(svbool_t pg, svuint64_t op); svfloat64_t svcvt_f64_s64_m(svfloat64_t inactive, svbool_t pg, svint64_t op); svbfloat16_t svcvt_bf16_f32_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op); svfloat32_t svcvt_f32_u64_m(svfloat32_t inactive, svbool_t pg, svuint64_t op); svuint32_t svcvt_u32_f16_x(svbool_t pg, svfloat16_t op); svuint32_t svcvt_u32_f32_x(svbool_t pg, svfloat32_t op); svuint64_t svcvt_u64_f16_x(svbool_t pg, svfloat16_t op); svuint64_t svcvt_u64_f32_x(svbool_t pg, svfloat32_t op); svuint64_t svcvt_u64_f64_x(svbool_t pg, svfloat64_t op); svint16_t svcvt_s16_f16_z(svbool_t pg, svfloat16_t op); svint32_t svcvt_s32_f16_z(svbool_t pg, svfloat16_t op); svint32_t svcvt_s32_f32_z(svbool_t pg, svfloat32_t op); svfloat64_t svcvt_f64_s32_m(svfloat64_t inactive, svbool_t pg, svint32_t op); svfloat16_t svcvt_f16_s16_m(svfloat16_t inactive, svbool_t pg, svint16_t op); svuint64_t svcvt_u64_f64_z(svbool_t pg, svfloat64_t op); svuint64_t svcvt_u64_f32_z(svbool_t pg, svfloat32_t op); svuint64_t svcvt_u64_f16_z(svbool_t pg, svfloat16_t op); svuint32_t svcvt_u32_f64_z(svbool_t pg, svfloat64_t op); svuint32_t svcvt_u32_f32_z(svbool_t pg, svfloat32_t op); svuint32_t svcvt_u32_f16_z(svbool_t pg, svfloat16_t op); svuint16_t svcvt_u16_f16_z(svbool_t pg, svfloat16_t op); svint64_t svcvt_s64_f64_z(svbool_t pg, svfloat64_t op); svint64_t svcvt_s64_f32_z(svbool_t pg, svfloat32_t op); svint64_t svcvt_s64_f16_z(svbool_t pg, svfloat16_t op); svint32_t svcvt_s32_f64_z(svbool_t pg, svfloat64_t op); svfloat16_t svcvt_f16_s32_m(svfloat16_t inactive, svbool_t pg, svint32_t op); svuint16_t svcvt_u16_f16_x(svbool_t pg, svfloat16_t op); svuint32_t svcvt_u32_f64_x(svbool_t pg, svfloat64_t op); svfloat16_t svcvt_f16_u64_m(svfloat16_t inactive, svbool_t pg, svuint64_t op); svfloat16_t svcvt_f16_u32_m(svfloat16_t inactive, svbool_t pg, svuint32_t op); svfloat16_t svcvt_f16_u16_m(svfloat16_t inactive, svbool_t pg, svuint16_t op); svfloat16_t svcvt_f16_s64_m(svfloat16_t inactive, svbool_t pg, svint64_t op); svfloat32_t svcvt_f32_u32_m(svfloat32_t inactive, svbool_t pg, svuint32_t op); svfloat32_t svcvt_f32_s64_m(svfloat32_t inactive, svbool_t pg, svint64_t op); svfloat32_t svcvt_f32_s32_m(svfloat32_t inactive, svbool_t pg, svint32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcvt_s32_m(inactive, pg, op) _Generic((op), \ svfloat16_t: svcvt_s32_f16_m, \ svfloat32_t: svcvt_s32_f32_m, \ svfloat64_t: svcvt_s32_f64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvt_s64_m(inactive, pg, op) _Generic((op), \ svfloat16_t: svcvt_s64_f16_m, \ svfloat32_t: svcvt_s64_f32_m, \ svfloat64_t: svcvt_s64_f64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvt_u16_m(inactive, pg, op) _Generic((op), \ svfloat16_t: svcvt_u16_f16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvt_s32_x(pg, op) _Generic((op), \ svfloat32_t: svcvt_s32_f32_x, \ svfloat16_t: svcvt_s32_f16_x, \ svfloat64_t: svcvt_s32_f64_x, \ default: __assume(0) \ )(pg, op) #define svcvt_u32_m(inactive, pg, op) _Generic((op), \ svfloat16_t: svcvt_u32_f16_m, \ svfloat64_t: svcvt_u32_f64_m, \ svfloat32_t: svcvt_u32_f32_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvt_u64_m(inactive, pg, op) _Generic((op), \ svfloat16_t: svcvt_u64_f16_m, \ svfloat32_t: svcvt_u64_f32_m, \ svfloat64_t: svcvt_u64_f64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvt_s16_x(pg, op) _Generic((op), \ svfloat16_t: svcvt_s16_f16_x, \ default: __assume(0) \ )(pg, op) #define svcvt_s16_m(inactive, pg, op) _Generic((op), \ svfloat16_t: svcvt_s16_f16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvt_f64_z(pg, op) _Generic((op), \ svfloat32_t: svcvt_f64_f32_z, \ svfloat16_t: svcvt_f64_f16_z, \ svint32_t: svcvt_f64_s32_z, \ svint64_t: svcvt_f64_s64_z, \ svuint32_t: svcvt_f64_u32_z, \ svuint64_t: svcvt_f64_u64_z, \ default: __assume(0) \ )(pg, op) #define svcvt_f32_m(inactive, pg, op) _Generic((op), \ svfloat64_t: svcvt_f32_f64_m, \ svfloat16_t: svcvt_f32_f16_m, \ svuint64_t: svcvt_f32_u64_m, \ svuint32_t: svcvt_f32_u32_m, \ svint64_t: svcvt_f32_s64_m, \ svint32_t: svcvt_f32_s32_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvt_f32_z(pg, op) _Generic((op), \ svfloat64_t: svcvt_f32_f64_z, \ svfloat16_t: svcvt_f32_f16_z, \ svint32_t: svcvt_f32_s32_z, \ svint64_t: svcvt_f32_s64_z, \ svuint32_t: svcvt_f32_u32_z, \ svuint64_t: svcvt_f32_u64_z, \ default: __assume(0) \ )(pg, op) #define svcvt_bf16_z(pg, op) _Generic((op), \ svfloat32_t: svcvt_bf16_f32_z, \ default: __assume(0) \ )(pg, op) #define svcvt_f16_m(inactive, pg, op) _Generic((op), \ svfloat32_t: svcvt_f16_f32_m, \ svfloat64_t: svcvt_f16_f64_m, \ svint16_t: svcvt_f16_s16_m, \ svint32_t: svcvt_f16_s32_m, \ svuint64_t: svcvt_f16_u64_m, \ svuint32_t: svcvt_f16_u32_m, \ svuint16_t: svcvt_f16_u16_m, \ svint64_t: svcvt_f16_s64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvt_f64_m(inactive, pg, op) _Generic((op), \ svfloat16_t: svcvt_f64_f16_m, \ svfloat32_t: svcvt_f64_f32_m, \ svuint64_t: svcvt_f64_u64_m, \ svuint32_t: svcvt_f64_u32_m, \ svint64_t: svcvt_f64_s64_m, \ svint32_t: svcvt_f64_s32_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvt_f16_x(pg, op) _Generic((op), \ svfloat32_t: svcvt_f16_f32_x, \ svfloat64_t: svcvt_f16_f64_x, \ svint16_t: svcvt_f16_s16_x, \ svint32_t: svcvt_f16_s32_x, \ svint64_t: svcvt_f16_s64_x, \ svuint16_t: svcvt_f16_u16_x, \ svuint32_t: svcvt_f16_u32_x, \ svuint64_t: svcvt_f16_u64_x, \ default: __assume(0) \ )(pg, op) #define svcvt_f32_x(pg, op) _Generic((op), \ svfloat16_t: svcvt_f32_f16_x, \ svfloat64_t: svcvt_f32_f64_x, \ svint32_t: svcvt_f32_s32_x, \ svint64_t: svcvt_f32_s64_x, \ svuint32_t: svcvt_f32_u32_x, \ svuint64_t: svcvt_f32_u64_x, \ default: __assume(0) \ )(pg, op) #define svcvt_f64_x(pg, op) _Generic((op), \ svfloat16_t: svcvt_f64_f16_x, \ svfloat32_t: svcvt_f64_f32_x, \ svint32_t: svcvt_f64_s32_x, \ svint64_t: svcvt_f64_s64_x, \ svuint32_t: svcvt_f64_u32_x, \ svuint64_t: svcvt_f64_u64_x, \ default: __assume(0) \ )(pg, op) #define svcvt_f16_z(pg, op) _Generic((op), \ svfloat32_t: svcvt_f16_f32_z, \ svfloat64_t: svcvt_f16_f64_z, \ svint16_t: svcvt_f16_s16_z, \ svint32_t: svcvt_f16_s32_z, \ svint64_t: svcvt_f16_s64_z, \ svuint16_t: svcvt_f16_u16_z, \ svuint32_t: svcvt_f16_u32_z, \ svuint64_t: svcvt_f16_u64_z, \ default: __assume(0) \ )(pg, op) #define svcvt_s64_x(pg, op) _Generic((op), \ svfloat16_t: svcvt_s64_f16_x, \ svfloat64_t: svcvt_s64_f64_x, \ svfloat32_t: svcvt_s64_f32_x, \ default: __assume(0) \ )(pg, op) #define svcvt_bf16_x(pg, op) _Generic((op), \ svfloat32_t: svcvt_bf16_f32_x, \ default: __assume(0) \ )(pg, op) #define svcvt_bf16_m(inactive, pg, op) _Generic((op), \ svfloat32_t: svcvt_bf16_f32_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvt_u32_x(pg, op) _Generic((op), \ svfloat16_t: svcvt_u32_f16_x, \ svfloat32_t: svcvt_u32_f32_x, \ svfloat64_t: svcvt_u32_f64_x, \ default: __assume(0) \ )(pg, op) #define svcvt_u64_x(pg, op) _Generic((op), \ svfloat16_t: svcvt_u64_f16_x, \ svfloat32_t: svcvt_u64_f32_x, \ svfloat64_t: svcvt_u64_f64_x, \ default: __assume(0) \ )(pg, op) #define svcvt_s16_z(pg, op) _Generic((op), \ svfloat16_t: svcvt_s16_f16_z, \ default: __assume(0) \ )(pg, op) #define svcvt_s32_z(pg, op) _Generic((op), \ svfloat16_t: svcvt_s32_f16_z, \ svfloat32_t: svcvt_s32_f32_z, \ svfloat64_t: svcvt_s32_f64_z, \ default: __assume(0) \ )(pg, op) #define svcvt_u64_z(pg, op) _Generic((op), \ svfloat64_t: svcvt_u64_f64_z, \ svfloat32_t: svcvt_u64_f32_z, \ svfloat16_t: svcvt_u64_f16_z, \ default: __assume(0) \ )(pg, op) #define svcvt_u32_z(pg, op) _Generic((op), \ svfloat64_t: svcvt_u32_f64_z, \ svfloat32_t: svcvt_u32_f32_z, \ svfloat16_t: svcvt_u32_f16_z, \ default: __assume(0) \ )(pg, op) #define svcvt_u16_z(pg, op) _Generic((op), \ svfloat16_t: svcvt_u16_f16_z, \ default: __assume(0) \ )(pg, op) #define svcvt_s64_z(pg, op) _Generic((op), \ svfloat64_t: svcvt_s64_f64_z, \ svfloat32_t: svcvt_s64_f32_z, \ svfloat16_t: svcvt_s64_f16_z, \ default: __assume(0) \ )(pg, op) #define svcvt_u16_x(pg, op) _Generic((op), \ svfloat16_t: svcvt_u16_f16_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Data type conversion / Reinterpret casts: Reinterpret vector contents svbfloat16_t svreinterpret_bf16_bf16(svbfloat16_t op); svbfloat16_t svreinterpret_bf16_f16(svfloat16_t op); svbfloat16_t svreinterpret_bf16_f32(svfloat32_t op); svbfloat16_t svreinterpret_bf16_f64(svfloat64_t op); svbfloat16_t svreinterpret_bf16_s8(svint8_t op); svbfloat16_t svreinterpret_bf16_s16(svint16_t op); svbfloat16_t svreinterpret_bf16_s32(svint32_t op); svbfloat16_t svreinterpret_bf16_s64(svint64_t op); svbfloat16_t svreinterpret_bf16_u8(svuint8_t op); svbfloat16_t svreinterpret_bf16_u16(svuint16_t op); svbfloat16_t svreinterpret_bf16_u32(svuint32_t op); svbfloat16_t svreinterpret_bf16_u64(svuint64_t op); svfloat16_t svreinterpret_f16_bf16(svbfloat16_t op); svfloat16_t svreinterpret_f16_f16(svfloat16_t op); svfloat16_t svreinterpret_f16_f32(svfloat32_t op); svfloat16_t svreinterpret_f16_f64(svfloat64_t op); svfloat16_t svreinterpret_f16_s8(svint8_t op); svfloat16_t svreinterpret_f16_s16(svint16_t op); svfloat16_t svreinterpret_f16_s64(svint64_t op); svfloat16_t svreinterpret_f16_s32(svint32_t op); svfloat16_t svreinterpret_f16_u8(svuint8_t op); svfloat16_t svreinterpret_f16_u16(svuint16_t op); svfloat16_t svreinterpret_f16_u32(svuint32_t op); svfloat16_t svreinterpret_f16_u64(svuint64_t op); svfloat32_t svreinterpret_f32_bf16(svbfloat16_t op); svfloat32_t svreinterpret_f32_f16(svfloat16_t op); svfloat32_t svreinterpret_f32_f32(svfloat32_t op); svfloat32_t svreinterpret_f32_f64(svfloat64_t op); svfloat32_t svreinterpret_f32_s8(svint8_t op); svfloat32_t svreinterpret_f32_s16(svint16_t op); svfloat32_t svreinterpret_f32_s32(svint32_t op); svfloat32_t svreinterpret_f32_s64(svint64_t op); svfloat32_t svreinterpret_f32_u8(svuint8_t op); svfloat32_t svreinterpret_f32_u16(svuint16_t op); svfloat32_t svreinterpret_f32_u32(svuint32_t op); svfloat32_t svreinterpret_f32_u64(svuint64_t op); svfloat64_t svreinterpret_f64_bf16(svbfloat16_t op); svfloat64_t svreinterpret_f64_f16(svfloat16_t op); svfloat64_t svreinterpret_f64_f32(svfloat32_t op); svfloat64_t svreinterpret_f64_f64(svfloat64_t op); svfloat64_t svreinterpret_f64_s8(svint8_t op); svfloat64_t svreinterpret_f64_s16(svint16_t op); svfloat64_t svreinterpret_f64_s32(svint32_t op); svfloat64_t svreinterpret_f64_s64(svint64_t op); svfloat64_t svreinterpret_f64_u8(svuint8_t op); svfloat64_t svreinterpret_f64_u16(svuint16_t op); svfloat64_t svreinterpret_f64_u32(svuint32_t op); svfloat64_t svreinterpret_f64_u64(svuint64_t op); svint8_t svreinterpret_s8_bf16(svbfloat16_t op); svint8_t svreinterpret_s8_f16(svfloat16_t op); svint8_t svreinterpret_s8_f32(svfloat32_t op); svint8_t svreinterpret_s8_f64(svfloat64_t op); svint8_t svreinterpret_s8_s8(svint8_t op); svint8_t svreinterpret_s8_s16(svint16_t op); svint8_t svreinterpret_s8_s32(svint32_t op); svint8_t svreinterpret_s8_s64(svint64_t op); svint8_t svreinterpret_s8_u8(svuint8_t op); svint8_t svreinterpret_s8_u16(svuint16_t op); svint8_t svreinterpret_s8_u32(svuint32_t op); svint8_t svreinterpret_s8_u64(svuint64_t op); svint16_t svreinterpret_s16_bf16(svbfloat16_t op); svint16_t svreinterpret_s16_f16(svfloat16_t op); svint16_t svreinterpret_s16_f32(svfloat32_t op); svint16_t svreinterpret_s16_f64(svfloat64_t op); svint16_t svreinterpret_s16_s8(svint8_t op); svint16_t svreinterpret_s16_s16(svint16_t op); svint16_t svreinterpret_s16_s32(svint32_t op); svint16_t svreinterpret_s16_s64(svint64_t op); svint16_t svreinterpret_s16_u8(svuint8_t op); svint16_t svreinterpret_s16_u16(svuint16_t op); svint16_t svreinterpret_s16_u32(svuint32_t op); svint16_t svreinterpret_s16_u64(svuint64_t op); svint32_t svreinterpret_s32_bf16(svbfloat16_t op); svint32_t svreinterpret_s32_f16(svfloat16_t op); svint32_t svreinterpret_s32_f32(svfloat32_t op); svint32_t svreinterpret_s32_f64(svfloat64_t op); svint32_t svreinterpret_s32_s8(svint8_t op); svint32_t svreinterpret_s32_s16(svint16_t op); svint32_t svreinterpret_s32_s32(svint32_t op); svint32_t svreinterpret_s32_s64(svint64_t op); svint32_t svreinterpret_s32_u8(svuint8_t op); svint32_t svreinterpret_s32_u16(svuint16_t op); svint32_t svreinterpret_s32_u32(svuint32_t op); svint32_t svreinterpret_s32_u64(svuint64_t op); svint64_t svreinterpret_s64_bf16(svbfloat16_t op); svint64_t svreinterpret_s64_f16(svfloat16_t op); svint64_t svreinterpret_s64_f32(svfloat32_t op); svint64_t svreinterpret_s64_f64(svfloat64_t op); svint64_t svreinterpret_s64_s8(svint8_t op); svint64_t svreinterpret_s64_s16(svint16_t op); svint64_t svreinterpret_s64_s32(svint32_t op); svint64_t svreinterpret_s64_s64(svint64_t op); svint64_t svreinterpret_s64_u8(svuint8_t op); svint64_t svreinterpret_s64_u16(svuint16_t op); svint64_t svreinterpret_s64_u32(svuint32_t op); svint64_t svreinterpret_s64_u64(svuint64_t op); svuint8_t svreinterpret_u8_bf16(svbfloat16_t op); svuint8_t svreinterpret_u8_f16(svfloat16_t op); svuint8_t svreinterpret_u8_f32(svfloat32_t op); svuint8_t svreinterpret_u8_f64(svfloat64_t op); svuint8_t svreinterpret_u8_s8(svint8_t op); svuint8_t svreinterpret_u8_s16(svint16_t op); svuint8_t svreinterpret_u8_s32(svint32_t op); svuint8_t svreinterpret_u8_s64(svint64_t op); svuint8_t svreinterpret_u8_u8(svuint8_t op); svuint8_t svreinterpret_u8_u16(svuint16_t op); svuint8_t svreinterpret_u8_u32(svuint32_t op); svuint8_t svreinterpret_u8_u64(svuint64_t op); svuint16_t svreinterpret_u16_bf16(svbfloat16_t op); svuint16_t svreinterpret_u16_f16(svfloat16_t op); svuint16_t svreinterpret_u16_f32(svfloat32_t op); svuint16_t svreinterpret_u16_f64(svfloat64_t op); svuint16_t svreinterpret_u16_s8(svint8_t op); svuint16_t svreinterpret_u16_s16(svint16_t op); svuint16_t svreinterpret_u16_s32(svint32_t op); svuint16_t svreinterpret_u16_s64(svint64_t op); svuint16_t svreinterpret_u16_u8(svuint8_t op); svuint16_t svreinterpret_u16_u16(svuint16_t op); svuint16_t svreinterpret_u16_u32(svuint32_t op); svuint16_t svreinterpret_u16_u64(svuint64_t op); svuint32_t svreinterpret_u32_bf16(svbfloat16_t op); svuint32_t svreinterpret_u32_f16(svfloat16_t op); svuint32_t svreinterpret_u32_f32(svfloat32_t op); svuint32_t svreinterpret_u32_f64(svfloat64_t op); svuint32_t svreinterpret_u32_s8(svint8_t op); svuint32_t svreinterpret_u32_s16(svint16_t op); svuint32_t svreinterpret_u32_s32(svint32_t op); svuint32_t svreinterpret_u32_s64(svint64_t op); svuint32_t svreinterpret_u32_u8(svuint8_t op); svuint32_t svreinterpret_u32_u16(svuint16_t op); svuint32_t svreinterpret_u32_u32(svuint32_t op); svuint32_t svreinterpret_u32_u64(svuint64_t op); svuint64_t svreinterpret_u64_bf16(svbfloat16_t op); svuint64_t svreinterpret_u64_f16(svfloat16_t op); svuint64_t svreinterpret_u64_f32(svfloat32_t op); svuint64_t svreinterpret_u64_f64(svfloat64_t op); svuint64_t svreinterpret_u64_s8(svint8_t op); svuint64_t svreinterpret_u64_s16(svint16_t op); svuint64_t svreinterpret_u64_s32(svint32_t op); svuint64_t svreinterpret_u64_s64(svint64_t op); svuint64_t svreinterpret_u64_u8(svuint8_t op); svuint64_t svreinterpret_u64_u16(svuint16_t op); svuint64_t svreinterpret_u64_u32(svuint32_t op); svuint64_t svreinterpret_u64_u64(svuint64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svreinterpret_bf16(op) _Generic((op), \ svbfloat16_t: svreinterpret_bf16_bf16, \ svfloat16_t: svreinterpret_bf16_f16, \ svfloat32_t: svreinterpret_bf16_f32, \ svfloat64_t: svreinterpret_bf16_f64, \ svint8_t: svreinterpret_bf16_s8, \ svint16_t: svreinterpret_bf16_s16, \ svint32_t: svreinterpret_bf16_s32, \ svint64_t: svreinterpret_bf16_s64, \ svuint8_t: svreinterpret_bf16_u8, \ svuint16_t: svreinterpret_bf16_u16, \ svuint32_t: svreinterpret_bf16_u32, \ svuint64_t: svreinterpret_bf16_u64, \ default: __assume(0) \ )(op) #define svreinterpret_f16(op) _Generic((op), \ svbfloat16_t: svreinterpret_f16_bf16, \ svfloat16_t: svreinterpret_f16_f16, \ svfloat32_t: svreinterpret_f16_f32, \ svfloat64_t: svreinterpret_f16_f64, \ svint8_t: svreinterpret_f16_s8, \ svint16_t: svreinterpret_f16_s16, \ svint64_t: svreinterpret_f16_s64, \ svint32_t: svreinterpret_f16_s32, \ svuint8_t: svreinterpret_f16_u8, \ svuint16_t: svreinterpret_f16_u16, \ svuint32_t: svreinterpret_f16_u32, \ svuint64_t: svreinterpret_f16_u64, \ default: __assume(0) \ )(op) #define svreinterpret_f32(op) _Generic((op), \ svbfloat16_t: svreinterpret_f32_bf16, \ svfloat16_t: svreinterpret_f32_f16, \ svfloat32_t: svreinterpret_f32_f32, \ svfloat64_t: svreinterpret_f32_f64, \ svint8_t: svreinterpret_f32_s8, \ svint16_t: svreinterpret_f32_s16, \ svint32_t: svreinterpret_f32_s32, \ svint64_t: svreinterpret_f32_s64, \ svuint8_t: svreinterpret_f32_u8, \ svuint16_t: svreinterpret_f32_u16, \ svuint32_t: svreinterpret_f32_u32, \ svuint64_t: svreinterpret_f32_u64, \ default: __assume(0) \ )(op) #define svreinterpret_f64(op) _Generic((op), \ svbfloat16_t: svreinterpret_f64_bf16, \ svfloat16_t: svreinterpret_f64_f16, \ svfloat32_t: svreinterpret_f64_f32, \ svfloat64_t: svreinterpret_f64_f64, \ svint8_t: svreinterpret_f64_s8, \ svint16_t: svreinterpret_f64_s16, \ svint32_t: svreinterpret_f64_s32, \ svint64_t: svreinterpret_f64_s64, \ svuint8_t: svreinterpret_f64_u8, \ svuint16_t: svreinterpret_f64_u16, \ svuint32_t: svreinterpret_f64_u32, \ svuint64_t: svreinterpret_f64_u64, \ default: __assume(0) \ )(op) #define svreinterpret_s8(op) _Generic((op), \ svbfloat16_t: svreinterpret_s8_bf16, \ svfloat16_t: svreinterpret_s8_f16, \ svfloat32_t: svreinterpret_s8_f32, \ svfloat64_t: svreinterpret_s8_f64, \ svint8_t: svreinterpret_s8_s8, \ svint16_t: svreinterpret_s8_s16, \ svint32_t: svreinterpret_s8_s32, \ svint64_t: svreinterpret_s8_s64, \ svuint8_t: svreinterpret_s8_u8, \ svuint16_t: svreinterpret_s8_u16, \ svuint32_t: svreinterpret_s8_u32, \ svuint64_t: svreinterpret_s8_u64, \ default: __assume(0) \ )(op) #define svreinterpret_s16(op) _Generic((op), \ svbfloat16_t: svreinterpret_s16_bf16, \ svfloat16_t: svreinterpret_s16_f16, \ svfloat32_t: svreinterpret_s16_f32, \ svfloat64_t: svreinterpret_s16_f64, \ svint8_t: svreinterpret_s16_s8, \ svint16_t: svreinterpret_s16_s16, \ svint32_t: svreinterpret_s16_s32, \ svint64_t: svreinterpret_s16_s64, \ svuint8_t: svreinterpret_s16_u8, \ svuint16_t: svreinterpret_s16_u16, \ svuint32_t: svreinterpret_s16_u32, \ svuint64_t: svreinterpret_s16_u64, \ default: __assume(0) \ )(op) #define svreinterpret_s32(op) _Generic((op), \ svbfloat16_t: svreinterpret_s32_bf16, \ svfloat16_t: svreinterpret_s32_f16, \ svfloat32_t: svreinterpret_s32_f32, \ svfloat64_t: svreinterpret_s32_f64, \ svint8_t: svreinterpret_s32_s8, \ svint16_t: svreinterpret_s32_s16, \ svint32_t: svreinterpret_s32_s32, \ svint64_t: svreinterpret_s32_s64, \ svuint8_t: svreinterpret_s32_u8, \ svuint16_t: svreinterpret_s32_u16, \ svuint32_t: svreinterpret_s32_u32, \ svuint64_t: svreinterpret_s32_u64, \ default: __assume(0) \ )(op) #define svreinterpret_s64(op) _Generic((op), \ svbfloat16_t: svreinterpret_s64_bf16, \ svfloat16_t: svreinterpret_s64_f16, \ svfloat32_t: svreinterpret_s64_f32, \ svfloat64_t: svreinterpret_s64_f64, \ svint8_t: svreinterpret_s64_s8, \ svint16_t: svreinterpret_s64_s16, \ svint32_t: svreinterpret_s64_s32, \ svint64_t: svreinterpret_s64_s64, \ svuint8_t: svreinterpret_s64_u8, \ svuint16_t: svreinterpret_s64_u16, \ svuint32_t: svreinterpret_s64_u32, \ svuint64_t: svreinterpret_s64_u64, \ default: __assume(0) \ )(op) #define svreinterpret_u8(op) _Generic((op), \ svbfloat16_t: svreinterpret_u8_bf16, \ svfloat16_t: svreinterpret_u8_f16, \ svfloat32_t: svreinterpret_u8_f32, \ svfloat64_t: svreinterpret_u8_f64, \ svint8_t: svreinterpret_u8_s8, \ svint16_t: svreinterpret_u8_s16, \ svint32_t: svreinterpret_u8_s32, \ svint64_t: svreinterpret_u8_s64, \ svuint8_t: svreinterpret_u8_u8, \ svuint16_t: svreinterpret_u8_u16, \ svuint32_t: svreinterpret_u8_u32, \ svuint64_t: svreinterpret_u8_u64, \ default: __assume(0) \ )(op) #define svreinterpret_u16(op) _Generic((op), \ svbfloat16_t: svreinterpret_u16_bf16, \ svfloat16_t: svreinterpret_u16_f16, \ svfloat32_t: svreinterpret_u16_f32, \ svfloat64_t: svreinterpret_u16_f64, \ svint8_t: svreinterpret_u16_s8, \ svint16_t: svreinterpret_u16_s16, \ svint32_t: svreinterpret_u16_s32, \ svint64_t: svreinterpret_u16_s64, \ svuint8_t: svreinterpret_u16_u8, \ svuint16_t: svreinterpret_u16_u16, \ svuint32_t: svreinterpret_u16_u32, \ svuint64_t: svreinterpret_u16_u64, \ default: __assume(0) \ )(op) #define svreinterpret_u32(op) _Generic((op), \ svbfloat16_t: svreinterpret_u32_bf16, \ svfloat16_t: svreinterpret_u32_f16, \ svfloat32_t: svreinterpret_u32_f32, \ svfloat64_t: svreinterpret_u32_f64, \ svint8_t: svreinterpret_u32_s8, \ svint16_t: svreinterpret_u32_s16, \ svint32_t: svreinterpret_u32_s32, \ svint64_t: svreinterpret_u32_s64, \ svuint8_t: svreinterpret_u32_u8, \ svuint16_t: svreinterpret_u32_u16, \ svuint32_t: svreinterpret_u32_u32, \ svuint64_t: svreinterpret_u32_u64, \ default: __assume(0) \ )(op) #define svreinterpret_u64(op) _Generic((op), \ svbfloat16_t: svreinterpret_u64_bf16, \ svfloat16_t: svreinterpret_u64_f16, \ svfloat32_t: svreinterpret_u64_f32, \ svfloat64_t: svreinterpret_u64_f64, \ svint8_t: svreinterpret_u64_s8, \ svint16_t: svreinterpret_u64_s16, \ svint32_t: svreinterpret_u64_s32, \ svint64_t: svreinterpret_u64_s64, \ svuint8_t: svreinterpret_u64_u8, \ svuint16_t: svreinterpret_u64_u16, \ svuint32_t: svreinterpret_u64_u32, \ svuint64_t: svreinterpret_u64_u64, \ default: __assume(0) \ )(op) #endif // sve: Fault suppression / FFR manipulation: Initialize the first-fault register to all-true void svsetffr(void); // sve: Fault suppression / FFR manipulation: Read FFR, returning predicate of succesfully loaded elements svbool_t svrdffr(void); svbool_t svrdffr_z(svbool_t pg); // sve: Fault suppression / FFR manipulation: Write to the first-fault register void svwrffr(svbool_t op); // sve: Fault suppression / First-faulting loads / Consecutive: Load 16-bit data and sign-extend, first-faulting svint64_t svldff1sh_s64(svbool_t pg, const int16_t *base); svuint64_t svldff1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum); svuint32_t svldff1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum); svint64_t svldff1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum); svint32_t svldff1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum); svuint64_t svldff1sh_u64(svbool_t pg, const int16_t *base); svuint32_t svldff1sh_u32(svbool_t pg, const int16_t *base); svint32_t svldff1sh_s32(svbool_t pg, const int16_t *base); // sve: Fault suppression / First-faulting loads / Consecutive: Load 16-bit data and zero-extend, first-faulting svuint32_t svldff1uh_u32(svbool_t pg, const uint16_t *base); svint64_t svldff1uh_s64(svbool_t pg, const uint16_t *base); svint32_t svldff1uh_s32(svbool_t pg, const uint16_t *base); svint32_t svldff1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum); svuint64_t svldff1uh_u64(svbool_t pg, const uint16_t *base); svint64_t svldff1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum); svuint32_t svldff1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum); svuint64_t svldff1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum); // sve: Fault suppression / First-faulting loads / Consecutive: Load 32-bit data and sign-extend, first-faulting svint64_t svldff1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum); svuint64_t svldff1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum); svuint64_t svldff1sw_u64(svbool_t pg, const int32_t *base); svint64_t svldff1sw_s64(svbool_t pg, const int32_t *base); // sve: Fault suppression / First-faulting loads / Consecutive: Load 32-bit data and zero-extend, first-faulting svuint64_t svldff1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum); svint64_t svldff1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum); svint64_t svldff1uw_s64(svbool_t pg, const uint32_t *base); svuint64_t svldff1uw_u64(svbool_t pg, const uint32_t *base); // sve: Fault suppression / First-faulting loads / Consecutive: Load 8-bit data and sign-extend, first-faulting svint32_t svldff1sb_s32(svbool_t pg, const int8_t *base); svint64_t svldff1sb_s64(svbool_t pg, const int8_t *base); svint16_t svldff1sb_s16(svbool_t pg, const int8_t *base); svuint32_t svldff1sb_u32(svbool_t pg, const int8_t *base); svint16_t svldff1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum); svint32_t svldff1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum); svint64_t svldff1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum); svuint32_t svldff1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum); svuint16_t svldff1sb_u16(svbool_t pg, const int8_t *base); svuint64_t svldff1sb_u64(svbool_t pg, const int8_t *base); svuint16_t svldff1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum); svuint64_t svldff1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum); // sve: Fault suppression / First-faulting loads / Consecutive: Load 8-bit data and zero-extend, first-faulting svint16_t svldff1ub_s16(svbool_t pg, const uint8_t *base); svint32_t svldff1ub_s32(svbool_t pg, const uint8_t *base); svint64_t svldff1ub_s64(svbool_t pg, const uint8_t *base); svuint16_t svldff1ub_u16(svbool_t pg, const uint8_t *base); svuint32_t svldff1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum); svuint64_t svldff1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum); svuint16_t svldff1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum); svint32_t svldff1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum); svint16_t svldff1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum); svuint64_t svldff1ub_u64(svbool_t pg, const uint8_t *base); svuint32_t svldff1ub_u32(svbool_t pg, const uint8_t *base); svint64_t svldff1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum); // sve: Fault suppression / First-faulting loads / Consecutive: Unextended load, first-faulting svuint64_t svldff1_u64(svbool_t pg, const uint64_t *base); svuint64_t svldff1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum); svuint32_t svldff1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum); svuint16_t svldff1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum); svuint8_t svldff1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum); svint64_t svldff1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum); svint32_t svldff1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum); svint16_t svldff1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum); svint8_t svldff1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum); svfloat64_t svldff1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum); svfloat32_t svldff1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum); svuint32_t svldff1_u32(svbool_t pg, const uint32_t *base); svuint16_t svldff1_u16(svbool_t pg, const uint16_t *base); svfloat32_t svldff1_f32(svbool_t pg, const float32_t *base); svfloat64_t svldff1_f64(svbool_t pg, const float64_t *base); svint8_t svldff1_s8(svbool_t pg, const int8_t *base); svint16_t svldff1_s16(svbool_t pg, const int16_t *base); svint32_t svldff1_s32(svbool_t pg, const int32_t *base); svint64_t svldff1_s64(svbool_t pg, const int64_t *base); svuint8_t svldff1_u8(svbool_t pg, const uint8_t *base); // sve: Fault suppression / First-faulting loads / Gather: Load 16-bit data and sign-extend, first-faulting svint64_t svldff1sh_gather_u64base_s64(svbool_t pg, svuint64_t bases); svint64_t svldff1sh_gather_s64offset_s64(svbool_t pg, const int16_t *base, svint64_t offsets); svuint64_t svldff1sh_gather_s64offset_u64(svbool_t pg, const int16_t *base, svint64_t offsets); svuint32_t svldff1sh_gather_u32offset_u32(svbool_t pg, const int16_t *base, svuint32_t offsets); svint32_t svldff1sh_gather_u32offset_s32(svbool_t pg, const int16_t *base, svuint32_t offsets); svuint32_t svldff1sh_gather_s32offset_u32(svbool_t pg, const int16_t *base, svint32_t offsets); svint32_t svldff1sh_gather_s32offset_s32(svbool_t pg, const int16_t *base, svint32_t offsets); svuint64_t svldff1sh_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint64_t svldff1sh_gather_u64offset_s64(svbool_t pg, const int16_t *base, svuint64_t offsets); svuint32_t svldff1sh_gather_u32base_u32(svbool_t pg, svuint32_t bases); svuint64_t svldff1sh_gather_u64offset_u64(svbool_t pg, const int16_t *base, svuint64_t offsets); svint32_t svldff1sh_gather_u32base_s32(svbool_t pg, svuint32_t bases); svuint32_t svldff1sh_gather_s32index_u32(svbool_t pg, const int16_t *base, svint32_t indices); svint32_t svldff1sh_gather_s32index_s32(svbool_t pg, const int16_t *base, svint32_t indices); svuint64_t svldff1sh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svint64_t svldff1sh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svuint32_t svldff1sh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index); svint32_t svldff1sh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index); svuint64_t svldff1sh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svuint32_t svldff1sh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint64_t svldff1sh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint64_t svldff1sh_gather_u64index_u64(svbool_t pg, const int16_t *base, svuint64_t indices); svint64_t svldff1sh_gather_u64index_s64(svbool_t pg, const int16_t *base, svuint64_t indices); svuint64_t svldff1sh_gather_s64index_u64(svbool_t pg, const int16_t *base, svint64_t indices); svint64_t svldff1sh_gather_s64index_s64(svbool_t pg, const int16_t *base, svint64_t indices); svuint32_t svldff1sh_gather_u32index_u32(svbool_t pg, const int16_t *base, svuint32_t indices); svint32_t svldff1sh_gather_u32index_s32(svbool_t pg, const int16_t *base, svuint32_t indices); svint32_t svldff1sh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldff1sh_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldff1sh_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldff1sh_gather_offset_s64(pg, base, offsets) _Generic((offsets), \ svint64_t: svldff1sh_gather_s64offset_s64, \ svuint64_t: svldff1sh_gather_u64offset_s64, \ int64_t: svldff1sh_gather_u64base_offset_s64, \ default: __assume(0) \ )(pg, base, offsets) #define svldff1sh_gather_offset_u64(pg, base, offsets) _Generic((offsets), \ svint64_t: svldff1sh_gather_s64offset_u64, \ svuint64_t: svldff1sh_gather_u64offset_u64, \ int64_t: svldff1sh_gather_u64base_offset_u64, \ default: __assume(0) \ )(pg, base, offsets) #define svldff1sh_gather_offset_u32(pg, base, offsets) _Generic((offsets), \ svuint32_t: svldff1sh_gather_u32offset_u32, \ svint32_t: svldff1sh_gather_s32offset_u32, \ int64_t: svldff1sh_gather_u32base_offset_u32, \ default: __assume(0) \ )(pg, base, offsets) #define svldff1sh_gather_offset_s32(pg, base, offsets) _Generic((offsets), \ svuint32_t: svldff1sh_gather_u32offset_s32, \ svint32_t: svldff1sh_gather_s32offset_s32, \ int64_t: svldff1sh_gather_u32base_offset_s32, \ default: __assume(0) \ )(pg, base, offsets) #define svldff1sh_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldff1sh_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldff1sh_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svldff1sh_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #define svldff1sh_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svldff1sh_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #define svldff1sh_gather_index_u32(pg, base, indices) _Generic((indices), \ svint32_t: svldff1sh_gather_s32index_u32, \ int64_t: svldff1sh_gather_u32base_index_u32, \ svuint32_t: svldff1sh_gather_u32index_u32, \ default: __assume(0) \ )(pg, base, indices) #define svldff1sh_gather_index_s32(pg, base, indices) _Generic((indices), \ svint32_t: svldff1sh_gather_s32index_s32, \ int64_t: svldff1sh_gather_u32base_index_s32, \ svuint32_t: svldff1sh_gather_u32index_s32, \ default: __assume(0) \ )(pg, base, indices) #define svldff1sh_gather_index_u64(pg, bases, index) _Generic((index), \ int64_t: svldff1sh_gather_u64base_index_u64, \ svuint64_t: svldff1sh_gather_u64index_u64, \ svint64_t: svldff1sh_gather_s64index_u64, \ default: __assume(0) \ )(pg, bases, index) #define svldff1sh_gather_index_s64(pg, bases, index) _Generic((index), \ int64_t: svldff1sh_gather_u64base_index_s64, \ svuint64_t: svldff1sh_gather_u64index_s64, \ svint64_t: svldff1sh_gather_s64index_s64, \ default: __assume(0) \ )(pg, bases, index) #endif // sve: Fault suppression / First-faulting loads / Gather: Load 16-bit data and zero-extend, first-faulting svuint64_t svldff1uh_gather_u64index_u64(svbool_t pg, const uint16_t *base, svuint64_t indices); svint32_t svldff1uh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svuint32_t svldff1uh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint64_t svldff1uh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint64_t svldff1uh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svuint32_t svldff1uh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index); svint64_t svldff1uh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svldff1uh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svint32_t svldff1uh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index); svuint64_t svldff1uh_gather_s64index_u64(svbool_t pg, const uint16_t *base, svint64_t indices); svint64_t svldff1uh_gather_u64index_s64(svbool_t pg, const uint16_t *base, svuint64_t indices); svuint32_t svldff1uh_gather_u32index_u32(svbool_t pg, const uint16_t *base, svuint32_t indices); svint32_t svldff1uh_gather_u32base_s32(svbool_t pg, svuint32_t bases); svuint32_t svldff1uh_gather_u32base_u32(svbool_t pg, svuint32_t bases); svint64_t svldff1uh_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint64_t svldff1uh_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint32_t svldff1uh_gather_s32offset_s32(svbool_t pg, const uint16_t *base, svint32_t offsets); svint64_t svldff1uh_gather_s64index_s64(svbool_t pg, const uint16_t *base, svint64_t indices); svint32_t svldff1uh_gather_s32index_s32(svbool_t pg, const uint16_t *base, svint32_t indices); svuint32_t svldff1uh_gather_s32offset_u32(svbool_t pg, const uint16_t *base, svint32_t offsets); svint32_t svldff1uh_gather_u32offset_s32(svbool_t pg, const uint16_t *base, svuint32_t offsets); svuint32_t svldff1uh_gather_u32offset_u32(svbool_t pg, const uint16_t *base, svuint32_t offsets); svint64_t svldff1uh_gather_s64offset_s64(svbool_t pg, const uint16_t *base, svint64_t offsets); svuint64_t svldff1uh_gather_s64offset_u64(svbool_t pg, const uint16_t *base, svint64_t offsets); svint64_t svldff1uh_gather_u64offset_s64(svbool_t pg, const uint16_t *base, svuint64_t offsets); svuint64_t svldff1uh_gather_u64offset_u64(svbool_t pg, const uint16_t *base, svuint64_t offsets); svuint32_t svldff1uh_gather_s32index_u32(svbool_t pg, const uint16_t *base, svint32_t indices); svint32_t svldff1uh_gather_u32index_s32(svbool_t pg, const uint16_t *base, svuint32_t indices); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldff1uh_gather_index_u64(pg, base, indices) _Generic((indices), \ svuint64_t: svldff1uh_gather_u64index_u64, \ int64_t: svldff1uh_gather_u64base_index_u64, \ svint64_t: svldff1uh_gather_s64index_u64, \ default: __assume(0) \ )(pg, base, indices) #define svldff1uh_gather_offset_s32(pg, bases, offset) _Generic((offset), \ int64_t: svldff1uh_gather_u32base_offset_s32, \ svint32_t: svldff1uh_gather_s32offset_s32, \ svuint32_t: svldff1uh_gather_u32offset_s32, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1uh_gather_offset_u32(pg, bases, offset) _Generic((offset), \ int64_t: svldff1uh_gather_u32base_offset_u32, \ svint32_t: svldff1uh_gather_s32offset_u32, \ svuint32_t: svldff1uh_gather_u32offset_u32, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1uh_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svldff1uh_gather_u64base_offset_s64, \ svint64_t: svldff1uh_gather_s64offset_s64, \ svuint64_t: svldff1uh_gather_u64offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1uh_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svldff1uh_gather_u64base_offset_u64, \ svint64_t: svldff1uh_gather_s64offset_u64, \ svuint64_t: svldff1uh_gather_u64offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1uh_gather_index_u32(pg, bases, index) _Generic((index), \ int64_t: svldff1uh_gather_u32base_index_u32, \ svuint32_t: svldff1uh_gather_u32index_u32, \ svint32_t: svldff1uh_gather_s32index_u32, \ default: __assume(0) \ )(pg, bases, index) #define svldff1uh_gather_index_s64(pg, bases, index) _Generic((index), \ int64_t: svldff1uh_gather_u64base_index_s64, \ svuint64_t: svldff1uh_gather_u64index_s64, \ svint64_t: svldff1uh_gather_s64index_s64, \ default: __assume(0) \ )(pg, bases, index) #define svldff1uh_gather_index_s32(pg, bases, index) _Generic((index), \ int64_t: svldff1uh_gather_u32base_index_s32, \ svint32_t: svldff1uh_gather_s32index_s32, \ svuint32_t: svldff1uh_gather_u32index_s32, \ default: __assume(0) \ )(pg, bases, index) #define svldff1uh_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svldff1uh_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #define svldff1uh_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svldff1uh_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #define svldff1uh_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldff1uh_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldff1uh_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldff1uh_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #endif // sve: Fault suppression / First-faulting loads / Gather: Load 32-bit data and sign-extend, first-faulting svint64_t svldff1sw_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint64_t svldff1sw_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint64_t svldff1sw_gather_s64offset_s64(svbool_t pg, const int32_t *base, svint64_t offsets); svuint64_t svldff1sw_gather_s64offset_u64(svbool_t pg, const int32_t *base, svint64_t offsets); svint64_t svldff1sw_gather_u64offset_s64(svbool_t pg, const int32_t *base, svuint64_t offsets); svuint64_t svldff1sw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svint64_t svldff1sw_gather_s64index_s64(svbool_t pg, const int32_t *base, svint64_t indices); svuint64_t svldff1sw_gather_s64index_u64(svbool_t pg, const int32_t *base, svint64_t indices); svint64_t svldff1sw_gather_u64index_s64(svbool_t pg, const int32_t *base, svuint64_t indices); svuint64_t svldff1sw_gather_u64index_u64(svbool_t pg, const int32_t *base, svuint64_t indices); svint64_t svldff1sw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svldff1sw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svldff1sw_gather_u64offset_u64(svbool_t pg, const int32_t *base, svuint64_t offsets); svuint64_t svldff1sw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldff1sw_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldff1sw_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldff1sw_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldff1sw_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldff1sw_gather_offset_s64(pg, base, offsets) _Generic((offsets), \ svint64_t: svldff1sw_gather_s64offset_s64, \ svuint64_t: svldff1sw_gather_u64offset_s64, \ int64_t: svldff1sw_gather_u64base_offset_s64, \ default: __assume(0) \ )(pg, base, offsets) #define svldff1sw_gather_offset_u64(pg, base, offsets) _Generic((offsets), \ svint64_t: svldff1sw_gather_s64offset_u64, \ svuint64_t: svldff1sw_gather_u64offset_u64, \ int64_t: svldff1sw_gather_u64base_offset_u64, \ default: __assume(0) \ )(pg, base, offsets) #define svldff1sw_gather_index_u64(pg, bases, index) _Generic((index), \ int64_t: svldff1sw_gather_u64base_index_u64, \ svint64_t: svldff1sw_gather_s64index_u64, \ svuint64_t: svldff1sw_gather_u64index_u64, \ default: __assume(0) \ )(pg, bases, index) #define svldff1sw_gather_index_s64(pg, base, indices) _Generic((indices), \ svint64_t: svldff1sw_gather_s64index_s64, \ svuint64_t: svldff1sw_gather_u64index_s64, \ int64_t: svldff1sw_gather_u64base_index_s64, \ default: __assume(0) \ )(pg, base, indices) #endif // sve: Fault suppression / First-faulting loads / Gather: Load 32-bit data and zero-extend, first-faulting svuint64_t svldff1uw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svldff1uw_gather_s64index_u64(svbool_t pg, const uint32_t *base, svint64_t indices); svuint64_t svldff1uw_gather_u64base_u64(svbool_t pg, svuint64_t bases); svuint64_t svldff1uw_gather_u64offset_u64(svbool_t pg, const uint32_t *base, svuint64_t offsets); svuint64_t svldff1uw_gather_s64offset_u64(svbool_t pg, const uint32_t *base, svint64_t offsets); svint64_t svldff1uw_gather_u64offset_s64(svbool_t pg, const uint32_t *base, svuint64_t offsets); svint64_t svldff1uw_gather_s64index_s64(svbool_t pg, const uint32_t *base, svint64_t indices); svint64_t svldff1uw_gather_s64offset_s64(svbool_t pg, const uint32_t *base, svint64_t offsets); svint64_t svldff1uw_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint64_t svldff1uw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svldff1uw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint64_t svldff1uw_gather_u64index_u64(svbool_t pg, const uint32_t *base, svuint64_t indices); svint64_t svldff1uw_gather_u64index_s64(svbool_t pg, const uint32_t *base, svuint64_t indices); svint64_t svldff1uw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldff1uw_gather_index_u64(pg, bases, index) _Generic((index), \ int64_t: svldff1uw_gather_u64base_index_u64, \ svint64_t: svldff1uw_gather_s64index_u64, \ svuint64_t: svldff1uw_gather_u64index_u64, \ default: __assume(0) \ )(pg, bases, index) #define svldff1uw_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldff1uw_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldff1uw_gather_offset_u64(pg, base, offsets) _Generic((offsets), \ svuint64_t: svldff1uw_gather_u64offset_u64, \ svint64_t: svldff1uw_gather_s64offset_u64, \ int64_t: svldff1uw_gather_u64base_offset_u64, \ default: __assume(0) \ )(pg, base, offsets) #define svldff1uw_gather_offset_s64(pg, base, offsets) _Generic((offsets), \ svuint64_t: svldff1uw_gather_u64offset_s64, \ svint64_t: svldff1uw_gather_s64offset_s64, \ int64_t: svldff1uw_gather_u64base_offset_s64, \ default: __assume(0) \ )(pg, base, offsets) #define svldff1uw_gather_index_s64(pg, base, indices) _Generic((indices), \ svint64_t: svldff1uw_gather_s64index_s64, \ svuint64_t: svldff1uw_gather_u64index_s64, \ int64_t: svldff1uw_gather_u64base_index_s64, \ default: __assume(0) \ )(pg, base, indices) #define svldff1uw_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldff1uw_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #endif // sve: Fault suppression / First-faulting loads / Gather: Load 8-bit data and sign-extend, first-faulting svuint32_t svldff1sb_gather_u32base_u32(svbool_t pg, svuint32_t bases); svint32_t svldff1sb_gather_u32base_s32(svbool_t pg, svuint32_t bases); svuint32_t svldff1sb_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svuint64_t svldff1sb_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svldff1sb_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svint32_t svldff1sb_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svuint64_t svldff1sb_gather_u64offset_u64(svbool_t pg, const int8_t *base, svuint64_t offsets); svuint64_t svldff1sb_gather_s64offset_u64(svbool_t pg, const int8_t *base, svint64_t offsets); svint64_t svldff1sb_gather_u64offset_s64(svbool_t pg, const int8_t *base, svuint64_t offsets); svuint32_t svldff1sb_gather_u32offset_u32(svbool_t pg, const int8_t *base, svuint32_t offsets); svint32_t svldff1sb_gather_u32offset_s32(svbool_t pg, const int8_t *base, svuint32_t offsets); svuint32_t svldff1sb_gather_s32offset_u32(svbool_t pg, const int8_t *base, svint32_t offsets); svint32_t svldff1sb_gather_s32offset_s32(svbool_t pg, const int8_t *base, svint32_t offsets); svuint64_t svldff1sb_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint64_t svldff1sb_gather_u64base_s64(svbool_t pg, svuint64_t bases); svint64_t svldff1sb_gather_s64offset_s64(svbool_t pg, const int8_t *base, svint64_t offsets); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldff1sb_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svldff1sb_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #define svldff1sb_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svldff1sb_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #define svldff1sb_gather_offset_u32(pg, bases, offset) _Generic((offset), \ int64_t: svldff1sb_gather_u32base_offset_u32, \ svuint32_t: svldff1sb_gather_u32offset_u32, \ svint32_t: svldff1sb_gather_s32offset_u32, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1sb_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svldff1sb_gather_u64base_offset_u64, \ svuint64_t: svldff1sb_gather_u64offset_u64, \ svint64_t: svldff1sb_gather_s64offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1sb_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svldff1sb_gather_u64base_offset_s64, \ svuint64_t: svldff1sb_gather_u64offset_s64, \ svint64_t: svldff1sb_gather_s64offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1sb_gather_offset_s32(pg, bases, offset) _Generic((offset), \ int64_t: svldff1sb_gather_u32base_offset_s32, \ svuint32_t: svldff1sb_gather_u32offset_s32, \ svint32_t: svldff1sb_gather_s32offset_s32, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1sb_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldff1sb_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldff1sb_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldff1sb_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #endif // sve: Fault suppression / First-faulting loads / Gather: Load 8-bit data and zero-extend, first-faulting svint64_t svldff1ub_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint64_t svldff1ub_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svldff1ub_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint32_t svldff1ub_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint32_t svldff1ub_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svuint64_t svldff1ub_gather_u64offset_u64(svbool_t pg, const uint8_t *base, svuint64_t offsets); svint64_t svldff1ub_gather_u64offset_s64(svbool_t pg, const uint8_t *base, svuint64_t offsets); svuint64_t svldff1ub_gather_s64offset_u64(svbool_t pg, const uint8_t *base, svint64_t offsets); svint64_t svldff1ub_gather_s64offset_s64(svbool_t pg, const uint8_t *base, svint64_t offsets); svuint32_t svldff1ub_gather_u32offset_u32(svbool_t pg, const uint8_t *base, svuint32_t offsets); svuint64_t svldff1ub_gather_u64base_u64(svbool_t pg, svuint64_t bases); svuint32_t svldff1ub_gather_s32offset_u32(svbool_t pg, const uint8_t *base, svint32_t offsets); svint32_t svldff1ub_gather_u32base_s32(svbool_t pg, svuint32_t bases); svint32_t svldff1ub_gather_u32offset_s32(svbool_t pg, const uint8_t *base, svuint32_t offsets); svuint32_t svldff1ub_gather_u32base_u32(svbool_t pg, svuint32_t bases); svint32_t svldff1ub_gather_s32offset_s32(svbool_t pg, const uint8_t *base, svint32_t offsets); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldff1ub_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldff1ub_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldff1ub_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svldff1ub_gather_u64base_offset_u64, \ svuint64_t: svldff1ub_gather_u64offset_u64, \ svint64_t: svldff1ub_gather_s64offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1ub_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svldff1ub_gather_u64base_offset_s64, \ svuint64_t: svldff1ub_gather_u64offset_s64, \ svint64_t: svldff1ub_gather_s64offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1ub_gather_offset_u32(pg, bases, offset) _Generic((offset), \ int64_t: svldff1ub_gather_u32base_offset_u32, \ svuint32_t: svldff1ub_gather_u32offset_u32, \ svint32_t: svldff1ub_gather_s32offset_u32, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1ub_gather_offset_s32(pg, bases, offset) _Generic((offset), \ int64_t: svldff1ub_gather_u32base_offset_s32, \ svuint32_t: svldff1ub_gather_u32offset_s32, \ svint32_t: svldff1ub_gather_s32offset_s32, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1ub_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldff1ub_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldff1ub_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svldff1ub_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #define svldff1ub_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svldff1ub_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #endif // sve: Fault suppression / First-faulting loads / Gather: Unextended load, first-faulting svint64_t svldff1_gather_u64index_s64(svbool_t pg, const int64_t *base, svuint64_t indices); svfloat64_t svldff1_gather_u64index_f64(svbool_t pg, const float64_t *base, svuint64_t indices); svuint64_t svldff1_gather_s64index_u64(svbool_t pg, const uint64_t *base, svint64_t indices); svuint64_t svldff1_gather_u64index_u64(svbool_t pg, const uint64_t *base, svuint64_t indices); svfloat32_t svldff1_gather_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset); svuint64_t svldff1_gather_u64base_u64(svbool_t pg, svuint64_t bases); svfloat64_t svldff1_gather_u64base_f64(svbool_t pg, svuint64_t bases); svfloat32_t svldff1_gather_u32base_f32(svbool_t pg, svuint32_t bases); svint64_t svldff1_gather_u64base_s64(svbool_t pg, svuint64_t bases); svint32_t svldff1_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svuint32_t svldff1_gather_s32index_u32(svbool_t pg, const uint32_t *base, svint32_t indices); svfloat32_t svldff1_gather_u32index_f32(svbool_t pg, const float32_t *base, svuint32_t indices); svint32_t svldff1_gather_u32index_s32(svbool_t pg, const int32_t *base, svuint32_t indices); svuint32_t svldff1_gather_u32index_u32(svbool_t pg, const uint32_t *base, svuint32_t indices); svfloat64_t svldff1_gather_s64index_f64(svbool_t pg, const float64_t *base, svint64_t indices); svint64_t svldff1_gather_s64index_s64(svbool_t pg, const int64_t *base, svint64_t indices); svuint32_t svldff1_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint32_t svldff1_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index); svint64_t svldff1_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svint32_t svldff1_gather_u32offset_s32(svbool_t pg, const int32_t *base, svuint32_t offsets); svfloat32_t svldff1_gather_u32offset_f32(svbool_t pg, const float32_t *base, svuint32_t offsets); svuint32_t svldff1_gather_s32offset_u32(svbool_t pg, const uint32_t *base, svint32_t offsets); svint32_t svldff1_gather_s32offset_s32(svbool_t pg, const int32_t *base, svint32_t offsets); svfloat32_t svldff1_gather_s32offset_f32(svbool_t pg, const float32_t *base, svint32_t offsets); svfloat64_t svldff1_gather_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset); svuint32_t svldff1_gather_u32offset_u32(svbool_t pg, const uint32_t *base, svuint32_t offsets); svint64_t svldff1_gather_s64offset_s64(svbool_t pg, const int64_t *base, svint64_t offsets); svuint64_t svldff1_gather_s64offset_u64(svbool_t pg, const uint64_t *base, svint64_t offsets); svfloat64_t svldff1_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t offsets); svint64_t svldff1_gather_u64offset_s64(svbool_t pg, const int64_t *base, svuint64_t offsets); svuint64_t svldff1_gather_u64offset_u64(svbool_t pg, const uint64_t *base, svuint64_t offsets); svfloat32_t svldff1_gather_s32index_f32(svbool_t pg, const float32_t *base, svint32_t indices); svint32_t svldff1_gather_s32index_s32(svbool_t pg, const int32_t *base, svint32_t indices); svuint32_t svldff1_gather_u32base_u32(svbool_t pg, svuint32_t bases); svuint64_t svldff1_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svint64_t svldff1_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svfloat64_t svldff1_gather_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index); svuint32_t svldff1_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index); svfloat32_t svldff1_gather_u32base_index_f32(svbool_t pg, svuint32_t bases, int64_t index); svuint64_t svldff1_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svfloat64_t svldff1_gather_s64offset_f64(svbool_t pg, const float64_t *base, svint64_t offsets); svint32_t svldff1_gather_u32base_s32(svbool_t pg, svuint32_t bases); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldff1_gather_index(pg, base, indices) _Generic((indices), \ svuint64_t: _Generic((base), \ const int64_t *: svldff1_gather_u64index_s64, \ const float64_t *: svldff1_gather_u64index_f64, \ const uint64_t *: svldff1_gather_u64index_u64, \ default: __assume(0)), \ svint64_t: _Generic((base), \ const uint64_t *: svldff1_gather_s64index_u64, \ const float64_t *: svldff1_gather_s64index_f64, \ const int64_t *: svldff1_gather_s64index_s64, \ default: __assume(0)), \ svint32_t: _Generic((base), \ const uint32_t *: svldff1_gather_s32index_u32, \ const float32_t *: svldff1_gather_s32index_f32, \ const int32_t *: svldff1_gather_s32index_s32, \ default: __assume(0)), \ svuint32_t: _Generic((base), \ const float32_t *: svldff1_gather_u32index_f32, \ const int32_t *: svldff1_gather_u32index_s32, \ const uint32_t *: svldff1_gather_u32index_u32, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, indices) #define svldff1_gather_offset_f32(pg, bases, offset) _Generic((offset), \ int64_t: svldff1_gather_u32base_offset_f32, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldff1_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldff1_gather_f64(pg, bases) _Generic((bases), \ svuint64_t: svldff1_gather_u64base_f64, \ default: __assume(0) \ )(pg, bases) #define svldff1_gather_f32(pg, bases) _Generic((bases), \ svuint32_t: svldff1_gather_u32base_f32, \ default: __assume(0) \ )(pg, bases) #define svldff1_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldff1_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldff1_gather_offset_s32(pg, bases, offset) _Generic((offset), \ int64_t: svldff1_gather_u32base_offset_s32, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1_gather_offset_u32(pg, bases, offset) _Generic((offset), \ int64_t: svldff1_gather_u32base_offset_u32, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1_gather_index_s32(pg, bases, index) _Generic((index), \ int64_t: svldff1_gather_u32base_index_s32, \ default: __assume(0) \ )(pg, bases, index) #define svldff1_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svldff1_gather_u64base_offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1_gather_offset(pg, base, offsets) _Generic((offsets), \ svuint32_t: _Generic((base), \ const int32_t *: svldff1_gather_u32offset_s32, \ const float32_t *: svldff1_gather_u32offset_f32, \ const uint32_t *: svldff1_gather_u32offset_u32, \ default: __assume(0)), \ svint32_t: _Generic((base), \ const uint32_t *: svldff1_gather_s32offset_u32, \ const int32_t *: svldff1_gather_s32offset_s32, \ const float32_t *: svldff1_gather_s32offset_f32, \ default: __assume(0)), \ svint64_t: _Generic((base), \ const int64_t *: svldff1_gather_s64offset_s64, \ const uint64_t *: svldff1_gather_s64offset_u64, \ const float64_t *: svldff1_gather_s64offset_f64, \ default: __assume(0)), \ svuint64_t: _Generic((base), \ const float64_t *: svldff1_gather_u64offset_f64, \ const int64_t *: svldff1_gather_u64offset_s64, \ const uint64_t *: svldff1_gather_u64offset_u64, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, offsets) #define svldff1_gather_offset_f64(pg, bases, offset) _Generic((offset), \ int64_t: svldff1_gather_u64base_offset_f64, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svldff1_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #define svldff1_gather_index_u64(pg, bases, index) _Generic((index), \ int64_t: svldff1_gather_u64base_index_u64, \ default: __assume(0) \ )(pg, bases, index) #define svldff1_gather_index_s64(pg, bases, index) _Generic((index), \ int64_t: svldff1_gather_u64base_index_s64, \ default: __assume(0) \ )(pg, bases, index) #define svldff1_gather_index_f64(pg, bases, index) _Generic((index), \ int64_t: svldff1_gather_u64base_index_f64, \ default: __assume(0) \ )(pg, bases, index) #define svldff1_gather_index_u32(pg, bases, index) _Generic((index), \ int64_t: svldff1_gather_u32base_index_u32, \ default: __assume(0) \ )(pg, bases, index) #define svldff1_gather_index_f32(pg, bases, index) _Generic((index), \ int64_t: svldff1_gather_u32base_index_f32, \ default: __assume(0) \ )(pg, bases, index) #define svldff1_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svldff1_gather_u64base_offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svldff1_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svldff1_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #endif // sve: Fault suppression / Non-faulting loads / Consecutive: Load 16-bit data and sign-extend, non-faulting svint64_t svldnf1sh_s64(svbool_t pg, const int16_t *base); svint32_t svldnf1sh_s32(svbool_t pg, const int16_t *base); svuint32_t svldnf1sh_u32(svbool_t pg, const int16_t *base); svuint64_t svldnf1sh_u64(svbool_t pg, const int16_t *base); svint32_t svldnf1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum); svint64_t svldnf1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum); svuint32_t svldnf1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum); svuint64_t svldnf1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum); // sve: Fault suppression / Non-faulting loads / Consecutive: Load 16-bit data and zero-extend, non-faulting svuint32_t svldnf1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum); svint32_t svldnf1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum); svuint64_t svldnf1uh_u64(svbool_t pg, const uint16_t *base); svuint32_t svldnf1uh_u32(svbool_t pg, const uint16_t *base); svuint64_t svldnf1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum); svint64_t svldnf1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum); svint64_t svldnf1uh_s64(svbool_t pg, const uint16_t *base); svint32_t svldnf1uh_s32(svbool_t pg, const uint16_t *base); // sve: Fault suppression / Non-faulting loads / Consecutive: Load 32-bit data and sign-extend, non-faulting svint64_t svldnf1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum); svint64_t svldnf1sw_s64(svbool_t pg, const int32_t *base); svuint64_t svldnf1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum); svuint64_t svldnf1sw_u64(svbool_t pg, const int32_t *base); // sve: Fault suppression / Non-faulting loads / Consecutive: Load 32-bit data and zero-extend, non-faulting svint64_t svldnf1uw_s64(svbool_t pg, const uint32_t *base); svuint64_t svldnf1uw_u64(svbool_t pg, const uint32_t *base); svint64_t svldnf1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum); svuint64_t svldnf1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum); // sve: Fault suppression / Non-faulting loads / Consecutive: Load 8-bit data and sign-extend, non-faulting svuint64_t svldnf1sb_u64(svbool_t pg, const int8_t *base); svint64_t svldnf1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum); svint32_t svldnf1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum); svint16_t svldnf1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum); svuint32_t svldnf1sb_u32(svbool_t pg, const int8_t *base); svuint64_t svldnf1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum); svint64_t svldnf1sb_s64(svbool_t pg, const int8_t *base); svint32_t svldnf1sb_s32(svbool_t pg, const int8_t *base); svint16_t svldnf1sb_s16(svbool_t pg, const int8_t *base); svuint16_t svldnf1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum); svuint16_t svldnf1sb_u16(svbool_t pg, const int8_t *base); svuint32_t svldnf1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum); // sve: Fault suppression / Non-faulting loads / Consecutive: Load 8-bit data and zero-extend, non-faulting svuint16_t svldnf1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum); svint64_t svldnf1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum); svint32_t svldnf1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum); svint16_t svldnf1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum); svint64_t svldnf1ub_s64(svbool_t pg, const uint8_t *base); svuint32_t svldnf1ub_u32(svbool_t pg, const uint8_t *base); svuint32_t svldnf1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum); svint32_t svldnf1ub_s32(svbool_t pg, const uint8_t *base); svint16_t svldnf1ub_s16(svbool_t pg, const uint8_t *base); svuint64_t svldnf1ub_u64(svbool_t pg, const uint8_t *base); svuint64_t svldnf1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum); svuint16_t svldnf1ub_u16(svbool_t pg, const uint8_t *base); // sve: Fault suppression / Non-faulting loads / Consecutive: Unextended load, non-faulting svuint32_t svldnf1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum); svuint64_t svldnf1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum); svuint64_t svldnf1_u64(svbool_t pg, const uint64_t *base); svuint32_t svldnf1_u32(svbool_t pg, const uint32_t *base); svuint16_t svldnf1_u16(svbool_t pg, const uint16_t *base); svuint8_t svldnf1_u8(svbool_t pg, const uint8_t *base); svint64_t svldnf1_s64(svbool_t pg, const int64_t *base); svint16_t svldnf1_s16(svbool_t pg, const int16_t *base); svint8_t svldnf1_s8(svbool_t pg, const int8_t *base); svfloat64_t svldnf1_f64(svbool_t pg, const float64_t *base); svfloat32_t svldnf1_f32(svbool_t pg, const float32_t *base); svint32_t svldnf1_s32(svbool_t pg, const int32_t *base); svuint16_t svldnf1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum); svuint8_t svldnf1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum); svint64_t svldnf1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum); svint32_t svldnf1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum); svint16_t svldnf1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum); svint8_t svldnf1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum); svfloat32_t svldnf1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum); svfloat64_t svldnf1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum); // sve: Load / Consecutive: Load 16-bit data and sign-extend svint64_t svld1sh_s64(svbool_t pg, const int16_t *base); svuint32_t svld1sh_u32(svbool_t pg, const int16_t *base); svuint64_t svld1sh_u64(svbool_t pg, const int16_t *base); svint32_t svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum); svuint32_t svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum); svuint64_t svld1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum); svint64_t svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum); svint32_t svld1sh_s32(svbool_t pg, const int16_t *base); // sve: Load / Consecutive: Load 16-bit data and zero-extend svuint64_t svld1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum); svuint32_t svld1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum); svint32_t svld1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum); svuint64_t svld1uh_u64(svbool_t pg, const uint16_t *base); svuint32_t svld1uh_u32(svbool_t pg, const uint16_t *base); svint64_t svld1uh_s64(svbool_t pg, const uint16_t *base); svint64_t svld1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum); svint32_t svld1uh_s32(svbool_t pg, const uint16_t *base); // sve: Load / Consecutive: Load 32-bit data and sign-extend svint64_t svld1sw_s64(svbool_t pg, const int32_t *base); svuint64_t svld1sw_u64(svbool_t pg, const int32_t *base); svuint64_t svld1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum); svint64_t svld1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum); // sve: Load / Consecutive: Load 32-bit data and zero-extend svint64_t svld1uw_s64(svbool_t pg, const uint32_t *base); svuint64_t svld1uw_u64(svbool_t pg, const uint32_t *base); svint64_t svld1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum); svuint64_t svld1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum); // sve: Load / Consecutive: Load 8-bit data and sign-extend svuint64_t svld1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum); svuint32_t svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum); svuint16_t svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum); svint32_t svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum); svint16_t svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum); svuint64_t svld1sb_u64(svbool_t pg, const int8_t *base); svuint32_t svld1sb_u32(svbool_t pg, const int8_t *base); svuint16_t svld1sb_u16(svbool_t pg, const int8_t *base); svint64_t svld1sb_s64(svbool_t pg, const int8_t *base); svint32_t svld1sb_s32(svbool_t pg, const int8_t *base); svint16_t svld1sb_s16(svbool_t pg, const int8_t *base); svint64_t svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum); // sve: Load / Consecutive: Load 8-bit data and zero-extend svint32_t svld1ub_s32(svbool_t pg, const uint8_t *base); svint64_t svld1ub_s64(svbool_t pg, const uint8_t *base); svuint16_t svld1ub_u16(svbool_t pg, const uint8_t *base); svuint32_t svld1ub_u32(svbool_t pg, const uint8_t *base); svuint64_t svld1ub_u64(svbool_t pg, const uint8_t *base); svint16_t svld1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum); svint32_t svld1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum); svint64_t svld1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum); svuint16_t svld1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum); svuint32_t svld1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum); svint16_t svld1ub_s16(svbool_t pg, const uint8_t *base); svuint64_t svld1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum); // sve: Load / Consecutive: Load four-element tuples into four vectors svuint16x4_t svld4_u16(svbool_t pg, const uint16_t *base); // svfloat16x4_t svld4_f16(svbool_t pg, const float16_t *base); svfloat32x4_t svld4_f32(svbool_t pg, const float32_t *base); svfloat64x4_t svld4_f64(svbool_t pg, const float64_t *base); svint8x4_t svld4_s8(svbool_t pg, const int8_t *base); svint16x4_t svld4_s16(svbool_t pg, const int16_t *base); svint32x4_t svld4_s32(svbool_t pg, const int32_t *base); svint64x4_t svld4_s64(svbool_t pg, const int64_t *base); svuint8x4_t svld4_u8(svbool_t pg, const uint8_t *base); // svbfloat16x4_t svld4_bf16(svbool_t pg, const bfloat16_t *base); svuint32x4_t svld4_u32(svbool_t pg, const uint32_t *base); // svbfloat16x4_t svld4_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum); svuint64x4_t svld4_u64(svbool_t pg, const uint64_t *base); svuint32x4_t svld4_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum); svuint16x4_t svld4_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum); svuint8x4_t svld4_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum); svint64x4_t svld4_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum); svuint64x4_t svld4_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum); svint16x4_t svld4_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum); svint8x4_t svld4_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum); svfloat64x4_t svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum); svfloat32x4_t svld4_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum); // svfloat16x4_t svld4_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum); svint32x4_t svld4_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum); // sve: Load / Consecutive: Load three-element tuples into three vectors // svfloat16x3_t svld3_f16(svbool_t pg, const float16_t *base); svfloat32x3_t svld3_f32(svbool_t pg, const float32_t *base); svfloat64x3_t svld3_f64(svbool_t pg, const float64_t *base); svint8x3_t svld3_s8(svbool_t pg, const int8_t *base); svint16x3_t svld3_s16(svbool_t pg, const int16_t *base); svint32x3_t svld3_s32(svbool_t pg, const int32_t *base); svint64x3_t svld3_s64(svbool_t pg, const int64_t *base); svuint8x3_t svld3_u8(svbool_t pg, const uint8_t *base); svuint16x3_t svld3_u16(svbool_t pg, const uint16_t *base); svuint32x3_t svld3_u32(svbool_t pg, const uint32_t *base); // svbfloat16x3_t svld3_bf16(svbool_t pg, const bfloat16_t *base); // svbfloat16x3_t svld3_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum); svfloat32x3_t svld3_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum); svfloat64x3_t svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum); svint8x3_t svld3_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum); svint32x3_t svld3_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum); svint64x3_t svld3_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum); svuint8x3_t svld3_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum); svuint16x3_t svld3_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum); svuint32x3_t svld3_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum); svuint64x3_t svld3_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum); svuint64x3_t svld3_u64(svbool_t pg, const uint64_t *base); svint16x3_t svld3_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum); // svfloat16x3_t svld3_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum); // sve: Load / Consecutive: Load two-element tuples into two vectors // svfloat16x2_t svld2_f16(svbool_t pg, const float16_t *base); // svbfloat16x2_t svld2_bf16(svbool_t pg, const bfloat16_t *base); svuint64x2_t svld2_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum); svuint32x2_t svld2_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum); svuint16x2_t svld2_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum); svuint8x2_t svld2_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum); svint32x2_t svld2_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum); svint16x2_t svld2_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum); svint8x2_t svld2_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum); svfloat64x2_t svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum); svfloat32x2_t svld2_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum); // svfloat16x2_t svld2_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum); svint64x2_t svld2_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum); svuint64x2_t svld2_u64(svbool_t pg, const uint64_t *base); svfloat32x2_t svld2_f32(svbool_t pg, const float32_t *base); svfloat64x2_t svld2_f64(svbool_t pg, const float64_t *base); svint8x2_t svld2_s8(svbool_t pg, const int8_t *base); svint16x2_t svld2_s16(svbool_t pg, const int16_t *base); svint32x2_t svld2_s32(svbool_t pg, const int32_t *base); // svbfloat16x2_t svld2_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum); svint64x2_t svld2_s64(svbool_t pg, const int64_t *base); svuint8x2_t svld2_u8(svbool_t pg, const uint8_t *base); svuint16x2_t svld2_u16(svbool_t pg, const uint16_t *base); svuint32x2_t svld2_u32(svbool_t pg, const uint32_t *base); // sve: Load / Consecutive: Unextended load svfloat64_t svld1_f64(svbool_t pg, const float64_t *base); svfloat32_t svld1_f32(svbool_t pg, const float32_t *base); svint8_t svld1_s8(svbool_t pg, const int8_t *base); svint32_t svld1_s32(svbool_t pg, const int32_t *base); svint64_t svld1_s64(svbool_t pg, const int64_t *base); svint16_t svld1_s16(svbool_t pg, const int16_t *base); svuint8_t svld1_u8(svbool_t pg, const uint8_t *base); svuint16_t svld1_u16(svbool_t pg, const uint16_t *base); svuint32_t svld1_u32(svbool_t pg, const uint32_t *base); svuint64_t svld1_u64(svbool_t pg, const uint64_t *base); svuint64_t svld1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum); svfloat32_t svld1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum); svfloat64_t svld1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum); svint16_t svld1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum); svint32_t svld1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum); svint64_t svld1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum); svuint8_t svld1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum); svuint16_t svld1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum); svuint32_t svld1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum); svint8_t svld1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svld1(pg, base) _Generic((base), \ const float64_t *: svld1_f64, \ const float32_t *: svld1_f32, \ const int8_t *: svld1_s8, \ const int32_t *: svld1_s32, \ const int64_t *: svld1_s64, \ const int16_t *: svld1_s16, \ const uint8_t *: svld1_u8, \ const uint16_t *: svld1_u16, \ const uint32_t *: svld1_u32, \ const uint64_t *: svld1_u64, \ default: __assume(0) \ )(pg, base) #define svld1_vnum(pg, base, vnum) _Generic((base), \ const uint64_t *: svld1_vnum_u64, \ const float32_t *: svld1_vnum_f32, \ const float64_t *: svld1_vnum_f64, \ const int16_t *: svld1_vnum_s16, \ const int32_t *: svld1_vnum_s32, \ const int64_t *: svld1_vnum_s64, \ const uint8_t *: svld1_vnum_u8, \ const uint16_t *: svld1_vnum_u16, \ const uint32_t *: svld1_vnum_u32, \ const int8_t *: svld1_vnum_s8, \ default: __assume(0) \ )(pg, base, vnum) #endif // sve: Load / Consecutive: Unextended load, non-temporal svint8_t svldnt1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum); svint64_t svldnt1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum); svfloat32_t svldnt1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum); svuint64_t svldnt1_u64(svbool_t pg, const uint64_t *base); svuint32_t svldnt1_u32(svbool_t pg, const uint32_t *base); svint16_t svldnt1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum); svint32_t svldnt1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum); svfloat64_t svldnt1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum); svuint8_t svldnt1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum); svint32_t svldnt1_s32(svbool_t pg, const int32_t *base); svuint32_t svldnt1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum); svuint64_t svldnt1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum); svint16_t svldnt1_s16(svbool_t pg, const int16_t *base); svuint8_t svldnt1_u8(svbool_t pg, const uint8_t *base); svfloat32_t svldnt1_f32(svbool_t pg, const float32_t *base); svfloat64_t svldnt1_f64(svbool_t pg, const float64_t *base); svint8_t svldnt1_s8(svbool_t pg, const int8_t *base); svint64_t svldnt1_s64(svbool_t pg, const int64_t *base); svuint16_t svldnt1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum); svuint16_t svldnt1_u16(svbool_t pg, const uint16_t *base); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldnt1_vnum(pg, base, vnum) _Generic((base), \ const int8_t *: svldnt1_vnum_s8, \ const int64_t *: svldnt1_vnum_s64, \ const float32_t *: svldnt1_vnum_f32, \ const int16_t *: svldnt1_vnum_s16, \ const int32_t *: svldnt1_vnum_s32, \ const float64_t *: svldnt1_vnum_f64, \ const uint8_t *: svldnt1_vnum_u8, \ const uint32_t *: svldnt1_vnum_u32, \ const uint64_t *: svldnt1_vnum_u64, \ const uint16_t *: svldnt1_vnum_u16, \ default: __assume(0) \ )(pg, base, vnum) #define svldnt1(pg, base) _Generic((base), \ const uint64_t *: svldnt1_u64, \ const uint32_t *: svldnt1_u32, \ const int32_t *: svldnt1_s32, \ const int16_t *: svldnt1_s16, \ const uint8_t *: svldnt1_u8, \ const float32_t *: svldnt1_f32, \ const float64_t *: svldnt1_f64, \ const int8_t *: svldnt1_s8, \ const int64_t *: svldnt1_s64, \ const uint16_t *: svldnt1_u16, \ default: __assume(0) \ )(pg, base) #endif // sve: Load / Gather: Load 16-bit data and sign-extend svuint32_t svld1sh_gather_s32index_u32(svbool_t pg, const int16_t *base, svint32_t indices); svint32_t svld1sh_gather_u32index_s32(svbool_t pg, const int16_t *base, svuint32_t indices); svuint32_t svld1sh_gather_u32index_u32(svbool_t pg, const int16_t *base, svuint32_t indices); svint64_t svld1sh_gather_s64index_s64(svbool_t pg, const int16_t *base, svint64_t indices); svuint64_t svld1sh_gather_s64index_u64(svbool_t pg, const int16_t *base, svint64_t indices); svint64_t svld1sh_gather_u64index_s64(svbool_t pg, const int16_t *base, svuint64_t indices); svuint64_t svld1sh_gather_u64index_u64(svbool_t pg, const int16_t *base, svuint64_t indices); svint32_t svld1sh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index); svuint32_t svld1sh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint64_t svld1sh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint64_t svld1sh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svuint32_t svld1sh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index); svint32_t svld1sh_gather_s32index_s32(svbool_t pg, const int16_t *base, svint32_t indices); svuint64_t svld1sh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svint64_t svld1sh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svint32_t svld1sh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svuint64_t svld1sh_gather_u64offset_u64(svbool_t pg, const int16_t *base, svuint64_t offsets); svint64_t svld1sh_gather_s64offset_s64(svbool_t pg, const int16_t *base, svint64_t offsets); svuint64_t svld1sh_gather_s64offset_u64(svbool_t pg, const int16_t *base, svint64_t offsets); svint64_t svld1sh_gather_u64offset_s64(svbool_t pg, const int16_t *base, svuint64_t offsets); svint32_t svld1sh_gather_u32base_s32(svbool_t pg, svuint32_t bases); svint64_t svld1sh_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint64_t svld1sh_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint32_t svld1sh_gather_s32offset_s32(svbool_t pg, const int16_t *base, svint32_t offsets); svuint32_t svld1sh_gather_s32offset_u32(svbool_t pg, const int16_t *base, svint32_t offsets); svint32_t svld1sh_gather_u32offset_s32(svbool_t pg, const int16_t *base, svuint32_t offsets); svuint32_t svld1sh_gather_u32offset_u32(svbool_t pg, const int16_t *base, svuint32_t offsets); svuint32_t svld1sh_gather_u32base_u32(svbool_t pg, svuint32_t bases); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svld1sh_gather_index_u32(pg, base, indices) _Generic((indices), \ svint32_t: svld1sh_gather_s32index_u32, \ svuint32_t: svld1sh_gather_u32index_u32, \ int64_t: svld1sh_gather_u32base_index_u32, \ default: __assume(0) \ )(pg, base, indices) #define svld1sh_gather_index_s32(pg, base, indices) _Generic((indices), \ svuint32_t: svld1sh_gather_u32index_s32, \ int64_t: svld1sh_gather_u32base_index_s32, \ svint32_t: svld1sh_gather_s32index_s32, \ default: __assume(0) \ )(pg, base, indices) #define svld1sh_gather_index_s64(pg, base, indices) _Generic((indices), \ svint64_t: svld1sh_gather_s64index_s64, \ svuint64_t: svld1sh_gather_u64index_s64, \ int64_t: svld1sh_gather_u64base_index_s64, \ default: __assume(0) \ )(pg, base, indices) #define svld1sh_gather_index_u64(pg, base, indices) _Generic((indices), \ svint64_t: svld1sh_gather_s64index_u64, \ svuint64_t: svld1sh_gather_u64index_u64, \ int64_t: svld1sh_gather_u64base_index_u64, \ default: __assume(0) \ )(pg, base, indices) #define svld1sh_gather_offset_u32(pg, bases, offset) _Generic((offset), \ int64_t: svld1sh_gather_u32base_offset_u32, \ svint32_t: svld1sh_gather_s32offset_u32, \ svuint32_t: svld1sh_gather_u32offset_u32, \ default: __assume(0) \ )(pg, bases, offset) #define svld1sh_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svld1sh_gather_u64base_offset_s64, \ svint64_t: svld1sh_gather_s64offset_s64, \ svuint64_t: svld1sh_gather_u64offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svld1sh_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svld1sh_gather_u64base_offset_u64, \ svuint64_t: svld1sh_gather_u64offset_u64, \ svint64_t: svld1sh_gather_s64offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svld1sh_gather_offset_s32(pg, bases, offset) _Generic((offset), \ int64_t: svld1sh_gather_u32base_offset_s32, \ svint32_t: svld1sh_gather_s32offset_s32, \ svuint32_t: svld1sh_gather_u32offset_s32, \ default: __assume(0) \ )(pg, bases, offset) #define svld1sh_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svld1sh_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #define svld1sh_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svld1sh_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svld1sh_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svld1sh_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svld1sh_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svld1sh_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #endif // sve: Load / Gather: Load 16-bit data and zero-extend svint32_t svld1uh_gather_u32offset_s32(svbool_t pg, const uint16_t *base, svuint32_t offsets); svuint32_t svld1uh_gather_s32offset_u32(svbool_t pg, const uint16_t *base, svint32_t offsets); svint32_t svld1uh_gather_s32offset_s32(svbool_t pg, const uint16_t *base, svint32_t offsets); svuint64_t svld1uh_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint64_t svld1uh_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint32_t svld1uh_gather_u32base_u32(svbool_t pg, svuint32_t bases); svuint32_t svld1uh_gather_u32offset_u32(svbool_t pg, const uint16_t *base, svuint32_t offsets); svint32_t svld1uh_gather_u32base_s32(svbool_t pg, svuint32_t bases); svint64_t svld1uh_gather_s64offset_s64(svbool_t pg, const uint16_t *base, svint64_t offsets); svint64_t svld1uh_gather_u64offset_s64(svbool_t pg, const uint16_t *base, svuint64_t offsets); svuint64_t svld1uh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svint64_t svld1uh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svuint32_t svld1uh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index); svuint64_t svld1uh_gather_s64offset_u64(svbool_t pg, const uint16_t *base, svint64_t offsets); svint32_t svld1uh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index); svuint64_t svld1uh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svld1uh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint32_t svld1uh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint32_t svld1uh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svint64_t svld1uh_gather_u64index_s64(svbool_t pg, const uint16_t *base, svuint64_t indices); svint32_t svld1uh_gather_u32index_s32(svbool_t pg, const uint16_t *base, svuint32_t indices); svuint64_t svld1uh_gather_u64index_u64(svbool_t pg, const uint16_t *base, svuint64_t indices); svint32_t svld1uh_gather_s32index_s32(svbool_t pg, const uint16_t *base, svint32_t indices); svuint64_t svld1uh_gather_u64offset_u64(svbool_t pg, const uint16_t *base, svuint64_t offsets); svuint32_t svld1uh_gather_s32index_u32(svbool_t pg, const uint16_t *base, svint32_t indices); svuint64_t svld1uh_gather_s64index_u64(svbool_t pg, const uint16_t *base, svint64_t indices); svuint32_t svld1uh_gather_u32index_u32(svbool_t pg, const uint16_t *base, svuint32_t indices); svint64_t svld1uh_gather_s64index_s64(svbool_t pg, const uint16_t *base, svint64_t indices); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svld1uh_gather_offset_s32(pg, base, offsets) _Generic((offsets), \ svuint32_t: svld1uh_gather_u32offset_s32, \ svint32_t: svld1uh_gather_s32offset_s32, \ int64_t: svld1uh_gather_u32base_offset_s32, \ default: __assume(0) \ )(pg, base, offsets) #define svld1uh_gather_offset_u32(pg, base, offsets) _Generic((offsets), \ svint32_t: svld1uh_gather_s32offset_u32, \ svuint32_t: svld1uh_gather_u32offset_u32, \ int64_t: svld1uh_gather_u32base_offset_u32, \ default: __assume(0) \ )(pg, base, offsets) #define svld1uh_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svld1uh_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svld1uh_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svld1uh_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svld1uh_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svld1uh_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #define svld1uh_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svld1uh_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #define svld1uh_gather_offset_s64(pg, base, offsets) _Generic((offsets), \ svint64_t: svld1uh_gather_s64offset_s64, \ svuint64_t: svld1uh_gather_u64offset_s64, \ int64_t: svld1uh_gather_u64base_offset_s64, \ default: __assume(0) \ )(pg, base, offsets) #define svld1uh_gather_index_u64(pg, bases, index) _Generic((index), \ int64_t: svld1uh_gather_u64base_index_u64, \ svuint64_t: svld1uh_gather_u64index_u64, \ svint64_t: svld1uh_gather_s64index_u64, \ default: __assume(0) \ )(pg, bases, index) #define svld1uh_gather_index_s64(pg, bases, index) _Generic((index), \ int64_t: svld1uh_gather_u64base_index_s64, \ svuint64_t: svld1uh_gather_u64index_s64, \ svint64_t: svld1uh_gather_s64index_s64, \ default: __assume(0) \ )(pg, bases, index) #define svld1uh_gather_index_u32(pg, bases, index) _Generic((index), \ int64_t: svld1uh_gather_u32base_index_u32, \ svint32_t: svld1uh_gather_s32index_u32, \ svuint32_t: svld1uh_gather_u32index_u32, \ default: __assume(0) \ )(pg, bases, index) #define svld1uh_gather_offset_u64(pg, base, offsets) _Generic((offsets), \ svint64_t: svld1uh_gather_s64offset_u64, \ int64_t: svld1uh_gather_u64base_offset_u64, \ svuint64_t: svld1uh_gather_u64offset_u64, \ default: __assume(0) \ )(pg, base, offsets) #define svld1uh_gather_index_s32(pg, bases, index) _Generic((index), \ int64_t: svld1uh_gather_u32base_index_s32, \ svuint32_t: svld1uh_gather_u32index_s32, \ svint32_t: svld1uh_gather_s32index_s32, \ default: __assume(0) \ )(pg, bases, index) #endif // sve: Load / Gather: Load 32-bit data and sign-extend svuint64_t svld1sw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svint64_t svld1sw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svld1sw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svld1sw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svld1sw_gather_u64index_s64(svbool_t pg, const int32_t *base, svuint64_t indices); svuint64_t svld1sw_gather_s64index_u64(svbool_t pg, const int32_t *base, svint64_t indices); svuint64_t svld1sw_gather_u64index_u64(svbool_t pg, const int32_t *base, svuint64_t indices); svuint64_t svld1sw_gather_u64offset_u64(svbool_t pg, const int32_t *base, svuint64_t offsets); svint64_t svld1sw_gather_u64offset_s64(svbool_t pg, const int32_t *base, svuint64_t offsets); svuint64_t svld1sw_gather_s64offset_u64(svbool_t pg, const int32_t *base, svint64_t offsets); svint64_t svld1sw_gather_s64offset_s64(svbool_t pg, const int32_t *base, svint64_t offsets); svuint64_t svld1sw_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint64_t svld1sw_gather_u64base_s64(svbool_t pg, svuint64_t bases); svint64_t svld1sw_gather_s64index_s64(svbool_t pg, const int32_t *base, svint64_t indices); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svld1sw_gather_index_u64(pg, bases, index) _Generic((index), \ int64_t: svld1sw_gather_u64base_index_u64, \ svint64_t: svld1sw_gather_s64index_u64, \ svuint64_t: svld1sw_gather_u64index_u64, \ default: __assume(0) \ )(pg, bases, index) #define svld1sw_gather_index_s64(pg, bases, index) _Generic((index), \ int64_t: svld1sw_gather_u64base_index_s64, \ svuint64_t: svld1sw_gather_u64index_s64, \ svint64_t: svld1sw_gather_s64index_s64, \ default: __assume(0) \ )(pg, bases, index) #define svld1sw_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svld1sw_gather_u64base_offset_u64, \ svuint64_t: svld1sw_gather_u64offset_u64, \ svint64_t: svld1sw_gather_s64offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svld1sw_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svld1sw_gather_u64base_offset_s64, \ svuint64_t: svld1sw_gather_u64offset_s64, \ svint64_t: svld1sw_gather_s64offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svld1sw_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svld1sw_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svld1sw_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svld1sw_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #endif // sve: Load / Gather: Load 32-bit data and zero-extend svint64_t svld1uw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svld1uw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svint64_t svld1uw_gather_u64base_s64(svbool_t pg, svuint64_t bases); svint64_t svld1uw_gather_s64offset_s64(svbool_t pg, const uint32_t *base, svint64_t offsets); svuint64_t svld1uw_gather_s64offset_u64(svbool_t pg, const uint32_t *base, svint64_t offsets); svuint64_t svld1uw_gather_u64base_u64(svbool_t pg, svuint64_t bases); svuint64_t svld1uw_gather_u64offset_u64(svbool_t pg, const uint32_t *base, svuint64_t offsets); svint64_t svld1uw_gather_s64index_s64(svbool_t pg, const uint32_t *base, svint64_t indices); svuint64_t svld1uw_gather_s64index_u64(svbool_t pg, const uint32_t *base, svint64_t indices); svint64_t svld1uw_gather_u64index_s64(svbool_t pg, const uint32_t *base, svuint64_t indices); svuint64_t svld1uw_gather_u64index_u64(svbool_t pg, const uint32_t *base, svuint64_t indices); svint64_t svld1uw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svld1uw_gather_u64offset_s64(svbool_t pg, const uint32_t *base, svuint64_t offsets); svuint64_t svld1uw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svld1uw_gather_index_s64(pg, bases, index) _Generic((index), \ int64_t: svld1uw_gather_u64base_index_s64, \ svint64_t: svld1uw_gather_s64index_s64, \ svuint64_t: svld1uw_gather_u64index_s64, \ default: __assume(0) \ )(pg, bases, index) #define svld1uw_gather_index_u64(pg, bases, index) _Generic((index), \ int64_t: svld1uw_gather_u64base_index_u64, \ svint64_t: svld1uw_gather_s64index_u64, \ svuint64_t: svld1uw_gather_u64index_u64, \ default: __assume(0) \ )(pg, bases, index) #define svld1uw_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svld1uw_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svld1uw_gather_offset_s64(pg, base, offsets) _Generic((offsets), \ svint64_t: svld1uw_gather_s64offset_s64, \ int64_t: svld1uw_gather_u64base_offset_s64, \ svuint64_t: svld1uw_gather_u64offset_s64, \ default: __assume(0) \ )(pg, base, offsets) #define svld1uw_gather_offset_u64(pg, base, offsets) _Generic((offsets), \ svint64_t: svld1uw_gather_s64offset_u64, \ svuint64_t: svld1uw_gather_u64offset_u64, \ int64_t: svld1uw_gather_u64base_offset_u64, \ default: __assume(0) \ )(pg, base, offsets) #define svld1uw_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svld1uw_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #endif // sve: Load / Gather: Load 8-bit data and sign-extend svint64_t svld1sb_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint32_t svld1sb_gather_u32base_u32(svbool_t pg, svuint32_t bases); svint32_t svld1sb_gather_u32base_s32(svbool_t pg, svuint32_t bases); svint32_t svld1sb_gather_s32offset_s32(svbool_t pg, const int8_t *base, svint32_t offsets); svuint64_t svld1sb_gather_u64base_u64(svbool_t pg, svuint64_t bases); svuint32_t svld1sb_gather_u32offset_u32(svbool_t pg, const int8_t *base, svuint32_t offsets); svuint32_t svld1sb_gather_s32offset_u32(svbool_t pg, const int8_t *base, svint32_t offsets); svuint64_t svld1sb_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svld1sb_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint32_t svld1sb_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint32_t svld1sb_gather_u32offset_s32(svbool_t pg, const int8_t *base, svuint32_t offsets); svuint64_t svld1sb_gather_u64offset_u64(svbool_t pg, const int8_t *base, svuint64_t offsets); svint64_t svld1sb_gather_u64offset_s64(svbool_t pg, const int8_t *base, svuint64_t offsets); svuint64_t svld1sb_gather_s64offset_u64(svbool_t pg, const int8_t *base, svint64_t offsets); svint64_t svld1sb_gather_s64offset_s64(svbool_t pg, const int8_t *base, svint64_t offsets); svint32_t svld1sb_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svld1sb_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svld1sb_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svld1sb_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svld1sb_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #define svld1sb_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svld1sb_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #define svld1sb_gather_offset_s32(pg, base, offsets) _Generic((offsets), \ svint32_t: svld1sb_gather_s32offset_s32, \ svuint32_t: svld1sb_gather_u32offset_s32, \ int64_t: svld1sb_gather_u32base_offset_s32, \ default: __assume(0) \ )(pg, base, offsets) #define svld1sb_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svld1sb_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svld1sb_gather_offset_u32(pg, base, offsets) _Generic((offsets), \ svuint32_t: svld1sb_gather_u32offset_u32, \ svint32_t: svld1sb_gather_s32offset_u32, \ int64_t: svld1sb_gather_u32base_offset_u32, \ default: __assume(0) \ )(pg, base, offsets) #define svld1sb_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svld1sb_gather_u64base_offset_u64, \ svuint64_t: svld1sb_gather_u64offset_u64, \ svint64_t: svld1sb_gather_s64offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svld1sb_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svld1sb_gather_u64base_offset_s64, \ svuint64_t: svld1sb_gather_u64offset_s64, \ svint64_t: svld1sb_gather_s64offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #endif // sve: Load / Gather: Load 8-bit data and zero-extend svuint64_t svld1ub_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint32_t svld1ub_gather_u32base_s32(svbool_t pg, svuint32_t bases); svint64_t svld1ub_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint64_t svld1ub_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint32_t svld1ub_gather_s32offset_s32(svbool_t pg, const uint8_t *base, svint32_t offsets); svuint32_t svld1ub_gather_s32offset_u32(svbool_t pg, const uint8_t *base, svint32_t offsets); svint32_t svld1ub_gather_u32offset_s32(svbool_t pg, const uint8_t *base, svuint32_t offsets); svuint32_t svld1ub_gather_u32offset_u32(svbool_t pg, const uint8_t *base, svuint32_t offsets); svint64_t svld1ub_gather_s64offset_s64(svbool_t pg, const uint8_t *base, svint64_t offsets); svuint64_t svld1ub_gather_s64offset_u64(svbool_t pg, const uint8_t *base, svint64_t offsets); svint64_t svld1ub_gather_u64offset_s64(svbool_t pg, const uint8_t *base, svuint64_t offsets); svuint64_t svld1ub_gather_u64offset_u64(svbool_t pg, const uint8_t *base, svuint64_t offsets); svint32_t svld1ub_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svuint32_t svld1ub_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint64_t svld1ub_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint32_t svld1ub_gather_u32base_u32(svbool_t pg, svuint32_t bases); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svld1ub_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svld1ub_gather_u64base_offset_u64, \ svint64_t: svld1ub_gather_s64offset_u64, \ svuint64_t: svld1ub_gather_u64offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svld1ub_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svld1ub_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #define svld1ub_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svld1ub_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svld1ub_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svld1ub_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svld1ub_gather_offset_s32(pg, base, offsets) _Generic((offsets), \ svint32_t: svld1ub_gather_s32offset_s32, \ svuint32_t: svld1ub_gather_u32offset_s32, \ int64_t: svld1ub_gather_u32base_offset_s32, \ default: __assume(0) \ )(pg, base, offsets) #define svld1ub_gather_offset_u32(pg, base, offsets) _Generic((offsets), \ svint32_t: svld1ub_gather_s32offset_u32, \ svuint32_t: svld1ub_gather_u32offset_u32, \ int64_t: svld1ub_gather_u32base_offset_u32, \ default: __assume(0) \ )(pg, base, offsets) #define svld1ub_gather_offset_s64(pg, base, offsets) _Generic((offsets), \ svint64_t: svld1ub_gather_s64offset_s64, \ svuint64_t: svld1ub_gather_u64offset_s64, \ int64_t: svld1ub_gather_u64base_offset_s64, \ default: __assume(0) \ )(pg, base, offsets) #define svld1ub_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svld1ub_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #endif // sve: Load / Gather: Unextended load svuint64_t svld1_gather_u64index_u64(svbool_t pg, const uint64_t *base, svuint64_t indices); svfloat32_t svld1_gather_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset); svfloat64_t svld1_gather_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index); svuint32_t svld1_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svfloat64_t svld1_gather_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svld1_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint64_t svld1_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svfloat32_t svld1_gather_u32base_index_f32(svbool_t pg, svuint32_t bases, int64_t index); svint32_t svld1_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index); svuint32_t svld1_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index); svint64_t svld1_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svld1_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svint64_t svld1_gather_u64index_s64(svbool_t pg, const int64_t *base, svuint64_t indices); svint32_t svld1_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svfloat64_t svld1_gather_u64index_f64(svbool_t pg, const float64_t *base, svuint64_t indices); svint64_t svld1_gather_s64index_s64(svbool_t pg, const int64_t *base, svint64_t indices); svint32_t svld1_gather_s32index_s32(svbool_t pg, const int32_t *base, svint32_t indices); svfloat32_t svld1_gather_s32index_f32(svbool_t pg, const float32_t *base, svint32_t indices); svuint64_t svld1_gather_u64offset_u64(svbool_t pg, const uint64_t *base, svuint64_t offsets); svint64_t svld1_gather_u64offset_s64(svbool_t pg, const int64_t *base, svuint64_t offsets); svfloat64_t svld1_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t offsets); svuint64_t svld1_gather_s64offset_u64(svbool_t pg, const uint64_t *base, svint64_t offsets); svint64_t svld1_gather_s64offset_s64(svbool_t pg, const int64_t *base, svint64_t offsets); svfloat64_t svld1_gather_s64offset_f64(svbool_t pg, const float64_t *base, svint64_t offsets); svuint32_t svld1_gather_u32offset_u32(svbool_t pg, const uint32_t *base, svuint32_t offsets); svint32_t svld1_gather_u32offset_s32(svbool_t pg, const int32_t *base, svuint32_t offsets); svfloat32_t svld1_gather_u32offset_f32(svbool_t pg, const float32_t *base, svuint32_t offsets); svuint32_t svld1_gather_s32offset_u32(svbool_t pg, const uint32_t *base, svint32_t offsets); svfloat64_t svld1_gather_u64base_f64(svbool_t pg, svuint64_t bases); svuint32_t svld1_gather_s32index_u32(svbool_t pg, const uint32_t *base, svint32_t indices); svint32_t svld1_gather_s32offset_s32(svbool_t pg, const int32_t *base, svint32_t offsets); svfloat32_t svld1_gather_s32offset_f32(svbool_t pg, const float32_t *base, svint32_t offsets); svuint64_t svld1_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint64_t svld1_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint64_t svld1_gather_s64index_u64(svbool_t pg, const uint64_t *base, svint64_t indices); svuint32_t svld1_gather_u32base_u32(svbool_t pg, svuint32_t bases); svfloat32_t svld1_gather_u32base_f32(svbool_t pg, svuint32_t bases); svint32_t svld1_gather_u32base_s32(svbool_t pg, svuint32_t bases); svint32_t svld1_gather_u32index_s32(svbool_t pg, const int32_t *base, svuint32_t indices); svfloat32_t svld1_gather_u32index_f32(svbool_t pg, const float32_t *base, svuint32_t indices); svuint32_t svld1_gather_u32index_u32(svbool_t pg, const uint32_t *base, svuint32_t indices); svfloat64_t svld1_gather_s64index_f64(svbool_t pg, const float64_t *base, svint64_t indices); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svld1_gather_index(pg, base, indices) _Generic((indices), \ svuint64_t: _Generic((base), \ const uint64_t *: svld1_gather_u64index_u64, \ const int64_t *: svld1_gather_u64index_s64, \ const float64_t *: svld1_gather_u64index_f64, \ default: __assume(0)), \ svint64_t: _Generic((base), \ const int64_t *: svld1_gather_s64index_s64, \ const uint64_t *: svld1_gather_s64index_u64, \ const float64_t *: svld1_gather_s64index_f64, \ default: __assume(0)), \ svint32_t: _Generic((base), \ const int32_t *: svld1_gather_s32index_s32, \ const float32_t *: svld1_gather_s32index_f32, \ const uint32_t *: svld1_gather_s32index_u32, \ default: __assume(0)), \ svuint32_t: _Generic((base), \ const int32_t *: svld1_gather_u32index_s32, \ const float32_t *: svld1_gather_u32index_f32, \ const uint32_t *: svld1_gather_u32index_u32, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, indices) #define svld1_gather_offset_f32(pg, bases, offset) _Generic((offset), \ int64_t: svld1_gather_u32base_offset_f32, \ default: __assume(0) \ )(pg, bases, offset) #define svld1_gather_index_f64(pg, bases, index) _Generic((index), \ int64_t: svld1_gather_u64base_index_f64, \ default: __assume(0) \ )(pg, bases, index) #define svld1_gather_offset_u32(pg, bases, offset) _Generic((offset), \ int64_t: svld1_gather_u32base_offset_u32, \ default: __assume(0) \ )(pg, bases, offset) #define svld1_gather_offset_f64(pg, bases, offset) _Generic((offset), \ int64_t: svld1_gather_u64base_offset_f64, \ default: __assume(0) \ )(pg, bases, offset) #define svld1_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svld1_gather_u64base_offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svld1_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svld1_gather_u64base_offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svld1_gather_index_f32(pg, bases, index) _Generic((index), \ int64_t: svld1_gather_u32base_index_f32, \ default: __assume(0) \ )(pg, bases, index) #define svld1_gather_index_s32(pg, bases, index) _Generic((index), \ int64_t: svld1_gather_u32base_index_s32, \ default: __assume(0) \ )(pg, bases, index) #define svld1_gather_index_u32(pg, bases, index) _Generic((index), \ int64_t: svld1_gather_u32base_index_u32, \ default: __assume(0) \ )(pg, bases, index) #define svld1_gather_index_s64(pg, bases, index) _Generic((index), \ int64_t: svld1_gather_u64base_index_s64, \ default: __assume(0) \ )(pg, bases, index) #define svld1_gather_index_u64(pg, bases, index) _Generic((index), \ int64_t: svld1_gather_u64base_index_u64, \ default: __assume(0) \ )(pg, bases, index) #define svld1_gather_offset_s32(pg, bases, offset) _Generic((offset), \ int64_t: svld1_gather_u32base_offset_s32, \ default: __assume(0) \ )(pg, bases, offset) #define svld1_gather_offset(pg, base, offsets) _Generic((offsets), \ svuint64_t: _Generic((base), \ const uint64_t *: svld1_gather_u64offset_u64, \ const int64_t *: svld1_gather_u64offset_s64, \ const float64_t *: svld1_gather_u64offset_f64, \ default: __assume(0)), \ svint64_t: _Generic((base), \ const uint64_t *: svld1_gather_s64offset_u64, \ const int64_t *: svld1_gather_s64offset_s64, \ const float64_t *: svld1_gather_s64offset_f64, \ default: __assume(0)), \ svuint32_t: _Generic((base), \ const uint32_t *: svld1_gather_u32offset_u32, \ const int32_t *: svld1_gather_u32offset_s32, \ const float32_t *: svld1_gather_u32offset_f32, \ default: __assume(0)), \ svint32_t: _Generic((base), \ const uint32_t *: svld1_gather_s32offset_u32, \ const int32_t *: svld1_gather_s32offset_s32, \ const float32_t *: svld1_gather_s32offset_f32, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, offsets) #define svld1_gather_f64(pg, bases) _Generic((bases), \ svuint64_t: svld1_gather_u64base_f64, \ default: __assume(0) \ )(pg, bases) #define svld1_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svld1_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svld1_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svld1_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svld1_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svld1_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #define svld1_gather_f32(pg, bases) _Generic((bases), \ svuint32_t: svld1_gather_u32base_f32, \ default: __assume(0) \ )(pg, bases) #define svld1_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svld1_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #endif // sve: Load / Load and replicate: Load and replicate 128 bits of data svint8_t svld1rq_s8(svbool_t pg, const int8_t *base); svuint32_t svld1rq_u32(svbool_t pg, const uint32_t *base); svuint16_t svld1rq_u16(svbool_t pg, const uint16_t *base); svuint8_t svld1rq_u8(svbool_t pg, const uint8_t *base); svint64_t svld1rq_s64(svbool_t pg, const int64_t *base); svint32_t svld1rq_s32(svbool_t pg, const int32_t *base); svint16_t svld1rq_s16(svbool_t pg, const int16_t *base); svuint64_t svld1rq_u64(svbool_t pg, const uint64_t *base); svfloat64_t svld1rq_f64(svbool_t pg, const float64_t *base); svfloat32_t svld1rq_f32(svbool_t pg, const float32_t *base); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svld1rq(pg, base) _Generic((base), \ const int8_t *: svld1rq_s8, \ const uint32_t *: svld1rq_u32, \ const uint16_t *: svld1rq_u16, \ const uint8_t *: svld1rq_u8, \ const int64_t *: svld1rq_s64, \ const int32_t *: svld1rq_s32, \ const int16_t *: svld1rq_s16, \ const uint64_t *: svld1rq_u64, \ const float64_t *: svld1rq_f64, \ const float32_t *: svld1rq_f32, \ default: __assume(0) \ )(pg, base) #endif // sve: Load / Load and replicate: Load and replicate 256 bits of data svint32_t svld1ro_s32(svbool_t pg, const int32_t *base); svuint32_t svld1ro_u32(svbool_t pg, const uint32_t *base); svuint64_t svld1ro_u64(svbool_t pg, const uint64_t *base); svint16_t svld1ro_s16(svbool_t pg, const int16_t *base); svint8_t svld1ro_s8(svbool_t pg, const int8_t *base); svfloat64_t svld1ro_f64(svbool_t pg, const float64_t *base); svfloat32_t svld1ro_f32(svbool_t pg, const float32_t *base); svint64_t svld1ro_s64(svbool_t pg, const int64_t *base); svuint8_t svld1ro_u8(svbool_t pg, const uint8_t *base); svuint16_t svld1ro_u16(svbool_t pg, const uint16_t *base); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svld1ro(pg, base) _Generic((base), \ const int32_t *: svld1ro_s32, \ const uint32_t *: svld1ro_u32, \ const uint64_t *: svld1ro_u64, \ const int16_t *: svld1ro_s16, \ const int8_t *: svld1ro_s8, \ const float64_t *: svld1ro_f64, \ const float32_t *: svld1ro_f32, \ const int64_t *: svld1ro_s64, \ const uint8_t *: svld1ro_u8, \ const uint16_t *: svld1ro_u16, \ default: __assume(0) \ )(pg, base) #endif // sve: Logical / AND-NOT: Bitwise clear svuint64_t svbic_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svbic_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svbic_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svbic_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svbic_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svbic_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svbic_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svbic_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svuint64_t svbic_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svbic_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svbic_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svbic_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svint32_t svbic_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svbic_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svbic_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svuint64_t svbic_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svbic_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svbic_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svbic_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svbic_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svbic_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svbic_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint8_t svbic_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svbic_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svbic_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svbool_t svbic_b_z(svbool_t pg, svbool_t op1, svbool_t op2); svuint64_t svbic_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svbic_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svbic_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svbic_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svbic_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svbic_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svbic_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svbic_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svbic_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svint16_t svbic_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svuint32_t svbic_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint8_t svbic_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svbic_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svbic_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svbic_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svbic_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svbic_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svbic_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svbic_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svbic_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svbic_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint16_t svbic_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svint8_t svbic_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbic_z(pg, op1, op2) _Generic((op2), \ svuint64_t: svbic_u64_z, \ svuint32_t: svbic_u32_z, \ svuint16_t: svbic_u16_z, \ svuint8_t: svbic_u8_z, \ svint64_t: svbic_s64_z, \ svint32_t: svbic_s32_z, \ svint16_t: svbic_s16_z, \ svint8_t: svbic_s8_z, \ svbool_t: svbic_b_z, \ uint64_t: svbic_n_u64_z, \ uint32_t: svbic_n_u32_z, \ uint16_t: svbic_n_u16_z, \ uint8_t: svbic_n_u8_z, \ int64_t: svbic_n_s64_z, \ int32_t: svbic_n_s32_z, \ int16_t: svbic_n_s16_z, \ int8_t: svbic_n_s8_z, \ default: __assume(0) \ )(pg, op1, op2) #define svbic_x(pg, op1, op2) _Generic((op2), \ svuint64_t: svbic_u64_x, \ svuint32_t: svbic_u32_x, \ svuint16_t: svbic_u16_x, \ svuint8_t: svbic_u8_x, \ svint32_t: svbic_s32_x, \ svint16_t: svbic_s16_x, \ svint8_t: svbic_s8_x, \ svint64_t: svbic_s64_x, \ uint64_t: svbic_n_u64_x, \ uint32_t: svbic_n_u32_x, \ uint8_t: svbic_n_u8_x, \ int64_t: svbic_n_s64_x, \ int32_t: svbic_n_s32_x, \ int16_t: svbic_n_s16_x, \ int8_t: svbic_n_s8_x, \ uint16_t: svbic_n_u16_x, \ default: __assume(0) \ )(pg, op1, op2) #define svbic_m(pg, op1, op2) _Generic((op2), \ svuint64_t: svbic_u64_m, \ svuint32_t: svbic_u32_m, \ svuint16_t: svbic_u16_m, \ svuint8_t: svbic_u8_m, \ svint64_t: svbic_s64_m, \ svint32_t: svbic_s32_m, \ int8_t: svbic_n_s8_m, \ int16_t: svbic_n_s16_m, \ int32_t: svbic_n_s32_m, \ svint16_t: svbic_s16_m, \ uint64_t: svbic_n_u64_m, \ uint32_t: svbic_n_u32_m, \ uint16_t: svbic_n_u16_m, \ uint8_t: svbic_n_u8_m, \ int64_t: svbic_n_s64_m, \ svint8_t: svbic_s8_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Logical / AND: Bitwise AND svint8_t svand_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svuint64_t svand_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svand_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svint64_t svand_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svand_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svand_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svuint16_t svand_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svand_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svint16_t svand_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svand_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svuint32_t svand_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svint8_t svand_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svuint64_t svand_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svand_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint8_t svand_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint8_t svand_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svand_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svand_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svand_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svand_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svand_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svint16_t svand_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svuint16_t svand_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svint16_t svand_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svuint64_t svand_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svbool_t svand_b_z(svbool_t pg, svbool_t op1, svbool_t op2); svuint64_t svand_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svand_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svand_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svand_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svand_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svand_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint8_t svand_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svand_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint16_t svand_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svand_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svuint32_t svand_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svint32_t svand_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svint8_t svand_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svand_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svand_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svand_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svand_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svand_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint32_t svand_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint64_t svand_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t svand_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svand_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svuint16_t svand_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svand_z(pg, op1, op2) _Generic((op2), \ svint8_t: svand_s8_z, \ svint32_t: svand_s32_z, \ svint64_t: svand_s64_z, \ svint16_t: svand_s16_z, \ svuint32_t: svand_u32_z, \ svuint8_t: svand_u8_z, \ svuint16_t: svand_u16_z, \ int16_t: svand_n_s16_z, \ svuint64_t: svand_u64_z, \ svbool_t: svand_b_z, \ uint64_t: svand_n_u64_z, \ uint32_t: svand_n_u32_z, \ uint16_t: svand_n_u16_z, \ uint8_t: svand_n_u8_z, \ int64_t: svand_n_s64_z, \ int32_t: svand_n_s32_z, \ int8_t: svand_n_s8_z, \ default: __assume(0) \ )(pg, op1, op2) #define svand_x(pg, op1, op2) _Generic((op2), \ svuint64_t: svand_u64_x, \ svuint32_t: svand_u32_x, \ svint64_t: svand_s64_x, \ svuint16_t: svand_u16_x, \ svuint8_t: svand_u8_x, \ svint32_t: svand_s32_x, \ svint8_t: svand_s8_x, \ svint16_t: svand_s16_x, \ uint64_t: svand_n_u64_x, \ uint16_t: svand_n_u16_x, \ uint8_t: svand_n_u8_x, \ uint32_t: svand_n_u32_x, \ int32_t: svand_n_s32_x, \ int64_t: svand_n_s64_x, \ int8_t: svand_n_s8_x, \ int16_t: svand_n_s16_x, \ default: __assume(0) \ )(pg, op1, op2) #define svand_m(pg, op1, op2) _Generic((op2), \ svuint64_t: svand_u64_m, \ svuint32_t: svand_u32_m, \ svint8_t: svand_s8_m, \ svint16_t: svand_s16_m, \ svint32_t: svand_s32_m, \ svint64_t: svand_s64_m, \ svuint8_t: svand_u8_m, \ svuint16_t: svand_u16_m, \ int8_t: svand_n_s8_m, \ int16_t: svand_n_s16_m, \ int32_t: svand_n_s32_m, \ uint8_t: svand_n_u8_m, \ int64_t: svand_n_s64_m, \ uint32_t: svand_n_u32_m, \ uint64_t: svand_n_u64_m, \ uint16_t: svand_n_u16_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Logical / AND: Bitwise AND reduction to scalar int64_t svandv_s64(svbool_t pg, svint64_t op); int8_t svandv_s8(svbool_t pg, svint8_t op); int16_t svandv_s16(svbool_t pg, svint16_t op); int32_t svandv_s32(svbool_t pg, svint32_t op); uint16_t svandv_u16(svbool_t pg, svuint16_t op); uint8_t svandv_u8(svbool_t pg, svuint8_t op); uint64_t svandv_u64(svbool_t pg, svuint64_t op); uint32_t svandv_u32(svbool_t pg, svuint32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svandv(pg, op) _Generic((op), \ svint64_t: svandv_s64, \ svint8_t: svandv_s8, \ svint16_t: svandv_s16, \ svint32_t: svandv_s32, \ svuint16_t: svandv_u16, \ svuint8_t: svandv_u8, \ svuint64_t: svandv_u64, \ svuint32_t: svandv_u32, \ default: __assume(0) \ )(pg, op) #endif // sve: Logical / AND: Move svbool_t svmov_b_z(svbool_t pg, svbool_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmov_z(pg, op) _Generic((op), \ svbool_t: svmov_b_z, \ default: __assume(0) \ )(pg, op) #endif // sve: Logical / Bitwise NOT: Bitwise invert svuint32_t svnot_u32_z(svbool_t pg, svuint32_t op); svuint64_t svnot_u64_z(svbool_t pg, svuint64_t op); svbool_t svnot_b_z(svbool_t pg, svbool_t op); svuint8_t svnot_u8_z(svbool_t pg, svuint8_t op); svuint64_t svnot_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op); svint64_t svnot_s64_z(svbool_t pg, svint64_t op); svint32_t svnot_s32_z(svbool_t pg, svint32_t op); svint16_t svnot_s16_z(svbool_t pg, svint16_t op); svint8_t svnot_s8_z(svbool_t pg, svint8_t op); svuint64_t svnot_u64_x(svbool_t pg, svuint64_t op); svuint32_t svnot_u32_x(svbool_t pg, svuint32_t op); svuint16_t svnot_u16_x(svbool_t pg, svuint16_t op); svuint8_t svnot_u8_x(svbool_t pg, svuint8_t op); svint64_t svnot_s64_x(svbool_t pg, svint64_t op); svint32_t svnot_s32_x(svbool_t pg, svint32_t op); svint16_t svnot_s16_x(svbool_t pg, svint16_t op); svint8_t svnot_s8_x(svbool_t pg, svint8_t op); svuint16_t svnot_u16_z(svbool_t pg, svuint16_t op); svuint32_t svnot_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op); svuint16_t svnot_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op); svuint8_t svnot_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op); svint64_t svnot_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svint32_t svnot_s32_m(svint32_t inactive, svbool_t pg, svint32_t op); svint16_t svnot_s16_m(svint16_t inactive, svbool_t pg, svint16_t op); svint8_t svnot_s8_m(svint8_t inactive, svbool_t pg, svint8_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svnot_z(pg, op) _Generic((op), \ svuint32_t: svnot_u32_z, \ svuint64_t: svnot_u64_z, \ svbool_t: svnot_b_z, \ svuint8_t: svnot_u8_z, \ svint64_t: svnot_s64_z, \ svint32_t: svnot_s32_z, \ svint16_t: svnot_s16_z, \ svint8_t: svnot_s8_z, \ svuint16_t: svnot_u16_z, \ default: __assume(0) \ )(pg, op) #define svnot_m(inactive, pg, op) _Generic((op), \ svuint64_t: svnot_u64_m, \ svuint32_t: svnot_u32_m, \ svuint16_t: svnot_u16_m, \ svuint8_t: svnot_u8_m, \ svint64_t: svnot_s64_m, \ svint32_t: svnot_s32_m, \ svint16_t: svnot_s16_m, \ svint8_t: svnot_s8_m, \ default: __assume(0) \ )(inactive, pg, op) #define svnot_x(pg, op) _Generic((op), \ svuint64_t: svnot_u64_x, \ svuint32_t: svnot_u32_x, \ svuint16_t: svnot_u16_x, \ svuint8_t: svnot_u8_x, \ svint64_t: svnot_s64_x, \ svint32_t: svnot_s32_x, \ svint16_t: svnot_s16_x, \ svint8_t: svnot_s8_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Logical / Exclusive OR: Bitwise exclusive OR svuint32_t sveor_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t sveor_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svbool_t sveor_b_z(svbool_t pg, svbool_t op1, svbool_t op2); svuint64_t sveor_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t sveor_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t sveor_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svint8_t sveor_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t sveor_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t sveor_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t sveor_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t sveor_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t sveor_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t sveor_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t sveor_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t sveor_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t sveor_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t sveor_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svint8_t sveor_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t sveor_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t sveor_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t sveor_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t sveor_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t sveor_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t sveor_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint8_t sveor_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svint32_t sveor_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint16_t sveor_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svuint8_t sveor_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svuint64_t sveor_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t sveor_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svint16_t sveor_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint32_t sveor_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint64_t sveor_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint16_t sveor_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint64_t sveor_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t sveor_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t sveor_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svint64_t sveor_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t sveor_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svuint8_t sveor_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t sveor_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svuint64_t sveor_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint16_t sveor_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t sveor_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint64_t sveor_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t sveor_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svint16_t sveor_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svuint32_t sveor_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svint32_t sveor_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define sveor_x(pg, op1, op2) _Generic((op2), \ uint32_t: sveor_n_u32_x, \ uint16_t: sveor_n_u16_x, \ svint8_t: sveor_s8_x, \ svint16_t: sveor_s16_x, \ svint32_t: sveor_s32_x, \ svint64_t: sveor_s64_x, \ svuint8_t: sveor_u8_x, \ svuint16_t: sveor_u16_x, \ svuint32_t: sveor_u32_x, \ svuint64_t: sveor_u64_x, \ uint64_t: sveor_n_u64_x, \ uint8_t: sveor_n_u8_x, \ int64_t: sveor_n_s64_x, \ int8_t: sveor_n_s8_x, \ int16_t: sveor_n_s16_x, \ int32_t: sveor_n_s32_x, \ default: __assume(0) \ )(pg, op1, op2) #define sveor_z(pg, op1, op2) _Generic((op2), \ svbool_t: sveor_b_z, \ svuint64_t: sveor_u64_z, \ svint8_t: sveor_s8_z, \ svint16_t: sveor_s16_z, \ svint32_t: sveor_s32_z, \ svint64_t: sveor_s64_z, \ svuint8_t: sveor_u8_z, \ svuint16_t: sveor_u16_z, \ svuint32_t: sveor_u32_z, \ int8_t: sveor_n_s8_z, \ int16_t: sveor_n_s16_z, \ int32_t: sveor_n_s32_z, \ int64_t: sveor_n_s64_z, \ uint8_t: sveor_n_u8_z, \ uint16_t: sveor_n_u16_z, \ uint64_t: sveor_n_u64_z, \ uint32_t: sveor_n_u32_z, \ default: __assume(0) \ )(pg, op1, op2) #define sveor_m(pg, op1, op2) _Generic((op2), \ svuint32_t: sveor_u32_m, \ svuint16_t: sveor_u16_m, \ int8_t: sveor_n_s8_m, \ svuint64_t: sveor_u64_m, \ svint8_t: sveor_s8_m, \ svuint8_t: sveor_u8_m, \ int32_t: sveor_n_s32_m, \ int16_t: sveor_n_s16_m, \ uint8_t: sveor_n_u8_m, \ svint16_t: sveor_s16_m, \ int64_t: sveor_n_s64_m, \ svint64_t: sveor_s64_m, \ svint32_t: sveor_s32_m, \ uint16_t: sveor_n_u16_m, \ uint32_t: sveor_n_u32_m, \ uint64_t: sveor_n_u64_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Logical / Exclusive OR: Bitwise exclusive OR reduction to scalar uint32_t sveorv_u32(svbool_t pg, svuint32_t op); uint16_t sveorv_u16(svbool_t pg, svuint16_t op); uint8_t sveorv_u8(svbool_t pg, svuint8_t op); int64_t sveorv_s64(svbool_t pg, svint64_t op); int32_t sveorv_s32(svbool_t pg, svint32_t op); int16_t sveorv_s16(svbool_t pg, svint16_t op); uint64_t sveorv_u64(svbool_t pg, svuint64_t op); int8_t sveorv_s8(svbool_t pg, svint8_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define sveorv(pg, op) _Generic((op), \ svuint32_t: sveorv_u32, \ svuint16_t: sveorv_u16, \ svuint8_t: sveorv_u8, \ svint64_t: sveorv_s64, \ svint32_t: sveorv_s32, \ svint16_t: sveorv_s16, \ svuint64_t: sveorv_u64, \ svint8_t: sveorv_s8, \ default: __assume(0) \ )(pg, op) #endif // sve: Logical / Logical NOT: Logically invert boolean condition svuint32_t svcnot_u32_z(svbool_t pg, svuint32_t op); svint16_t svcnot_s16_z(svbool_t pg, svint16_t op); svint8_t svcnot_s8_m(svint8_t inactive, svbool_t pg, svint8_t op); svint16_t svcnot_s16_m(svint16_t inactive, svbool_t pg, svint16_t op); svint32_t svcnot_s32_m(svint32_t inactive, svbool_t pg, svint32_t op); svint64_t svcnot_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svuint16_t svcnot_u16_z(svbool_t pg, svuint16_t op); svuint8_t svcnot_u8_z(svbool_t pg, svuint8_t op); svint64_t svcnot_s64_z(svbool_t pg, svint64_t op); svint32_t svcnot_s32_z(svbool_t pg, svint32_t op); svint8_t svcnot_s8_z(svbool_t pg, svint8_t op); svuint64_t svcnot_u64_x(svbool_t pg, svuint64_t op); svuint32_t svcnot_u32_x(svbool_t pg, svuint32_t op); svuint16_t svcnot_u16_x(svbool_t pg, svuint16_t op); svuint8_t svcnot_u8_x(svbool_t pg, svuint8_t op); svint64_t svcnot_s64_x(svbool_t pg, svint64_t op); svint32_t svcnot_s32_x(svbool_t pg, svint32_t op); svint16_t svcnot_s16_x(svbool_t pg, svint16_t op); svint8_t svcnot_s8_x(svbool_t pg, svint8_t op); svuint64_t svcnot_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op); svuint32_t svcnot_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op); svuint8_t svcnot_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op); svuint64_t svcnot_u64_z(svbool_t pg, svuint64_t op); svuint16_t svcnot_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcnot_z(pg, op) _Generic((op), \ svuint32_t: svcnot_u32_z, \ svint16_t: svcnot_s16_z, \ svuint16_t: svcnot_u16_z, \ svuint8_t: svcnot_u8_z, \ svint64_t: svcnot_s64_z, \ svint32_t: svcnot_s32_z, \ svint8_t: svcnot_s8_z, \ svuint64_t: svcnot_u64_z, \ default: __assume(0) \ )(pg, op) #define svcnot_m(inactive, pg, op) _Generic((op), \ svint8_t: svcnot_s8_m, \ svint16_t: svcnot_s16_m, \ svint32_t: svcnot_s32_m, \ svint64_t: svcnot_s64_m, \ svuint64_t: svcnot_u64_m, \ svuint32_t: svcnot_u32_m, \ svuint8_t: svcnot_u8_m, \ svuint16_t: svcnot_u16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcnot_x(pg, op) _Generic((op), \ svuint64_t: svcnot_u64_x, \ svuint32_t: svcnot_u32_x, \ svuint16_t: svcnot_u16_x, \ svuint8_t: svcnot_u8_x, \ svint64_t: svcnot_s64_x, \ svint32_t: svcnot_s32_x, \ svint16_t: svcnot_s16_x, \ svint8_t: svcnot_s8_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Logical / NAND: Bitwise NAND svbool_t svnand_b_z(svbool_t pg, svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svnand_z(pg, op1, op2) _Generic((op2), \ svbool_t: svnand_b_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Logical / NOR: Bitwise NOR svbool_t svnor_b_z(svbool_t pg, svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svnor_z(pg, op1, op2) _Generic((op2), \ svbool_t: svnor_b_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Logical / OR-NOT: Bitwise inclusive OR, inverting second argument svbool_t svorn_b_z(svbool_t pg, svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svorn_z(pg, op1, op2) _Generic((op2), \ svbool_t: svorn_b_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Logical / OR: Bitwise inclusive OR svint8_t svorr_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svuint8_t svorr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint16_t svorr_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svorr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svorr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svorr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svorr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svorr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svorr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svint16_t svorr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svuint16_t svorr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svint32_t svorr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svuint8_t svorr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svorr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svorr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svorr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svorr_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svorr_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svorr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svorr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint64_t svorr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint8_t svorr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svuint32_t svorr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svbool_t svorr_b_z(svbool_t pg, svbool_t op1, svbool_t op2); svuint16_t svorr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint64_t svorr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svint32_t svorr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svorr_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svorr_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svorr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svorr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svint16_t svorr_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svorr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svorr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svorr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svorr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svorr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint8_t svorr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svint8_t svorr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svorr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svint32_t svorr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svuint16_t svorr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svorr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svuint32_t svorr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svorr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svorr_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svorr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint64_t svorr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint64_t svorr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svorr_m(pg, op1, op2) _Generic((op2), \ svint8_t: svorr_s8_m, \ svint16_t: svorr_s16_m, \ svint32_t: svorr_s32_m, \ svint64_t: svorr_s64_m, \ svuint8_t: svorr_u8_m, \ svuint16_t: svorr_u16_m, \ svuint32_t: svorr_u32_m, \ svuint64_t: svorr_u64_m, \ int16_t: svorr_n_s16_m, \ int32_t: svorr_n_s32_m, \ int64_t: svorr_n_s64_m, \ uint8_t: svorr_n_u8_m, \ uint16_t: svorr_n_u16_m, \ uint32_t: svorr_n_u32_m, \ uint64_t: svorr_n_u64_m, \ int8_t: svorr_n_s8_m, \ default: __assume(0) \ )(pg, op1, op2) #define svorr_z(pg, op1, op2) _Generic((op2), \ svuint8_t: svorr_u8_z, \ svuint16_t: svorr_u16_z, \ svint8_t: svorr_s8_z, \ svint16_t: svorr_s16_z, \ svint32_t: svorr_s32_z, \ svint64_t: svorr_s64_z, \ uint32_t: svorr_n_u32_z, \ svbool_t: svorr_b_z, \ uint16_t: svorr_n_u16_z, \ uint64_t: svorr_n_u64_z, \ int32_t: svorr_n_s32_z, \ int16_t: svorr_n_s16_z, \ int8_t: svorr_n_s8_z, \ uint8_t: svorr_n_u8_z, \ svuint32_t: svorr_u32_z, \ svuint64_t: svorr_u64_z, \ int64_t: svorr_n_s64_z, \ default: __assume(0) \ )(pg, op1, op2) #define svorr_x(pg, op1, op2) _Generic((op2), \ svint16_t: svorr_s16_x, \ svint32_t: svorr_s32_x, \ svuint8_t: svorr_u8_x, \ svuint16_t: svorr_u16_x, \ svuint32_t: svorr_u32_x, \ svuint64_t: svorr_u64_x, \ svint64_t: svorr_s64_x, \ svint8_t: svorr_s8_x, \ uint64_t: svorr_n_u64_x, \ uint32_t: svorr_n_u32_x, \ int8_t: svorr_n_s8_x, \ int32_t: svorr_n_s32_x, \ uint16_t: svorr_n_u16_x, \ uint8_t: svorr_n_u8_x, \ int16_t: svorr_n_s16_x, \ int64_t: svorr_n_s64_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Logical / OR: Bitwise inclusive OR reduction to scalar int32_t svorv_s32(svbool_t pg, svint32_t op); int64_t svorv_s64(svbool_t pg, svint64_t op); uint16_t svorv_u16(svbool_t pg, svuint16_t op); uint32_t svorv_u32(svbool_t pg, svuint32_t op); uint64_t svorv_u64(svbool_t pg, svuint64_t op); int8_t svorv_s8(svbool_t pg, svint8_t op); int16_t svorv_s16(svbool_t pg, svint16_t op); uint8_t svorv_u8(svbool_t pg, svuint8_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svorv(pg, op) _Generic((op), \ svint32_t: svorv_s32, \ svint64_t: svorv_s64, \ svuint16_t: svorv_u16, \ svuint32_t: svorv_u32, \ svuint64_t: svorv_u64, \ svint8_t: svorv_s8, \ svint16_t: svorv_s16, \ svuint8_t: svorv_u8, \ default: __assume(0) \ )(pg, op) #endif // sve: Move / Widen: Unpack and extend high half svuint16_t svunpkhi_u16(svuint8_t op); svint64_t svunpkhi_s64(svint32_t op); svint16_t svunpkhi_s16(svint8_t op); svint32_t svunpkhi_s32(svint16_t op); svuint32_t svunpkhi_u32(svuint16_t op); svuint64_t svunpkhi_u64(svuint32_t op); svbool_t svunpkhi_b(svbool_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svunpkhi(op) _Generic((op), \ svuint8_t: svunpkhi_u16, \ svint32_t: svunpkhi_s64, \ svint8_t: svunpkhi_s16, \ svint16_t: svunpkhi_s32, \ svuint16_t: svunpkhi_u32, \ svuint32_t: svunpkhi_u64, \ svbool_t: svunpkhi_b, \ default: __assume(0) \ )(op) #endif // sve: Move / Widen: Unpack and extend low half svbool_t svunpklo_b(svbool_t op); svuint64_t svunpklo_u64(svuint32_t op); svint32_t svunpklo_s32(svint16_t op); svint64_t svunpklo_s64(svint32_t op); svuint16_t svunpklo_u16(svuint8_t op); svuint32_t svunpklo_u32(svuint16_t op); svint16_t svunpklo_s16(svint8_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svunpklo(op) _Generic((op), \ svbool_t: svunpklo_b, \ svuint32_t: svunpklo_u64, \ svint16_t: svunpklo_s32, \ svint32_t: svunpklo_s64, \ svuint8_t: svunpklo_u16, \ svuint16_t: svunpklo_u32, \ svint8_t: svunpklo_s16, \ default: __assume(0) \ )(op) #endif // sve: Predication / Counting / Count active elements: Count set predicate bits uint64_t svcntp_b16(svbool_t pg, svbool_t op); uint64_t svcntp_b8(svbool_t pg, svbool_t op); uint64_t svcntp_b64(svbool_t pg, svbool_t op); uint64_t svcntp_b32(svbool_t pg, svbool_t op); // sve: Predication / Counting / Saturating decrement: Saturating decrement by active element count uint64_t svqdecp_n_u64_b64(uint64_t op, svbool_t pg); uint32_t svqdecp_n_u32_b32(uint32_t op, svbool_t pg); uint32_t svqdecp_n_u32_b64(uint32_t op, svbool_t pg); svuint64_t svqdecp_u64(svuint64_t op, svbool_t pg); uint32_t svqdecp_n_u32_b16(uint32_t op, svbool_t pg); uint64_t svqdecp_n_u64_b8(uint64_t op, svbool_t pg); uint64_t svqdecp_n_u64_b16(uint64_t op, svbool_t pg); uint64_t svqdecp_n_u64_b32(uint64_t op, svbool_t pg); svint32_t svqdecp_s32(svint32_t op, svbool_t pg); svint64_t svqdecp_s64(svint64_t op, svbool_t pg); int64_t svqdecp_n_s64_b64(int64_t op, svbool_t pg); int32_t svqdecp_n_s32_b8(int32_t op, svbool_t pg); int32_t svqdecp_n_s32_b16(int32_t op, svbool_t pg); svint16_t svqdecp_s16(svint16_t op, svbool_t pg); int32_t svqdecp_n_s32_b32(int32_t op, svbool_t pg); int64_t svqdecp_n_s64_b32(int64_t op, svbool_t pg); int64_t svqdecp_n_s64_b8(int64_t op, svbool_t pg); int64_t svqdecp_n_s64_b16(int64_t op, svbool_t pg); uint32_t svqdecp_n_u32_b8(uint32_t op, svbool_t pg); svuint32_t svqdecp_u32(svuint32_t op, svbool_t pg); svuint16_t svqdecp_u16(svuint16_t op, svbool_t pg); int32_t svqdecp_n_s32_b64(int32_t op, svbool_t pg); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdecp_b64(op, pg) _Generic((op), \ uint64_t: svqdecp_n_u64_b64, \ uint32_t: svqdecp_n_u32_b64, \ int64_t: svqdecp_n_s64_b64, \ int32_t: svqdecp_n_s32_b64, \ default: __assume(0) \ )(op, pg) #define svqdecp_b32(op, pg) _Generic((op), \ uint32_t: svqdecp_n_u32_b32, \ uint64_t: svqdecp_n_u64_b32, \ int32_t: svqdecp_n_s32_b32, \ int64_t: svqdecp_n_s64_b32, \ default: __assume(0) \ )(op, pg) #define svqdecp(op, pg) _Generic((op), \ svuint64_t: svqdecp_u64, \ svint32_t: svqdecp_s32, \ svint64_t: svqdecp_s64, \ svint16_t: svqdecp_s16, \ svuint32_t: svqdecp_u32, \ svuint16_t: svqdecp_u16, \ default: __assume(0) \ )(op, pg) #define svqdecp_b16(op, pg) _Generic((op), \ uint32_t: svqdecp_n_u32_b16, \ uint64_t: svqdecp_n_u64_b16, \ int32_t: svqdecp_n_s32_b16, \ int64_t: svqdecp_n_s64_b16, \ default: __assume(0) \ )(op, pg) #define svqdecp_b8(op, pg) _Generic((op), \ uint64_t: svqdecp_n_u64_b8, \ int32_t: svqdecp_n_s32_b8, \ int64_t: svqdecp_n_s64_b8, \ uint32_t: svqdecp_n_u32_b8, \ default: __assume(0) \ )(op, pg) #endif // sve: Predication / Counting / Saturating increment: Saturating increment by active element count int32_t svqincp_n_s32_b8(int32_t op, svbool_t pg); int32_t svqincp_n_s32_b16(int32_t op, svbool_t pg); int32_t svqincp_n_s32_b32(int32_t op, svbool_t pg); int32_t svqincp_n_s32_b64(int32_t op, svbool_t pg); int64_t svqincp_n_s64_b8(int64_t op, svbool_t pg); int64_t svqincp_n_s64_b16(int64_t op, svbool_t pg); int64_t svqincp_n_s64_b32(int64_t op, svbool_t pg); uint32_t svqincp_n_u32_b64(uint32_t op, svbool_t pg); uint32_t svqincp_n_u32_b16(uint32_t op, svbool_t pg); svuint64_t svqincp_u64(svuint64_t op, svbool_t pg); int64_t svqincp_n_s64_b64(int64_t op, svbool_t pg); svuint16_t svqincp_u16(svuint16_t op, svbool_t pg); svint64_t svqincp_s64(svint64_t op, svbool_t pg); svint32_t svqincp_s32(svint32_t op, svbool_t pg); svuint32_t svqincp_u32(svuint32_t op, svbool_t pg); uint64_t svqincp_n_u64_b64(uint64_t op, svbool_t pg); uint64_t svqincp_n_u64_b32(uint64_t op, svbool_t pg); uint64_t svqincp_n_u64_b16(uint64_t op, svbool_t pg); uint64_t svqincp_n_u64_b8(uint64_t op, svbool_t pg); uint32_t svqincp_n_u32_b32(uint32_t op, svbool_t pg); svint16_t svqincp_s16(svint16_t op, svbool_t pg); uint32_t svqincp_n_u32_b8(uint32_t op, svbool_t pg); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqincp_b8(op, pg) _Generic((op), \ int32_t: svqincp_n_s32_b8, \ int64_t: svqincp_n_s64_b8, \ uint64_t: svqincp_n_u64_b8, \ uint32_t: svqincp_n_u32_b8, \ default: __assume(0) \ )(op, pg) #define svqincp_b16(op, pg) _Generic((op), \ int32_t: svqincp_n_s32_b16, \ int64_t: svqincp_n_s64_b16, \ uint32_t: svqincp_n_u32_b16, \ uint64_t: svqincp_n_u64_b16, \ default: __assume(0) \ )(op, pg) #define svqincp_b32(op, pg) _Generic((op), \ int32_t: svqincp_n_s32_b32, \ int64_t: svqincp_n_s64_b32, \ uint64_t: svqincp_n_u64_b32, \ uint32_t: svqincp_n_u32_b32, \ default: __assume(0) \ )(op, pg) #define svqincp_b64(op, pg) _Generic((op), \ int32_t: svqincp_n_s32_b64, \ uint32_t: svqincp_n_u32_b64, \ int64_t: svqincp_n_s64_b64, \ uint64_t: svqincp_n_u64_b64, \ default: __assume(0) \ )(op, pg) #define svqincp(op, pg) _Generic((op), \ svuint64_t: svqincp_u64, \ svuint16_t: svqincp_u16, \ svint64_t: svqincp_s64, \ svint32_t: svqincp_s32, \ svuint32_t: svqincp_u32, \ svint16_t: svqincp_s16, \ default: __assume(0) \ )(op, pg) #endif // sve: Predication / Element selection / Concatenate active elements: Shuffle active elements of vector to the right and fill with zero svuint32_t svcompact_u32(svbool_t pg, svuint32_t op); svuint64_t svcompact_u64(svbool_t pg, svuint64_t op); svint64_t svcompact_s64(svbool_t pg, svint64_t op); svint32_t svcompact_s32(svbool_t pg, svint32_t op); svfloat64_t svcompact_f64(svbool_t pg, svfloat64_t op); svfloat32_t svcompact_f32(svbool_t pg, svfloat32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcompact(pg, op) _Generic((op), \ svuint32_t: svcompact_u32, \ svuint64_t: svcompact_u64, \ svint64_t: svcompact_s64, \ svint32_t: svcompact_s32, \ svfloat64_t: svcompact_f64, \ svfloat32_t: svcompact_f32, \ default: __assume(0) \ )(pg, op) #endif // sve: Predication / Element selection / Extract element after last active element: Conditionally extract element after last svint64_t svclasta_s64(svbool_t pg, svint64_t fallback, svint64_t data); svuint8_t svclasta_u8(svbool_t pg, svuint8_t fallback, svuint8_t data); uint64_t svclasta_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data); uint32_t svclasta_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data); uint16_t svclasta_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data); uint8_t svclasta_n_u8(svbool_t pg, uint8_t fallback, svuint8_t data); int64_t svclasta_n_s64(svbool_t pg, int64_t fallback, svint64_t data); int16_t svclasta_n_s16(svbool_t pg, int16_t fallback, svint16_t data); int8_t svclasta_n_s8(svbool_t pg, int8_t fallback, svint8_t data); float64_t svclasta_n_f64(svbool_t pg, float64_t fallback, svfloat64_t data); float32_t svclasta_n_f32(svbool_t pg, float32_t fallback, svfloat32_t data); //float16_t svclasta_n_f16(svbool_t pg, float16_t fallback, svfloat16_t data); int32_t svclasta_n_s32(svbool_t pg, int32_t fallback, svint32_t data); svuint64_t svclasta_u64(svbool_t pg, svuint64_t fallback, svuint64_t data); //bfloat16_t svclasta_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data); svint16_t svclasta_s16(svbool_t pg, svint16_t fallback, svint16_t data); svint8_t svclasta_s8(svbool_t pg, svint8_t fallback, svint8_t data); svfloat64_t svclasta_f64(svbool_t pg, svfloat64_t fallback, svfloat64_t data); svfloat32_t svclasta_f32(svbool_t pg, svfloat32_t fallback, svfloat32_t data); svint32_t svclasta_s32(svbool_t pg, svint32_t fallback, svint32_t data); svbfloat16_t svclasta_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data); svuint16_t svclasta_u16(svbool_t pg, svuint16_t fallback, svuint16_t data); svuint32_t svclasta_u32(svbool_t pg, svuint32_t fallback, svuint32_t data); svfloat16_t svclasta_f16(svbool_t pg, svfloat16_t fallback, svfloat16_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svclasta(pg, fallback, data) _Generic((data), \ svint64_t: _Generic((fallback), \ svint64_t: svclasta_s64, \ int64_t: svclasta_n_s64, \ default: __assume(0)), \ svuint8_t: _Generic((fallback), \ svuint8_t: svclasta_u8, \ uint8_t: svclasta_n_u8, \ default: __assume(0)), \ svuint64_t: _Generic((fallback), \ uint64_t: svclasta_n_u64, \ svuint64_t: svclasta_u64, \ default: __assume(0)), \ svuint32_t: _Generic((fallback), \ uint32_t: svclasta_n_u32, \ svuint32_t: svclasta_u32, \ default: __assume(0)), \ svuint16_t: _Generic((fallback), \ uint16_t: svclasta_n_u16, \ svuint16_t: svclasta_u16, \ default: __assume(0)), \ svint16_t: _Generic((fallback), \ int16_t: svclasta_n_s16, \ svint16_t: svclasta_s16, \ default: __assume(0)), \ svint8_t: _Generic((fallback), \ int8_t: svclasta_n_s8, \ svint8_t: svclasta_s8, \ default: __assume(0)), \ svfloat64_t: _Generic((fallback), \ float64_t: svclasta_n_f64, \ svfloat64_t: svclasta_f64, \ default: __assume(0)), \ svfloat32_t: _Generic((fallback), \ float32_t: svclasta_n_f32, \ svfloat32_t: svclasta_f32, \ default: __assume(0)), \ svint32_t: _Generic((fallback), \ int32_t: svclasta_n_s32, \ svint32_t: svclasta_s32, \ default: __assume(0)), \ svbfloat16_t: svclasta_bf16, \ svfloat16_t: svclasta_f16, \ default: __assume(0) \ )(pg, fallback, data) #endif // sve: Predication / Element selection / Extract element after last active element: Extract element after last int8_t svlasta_s8(svbool_t pg, svint8_t op); //bfloat16_t svlasta_bf16(svbool_t pg, svbfloat16_t op); //float16_t svlasta_f16(svbool_t pg, svfloat16_t op); float32_t svlasta_f32(svbool_t pg, svfloat32_t op); float64_t svlasta_f64(svbool_t pg, svfloat64_t op); uint64_t svlasta_u64(svbool_t pg, svuint64_t op); int64_t svlasta_s64(svbool_t pg, svint64_t op); int16_t svlasta_s16(svbool_t pg, svint16_t op); int32_t svlasta_s32(svbool_t pg, svint32_t op); uint16_t svlasta_u16(svbool_t pg, svuint16_t op); uint32_t svlasta_u32(svbool_t pg, svuint32_t op); uint8_t svlasta_u8(svbool_t pg, svuint8_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svlasta(pg, op) _Generic((op), \ svint8_t: svlasta_s8, \ svfloat32_t: svlasta_f32, \ svfloat64_t: svlasta_f64, \ svuint64_t: svlasta_u64, \ svint64_t: svlasta_s64, \ svint16_t: svlasta_s16, \ svint32_t: svlasta_s32, \ svuint16_t: svlasta_u16, \ svuint32_t: svlasta_u32, \ svuint8_t: svlasta_u8, \ default: __assume(0) \ )(pg, op) #endif // sve: Predication / Element selection / Extract last active element: Conditionally extract last element float32_t svclastb_n_f32(svbool_t pg, float32_t fallback, svfloat32_t data); uint64_t svclastb_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data); uint32_t svclastb_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data); uint16_t svclastb_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data); uint8_t svclastb_n_u8(svbool_t pg, uint8_t fallback, svuint8_t data); int64_t svclastb_n_s64(svbool_t pg, int64_t fallback, svint64_t data); int32_t svclastb_n_s32(svbool_t pg, int32_t fallback, svint32_t data); int16_t svclastb_n_s16(svbool_t pg, int16_t fallback, svint16_t data); int8_t svclastb_n_s8(svbool_t pg, int8_t fallback, svint8_t data); float64_t svclastb_n_f64(svbool_t pg, float64_t fallback, svfloat64_t data); //float16_t svclastb_n_f16(svbool_t pg, float16_t fallback, svfloat16_t data); svbfloat16_t svclastb_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data); svuint64_t svclastb_u64(svbool_t pg, svuint64_t fallback, svuint64_t data); svuint32_t svclastb_u32(svbool_t pg, svuint32_t fallback, svuint32_t data); svuint8_t svclastb_u8(svbool_t pg, svuint8_t fallback, svuint8_t data); svint64_t svclastb_s64(svbool_t pg, svint64_t fallback, svint64_t data); svint32_t svclastb_s32(svbool_t pg, svint32_t fallback, svint32_t data); svint16_t svclastb_s16(svbool_t pg, svint16_t fallback, svint16_t data); svint8_t svclastb_s8(svbool_t pg, svint8_t fallback, svint8_t data); svfloat64_t svclastb_f64(svbool_t pg, svfloat64_t fallback, svfloat64_t data); svfloat32_t svclastb_f32(svbool_t pg, svfloat32_t fallback, svfloat32_t data); svfloat16_t svclastb_f16(svbool_t pg, svfloat16_t fallback, svfloat16_t data); //bfloat16_t svclastb_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data); svuint16_t svclastb_u16(svbool_t pg, svuint16_t fallback, svuint16_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svclastb(pg, fallback, data) _Generic((data), \ svfloat32_t: _Generic((fallback), \ float32_t: svclastb_n_f32, \ svfloat32_t: svclastb_f32, \ default: __assume(0)), \ svuint64_t: _Generic((fallback), \ uint64_t: svclastb_n_u64, \ svuint64_t: svclastb_u64, \ default: __assume(0)), \ svuint32_t: _Generic((fallback), \ uint32_t: svclastb_n_u32, \ svuint32_t: svclastb_u32, \ default: __assume(0)), \ svuint16_t: _Generic((fallback), \ uint16_t: svclastb_n_u16, \ svuint16_t: svclastb_u16, \ default: __assume(0)), \ svuint8_t: _Generic((fallback), \ uint8_t: svclastb_n_u8, \ svuint8_t: svclastb_u8, \ default: __assume(0)), \ svint64_t: _Generic((fallback), \ int64_t: svclastb_n_s64, \ svint64_t: svclastb_s64, \ default: __assume(0)), \ svint32_t: _Generic((fallback), \ int32_t: svclastb_n_s32, \ svint32_t: svclastb_s32, \ default: __assume(0)), \ svint16_t: _Generic((fallback), \ int16_t: svclastb_n_s16, \ svint16_t: svclastb_s16, \ default: __assume(0)), \ svint8_t: _Generic((fallback), \ int8_t: svclastb_n_s8, \ svint8_t: svclastb_s8, \ default: __assume(0)), \ svfloat64_t: _Generic((fallback), \ float64_t: svclastb_n_f64, \ svfloat64_t: svclastb_f64, \ default: __assume(0)), \ svbfloat16_t: svclastb_bf16, \ svfloat16_t: svclastb_f16, \ default: __assume(0) \ )(pg, fallback, data) #endif // sve: Predication / Element selection / Extract last active element: Extract last element //bfloat16_t svlastb_bf16(svbool_t pg, svbfloat16_t op); //float16_t svlastb_f16(svbool_t pg, svfloat16_t op); int32_t svlastb_s32(svbool_t pg, svint32_t op); float32_t svlastb_f32(svbool_t pg, svfloat32_t op); int16_t svlastb_s16(svbool_t pg, svint16_t op); float64_t svlastb_f64(svbool_t pg, svfloat64_t op); int64_t svlastb_s64(svbool_t pg, svint64_t op); uint8_t svlastb_u8(svbool_t pg, svuint8_t op); int8_t svlastb_s8(svbool_t pg, svint8_t op); uint32_t svlastb_u32(svbool_t pg, svuint32_t op); uint64_t svlastb_u64(svbool_t pg, svuint64_t op); uint16_t svlastb_u16(svbool_t pg, svuint16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svlastb(pg, op) _Generic((op), \ svint32_t: svlastb_s32, \ svfloat32_t: svlastb_f32, \ svint16_t: svlastb_s16, \ svfloat64_t: svlastb_f64, \ svint64_t: svlastb_s64, \ svuint8_t: svlastb_u8, \ svint8_t: svlastb_s8, \ svuint32_t: svlastb_u32, \ svuint64_t: svlastb_u64, \ svuint16_t: svlastb_u16, \ default: __assume(0) \ )(pg, op) #endif // sve: Predication / Element selection / Select between two vectors: Conditionally select elements svfloat32_t svsel_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svsel_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svbfloat16_t svsel_bf16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2); svfloat16_t svsel_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svint8_t svsel_s8(svbool_t pg, svint8_t op1, svint8_t op2); svint32_t svsel_s32(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svsel_s64(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svsel_u8(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svsel_u16(svbool_t pg, svuint16_t op1, svuint16_t op2); svint16_t svsel_s16(svbool_t pg, svint16_t op1, svint16_t op2); svuint64_t svsel_u64(svbool_t pg, svuint64_t op1, svuint64_t op2); svbool_t svsel_b(svbool_t pg, svbool_t op1, svbool_t op2); svuint32_t svsel_u32(svbool_t pg, svuint32_t op1, svuint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsel(pg, op1, op2) _Generic((op2), \ svfloat32_t: svsel_f32, \ svfloat64_t: svsel_f64, \ svbfloat16_t: svsel_bf16, \ svfloat16_t: svsel_f16, \ svint8_t: svsel_s8, \ svint32_t: svsel_s32, \ svint64_t: svsel_s64, \ svuint8_t: svsel_u8, \ svuint16_t: svsel_u16, \ svint16_t: svsel_s16, \ svuint64_t: svsel_u64, \ svbool_t: svsel_b, \ svuint32_t: svsel_u32, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Predication / Initialization / Initialize from booleans: Broadcast a quadword of scalars svbool_t svdupq_n_b8(bool x0, bool x1, bool x2, bool x3, bool x4, bool x5, bool x6, bool x7, bool x8, bool x9, bool x10, bool x11, bool x12, bool x13, bool x14, bool x15); svbool_t svdupq_n_b16(bool x0, bool x1, bool x2, bool x3, bool x4, bool x5, bool x6, bool x7); svbool_t svdupq_n_b32(bool x0, bool x1, bool x2, bool x3); svbool_t svdupq_n_b64(bool x0, bool x1); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svdupq_b8(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) _Generic((x15), \ bool: svdupq_n_b8, \ default: __assume(0) \ )(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) #define svdupq_b16(x0, x1, x2, x3, x4, x5, x6, x7) _Generic((x7), \ bool: svdupq_n_b16, \ default: __assume(0) \ )(x0, x1, x2, x3, x4, x5, x6, x7) #define svdupq_b32(x0, x1, x2, x3) _Generic((x3), \ bool: svdupq_n_b32, \ default: __assume(0) \ )(x0, x1, x2, x3) #define svdupq_b64(x0, x1) _Generic((x1), \ bool: svdupq_n_b64, \ default: __assume(0) \ )(x0, x1) #endif // sve: Predication / Initialization / Initialize from booleans: Broadcast a scalar value svbool_t svdup_n_b8(bool op); svbool_t svdup_n_b16(bool op); svbool_t svdup_n_b32(bool op); svbool_t svdup_n_b64(bool op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svdup_b8(op) _Generic((op), \ bool: svdup_n_b8, \ default: __assume(0) \ )(op) #define svdup_b16(op) _Generic((op), \ bool: svdup_n_b16, \ default: __assume(0) \ )(op) #define svdup_b32(op) _Generic((op), \ bool: svdup_n_b32, \ default: __assume(0) \ )(op) #define svdup_b64(op) _Generic((op), \ bool: svdup_n_b64, \ default: __assume(0) \ )(op) #endif // sve: Predication / Initialization / Initialize to pattern: Set all predicate elements to false svbool_t svpfalse_b(void); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svpfalse() svpfalse_b() #endif // sve: Predication / Initialization / Initialize to pattern: Set predicate elements to true svbool_t svptrue_b8(void); svbool_t svptrue_b16(void); svbool_t svptrue_b32(void); svbool_t svptrue_b64(void); svbool_t svptrue_pat_b8(enum svpattern pattern); svbool_t svptrue_pat_b16(enum svpattern pattern); svbool_t svptrue_pat_b32(enum svpattern pattern); svbool_t svptrue_pat_b64(enum svpattern pattern); // sve: Predication / Initialization / While counter meets condition (forward): While incrementing scalar is less than svbool_t svwhilelt_b8_s32(int32_t op1, int32_t op2); svbool_t svwhilelt_b8_s64(int64_t op1, int64_t op2); svbool_t svwhilelt_b8_u32(uint32_t op1, uint32_t op2); svbool_t svwhilelt_b8_u64(uint64_t op1, uint64_t op2); svbool_t svwhilelt_b16_s32(int32_t op1, int32_t op2); svbool_t svwhilelt_b16_s64(int64_t op1, int64_t op2); svbool_t svwhilelt_b16_u32(uint32_t op1, uint32_t op2); svbool_t svwhilelt_b16_u64(uint64_t op1, uint64_t op2); svbool_t svwhilelt_b32_s32(int32_t op1, int32_t op2); svbool_t svwhilelt_b32_s64(int64_t op1, int64_t op2); svbool_t svwhilelt_b32_u32(uint32_t op1, uint32_t op2); svbool_t svwhilelt_b32_u64(uint64_t op1, uint64_t op2); svbool_t svwhilelt_b64_s32(int32_t op1, int32_t op2); svbool_t svwhilelt_b64_s64(int64_t op1, int64_t op2); svbool_t svwhilelt_b64_u32(uint32_t op1, uint32_t op2); svbool_t svwhilelt_b64_u64(uint64_t op1, uint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svwhilelt_b8(op1, op2) _Generic((op2), \ int32_t: svwhilelt_b8_s32, \ int64_t: svwhilelt_b8_s64, \ uint32_t: svwhilelt_b8_u32, \ uint64_t: svwhilelt_b8_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilelt_b16(op1, op2) _Generic((op2), \ int32_t: svwhilelt_b16_s32, \ int64_t: svwhilelt_b16_s64, \ uint32_t: svwhilelt_b16_u32, \ uint64_t: svwhilelt_b16_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilelt_b32(op1, op2) _Generic((op2), \ int32_t: svwhilelt_b32_s32, \ int64_t: svwhilelt_b32_s64, \ uint32_t: svwhilelt_b32_u32, \ uint64_t: svwhilelt_b32_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilelt_b64(op1, op2) _Generic((op2), \ int32_t: svwhilelt_b64_s32, \ int64_t: svwhilelt_b64_s64, \ uint32_t: svwhilelt_b64_u32, \ uint64_t: svwhilelt_b64_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Predication / Initialization / While counter meets condition (forward): While incrementing scalar is less than or equal to svbool_t svwhilele_b8_s32(int32_t op1, int32_t op2); svbool_t svwhilele_b8_s64(int64_t op1, int64_t op2); svbool_t svwhilele_b8_u32(uint32_t op1, uint32_t op2); svbool_t svwhilele_b8_u64(uint64_t op1, uint64_t op2); svbool_t svwhilele_b16_s32(int32_t op1, int32_t op2); svbool_t svwhilele_b16_s64(int64_t op1, int64_t op2); svbool_t svwhilele_b16_u32(uint32_t op1, uint32_t op2); svbool_t svwhilele_b16_u64(uint64_t op1, uint64_t op2); svbool_t svwhilele_b32_s32(int32_t op1, int32_t op2); svbool_t svwhilele_b32_s64(int64_t op1, int64_t op2); svbool_t svwhilele_b32_u32(uint32_t op1, uint32_t op2); svbool_t svwhilele_b32_u64(uint64_t op1, uint64_t op2); svbool_t svwhilele_b64_s32(int32_t op1, int32_t op2); svbool_t svwhilele_b64_s64(int64_t op1, int64_t op2); svbool_t svwhilele_b64_u32(uint32_t op1, uint32_t op2); svbool_t svwhilele_b64_u64(uint64_t op1, uint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svwhilele_b8(op1, op2) _Generic((op2), \ int32_t: svwhilele_b8_s32, \ int64_t: svwhilele_b8_s64, \ uint32_t: svwhilele_b8_u32, \ uint64_t: svwhilele_b8_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilele_b16(op1, op2) _Generic((op2), \ int32_t: svwhilele_b16_s32, \ int64_t: svwhilele_b16_s64, \ uint32_t: svwhilele_b16_u32, \ uint64_t: svwhilele_b16_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilele_b32(op1, op2) _Generic((op2), \ int32_t: svwhilele_b32_s32, \ int64_t: svwhilele_b32_s64, \ uint32_t: svwhilele_b32_u32, \ uint64_t: svwhilele_b32_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilele_b64(op1, op2) _Generic((op2), \ int32_t: svwhilele_b64_s32, \ int64_t: svwhilele_b64_s64, \ uint32_t: svwhilele_b64_u32, \ uint64_t: svwhilele_b64_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Predication / Partitioning / Break after first true condition: Break after first true condition svbool_t svbrka_b_m(svbool_t inactive, svbool_t pg, svbool_t op); svbool_t svbrka_b_z(svbool_t pg, svbool_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbrka_m(inactive, pg, op) _Generic((op), \ svbool_t: svbrka_b_m, \ default: __assume(0) \ )(inactive, pg, op) #define svbrka_z(pg, op) _Generic((op), \ svbool_t: svbrka_b_z, \ default: __assume(0) \ )(pg, op) #endif // sve: Predication / Partitioning / Break after first true condition: Break after first true condition, propagating from previous partition svbool_t svbrkpa_b_z(svbool_t pg, svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbrkpa_z(pg, op1, op2) _Generic((op2), \ svbool_t: svbrkpa_b_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Predication / Partitioning / Break before first true condition: Break before first true condition svbool_t svbrkb_b_m(svbool_t inactive, svbool_t pg, svbool_t op); svbool_t svbrkb_b_z(svbool_t pg, svbool_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbrkb_m(inactive, pg, op) _Generic((op), \ svbool_t: svbrkb_b_m, \ default: __assume(0) \ )(inactive, pg, op) #define svbrkb_z(pg, op) _Generic((op), \ svbool_t: svbrkb_b_z, \ default: __assume(0) \ )(pg, op) #endif // sve: Predication / Partitioning / Break before first true condition: Break before first true condition, propagating from previous partition svbool_t svbrkpb_b_z(svbool_t pg, svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbrkpb_z(pg, op1, op2) _Generic((op2), \ svbool_t: svbrkpb_b_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Predication / Partitioning / Propagate break to next partition: Propagate break to next partition svbool_t svbrkn_b_z(svbool_t pg, svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbrkn_z(pg, op1, op2) _Generic((op2), \ svbool_t: svbrkn_b_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Predication / Partitioning / Single-element partitioning: Find next active predicate svbool_t svpnext_b8(svbool_t pg, svbool_t op); svbool_t svpnext_b16(svbool_t pg, svbool_t op); svbool_t svpnext_b32(svbool_t pg, svbool_t op); svbool_t svpnext_b64(svbool_t pg, svbool_t op); // sve: Predication / Partitioning / Single-element partitioning: Set the first active predicate element to true svbool_t svpfirst_b(svbool_t pg, svbool_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svpfirst(pg, op) _Generic((op), \ svbool_t: svpfirst_b, \ default: __assume(0) \ )(pg, op) #endif // sve: Predication / Testing: Test whether any active element is true bool svptest_any(svbool_t pg, svbool_t op); // sve: Predication / Testing: Test whether the first active element is true bool svptest_first(svbool_t pg, svbool_t op); // sve: Predication / Testing: Test whether the last active element is true bool svptest_last(svbool_t pg, svbool_t op); // sve: Prefetch / Consecutive: Prefetch bytes void svprfb(svbool_t pg, const void *base, enum svprfop op); void svprfb_vnum(svbool_t pg, const void *base, int64_t vnum, enum svprfop op); // sve: Prefetch / Consecutive: Prefetch doublewords void svprfd(svbool_t pg, const void *base, enum svprfop op); void svprfd_vnum(svbool_t pg, const void *base, int64_t vnum, enum svprfop op); // sve: Prefetch / Consecutive: Prefetch halfwords void svprfh(svbool_t pg, const void *base, enum svprfop op); void svprfh_vnum(svbool_t pg, const void *base, int64_t vnum, enum svprfop op); // sve: Prefetch / Consecutive: Prefetch words void svprfw(svbool_t pg, const void *base, enum svprfop op); void svprfw_vnum(svbool_t pg, const void *base, int64_t vnum, enum svprfop op); // sve: Prefetch / Gather: Prefetch bytes void svprfb_gather_s32offset(svbool_t pg, const void *base, svint32_t offsets, enum svprfop op); void svprfb_gather_s64offset(svbool_t pg, const void *base, svint64_t offsets, enum svprfop op); void svprfb_gather_u32base(svbool_t pg, svuint32_t bases, enum svprfop op); void svprfb_gather_u32base_offset(svbool_t pg, svuint32_t bases, int64_t offset, enum svprfop op); void svprfb_gather_u32offset(svbool_t pg, const void *base, svuint32_t offsets, enum svprfop op); void svprfb_gather_u64base(svbool_t pg, svuint64_t bases, enum svprfop op); void svprfb_gather_u64base_offset(svbool_t pg, svuint64_t bases, int64_t offset, enum svprfop op); void svprfb_gather_u64offset(svbool_t pg, const void *base, svuint64_t offsets, enum svprfop op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svprfb_gather_offset(pg, base, offsets, op) _Generic((offsets), \ svint32_t: svprfb_gather_s32offset, \ svint64_t: svprfb_gather_s64offset, \ int64_t: _Generic((base), \ svuint32_t: svprfb_gather_u32base_offset, \ svuint64_t: svprfb_gather_u64base_offset, \ default: __assume(0)), \ svuint32_t: svprfb_gather_u32offset, \ svuint64_t: svprfb_gather_u64offset, \ default: __assume(0) \ )(pg, base, offsets, op) #define svprfb_gather(pg, bases, op) _Generic((bases), \ svuint32_t: svprfb_gather_u32base, \ svuint64_t: svprfb_gather_u64base, \ default: __assume(0) \ )(pg, bases, op) #endif // sve: Prefetch / Gather: Prefetch doublewords void svprfd_gather_s32index(svbool_t pg, const void *base, svint32_t indices, enum svprfop op); void svprfd_gather_s64index(svbool_t pg, const void *base, svint64_t indices, enum svprfop op); void svprfd_gather_u32base(svbool_t pg, svuint32_t bases, enum svprfop op); void svprfd_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index, enum svprfop op); void svprfd_gather_u32index(svbool_t pg, const void *base, svuint32_t indices, enum svprfop op); void svprfd_gather_u64base(svbool_t pg, svuint64_t bases, enum svprfop op); void svprfd_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index, enum svprfop op); void svprfd_gather_u64index(svbool_t pg, const void *base, svuint64_t indices, enum svprfop op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svprfd_gather_index(pg, base, indices, op) _Generic((indices), \ svint32_t: svprfd_gather_s32index, \ svint64_t: svprfd_gather_s64index, \ int64_t: _Generic((base), \ svuint32_t: svprfd_gather_u32base_index, \ svuint64_t: svprfd_gather_u64base_index, \ default: __assume(0)), \ svuint32_t: svprfd_gather_u32index, \ svuint64_t: svprfd_gather_u64index, \ default: __assume(0) \ )(pg, base, indices, op) #define svprfd_gather(pg, bases, op) _Generic((bases), \ svuint32_t: svprfd_gather_u32base, \ svuint64_t: svprfd_gather_u64base, \ default: __assume(0) \ )(pg, bases, op) #endif // sve: Prefetch / Gather: Prefetch halfwords void svprfh_gather_s32index(svbool_t pg, const void *base, svint32_t indices, enum svprfop op); void svprfh_gather_s64index(svbool_t pg, const void *base, svint64_t indices, enum svprfop op); void svprfh_gather_u32base(svbool_t pg, svuint32_t bases, enum svprfop op); void svprfh_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index, enum svprfop op); void svprfh_gather_u32index(svbool_t pg, const void *base, svuint32_t indices, enum svprfop op); void svprfh_gather_u64base(svbool_t pg, svuint64_t bases, enum svprfop op); void svprfh_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index, enum svprfop op); void svprfh_gather_u64index(svbool_t pg, const void *base, svuint64_t indices, enum svprfop op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svprfh_gather_index(pg, base, indices, op) _Generic((indices), \ svint32_t: svprfh_gather_s32index, \ svint64_t: svprfh_gather_s64index, \ int64_t: _Generic((base), \ svuint32_t: svprfh_gather_u32base_index, \ svuint64_t: svprfh_gather_u64base_index, \ default: __assume(0)), \ svuint32_t: svprfh_gather_u32index, \ svuint64_t: svprfh_gather_u64index, \ default: __assume(0) \ )(pg, base, indices, op) #define svprfh_gather(pg, bases, op) _Generic((bases), \ svuint32_t: svprfh_gather_u32base, \ svuint64_t: svprfh_gather_u64base, \ default: __assume(0) \ )(pg, bases, op) #endif // sve: Prefetch / Gather: Prefetch words void svprfw_gather_s32index(svbool_t pg, const void *base, svint32_t indices, enum svprfop op); void svprfw_gather_s64index(svbool_t pg, const void *base, svint64_t indices, enum svprfop op); void svprfw_gather_u32base(svbool_t pg, svuint32_t bases, enum svprfop op); void svprfw_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index, enum svprfop op); void svprfw_gather_u32index(svbool_t pg, const void *base, svuint32_t indices, enum svprfop op); void svprfw_gather_u64base(svbool_t pg, svuint64_t bases, enum svprfop op); void svprfw_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index, enum svprfop op); void svprfw_gather_u64index(svbool_t pg, const void *base, svuint64_t indices, enum svprfop op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svprfw_gather_index(pg, base, indices, op) _Generic((indices), \ svint32_t: svprfw_gather_s32index, \ svint64_t: svprfw_gather_s64index, \ int64_t: _Generic((base), \ svuint32_t: svprfw_gather_u32base_index, \ svuint64_t: svprfw_gather_u64base_index, \ default: __assume(0)), \ svuint32_t: svprfw_gather_u32index, \ svuint64_t: svprfw_gather_u64index, \ default: __assume(0) \ )(pg, base, indices, op) #define svprfw_gather(pg, bases, op) _Generic((bases), \ svuint32_t: svprfw_gather_u32base, \ svuint64_t: svprfw_gather_u64base, \ default: __assume(0) \ )(pg, bases, op) #endif // sve: Shift / Left / Vector shift left: Logical shift left svint16_t svlsl_n_s16_m(svbool_t pg, svint16_t op1, uint16_t op2); svint16_t svlsl_n_s16_x(svbool_t pg, svint16_t op1, uint16_t op2); svint16_t svlsl_n_s16_z(svbool_t pg, svint16_t op1, uint16_t op2); svint16_t svlsl_s16_m(svbool_t pg, svint16_t op1, svuint16_t op2); svint16_t svlsl_s16_x(svbool_t pg, svint16_t op1, svuint16_t op2); svint16_t svlsl_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2); svint16_t svlsl_wide_n_s16_m(svbool_t pg, svint16_t op1, uint64_t op2); svint16_t svlsl_wide_n_s16_x(svbool_t pg, svint16_t op1, uint64_t op2); svint16_t svlsl_wide_n_s16_z(svbool_t pg, svint16_t op1, uint64_t op2); svint16_t svlsl_wide_s16_m(svbool_t pg, svint16_t op1, svuint64_t op2); svint16_t svlsl_wide_s16_x(svbool_t pg, svint16_t op1, svuint64_t op2); svint16_t svlsl_wide_s16_z(svbool_t pg, svint16_t op1, svuint64_t op2); svint32_t svlsl_n_s32_m(svbool_t pg, svint32_t op1, uint32_t op2); svint32_t svlsl_n_s32_x(svbool_t pg, svint32_t op1, uint32_t op2); svint32_t svlsl_n_s32_z(svbool_t pg, svint32_t op1, uint32_t op2); svint32_t svlsl_s32_m(svbool_t pg, svint32_t op1, svuint32_t op2); svint32_t svlsl_s32_x(svbool_t pg, svint32_t op1, svuint32_t op2); svint32_t svlsl_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2); svint32_t svlsl_wide_n_s32_m(svbool_t pg, svint32_t op1, uint64_t op2); svint32_t svlsl_wide_n_s32_x(svbool_t pg, svint32_t op1, uint64_t op2); svint32_t svlsl_wide_n_s32_z(svbool_t pg, svint32_t op1, uint64_t op2); svint32_t svlsl_wide_s32_m(svbool_t pg, svint32_t op1, svuint64_t op2); svint32_t svlsl_wide_s32_x(svbool_t pg, svint32_t op1, svuint64_t op2); svint32_t svlsl_wide_s32_z(svbool_t pg, svint32_t op1, svuint64_t op2); svint64_t svlsl_n_s64_m(svbool_t pg, svint64_t op1, uint64_t op2); svint64_t svlsl_n_s64_x(svbool_t pg, svint64_t op1, uint64_t op2); svint64_t svlsl_n_s64_z(svbool_t pg, svint64_t op1, uint64_t op2); svint64_t svlsl_s64_m(svbool_t pg, svint64_t op1, svuint64_t op2); svint64_t svlsl_s64_x(svbool_t pg, svint64_t op1, svuint64_t op2); svint64_t svlsl_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2); svint8_t svlsl_n_s8_m(svbool_t pg, svint8_t op1, uint8_t op2); svint8_t svlsl_n_s8_x(svbool_t pg, svint8_t op1, uint8_t op2); svint8_t svlsl_n_s8_z(svbool_t pg, svint8_t op1, uint8_t op2); svint8_t svlsl_s8_m(svbool_t pg, svint8_t op1, svuint8_t op2); svint8_t svlsl_s8_x(svbool_t pg, svint8_t op1, svuint8_t op2); svint8_t svlsl_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2); svint8_t svlsl_wide_n_s8_m(svbool_t pg, svint8_t op1, uint64_t op2); svint8_t svlsl_wide_n_s8_x(svbool_t pg, svint8_t op1, uint64_t op2); svint8_t svlsl_wide_n_s8_z(svbool_t pg, svint8_t op1, uint64_t op2); svint8_t svlsl_wide_s8_m(svbool_t pg, svint8_t op1, svuint64_t op2); svint8_t svlsl_wide_s8_x(svbool_t pg, svint8_t op1, svuint64_t op2); svint8_t svlsl_wide_s8_z(svbool_t pg, svint8_t op1, svuint64_t op2); svuint16_t svlsl_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint16_t svlsl_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint16_t svlsl_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint16_t svlsl_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint16_t svlsl_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint16_t svlsl_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint16_t svlsl_wide_n_u16_m(svbool_t pg, svuint16_t op1, uint64_t op2); svuint16_t svlsl_wide_n_u16_x(svbool_t pg, svuint16_t op1, uint64_t op2); svuint16_t svlsl_wide_n_u16_z(svbool_t pg, svuint16_t op1, uint64_t op2); svuint16_t svlsl_wide_u16_m(svbool_t pg, svuint16_t op1, svuint64_t op2); svuint16_t svlsl_wide_u16_x(svbool_t pg, svuint16_t op1, svuint64_t op2); svuint16_t svlsl_wide_u16_z(svbool_t pg, svuint16_t op1, svuint64_t op2); svuint32_t svlsl_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint32_t svlsl_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint32_t svlsl_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint32_t svlsl_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint32_t svlsl_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint32_t svlsl_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint32_t svlsl_wide_n_u32_m(svbool_t pg, svuint32_t op1, uint64_t op2); svuint32_t svlsl_wide_n_u32_x(svbool_t pg, svuint32_t op1, uint64_t op2); svuint32_t svlsl_wide_n_u32_z(svbool_t pg, svuint32_t op1, uint64_t op2); svuint32_t svlsl_wide_u32_m(svbool_t pg, svuint32_t op1, svuint64_t op2); svuint32_t svlsl_wide_u32_x(svbool_t pg, svuint32_t op1, svuint64_t op2); svuint32_t svlsl_wide_u32_z(svbool_t pg, svuint32_t op1, svuint64_t op2); svuint64_t svlsl_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint64_t svlsl_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint64_t svlsl_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint64_t svlsl_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint64_t svlsl_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint64_t svlsl_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint8_t svlsl_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svuint8_t svlsl_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svuint8_t svlsl_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svuint8_t svlsl_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint8_t svlsl_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint8_t svlsl_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint8_t svlsl_wide_n_u8_m(svbool_t pg, svuint8_t op1, uint64_t op2); svuint8_t svlsl_wide_n_u8_x(svbool_t pg, svuint8_t op1, uint64_t op2); svuint8_t svlsl_wide_n_u8_z(svbool_t pg, svuint8_t op1, uint64_t op2); svuint8_t svlsl_wide_u8_m(svbool_t pg, svuint8_t op1, svuint64_t op2); svuint8_t svlsl_wide_u8_x(svbool_t pg, svuint8_t op1, svuint64_t op2); svuint8_t svlsl_wide_u8_z(svbool_t pg, svuint8_t op1, svuint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svlsl_m(pg, op1, op2) _Generic((op2), \ uint16_t: _Generic((op1), \ svint16_t: svlsl_n_s16_m, \ svuint16_t: svlsl_n_u16_m, \ default: __assume(0)), \ svuint16_t: _Generic((op1), \ svint16_t: svlsl_s16_m, \ svuint16_t: svlsl_u16_m, \ default: __assume(0)), \ uint32_t: _Generic((op1), \ svint32_t: svlsl_n_s32_m, \ svuint32_t: svlsl_n_u32_m, \ default: __assume(0)), \ svuint32_t: _Generic((op1), \ svint32_t: svlsl_s32_m, \ svuint32_t: svlsl_u32_m, \ default: __assume(0)), \ uint64_t: _Generic((op1), \ svint64_t: svlsl_n_s64_m, \ svuint64_t: svlsl_n_u64_m, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svint64_t: svlsl_s64_m, \ svuint64_t: svlsl_u64_m, \ default: __assume(0)), \ uint8_t: _Generic((op1), \ svint8_t: svlsl_n_s8_m, \ svuint8_t: svlsl_n_u8_m, \ default: __assume(0)), \ svuint8_t: _Generic((op1), \ svint8_t: svlsl_s8_m, \ svuint8_t: svlsl_u8_m, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svlsl_x(pg, op1, op2) _Generic((op2), \ uint16_t: _Generic((op1), \ svint16_t: svlsl_n_s16_x, \ svuint16_t: svlsl_n_u16_x, \ default: __assume(0)), \ svuint16_t: _Generic((op1), \ svint16_t: svlsl_s16_x, \ svuint16_t: svlsl_u16_x, \ default: __assume(0)), \ uint32_t: _Generic((op1), \ svint32_t: svlsl_n_s32_x, \ svuint32_t: svlsl_n_u32_x, \ default: __assume(0)), \ svuint32_t: _Generic((op1), \ svint32_t: svlsl_s32_x, \ svuint32_t: svlsl_u32_x, \ default: __assume(0)), \ uint64_t: _Generic((op1), \ svint64_t: svlsl_n_s64_x, \ svuint64_t: svlsl_n_u64_x, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svint64_t: svlsl_s64_x, \ svuint64_t: svlsl_u64_x, \ default: __assume(0)), \ uint8_t: _Generic((op1), \ svint8_t: svlsl_n_s8_x, \ svuint8_t: svlsl_n_u8_x, \ default: __assume(0)), \ svuint8_t: _Generic((op1), \ svint8_t: svlsl_s8_x, \ svuint8_t: svlsl_u8_x, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svlsl_z(pg, op1, op2) _Generic((op2), \ uint16_t: _Generic((op1), \ svint16_t: svlsl_n_s16_z, \ svuint16_t: svlsl_n_u16_z, \ default: __assume(0)), \ svuint16_t: _Generic((op1), \ svint16_t: svlsl_s16_z, \ svuint16_t: svlsl_u16_z, \ default: __assume(0)), \ uint32_t: _Generic((op1), \ svint32_t: svlsl_n_s32_z, \ svuint32_t: svlsl_n_u32_z, \ default: __assume(0)), \ svuint32_t: _Generic((op1), \ svint32_t: svlsl_s32_z, \ svuint32_t: svlsl_u32_z, \ default: __assume(0)), \ uint64_t: _Generic((op1), \ svint64_t: svlsl_n_s64_z, \ svuint64_t: svlsl_n_u64_z, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svint64_t: svlsl_s64_z, \ svuint64_t: svlsl_u64_z, \ default: __assume(0)), \ uint8_t: _Generic((op1), \ svint8_t: svlsl_n_s8_z, \ svuint8_t: svlsl_n_u8_z, \ default: __assume(0)), \ svuint8_t: _Generic((op1), \ svint8_t: svlsl_s8_z, \ svuint8_t: svlsl_u8_z, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svlsl_wide_m(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svint16_t: svlsl_wide_n_s16_m, \ svint32_t: svlsl_wide_n_s32_m, \ svint8_t: svlsl_wide_n_s8_m, \ svuint16_t: svlsl_wide_n_u16_m, \ svuint32_t: svlsl_wide_n_u32_m, \ svuint8_t: svlsl_wide_n_u8_m, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svint16_t: svlsl_wide_s16_m, \ svint32_t: svlsl_wide_s32_m, \ svint8_t: svlsl_wide_s8_m, \ svuint16_t: svlsl_wide_u16_m, \ svuint32_t: svlsl_wide_u32_m, \ svuint8_t: svlsl_wide_u8_m, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svlsl_wide_x(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svint16_t: svlsl_wide_n_s16_x, \ svint32_t: svlsl_wide_n_s32_x, \ svint8_t: svlsl_wide_n_s8_x, \ svuint16_t: svlsl_wide_n_u16_x, \ svuint32_t: svlsl_wide_n_u32_x, \ svuint8_t: svlsl_wide_n_u8_x, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svint16_t: svlsl_wide_s16_x, \ svint32_t: svlsl_wide_s32_x, \ svint8_t: svlsl_wide_s8_x, \ svuint16_t: svlsl_wide_u16_x, \ svuint32_t: svlsl_wide_u32_x, \ svuint8_t: svlsl_wide_u8_x, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svlsl_wide_z(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svint16_t: svlsl_wide_n_s16_z, \ svint32_t: svlsl_wide_n_s32_z, \ svint8_t: svlsl_wide_n_s8_z, \ svuint16_t: svlsl_wide_n_u16_z, \ svuint32_t: svlsl_wide_n_u32_z, \ svuint8_t: svlsl_wide_n_u8_z, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svint16_t: svlsl_wide_s16_z, \ svint32_t: svlsl_wide_s32_z, \ svint8_t: svlsl_wide_s8_z, \ svuint16_t: svlsl_wide_u16_z, \ svuint32_t: svlsl_wide_u32_z, \ svuint8_t: svlsl_wide_u8_z, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Shift / Right / Vector shift right: Arithmetic shift right svint16_t svasr_n_s16_m(svbool_t pg, svint16_t op1, uint16_t op2); svint16_t svasr_n_s16_x(svbool_t pg, svint16_t op1, uint16_t op2); svint16_t svasr_n_s16_z(svbool_t pg, svint16_t op1, uint16_t op2); svint16_t svasr_s16_m(svbool_t pg, svint16_t op1, svuint16_t op2); svint16_t svasr_s16_x(svbool_t pg, svint16_t op1, svuint16_t op2); svint16_t svasr_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2); svint16_t svasr_wide_n_s16_m(svbool_t pg, svint16_t op1, uint64_t op2); svint16_t svasr_wide_n_s16_x(svbool_t pg, svint16_t op1, uint64_t op2); svint16_t svasr_wide_n_s16_z(svbool_t pg, svint16_t op1, uint64_t op2); svint16_t svasr_wide_s16_m(svbool_t pg, svint16_t op1, svuint64_t op2); svint16_t svasr_wide_s16_x(svbool_t pg, svint16_t op1, svuint64_t op2); svint16_t svasr_wide_s16_z(svbool_t pg, svint16_t op1, svuint64_t op2); svint32_t svasr_n_s32_m(svbool_t pg, svint32_t op1, uint32_t op2); svint32_t svasr_n_s32_x(svbool_t pg, svint32_t op1, uint32_t op2); svint32_t svasr_n_s32_z(svbool_t pg, svint32_t op1, uint32_t op2); svint32_t svasr_s32_m(svbool_t pg, svint32_t op1, svuint32_t op2); svint32_t svasr_s32_x(svbool_t pg, svint32_t op1, svuint32_t op2); svint32_t svasr_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2); svint32_t svasr_wide_n_s32_m(svbool_t pg, svint32_t op1, uint64_t op2); svint32_t svasr_wide_n_s32_x(svbool_t pg, svint32_t op1, uint64_t op2); svint32_t svasr_wide_n_s32_z(svbool_t pg, svint32_t op1, uint64_t op2); svint32_t svasr_wide_s32_m(svbool_t pg, svint32_t op1, svuint64_t op2); svint32_t svasr_wide_s32_x(svbool_t pg, svint32_t op1, svuint64_t op2); svint32_t svasr_wide_s32_z(svbool_t pg, svint32_t op1, svuint64_t op2); svint64_t svasr_n_s64_m(svbool_t pg, svint64_t op1, uint64_t op2); svint64_t svasr_n_s64_x(svbool_t pg, svint64_t op1, uint64_t op2); svint64_t svasr_n_s64_z(svbool_t pg, svint64_t op1, uint64_t op2); svint64_t svasr_s64_m(svbool_t pg, svint64_t op1, svuint64_t op2); svint64_t svasr_s64_x(svbool_t pg, svint64_t op1, svuint64_t op2); svint64_t svasr_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2); svint8_t svasr_n_s8_m(svbool_t pg, svint8_t op1, uint8_t op2); svint8_t svasr_n_s8_x(svbool_t pg, svint8_t op1, uint8_t op2); svint8_t svasr_n_s8_z(svbool_t pg, svint8_t op1, uint8_t op2); svint8_t svasr_s8_m(svbool_t pg, svint8_t op1, svuint8_t op2); svint8_t svasr_s8_x(svbool_t pg, svint8_t op1, svuint8_t op2); svint8_t svasr_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2); svint8_t svasr_wide_n_s8_m(svbool_t pg, svint8_t op1, uint64_t op2); svint8_t svasr_wide_n_s8_x(svbool_t pg, svint8_t op1, uint64_t op2); svint8_t svasr_wide_n_s8_z(svbool_t pg, svint8_t op1, uint64_t op2); svint8_t svasr_wide_s8_m(svbool_t pg, svint8_t op1, svuint64_t op2); svint8_t svasr_wide_s8_x(svbool_t pg, svint8_t op1, svuint64_t op2); svint8_t svasr_wide_s8_z(svbool_t pg, svint8_t op1, svuint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svasr_m(pg, op1, op2) _Generic((op2), \ uint16_t: svasr_n_s16_m, \ svuint16_t: svasr_s16_m, \ uint32_t: svasr_n_s32_m, \ svuint32_t: svasr_s32_m, \ uint64_t: svasr_n_s64_m, \ svuint64_t: svasr_s64_m, \ uint8_t: svasr_n_s8_m, \ svuint8_t: svasr_s8_m, \ default: __assume(0) \ )(pg, op1, op2) #define svasr_x(pg, op1, op2) _Generic((op2), \ uint16_t: svasr_n_s16_x, \ svuint16_t: svasr_s16_x, \ uint32_t: svasr_n_s32_x, \ svuint32_t: svasr_s32_x, \ uint64_t: svasr_n_s64_x, \ svuint64_t: svasr_s64_x, \ uint8_t: svasr_n_s8_x, \ svuint8_t: svasr_s8_x, \ default: __assume(0) \ )(pg, op1, op2) #define svasr_z(pg, op1, op2) _Generic((op2), \ uint16_t: svasr_n_s16_z, \ svuint16_t: svasr_s16_z, \ uint32_t: svasr_n_s32_z, \ svuint32_t: svasr_s32_z, \ uint64_t: svasr_n_s64_z, \ svuint64_t: svasr_s64_z, \ uint8_t: svasr_n_s8_z, \ svuint8_t: svasr_s8_z, \ default: __assume(0) \ )(pg, op1, op2) #define svasr_wide_m(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svint16_t: svasr_wide_n_s16_m, \ svint32_t: svasr_wide_n_s32_m, \ svint8_t: svasr_wide_n_s8_m, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svint16_t: svasr_wide_s16_m, \ svint32_t: svasr_wide_s32_m, \ svint8_t: svasr_wide_s8_m, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svasr_wide_x(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svint16_t: svasr_wide_n_s16_x, \ svint32_t: svasr_wide_n_s32_x, \ svint8_t: svasr_wide_n_s8_x, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svint16_t: svasr_wide_s16_x, \ svint32_t: svasr_wide_s32_x, \ svint8_t: svasr_wide_s8_x, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svasr_wide_z(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svint16_t: svasr_wide_n_s16_z, \ svint32_t: svasr_wide_n_s32_z, \ svint8_t: svasr_wide_n_s8_z, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svint16_t: svasr_wide_s16_z, \ svint32_t: svasr_wide_s32_z, \ svint8_t: svasr_wide_s8_z, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Shift / Right / Vector shift right: Arithmetic shift right for divide by immediate svint16_t svasrd_n_s16_m(svbool_t pg, svint16_t op1, uint64_t imm2); svint16_t svasrd_n_s16_x(svbool_t pg, svint16_t op1, uint64_t imm2); svint16_t svasrd_n_s16_z(svbool_t pg, svint16_t op1, uint64_t imm2); svint32_t svasrd_n_s32_m(svbool_t pg, svint32_t op1, uint64_t imm2); svint32_t svasrd_n_s32_x(svbool_t pg, svint32_t op1, uint64_t imm2); svint32_t svasrd_n_s32_z(svbool_t pg, svint32_t op1, uint64_t imm2); svint64_t svasrd_n_s64_m(svbool_t pg, svint64_t op1, uint64_t imm2); svint64_t svasrd_n_s64_x(svbool_t pg, svint64_t op1, uint64_t imm2); svint64_t svasrd_n_s64_z(svbool_t pg, svint64_t op1, uint64_t imm2); svint8_t svasrd_n_s8_m(svbool_t pg, svint8_t op1, uint64_t imm2); svint8_t svasrd_n_s8_x(svbool_t pg, svint8_t op1, uint64_t imm2); svint8_t svasrd_n_s8_z(svbool_t pg, svint8_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svasrd_m(pg, op1, imm2) _Generic((op1), \ svint16_t: svasrd_n_s16_m, \ svint32_t: svasrd_n_s32_m, \ svint64_t: svasrd_n_s64_m, \ svint8_t: svasrd_n_s8_m, \ default: __assume(0) \ )(pg, op1, imm2) #define svasrd_x(pg, op1, imm2) _Generic((op1), \ svint16_t: svasrd_n_s16_x, \ svint32_t: svasrd_n_s32_x, \ svint64_t: svasrd_n_s64_x, \ svint8_t: svasrd_n_s8_x, \ default: __assume(0) \ )(pg, op1, imm2) #define svasrd_z(pg, op1, imm2) _Generic((op1), \ svint16_t: svasrd_n_s16_z, \ svint32_t: svasrd_n_s32_z, \ svint64_t: svasrd_n_s64_z, \ svint8_t: svasrd_n_s8_z, \ default: __assume(0) \ )(pg, op1, imm2) #endif // sve: Shift / Right / Vector shift right: Logical shift right svuint16_t svlsr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint16_t svlsr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint16_t svlsr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint16_t svlsr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint16_t svlsr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint16_t svlsr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint16_t svlsr_wide_n_u16_m(svbool_t pg, svuint16_t op1, uint64_t op2); svuint16_t svlsr_wide_n_u16_x(svbool_t pg, svuint16_t op1, uint64_t op2); svuint16_t svlsr_wide_n_u16_z(svbool_t pg, svuint16_t op1, uint64_t op2); svuint16_t svlsr_wide_u16_m(svbool_t pg, svuint16_t op1, svuint64_t op2); svuint16_t svlsr_wide_u16_x(svbool_t pg, svuint16_t op1, svuint64_t op2); svuint16_t svlsr_wide_u16_z(svbool_t pg, svuint16_t op1, svuint64_t op2); svuint32_t svlsr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint32_t svlsr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint32_t svlsr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint32_t svlsr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint32_t svlsr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint32_t svlsr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint32_t svlsr_wide_n_u32_m(svbool_t pg, svuint32_t op1, uint64_t op2); svuint32_t svlsr_wide_n_u32_x(svbool_t pg, svuint32_t op1, uint64_t op2); svuint32_t svlsr_wide_n_u32_z(svbool_t pg, svuint32_t op1, uint64_t op2); svuint32_t svlsr_wide_u32_m(svbool_t pg, svuint32_t op1, svuint64_t op2); svuint32_t svlsr_wide_u32_x(svbool_t pg, svuint32_t op1, svuint64_t op2); svuint32_t svlsr_wide_u32_z(svbool_t pg, svuint32_t op1, svuint64_t op2); svuint64_t svlsr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint64_t svlsr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint64_t svlsr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint64_t svlsr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint64_t svlsr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint64_t svlsr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint8_t svlsr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svuint8_t svlsr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svuint8_t svlsr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svuint8_t svlsr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint8_t svlsr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint8_t svlsr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint8_t svlsr_wide_n_u8_m(svbool_t pg, svuint8_t op1, uint64_t op2); svuint8_t svlsr_wide_n_u8_x(svbool_t pg, svuint8_t op1, uint64_t op2); svuint8_t svlsr_wide_n_u8_z(svbool_t pg, svuint8_t op1, uint64_t op2); svuint8_t svlsr_wide_u8_m(svbool_t pg, svuint8_t op1, svuint64_t op2); svuint8_t svlsr_wide_u8_x(svbool_t pg, svuint8_t op1, svuint64_t op2); svuint8_t svlsr_wide_u8_z(svbool_t pg, svuint8_t op1, svuint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svlsr_m(pg, op1, op2) _Generic((op2), \ uint16_t: svlsr_n_u16_m, \ svuint16_t: svlsr_u16_m, \ uint32_t: svlsr_n_u32_m, \ svuint32_t: svlsr_u32_m, \ uint64_t: svlsr_n_u64_m, \ svuint64_t: svlsr_u64_m, \ uint8_t: svlsr_n_u8_m, \ svuint8_t: svlsr_u8_m, \ default: __assume(0) \ )(pg, op1, op2) #define svlsr_x(pg, op1, op2) _Generic((op2), \ uint16_t: svlsr_n_u16_x, \ svuint16_t: svlsr_u16_x, \ uint32_t: svlsr_n_u32_x, \ svuint32_t: svlsr_u32_x, \ uint64_t: svlsr_n_u64_x, \ svuint64_t: svlsr_u64_x, \ uint8_t: svlsr_n_u8_x, \ svuint8_t: svlsr_u8_x, \ default: __assume(0) \ )(pg, op1, op2) #define svlsr_z(pg, op1, op2) _Generic((op2), \ uint16_t: svlsr_n_u16_z, \ svuint16_t: svlsr_u16_z, \ uint32_t: svlsr_n_u32_z, \ svuint32_t: svlsr_u32_z, \ uint64_t: svlsr_n_u64_z, \ svuint64_t: svlsr_u64_z, \ uint8_t: svlsr_n_u8_z, \ svuint8_t: svlsr_u8_z, \ default: __assume(0) \ )(pg, op1, op2) #define svlsr_wide_m(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svuint16_t: svlsr_wide_n_u16_m, \ svuint32_t: svlsr_wide_n_u32_m, \ svuint8_t: svlsr_wide_n_u8_m, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svuint16_t: svlsr_wide_u16_m, \ svuint32_t: svlsr_wide_u32_m, \ svuint8_t: svlsr_wide_u8_m, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svlsr_wide_x(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svuint16_t: svlsr_wide_n_u16_x, \ svuint32_t: svlsr_wide_n_u32_x, \ svuint8_t: svlsr_wide_n_u8_x, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svuint16_t: svlsr_wide_u16_x, \ svuint32_t: svlsr_wide_u32_x, \ svuint8_t: svlsr_wide_u8_x, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svlsr_wide_z(pg, op1, op2) _Generic((op2), \ uint64_t: _Generic((op1), \ svuint16_t: svlsr_wide_n_u16_z, \ svuint32_t: svlsr_wide_n_u32_z, \ svuint8_t: svlsr_wide_n_u8_z, \ default: __assume(0)), \ svuint64_t: _Generic((op1), \ svuint16_t: svlsr_wide_u16_z, \ svuint32_t: svlsr_wide_u32_z, \ svuint8_t: svlsr_wide_u8_z, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Store / Consecutive: Non-truncating store //void svst1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data); //void svst1_f16(svbool_t pg, float16_t *base, svfloat16_t data); void svst1_f32(svbool_t pg, float32_t *base, svfloat32_t data); void svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data); void svst1_s16(svbool_t pg, int16_t *base, svint16_t data); void svst1_s32(svbool_t pg, int32_t *base, svint32_t data); void svst1_s64(svbool_t pg, int64_t *base, svint64_t data); void svst1_s8(svbool_t pg, int8_t *base, svint8_t data); void svst1_u16(svbool_t pg, uint16_t *base, svuint16_t data); void svst1_u32(svbool_t pg, uint32_t *base, svuint32_t data); void svst1_u64(svbool_t pg, uint64_t *base, svuint64_t data); void svst1_u8(svbool_t pg, uint8_t *base, svuint8_t data); //void svst1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data); //void svst1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data); void svst1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data); void svst1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data); void svst1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data); void svst1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data); void svst1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data); void svst1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data); void svst1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data); void svst1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data); void svst1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data); void svst1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svst1(pg, base, data) _Generic((data), \ svfloat32_t: svst1_f32, \ svfloat64_t: svst1_f64, \ svint16_t: svst1_s16, \ svint32_t: svst1_s32, \ svint64_t: svst1_s64, \ svint8_t: svst1_s8, \ svuint16_t: svst1_u16, \ svuint32_t: svst1_u32, \ svuint64_t: svst1_u64, \ svuint8_t: svst1_u8, \ default: __assume(0) \ )(pg, base, data) #define svst1_vnum(pg, base, vnum, data) _Generic((data), \ svfloat32_t: svst1_vnum_f32, \ svfloat64_t: svst1_vnum_f64, \ svint16_t: svst1_vnum_s16, \ svint32_t: svst1_vnum_s32, \ svint64_t: svst1_vnum_s64, \ svint8_t: svst1_vnum_s8, \ svuint16_t: svst1_vnum_u16, \ svuint32_t: svst1_vnum_u32, \ svuint64_t: svst1_vnum_u64, \ svuint8_t: svst1_vnum_u8, \ default: __assume(0) \ )(pg, base, vnum, data) #endif // sve: Store / Consecutive: Non-truncating store, non-temporal void svstnt1_u8(svbool_t pg, uint8_t *base, svuint8_t data); void svstnt1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data); void svstnt1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data); void svstnt1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data); void svstnt1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data); void svstnt1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data); void svstnt1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data); void svstnt1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data); //void svstnt1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data); //void svstnt1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data); void svstnt1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data); void svstnt1_u64(svbool_t pg, uint64_t *base, svuint64_t data); void svstnt1_u16(svbool_t pg, uint16_t *base, svuint16_t data); void svstnt1_s64(svbool_t pg, int64_t *base, svint64_t data); void svstnt1_s32(svbool_t pg, int32_t *base, svint32_t data); void svstnt1_s16(svbool_t pg, int16_t *base, svint16_t data); void svstnt1_s8(svbool_t pg, int8_t *base, svint8_t data); void svstnt1_f64(svbool_t pg, float64_t *base, svfloat64_t data); void svstnt1_f32(svbool_t pg, float32_t *base, svfloat32_t data); //void svstnt1_f16(svbool_t pg, float16_t *base, svfloat16_t data); //void svstnt1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data); void svstnt1_u32(svbool_t pg, uint32_t *base, svuint32_t data); void svstnt1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data); void svstnt1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svstnt1(pg, base, data) _Generic((data), \ svuint8_t: svstnt1_u8, \ svuint64_t: svstnt1_u64, \ svuint16_t: svstnt1_u16, \ svint64_t: svstnt1_s64, \ svint32_t: svstnt1_s32, \ svint16_t: svstnt1_s16, \ svint8_t: svstnt1_s8, \ svfloat64_t: svstnt1_f64, \ svfloat32_t: svstnt1_f32, \ svuint32_t: svstnt1_u32, \ default: __assume(0) \ )(pg, base, data) #define svstnt1_vnum(pg, base, vnum, data) _Generic((data), \ svuint8_t: svstnt1_vnum_u8, \ svint64_t: svstnt1_vnum_s64, \ svint32_t: svstnt1_vnum_s32, \ svint16_t: svstnt1_vnum_s16, \ svint8_t: svstnt1_vnum_s8, \ svfloat64_t: svstnt1_vnum_f64, \ svfloat32_t: svstnt1_vnum_f32, \ svuint16_t: svstnt1_vnum_u16, \ svuint32_t: svstnt1_vnum_u32, \ svuint64_t: svstnt1_vnum_u64, \ default: __assume(0) \ )(pg, base, vnum, data) #endif // sve: Store / Consecutive: Store four vectors into four-element tuples void svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4_t data); void svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t data); void svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t data); // void svst4_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x4_t data); void svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data); void svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data); void svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4_t data); void svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data); void svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data); void svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data); void svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data); // void svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4_t data); void svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data); void svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data); void svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data); // void svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data); // void svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data); void svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t data); void svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t data); void svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t data); void svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t data); void svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data); void svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data); void svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t data); // sve: Store / Consecutive: Store three vectors into three-element tuples void svst3_s64(svbool_t pg, int64_t *base, svint64x3_t data); void svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t data); void svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t data); void svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t data); void svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data); void svst3_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x3_t data); void svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3_t data); // void svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3_t data); void svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data); void svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t data); void svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t data); void svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t data); void svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t data); // void svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data); void svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data); void svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data); void svst3_s8(svbool_t pg, int8_t *base, svint8x3_t data); void svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data); // void svst3_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x3_t data); void svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data); void svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data); void svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data); void svst3_u8(svbool_t pg, uint8_t *base, svuint8x3_t data); // void svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data); // sve: Store / Consecutive: Store two vectors into two-element tuples void svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data); void svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data); void svst2_s8(svbool_t pg, int8_t *base, svint8x2_t data); // void svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data); void svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data); // void svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data); void svst2_s64(svbool_t pg, int64_t *base, svint64x2_t data); void svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data); void svst2_u8(svbool_t pg, uint8_t *base, svuint8x2_t data); // void svst2_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x2_t data); void svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data); void svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data); // void svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2_t data); void svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2_t data); void svst2_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x2_t data); void svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data); void svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t data); void svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t data); void svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t data); void svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t data); void svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t data); void svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t data); void svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data); void svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t data); // sve: Store / Consecutive: Truncate to 16 bits and store void svst1h_vnum_s32(svbool_t pg, int16_t *base, int64_t vnum, svint32_t data); void svst1h_vnum_s64(svbool_t pg, int16_t *base, int64_t vnum, svint64_t data); void svst1h_u64(svbool_t pg, uint16_t *base, svuint64_t data); void svst1h_u32(svbool_t pg, uint16_t *base, svuint32_t data); void svst1h_s64(svbool_t pg, int16_t *base, svint64_t data); void svst1h_s32(svbool_t pg, int16_t *base, svint32_t data); void svst1h_vnum_u64(svbool_t pg, uint16_t *base, int64_t vnum, svuint64_t data); void svst1h_vnum_u32(svbool_t pg, uint16_t *base, int64_t vnum, svuint32_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svst1h_vnum(pg, base, vnum, data) _Generic((data), \ svint32_t: svst1h_vnum_s32, \ svint64_t: svst1h_vnum_s64, \ svuint64_t: svst1h_vnum_u64, \ svuint32_t: svst1h_vnum_u32, \ default: __assume(0) \ )(pg, base, vnum, data) #define svst1h(pg, base, data) _Generic((data), \ svuint64_t: svst1h_u64, \ svuint32_t: svst1h_u32, \ svint64_t: svst1h_s64, \ svint32_t: svst1h_s32, \ default: __assume(0) \ )(pg, base, data) #endif // sve: Store / Consecutive: Truncate to 32 bits and store void svst1w_vnum_s64(svbool_t pg, int32_t *base, int64_t vnum, svint64_t data); void svst1w_u64(svbool_t pg, uint32_t *base, svuint64_t data); void svst1w_s64(svbool_t pg, int32_t *base, svint64_t data); void svst1w_vnum_u64(svbool_t pg, uint32_t *base, int64_t vnum, svuint64_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svst1w_vnum(pg, base, vnum, data) _Generic((data), \ svint64_t: svst1w_vnum_s64, \ svuint64_t: svst1w_vnum_u64, \ default: __assume(0) \ )(pg, base, vnum, data) #define svst1w(pg, base, data) _Generic((data), \ svuint64_t: svst1w_u64, \ svint64_t: svst1w_s64, \ default: __assume(0) \ )(pg, base, data) #endif // sve: Store / Consecutive: Truncate to 8 bits and store void svst1b_u64(svbool_t pg, uint8_t *base, svuint64_t data); void svst1b_s16(svbool_t pg, int8_t *base, svint16_t data); void svst1b_s32(svbool_t pg, int8_t *base, svint32_t data); void svst1b_s64(svbool_t pg, int8_t *base, svint64_t data); void svst1b_u16(svbool_t pg, uint8_t *base, svuint16_t data); void svst1b_u32(svbool_t pg, uint8_t *base, svuint32_t data); void svst1b_vnum_s16(svbool_t pg, int8_t *base, int64_t vnum, svint16_t data); void svst1b_vnum_s64(svbool_t pg, int8_t *base, int64_t vnum, svint64_t data); void svst1b_vnum_s32(svbool_t pg, int8_t *base, int64_t vnum, svint32_t data); void svst1b_vnum_u64(svbool_t pg, uint8_t *base, int64_t vnum, svuint64_t data); void svst1b_vnum_u32(svbool_t pg, uint8_t *base, int64_t vnum, svuint32_t data); void svst1b_vnum_u16(svbool_t pg, uint8_t *base, int64_t vnum, svuint16_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svst1b(pg, base, data) _Generic((data), \ svuint64_t: svst1b_u64, \ svint16_t: svst1b_s16, \ svint32_t: svst1b_s32, \ svint64_t: svst1b_s64, \ svuint16_t: svst1b_u16, \ svuint32_t: svst1b_u32, \ default: __assume(0) \ )(pg, base, data) #define svst1b_vnum(pg, base, vnum, data) _Generic((data), \ svint16_t: svst1b_vnum_s16, \ svint64_t: svst1b_vnum_s64, \ svint32_t: svst1b_vnum_s32, \ svuint64_t: svst1b_vnum_u64, \ svuint32_t: svst1b_vnum_u32, \ svuint16_t: svst1b_vnum_u16, \ default: __assume(0) \ )(pg, base, vnum, data) #endif // sve: Store / Scatter: Non-truncating store void svst1_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data); void svst1_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data); void svst1_scatter_s64offset_u64(svbool_t pg, uint64_t *base, svint64_t offsets, svuint64_t data); void svst1_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t offsets, svfloat64_t data); void svst1_scatter_u64offset_s64(svbool_t pg, int64_t *base, svuint64_t offsets, svint64_t data); void svst1_scatter_u64offset_u64(svbool_t pg, uint64_t *base, svuint64_t offsets, svuint64_t data); void svst1_scatter_s32index_f32(svbool_t pg, float32_t *base, svint32_t indices, svfloat32_t data); void svst1_scatter_s32index_s32(svbool_t pg, int32_t *base, svint32_t indices, svint32_t data); void svst1_scatter_s32index_u32(svbool_t pg, uint32_t *base, svint32_t indices, svuint32_t data); void svst1_scatter_u32index_f32(svbool_t pg, float32_t *base, svuint32_t indices, svfloat32_t data); void svst1_scatter_u32index_s32(svbool_t pg, int32_t *base, svuint32_t indices, svint32_t data); void svst1_scatter_u32index_u32(svbool_t pg, uint32_t *base, svuint32_t indices, svuint32_t data); void svst1_scatter_s64index_f64(svbool_t pg, float64_t *base, svint64_t indices, svfloat64_t data); void svst1_scatter_s64index_s64(svbool_t pg, int64_t *base, svint64_t indices, svint64_t data); void svst1_scatter_s64offset_s64(svbool_t pg, int64_t *base, svint64_t offsets, svint64_t data); void svst1_scatter_s64index_u64(svbool_t pg, uint64_t *base, svint64_t indices, svuint64_t data); void svst1_scatter_u64index_s64(svbool_t pg, int64_t *base, svuint64_t indices, svint64_t data); void svst1_scatter_u64index_u64(svbool_t pg, uint64_t *base, svuint64_t indices, svuint64_t data); void svst1_scatter_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset, svfloat32_t data); void svst1_scatter_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index, svuint32_t data); void svst1_scatter_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index, svint32_t data); void svst1_scatter_u32base_index_f32(svbool_t pg, svuint32_t bases, int64_t index, svfloat32_t data); void svst1_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data); void svst1_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data); void svst1_scatter_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset, svfloat64_t data); void svst1_scatter_u64index_f64(svbool_t pg, float64_t *base, svuint64_t indices, svfloat64_t data); void svst1_scatter_s64offset_f64(svbool_t pg, float64_t *base, svint64_t offsets, svfloat64_t data); void svst1_scatter_u32offset_u32(svbool_t pg, uint32_t *base, svuint32_t offsets, svuint32_t data); void svst1_scatter_u32offset_s32(svbool_t pg, int32_t *base, svuint32_t offsets, svint32_t data); void svst1_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data); void svst1_scatter_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index, svfloat64_t data); void svst1_scatter_u32base_f32(svbool_t pg, svuint32_t bases, svfloat32_t data); void svst1_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t data); void svst1_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t data); void svst1_scatter_u64base_f64(svbool_t pg, svuint64_t bases, svfloat64_t data); void svst1_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data); void svst1_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data); void svst1_scatter_s32offset_f32(svbool_t pg, float32_t *base, svint32_t offsets, svfloat32_t data); void svst1_scatter_s32offset_s32(svbool_t pg, int32_t *base, svint32_t offsets, svint32_t data); void svst1_scatter_s32offset_u32(svbool_t pg, uint32_t *base, svint32_t offsets, svuint32_t data); void svst1_scatter_u32offset_f32(svbool_t pg, float32_t *base, svuint32_t offsets, svfloat32_t data); void svst1_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svst1_scatter_offset(pg, bases, offset, data) _Generic((data), \ svuint32_t: _Generic((offset), \ int64_t: svst1_scatter_u32base_offset_u32, \ svuint32_t: svst1_scatter_u32offset_u32, \ svint32_t: svst1_scatter_s32offset_u32, \ default: __assume(0)), \ svuint64_t: _Generic((offset), \ svint64_t: svst1_scatter_s64offset_u64, \ svuint64_t: svst1_scatter_u64offset_u64, \ int64_t: svst1_scatter_u64base_offset_u64, \ default: __assume(0)), \ svfloat64_t: _Generic((offset), \ svuint64_t: svst1_scatter_u64offset_f64, \ int64_t: svst1_scatter_u64base_offset_f64, \ svint64_t: svst1_scatter_s64offset_f64, \ default: __assume(0)), \ svint64_t: _Generic((offset), \ svuint64_t: svst1_scatter_u64offset_s64, \ svint64_t: svst1_scatter_s64offset_s64, \ int64_t: svst1_scatter_u64base_offset_s64, \ default: __assume(0)), \ svfloat32_t: _Generic((offset), \ int64_t: svst1_scatter_u32base_offset_f32, \ svint32_t: svst1_scatter_s32offset_f32, \ svuint32_t: svst1_scatter_u32offset_f32, \ default: __assume(0)), \ svint32_t: _Generic((offset), \ svuint32_t: svst1_scatter_u32offset_s32, \ svint32_t: svst1_scatter_s32offset_s32, \ int64_t: svst1_scatter_u32base_offset_s32, \ default: __assume(0)), \ default: __assume(0) \ )(pg, bases, offset, data) #define svst1_scatter_index(pg, bases, index, data) _Generic((data), \ svuint64_t: _Generic((index), \ int64_t: svst1_scatter_u64base_index_u64, \ svint64_t: svst1_scatter_s64index_u64, \ svuint64_t: svst1_scatter_u64index_u64, \ default: __assume(0)), \ svfloat32_t: _Generic((index), \ svint32_t: svst1_scatter_s32index_f32, \ svuint32_t: svst1_scatter_u32index_f32, \ int64_t: svst1_scatter_u32base_index_f32, \ default: __assume(0)), \ svint32_t: _Generic((index), \ svint32_t: svst1_scatter_s32index_s32, \ svuint32_t: svst1_scatter_u32index_s32, \ int64_t: svst1_scatter_u32base_index_s32, \ default: __assume(0)), \ svuint32_t: _Generic((index), \ svint32_t: svst1_scatter_s32index_u32, \ svuint32_t: svst1_scatter_u32index_u32, \ int64_t: svst1_scatter_u32base_index_u32, \ default: __assume(0)), \ svfloat64_t: _Generic((index), \ svint64_t: svst1_scatter_s64index_f64, \ svuint64_t: svst1_scatter_u64index_f64, \ int64_t: svst1_scatter_u64base_index_f64, \ default: __assume(0)), \ svint64_t: _Generic((index), \ svint64_t: svst1_scatter_s64index_s64, \ svuint64_t: svst1_scatter_u64index_s64, \ int64_t: svst1_scatter_u64base_index_s64, \ default: __assume(0)), \ default: __assume(0) \ )(pg, bases, index, data) #define svst1_scatter(pg, bases, data) _Generic((data), \ svfloat32_t: svst1_scatter_u32base_f32, \ svint32_t: svst1_scatter_u32base_s32, \ svuint32_t: svst1_scatter_u32base_u32, \ svfloat64_t: svst1_scatter_u64base_f64, \ svint64_t: svst1_scatter_u64base_s64, \ svuint64_t: svst1_scatter_u64base_u64, \ default: __assume(0) \ )(pg, bases, data) #endif // sve: Store / Scatter: Truncate to 16 bits and store void svst1h_scatter_s32offset_u32(svbool_t pg, uint16_t *base, svint32_t offsets, svuint32_t data); void svst1h_scatter_u32offset_u32(svbool_t pg, uint16_t *base, svuint32_t offsets, svuint32_t data); void svst1h_scatter_s64offset_s64(svbool_t pg, int16_t *base, svint64_t offsets, svint64_t data); void svst1h_scatter_u32offset_s32(svbool_t pg, int16_t *base, svuint32_t offsets, svint32_t data); void svst1h_scatter_s64offset_u64(svbool_t pg, uint16_t *base, svint64_t offsets, svuint64_t data); void svst1h_scatter_u64offset_s64(svbool_t pg, int16_t *base, svuint64_t offsets, svint64_t data); void svst1h_scatter_u64offset_u64(svbool_t pg, uint16_t *base, svuint64_t offsets, svuint64_t data); void svst1h_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t data); void svst1h_scatter_s32index_u32(svbool_t pg, uint16_t *base, svint32_t indices, svuint32_t data); void svst1h_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t data); void svst1h_scatter_s32index_s32(svbool_t pg, int16_t *base, svint32_t indices, svint32_t data); void svst1h_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data); void svst1h_scatter_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index, svuint32_t data); void svst1h_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data); void svst1h_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data); void svst1h_scatter_s32offset_s32(svbool_t pg, int16_t *base, svint32_t offsets, svint32_t data); void svst1h_scatter_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index, svint32_t data); void svst1h_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data); void svst1h_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data); void svst1h_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data); void svst1h_scatter_u32index_u32(svbool_t pg, uint16_t *base, svuint32_t indices, svuint32_t data); void svst1h_scatter_s64index_s64(svbool_t pg, int16_t *base, svint64_t indices, svint64_t data); void svst1h_scatter_s64index_u64(svbool_t pg, uint16_t *base, svint64_t indices, svuint64_t data); void svst1h_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data); void svst1h_scatter_u64index_u64(svbool_t pg, uint16_t *base, svuint64_t indices, svuint64_t data); void svst1h_scatter_u64index_s64(svbool_t pg, int16_t *base, svuint64_t indices, svint64_t data); void svst1h_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data); void svst1h_scatter_u32index_s32(svbool_t pg, int16_t *base, svuint32_t indices, svint32_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svst1h_scatter_offset(pg, base, offsets, data) _Generic((data), \ svuint32_t: _Generic((offsets), \ svint32_t: svst1h_scatter_s32offset_u32, \ svuint32_t: svst1h_scatter_u32offset_u32, \ int64_t: svst1h_scatter_u32base_offset_u32, \ default: __assume(0)), \ svint64_t: _Generic((offsets), \ svint64_t: svst1h_scatter_s64offset_s64, \ svuint64_t: svst1h_scatter_u64offset_s64, \ int64_t: svst1h_scatter_u64base_offset_s64, \ default: __assume(0)), \ svint32_t: _Generic((offsets), \ svuint32_t: svst1h_scatter_u32offset_s32, \ svint32_t: svst1h_scatter_s32offset_s32, \ int64_t: svst1h_scatter_u32base_offset_s32, \ default: __assume(0)), \ svuint64_t: _Generic((offsets), \ svint64_t: svst1h_scatter_s64offset_u64, \ svuint64_t: svst1h_scatter_u64offset_u64, \ int64_t: svst1h_scatter_u64base_offset_u64, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, offsets, data) #define svst1h_scatter(pg, bases, data) _Generic((data), \ svint32_t: svst1h_scatter_u32base_s32, \ svuint32_t: svst1h_scatter_u32base_u32, \ svint64_t: svst1h_scatter_u64base_s64, \ svuint64_t: svst1h_scatter_u64base_u64, \ default: __assume(0) \ )(pg, bases, data) #define svst1h_scatter_index(pg, base, indices, data) _Generic((data), \ svuint32_t: _Generic((indices), \ svint32_t: svst1h_scatter_s32index_u32, \ int64_t: svst1h_scatter_u32base_index_u32, \ svuint32_t: svst1h_scatter_u32index_u32, \ default: __assume(0)), \ svint32_t: _Generic((indices), \ svint32_t: svst1h_scatter_s32index_s32, \ int64_t: svst1h_scatter_u32base_index_s32, \ svuint32_t: svst1h_scatter_u32index_s32, \ default: __assume(0)), \ svint64_t: _Generic((indices), \ int64_t: svst1h_scatter_u64base_index_s64, \ svint64_t: svst1h_scatter_s64index_s64, \ svuint64_t: svst1h_scatter_u64index_s64, \ default: __assume(0)), \ svuint64_t: _Generic((indices), \ int64_t: svst1h_scatter_u64base_index_u64, \ svint64_t: svst1h_scatter_s64index_u64, \ svuint64_t: svst1h_scatter_u64index_u64, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, indices, data) #endif // sve: Store / Scatter: Truncate to 32 bits and store void svst1w_scatter_u64index_u64(svbool_t pg, uint32_t *base, svuint64_t indices, svuint64_t data); void svst1w_scatter_u64index_s64(svbool_t pg, int32_t *base, svuint64_t indices, svint64_t data); void svst1w_scatter_s64index_u64(svbool_t pg, uint32_t *base, svint64_t indices, svuint64_t data); void svst1w_scatter_s64index_s64(svbool_t pg, int32_t *base, svint64_t indices, svint64_t data); void svst1w_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data); void svst1w_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data); void svst1w_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data); void svst1w_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data); void svst1w_scatter_u64offset_u64(svbool_t pg, uint32_t *base, svuint64_t offsets, svuint64_t data); void svst1w_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data); void svst1w_scatter_u64offset_s64(svbool_t pg, int32_t *base, svuint64_t offsets, svint64_t data); void svst1w_scatter_s64offset_u64(svbool_t pg, uint32_t *base, svint64_t offsets, svuint64_t data); void svst1w_scatter_s64offset_s64(svbool_t pg, int32_t *base, svint64_t offsets, svint64_t data); void svst1w_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svst1w_scatter_index(pg, base, indices, data) _Generic((data), \ svuint64_t: _Generic((indices), \ svuint64_t: svst1w_scatter_u64index_u64, \ svint64_t: svst1w_scatter_s64index_u64, \ int64_t: svst1w_scatter_u64base_index_u64, \ default: __assume(0)), \ svint64_t: _Generic((indices), \ svuint64_t: svst1w_scatter_u64index_s64, \ svint64_t: svst1w_scatter_s64index_s64, \ int64_t: svst1w_scatter_u64base_index_s64, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, indices, data) #define svst1w_scatter_offset(pg, bases, offset, data) _Generic((data), \ svint64_t: _Generic((offset), \ int64_t: svst1w_scatter_u64base_offset_s64, \ svuint64_t: svst1w_scatter_u64offset_s64, \ svint64_t: svst1w_scatter_s64offset_s64, \ default: __assume(0)), \ svuint64_t: _Generic((offset), \ int64_t: svst1w_scatter_u64base_offset_u64, \ svuint64_t: svst1w_scatter_u64offset_u64, \ svint64_t: svst1w_scatter_s64offset_u64, \ default: __assume(0)), \ default: __assume(0) \ )(pg, bases, offset, data) #define svst1w_scatter(pg, bases, data) _Generic((data), \ svint64_t: svst1w_scatter_u64base_s64, \ svuint64_t: svst1w_scatter_u64base_u64, \ default: __assume(0) \ )(pg, bases, data) #endif // sve: Store / Scatter: Truncate to 8 bits and store void svst1b_scatter_u64offset_s64(svbool_t pg, int8_t *base, svuint64_t offsets, svint64_t data); void svst1b_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data); void svst1b_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data); void svst1b_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data); void svst1b_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data); void svst1b_scatter_u64offset_u64(svbool_t pg, uint8_t *base, svuint64_t offsets, svuint64_t data); void svst1b_scatter_s64offset_u64(svbool_t pg, uint8_t *base, svint64_t offsets, svuint64_t data); void svst1b_scatter_s64offset_s64(svbool_t pg, int8_t *base, svint64_t offsets, svint64_t data); void svst1b_scatter_u32offset_s32(svbool_t pg, int8_t *base, svuint32_t offsets, svint32_t data); void svst1b_scatter_u32offset_u32(svbool_t pg, uint8_t *base, svuint32_t offsets, svuint32_t data); void svst1b_scatter_s32offset_s32(svbool_t pg, int8_t *base, svint32_t offsets, svint32_t data); void svst1b_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data); void svst1b_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data); void svst1b_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t data); void svst1b_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t data); void svst1b_scatter_s32offset_u32(svbool_t pg, uint8_t *base, svint32_t offsets, svuint32_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svst1b_scatter_offset(pg, base, offsets, data) _Generic((data), \ svint64_t: _Generic((offsets), \ svuint64_t: svst1b_scatter_u64offset_s64, \ int64_t: svst1b_scatter_u64base_offset_s64, \ svint64_t: svst1b_scatter_s64offset_s64, \ default: __assume(0)), \ svuint64_t: _Generic((offsets), \ int64_t: svst1b_scatter_u64base_offset_u64, \ svuint64_t: svst1b_scatter_u64offset_u64, \ svint64_t: svst1b_scatter_s64offset_u64, \ default: __assume(0)), \ svuint32_t: _Generic((offsets), \ int64_t: svst1b_scatter_u32base_offset_u32, \ svuint32_t: svst1b_scatter_u32offset_u32, \ svint32_t: svst1b_scatter_s32offset_u32, \ default: __assume(0)), \ svint32_t: _Generic((offsets), \ int64_t: svst1b_scatter_u32base_offset_s32, \ svuint32_t: svst1b_scatter_u32offset_s32, \ svint32_t: svst1b_scatter_s32offset_s32, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, offsets, data) #define svst1b_scatter(pg, bases, data) _Generic((data), \ svuint64_t: svst1b_scatter_u64base_u64, \ svint64_t: svst1b_scatter_u64base_s64, \ svuint32_t: svst1b_scatter_u32base_u32, \ svint32_t: svst1b_scatter_u32base_s32, \ default: __assume(0) \ )(pg, bases, data) #endif // sve: Table lookups / Table lookup: Table lookup in single-vector table svfloat64_t svtbl_f64(svfloat64_t data, svuint64_t indices); svbfloat16_t svtbl_bf16(svbfloat16_t data, svuint16_t indices); svint16_t svtbl_s16(svint16_t data, svuint16_t indices); svint32_t svtbl_s32(svint32_t data, svuint32_t indices); svint64_t svtbl_s64(svint64_t data, svuint64_t indices); svuint8_t svtbl_u8(svuint8_t data, svuint8_t indices); svuint16_t svtbl_u16(svuint16_t data, svuint16_t indices); svuint64_t svtbl_u64(svuint64_t data, svuint64_t indices); svint8_t svtbl_s8(svint8_t data, svuint8_t indices); svuint32_t svtbl_u32(svuint32_t data, svuint32_t indices); svfloat32_t svtbl_f32(svfloat32_t data, svuint32_t indices); svfloat16_t svtbl_f16(svfloat16_t data, svuint16_t indices); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svtbl(data, indices) _Generic((data), \ svfloat64_t: svtbl_f64, \ svbfloat16_t: svtbl_bf16, \ svint16_t: svtbl_s16, \ svint32_t: svtbl_s32, \ svint64_t: svtbl_s64, \ svuint8_t: svtbl_u8, \ svuint16_t: svtbl_u16, \ svuint64_t: svtbl_u64, \ svint8_t: svtbl_s8, \ svuint32_t: svtbl_u32, \ svfloat32_t: svtbl_f32, \ svfloat16_t: svtbl_f16, \ default: __assume(0) \ )(data, indices) #endif // sve: Vector arithmetic / Absolute / Absolute difference: Absolute difference svuint64_t svabd_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svabd_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svabd_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svabd_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svabd_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svabd_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svabd_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svfloat32_t svabd_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat64_t svabd_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svuint64_t svabd_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svabd_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svabd_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svabd_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svabd_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svabd_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint8_t svabd_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svfloat32_t svabd_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat64_t svabd_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svint16_t svabd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svuint64_t svabd_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svabd_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svabd_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svabd_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svabd_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svabd_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svabd_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svabd_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svabd_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svfloat32_t svabd_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); svuint64_t svabd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svabd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svabd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svabd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svabd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svabd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svfloat64_t svabd_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svint8_t svabd_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svint8_t svabd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svfloat32_t svabd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svabd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat16_t svabd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svabd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svint8_t svabd_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svabd_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svabd_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svabd_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svabd_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svabd_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svabd_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svabd_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svfloat64_t svabd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svabd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svabd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat16_t svabd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svuint64_t svabd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint16_t svabd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svabd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint32_t svabd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svint32_t svabd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svabd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svabd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svabd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svint64_t svabd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svabd_m(pg, op1, op2) _Generic((op2), \ uint64_t: svabd_n_u64_m, \ uint32_t: svabd_n_u32_m, \ uint16_t: svabd_n_u16_m, \ uint8_t: svabd_n_u8_m, \ int64_t: svabd_n_s64_m, \ int32_t: svabd_n_s32_m, \ int16_t: svabd_n_s16_m, \ float32_t: svabd_n_f32_m, \ float64_t: svabd_n_f64_m, \ int8_t: svabd_n_s8_m, \ svfloat16_t: svabd_f16_m, \ svfloat32_t: svabd_f32_m, \ svint8_t: svabd_s8_m, \ svint16_t: svabd_s16_m, \ svint32_t: svabd_s32_m, \ svint64_t: svabd_s64_m, \ svuint8_t: svabd_u8_m, \ svuint16_t: svabd_u16_m, \ svuint32_t: svabd_u32_m, \ svuint64_t: svabd_u64_m, \ svfloat64_t: svabd_f64_m, \ default: __assume(0) \ )(pg, op1, op2) #define svabd_z(pg, op1, op2) _Generic((op2), \ svuint64_t: svabd_u64_z, \ svuint32_t: svabd_u32_z, \ svuint16_t: svabd_u16_z, \ svuint8_t: svabd_u8_z, \ svint64_t: svabd_s64_z, \ svint32_t: svabd_s32_z, \ float64_t: svabd_n_f64_z, \ uint64_t: svabd_n_u64_z, \ uint32_t: svabd_n_u32_z, \ uint16_t: svabd_n_u16_z, \ uint8_t: svabd_n_u8_z, \ int64_t: svabd_n_s64_z, \ int32_t: svabd_n_s32_z, \ int16_t: svabd_n_s16_z, \ int8_t: svabd_n_s8_z, \ svint16_t: svabd_s16_z, \ float32_t: svabd_n_f32_z, \ svint8_t: svabd_s8_z, \ svfloat32_t: svabd_f32_z, \ svfloat64_t: svabd_f64_z, \ svfloat16_t: svabd_f16_z, \ default: __assume(0) \ )(pg, op1, op2) #define svabd_x(pg, op1, op2) _Generic((op2), \ float32_t: svabd_n_f32_x, \ int16_t: svabd_n_s16_x, \ uint64_t: svabd_n_u64_x, \ uint32_t: svabd_n_u32_x, \ uint16_t: svabd_n_u16_x, \ uint8_t: svabd_n_u8_x, \ int64_t: svabd_n_s64_x, \ int32_t: svabd_n_s32_x, \ float64_t: svabd_n_f64_x, \ int8_t: svabd_n_s8_x, \ svfloat32_t: svabd_f32_x, \ svfloat16_t: svabd_f16_x, \ svuint64_t: svabd_u64_x, \ svuint16_t: svabd_u16_x, \ svuint8_t: svabd_u8_x, \ svuint32_t: svabd_u32_x, \ svint32_t: svabd_s32_x, \ svint16_t: svabd_s16_x, \ svint8_t: svabd_s8_x, \ svfloat64_t: svabd_f64_x, \ svint64_t: svabd_s64_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Absolute / Absolute value: Absolute value svint16_t svabs_s16_z(svbool_t pg, svint16_t op); svint32_t svabs_s32_x(svbool_t pg, svint32_t op); svint8_t svabs_s8_z(svbool_t pg, svint8_t op); svfloat64_t svabs_f64_z(svbool_t pg, svfloat64_t op); svfloat32_t svabs_f32_z(svbool_t pg, svfloat32_t op); svfloat16_t svabs_f16_z(svbool_t pg, svfloat16_t op); svint64_t svabs_s64_x(svbool_t pg, svint64_t op); svint16_t svabs_s16_x(svbool_t pg, svint16_t op); svint64_t svabs_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svfloat64_t svabs_f64_x(svbool_t pg, svfloat64_t op); svfloat32_t svabs_f32_x(svbool_t pg, svfloat32_t op); svfloat16_t svabs_f16_x(svbool_t pg, svfloat16_t op); svint32_t svabs_s32_m(svint32_t inactive, svbool_t pg, svint32_t op); svint16_t svabs_s16_m(svint16_t inactive, svbool_t pg, svint16_t op); svfloat16_t svabs_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op); svfloat32_t svabs_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op); svint32_t svabs_s32_z(svbool_t pg, svint32_t op); svint8_t svabs_s8_x(svbool_t pg, svint8_t op); svint64_t svabs_s64_z(svbool_t pg, svint64_t op); svint8_t svabs_s8_m(svint8_t inactive, svbool_t pg, svint8_t op); svfloat64_t svabs_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svabs_z(pg, op) _Generic((op), \ svint16_t: svabs_s16_z, \ svint8_t: svabs_s8_z, \ svfloat64_t: svabs_f64_z, \ svfloat32_t: svabs_f32_z, \ svfloat16_t: svabs_f16_z, \ svint32_t: svabs_s32_z, \ svint64_t: svabs_s64_z, \ default: __assume(0) \ )(pg, op) #define svabs_x(pg, op) _Generic((op), \ svint32_t: svabs_s32_x, \ svint64_t: svabs_s64_x, \ svint16_t: svabs_s16_x, \ svfloat64_t: svabs_f64_x, \ svfloat32_t: svabs_f32_x, \ svfloat16_t: svabs_f16_x, \ svint8_t: svabs_s8_x, \ default: __assume(0) \ )(pg, op) #define svabs_m(inactive, pg, op) _Generic((op), \ svint64_t: svabs_s64_m, \ svint32_t: svabs_s32_m, \ svint16_t: svabs_s16_m, \ svfloat16_t: svabs_f16_m, \ svfloat32_t: svabs_f32_m, \ svint8_t: svabs_s8_m, \ svfloat64_t: svabs_f64_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve: Vector arithmetic / Across vector arithmetic / Addition across vector widening: Add reduction int64_t svaddv_s32(svbool_t pg, svint32_t op); int64_t svaddv_s16(svbool_t pg, svint16_t op); uint64_t svaddv_u8(svbool_t pg, svuint8_t op); uint64_t svaddv_u16(svbool_t pg, svuint16_t op); uint64_t svaddv_u32(svbool_t pg, svuint32_t op); int64_t svaddv_s8(svbool_t pg, svint8_t op); // sve: Vector arithmetic / Across vector arithmetic / Addition across vector: Add reduction int64_t svaddv_s64(svbool_t pg, svint64_t op); uint64_t svaddv_u64(svbool_t pg, svuint64_t op); //float16_t svaddv_f16(svbool_t pg, svfloat16_t op); float32_t svaddv_f32(svbool_t pg, svfloat32_t op); float64_t svaddv_f64(svbool_t pg, svfloat64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaddv(pg, op) _Generic((op), \ svint32_t: svaddv_s32, \ svint16_t: svaddv_s16, \ svuint8_t: svaddv_u8, \ svuint16_t: svaddv_u16, \ svuint32_t: svaddv_u32, \ svint8_t: svaddv_s8, \ svint64_t: svaddv_s64, \ svuint64_t: svaddv_u64, \ svfloat32_t: svaddv_f32, \ svfloat64_t: svaddv_f64, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Across vector arithmetic / Addition across vector: Add reduction (strictly-ordered) float64_t svadda_f64(svbool_t pg, float64_t initial, svfloat64_t op); //float16_t svadda_f16(svbool_t pg, float16_t initial, svfloat16_t op); float32_t svadda_f32(svbool_t pg, float32_t initial, svfloat32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svadda(pg, initial, op) _Generic((op), \ svfloat64_t: svadda_f64, \ svfloat32_t: svadda_f32, \ default: __assume(0) \ )(pg, initial, op) #endif // sve: Vector arithmetic / Across vector arithmetic / Maximum across vector (IEEE754): Maximum number reduction to scalar float64_t svmaxnmv_f64(svbool_t pg, svfloat64_t op); //float16_t svmaxnmv_f16(svbool_t pg, svfloat16_t op); float32_t svmaxnmv_f32(svbool_t pg, svfloat32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmaxnmv(pg, op) _Generic((op), \ svfloat64_t: svmaxnmv_f64, \ svfloat32_t: svmaxnmv_f32, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Across vector arithmetic / Maximum across vector: Maximum reduction to scalar int32_t svmaxv_s32(svbool_t pg, svint32_t op); int64_t svmaxv_s64(svbool_t pg, svint64_t op); //float16_t svmaxv_f16(svbool_t pg, svfloat16_t op); float32_t svmaxv_f32(svbool_t pg, svfloat32_t op); float64_t svmaxv_f64(svbool_t pg, svfloat64_t op); int8_t svmaxv_s8(svbool_t pg, svint8_t op); int16_t svmaxv_s16(svbool_t pg, svint16_t op); uint64_t svmaxv_u64(svbool_t pg, svuint64_t op); uint8_t svmaxv_u8(svbool_t pg, svuint8_t op); uint16_t svmaxv_u16(svbool_t pg, svuint16_t op); uint32_t svmaxv_u32(svbool_t pg, svuint32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmaxv(pg, op) _Generic((op), \ svint32_t: svmaxv_s32, \ svint64_t: svmaxv_s64, \ svfloat32_t: svmaxv_f32, \ svfloat64_t: svmaxv_f64, \ svint8_t: svmaxv_s8, \ svint16_t: svmaxv_s16, \ svuint64_t: svmaxv_u64, \ svuint8_t: svmaxv_u8, \ svuint16_t: svmaxv_u16, \ svuint32_t: svmaxv_u32, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Across vector arithmetic / Minimum across vector (IEEE754): Minimum number reduction to scalar //float16_t svminnmv_f16(svbool_t pg, svfloat16_t op); float64_t svminnmv_f64(svbool_t pg, svfloat64_t op); float32_t svminnmv_f32(svbool_t pg, svfloat32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svminnmv(pg, op) _Generic((op), \ svfloat64_t: svminnmv_f64, \ svfloat32_t: svminnmv_f32, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Across vector arithmetic / Minimum across vector: Minimum reduction to scalar //float16_t svminv_f16(svbool_t pg, svfloat16_t op); uint64_t svminv_u64(svbool_t pg, svuint64_t op); uint32_t svminv_u32(svbool_t pg, svuint32_t op); uint16_t svminv_u16(svbool_t pg, svuint16_t op); uint8_t svminv_u8(svbool_t pg, svuint8_t op); float32_t svminv_f32(svbool_t pg, svfloat32_t op); int32_t svminv_s32(svbool_t pg, svint32_t op); int16_t svminv_s16(svbool_t pg, svint16_t op); int8_t svminv_s8(svbool_t pg, svint8_t op); float64_t svminv_f64(svbool_t pg, svfloat64_t op); int64_t svminv_s64(svbool_t pg, svint64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svminv(pg, op) _Generic((op), \ svuint64_t: svminv_u64, \ svuint32_t: svminv_u32, \ svuint16_t: svminv_u16, \ svuint8_t: svminv_u8, \ svfloat32_t: svminv_f32, \ svint32_t: svminv_s32, \ svint16_t: svminv_s16, \ svint8_t: svminv_s8, \ svfloat64_t: svminv_f64, \ svint64_t: svminv_s64, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Add / Addition: Add svuint64_t svadd_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svadd_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svadd_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svadd_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svadd_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svadd_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svadd_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svadd_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svfloat64_t svadd_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svadd_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat32_t svadd_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); svuint64_t svadd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svadd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svadd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svadd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svadd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svadd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svadd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svadd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svfloat64_t svadd_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svint8_t svadd_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svuint64_t svadd_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svadd_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svadd_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svadd_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svadd_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svadd_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svadd_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svadd_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svfloat64_t svadd_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svadd_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); svuint64_t svadd_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svadd_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svadd_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svadd_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svadd_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint16_t svadd_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svadd_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat64_t svadd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat16_t svadd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svint16_t svadd_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svadd_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svadd_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svadd_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svadd_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svadd_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svadd_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svfloat32_t svadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svadd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svint8_t svadd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svadd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svadd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svadd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svadd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svadd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svadd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svadd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svfloat32_t svadd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svint32_t svadd_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svadd_z(pg, op1, op2) _Generic((op2), \ uint64_t: svadd_n_u64_z, \ uint32_t: svadd_n_u32_z, \ uint16_t: svadd_n_u16_z, \ uint8_t: svadd_n_u8_z, \ int64_t: svadd_n_s64_z, \ int32_t: svadd_n_s32_z, \ int16_t: svadd_n_s16_z, \ int8_t: svadd_n_s8_z, \ float64_t: svadd_n_f64_z, \ float32_t: svadd_n_f32_z, \ svuint64_t: svadd_u64_z, \ svuint32_t: svadd_u32_z, \ svuint16_t: svadd_u16_z, \ svuint8_t: svadd_u8_z, \ svint64_t: svadd_s64_z, \ svint16_t: svadd_s16_z, \ svint8_t: svadd_s8_z, \ svfloat64_t: svadd_f64_z, \ svfloat16_t: svadd_f16_z, \ svfloat32_t: svadd_f32_z, \ svint32_t: svadd_s32_z, \ default: __assume(0) \ )(pg, op1, op2) #define svadd_x(pg, op1, op2) _Generic((op2), \ float32_t: svadd_n_f32_x, \ uint64_t: svadd_n_u64_x, \ uint32_t: svadd_n_u32_x, \ uint16_t: svadd_n_u16_x, \ uint8_t: svadd_n_u8_x, \ int64_t: svadd_n_s64_x, \ int32_t: svadd_n_s32_x, \ int16_t: svadd_n_s16_x, \ int8_t: svadd_n_s8_x, \ float64_t: svadd_n_f64_x, \ svfloat16_t: svadd_f16_x, \ svfloat32_t: svadd_f32_x, \ svfloat64_t: svadd_f64_x, \ svint8_t: svadd_s8_x, \ svint16_t: svadd_s16_x, \ svint32_t: svadd_s32_x, \ svint64_t: svadd_s64_x, \ svuint8_t: svadd_u8_x, \ svuint16_t: svadd_u16_x, \ svuint32_t: svadd_u32_x, \ svuint64_t: svadd_u64_x, \ default: __assume(0) \ )(pg, op1, op2) #define svadd_m(pg, op1, op2) _Generic((op2), \ svfloat32_t: svadd_f32_m, \ svint8_t: svadd_s8_m, \ uint64_t: svadd_n_u64_m, \ uint32_t: svadd_n_u32_m, \ uint16_t: svadd_n_u16_m, \ uint8_t: svadd_n_u8_m, \ int64_t: svadd_n_s64_m, \ int32_t: svadd_n_s32_m, \ int16_t: svadd_n_s16_m, \ int8_t: svadd_n_s8_m, \ float64_t: svadd_n_f64_m, \ float32_t: svadd_n_f32_m, \ svfloat64_t: svadd_f64_m, \ svint16_t: svadd_s16_m, \ svint32_t: svadd_s32_m, \ svint64_t: svadd_s64_m, \ svuint8_t: svadd_u8_m, \ svuint16_t: svadd_u16_m, \ svuint32_t: svadd_u32_m, \ svuint64_t: svadd_u64_m, \ svfloat16_t: svadd_f16_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Add / Saturating addition: Saturating add svint32_t svqadd_n_s32(svint32_t op1, int32_t op2); svint16_t svqadd_n_s16(svint16_t op1, int16_t op2); svint64_t svqadd_n_s64(svint64_t op1, int64_t op2); svint8_t svqadd_s8(svint8_t op1, svint8_t op2); svint16_t svqadd_s16(svint16_t op1, svint16_t op2); svuint32_t svqadd_n_u32(svuint32_t op1, uint32_t op2); svint32_t svqadd_s32(svint32_t op1, svint32_t op2); svuint16_t svqadd_n_u16(svuint16_t op1, uint16_t op2); svuint64_t svqadd_n_u64(svuint64_t op1, uint64_t op2); svuint8_t svqadd_u8(svuint8_t op1, svuint8_t op2); svuint16_t svqadd_u16(svuint16_t op1, svuint16_t op2); svuint32_t svqadd_u32(svuint32_t op1, svuint32_t op2); svuint64_t svqadd_u64(svuint64_t op1, svuint64_t op2); svint8_t svqadd_n_s8(svint8_t op1, int8_t op2); svint64_t svqadd_s64(svint64_t op1, svint64_t op2); svuint8_t svqadd_n_u8(svuint8_t op1, uint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqadd(op1, op2) _Generic((op2), \ int32_t: svqadd_n_s32, \ int16_t: svqadd_n_s16, \ int64_t: svqadd_n_s64, \ svint8_t: svqadd_s8, \ svint16_t: svqadd_s16, \ uint32_t: svqadd_n_u32, \ svint32_t: svqadd_s32, \ uint16_t: svqadd_n_u16, \ uint64_t: svqadd_n_u64, \ svuint8_t: svqadd_u8, \ svuint16_t: svqadd_u16, \ svuint32_t: svqadd_u32, \ svuint64_t: svqadd_u64, \ int8_t: svqadd_n_s8, \ svint64_t: svqadd_s64, \ uint8_t: svqadd_n_u8, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector arithmetic / Address calculation: Compute vector addresses for 16-bit data svuint64_t svadrh_u64base_u64index(svuint64_t bases, svuint64_t indices); svuint64_t svadrh_u64base_s64index(svuint64_t bases, svint64_t indices); svuint32_t svadrh_u32base_u32index(svuint32_t bases, svuint32_t indices); svuint32_t svadrh_u32base_s32index(svuint32_t bases, svint32_t indices); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svadrh_index(bases, indices) _Generic((indices), \ svuint64_t: svadrh_u64base_u64index, \ svint64_t: svadrh_u64base_s64index, \ svuint32_t: svadrh_u32base_u32index, \ svint32_t: svadrh_u32base_s32index, \ default: __assume(0) \ )(bases, indices) #endif // sve: Vector arithmetic / Address calculation: Compute vector addresses for 32-bit data svuint64_t svadrw_u64base_u64index(svuint64_t bases, svuint64_t indices); svuint64_t svadrw_u64base_s64index(svuint64_t bases, svint64_t indices); svuint32_t svadrw_u32base_u32index(svuint32_t bases, svuint32_t indices); svuint32_t svadrw_u32base_s32index(svuint32_t bases, svint32_t indices); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svadrw_index(bases, indices) _Generic((indices), \ svuint64_t: svadrw_u64base_u64index, \ svint64_t: svadrw_u64base_s64index, \ svuint32_t: svadrw_u32base_u32index, \ svint32_t: svadrw_u32base_s32index, \ default: __assume(0) \ )(bases, indices) #endif // sve: Vector arithmetic / Address calculation: Compute vector addresses for 64-bit data svuint64_t svadrd_u64base_u64index(svuint64_t bases, svuint64_t indices); svuint64_t svadrd_u64base_s64index(svuint64_t bases, svint64_t indices); svuint32_t svadrd_u32base_u32index(svuint32_t bases, svuint32_t indices); svuint32_t svadrd_u32base_s32index(svuint32_t bases, svint32_t indices); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svadrd_index(bases, indices) _Generic((indices), \ svuint64_t: svadrd_u64base_u64index, \ svint64_t: svadrd_u64base_s64index, \ svuint32_t: svadrd_u32base_u32index, \ svint32_t: svadrd_u32base_s32index, \ default: __assume(0) \ )(bases, indices) #endif // sve: Vector arithmetic / Address calculation: Compute vector addresses for 8-bit data svuint64_t svadrb_u64base_u64offset(svuint64_t bases, svuint64_t offsets); svuint64_t svadrb_u64base_s64offset(svuint64_t bases, svint64_t offsets); svuint32_t svadrb_u32base_u32offset(svuint32_t bases, svuint32_t offsets); svuint32_t svadrb_u32base_s32offset(svuint32_t bases, svint32_t offsets); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svadrb_offset(bases, offsets) _Generic((offsets), \ svuint64_t: svadrb_u64base_u64offset, \ svint64_t: svadrb_u64base_s64offset, \ svuint32_t: svadrb_u32base_u32offset, \ svint32_t: svadrb_u32base_s32offset, \ default: __assume(0) \ )(bases, offsets) #endif // sve: Vector arithmetic / Division: Divide svuint32_t svdiv_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svdiv_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svdiv_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svint64_t svdiv_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svdiv_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svfloat64_t svdiv_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svdiv_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svdiv_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat16_t svdiv_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svdiv_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svdiv_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svdiv_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); svint32_t svdiv_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svdiv_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint64_t svdiv_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svfloat64_t svdiv_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svdiv_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); svint64_t svdiv_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint64_t svdiv_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svdiv_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svint64_t svdiv_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svdiv_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svfloat32_t svdiv_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); svuint64_t svdiv_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svdiv_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svint64_t svdiv_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svdiv_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svfloat64_t svdiv_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svuint64_t svdiv_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint64_t svdiv_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svdiv_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svint32_t svdiv_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svuint32_t svdiv_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svfloat64_t svdiv_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svint32_t svdiv_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svfloat64_t svdiv_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svdiv_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svdiv_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svint64_t svdiv_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svdiv_x(pg, op1, op2) _Generic((op2), \ svuint32_t: svdiv_u32_x, \ svfloat16_t: svdiv_f16_x, \ svfloat32_t: svdiv_f32_x, \ svfloat64_t: svdiv_f64_x, \ svint32_t: svdiv_s32_x, \ svint64_t: svdiv_s64_x, \ svuint64_t: svdiv_u64_x, \ float32_t: svdiv_n_f32_x, \ uint64_t: svdiv_n_u64_x, \ uint32_t: svdiv_n_u32_x, \ int64_t: svdiv_n_s64_x, \ int32_t: svdiv_n_s32_x, \ float64_t: svdiv_n_f64_x, \ default: __assume(0) \ )(pg, op1, op2) #define svdiv_z(pg, op1, op2) _Generic((op2), \ svuint64_t: svdiv_u64_z, \ svuint32_t: svdiv_u32_z, \ svint64_t: svdiv_s64_z, \ svint32_t: svdiv_s32_z, \ svfloat64_t: svdiv_f64_z, \ svfloat32_t: svdiv_f32_z, \ svfloat16_t: svdiv_f16_z, \ uint64_t: svdiv_n_u64_z, \ uint32_t: svdiv_n_u32_z, \ int64_t: svdiv_n_s64_z, \ int32_t: svdiv_n_s32_z, \ float32_t: svdiv_n_f32_z, \ float64_t: svdiv_n_f64_z, \ default: __assume(0) \ )(pg, op1, op2) #define svdiv_m(pg, op1, op2) _Generic((op2), \ float32_t: svdiv_n_f32_m, \ float64_t: svdiv_n_f64_m, \ int64_t: svdiv_n_s64_m, \ svuint64_t: svdiv_u64_m, \ uint64_t: svdiv_n_u64_m, \ uint32_t: svdiv_n_u32_m, \ int32_t: svdiv_n_s32_m, \ svuint32_t: svdiv_u32_m, \ svint32_t: svdiv_s32_m, \ svfloat64_t: svdiv_f64_m, \ svfloat32_t: svdiv_f32_m, \ svfloat16_t: svdiv_f16_m, \ svint64_t: svdiv_s64_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Division: Divide reversed svuint64_t svdivr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svint64_t svdivr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svdivr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svfloat64_t svdivr_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svdivr_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svuint64_t svdivr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svfloat32_t svdivr_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); svuint32_t svdivr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svint64_t svdivr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svdivr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svfloat64_t svdivr_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svdivr_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svdivr_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat16_t svdivr_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat64_t svdivr_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svint64_t svdivr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint64_t svdivr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint64_t svdivr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svdivr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svint64_t svdivr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svdivr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svfloat64_t svdivr_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svdivr_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); svint32_t svdivr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svuint64_t svdivr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svint64_t svdivr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svdivr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svfloat64_t svdivr_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svdivr_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); svuint64_t svdivr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svdivr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint32_t svdivr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint32_t svdivr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint32_t svdivr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svint32_t svdivr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svfloat64_t svdivr_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svdivr_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svdivr_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svint64_t svdivr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svdivr_z(pg, op1, op2) _Generic((op2), \ svuint64_t: svdivr_u64_z, \ svint64_t: svdivr_s64_z, \ svint32_t: svdivr_s32_z, \ svfloat64_t: svdivr_f64_z, \ svfloat32_t: svdivr_f32_z, \ svfloat16_t: svdivr_f16_z, \ uint64_t: svdivr_n_u64_z, \ uint32_t: svdivr_n_u32_z, \ int64_t: svdivr_n_s64_z, \ int32_t: svdivr_n_s32_z, \ float64_t: svdivr_n_f64_z, \ float32_t: svdivr_n_f32_z, \ svuint32_t: svdivr_u32_z, \ default: __assume(0) \ )(pg, op1, op2) #define svdivr_x(pg, op1, op2) _Generic((op2), \ svuint64_t: svdivr_u64_x, \ svuint32_t: svdivr_u32_x, \ svint64_t: svdivr_s64_x, \ svint32_t: svdivr_s32_x, \ svfloat64_t: svdivr_f64_x, \ svfloat32_t: svdivr_f32_x, \ svfloat16_t: svdivr_f16_x, \ uint64_t: svdivr_n_u64_x, \ int64_t: svdivr_n_s64_x, \ int32_t: svdivr_n_s32_x, \ float64_t: svdivr_n_f64_x, \ float32_t: svdivr_n_f32_x, \ uint32_t: svdivr_n_u32_x, \ default: __assume(0) \ )(pg, op1, op2) #define svdivr_m(pg, op1, op2) _Generic((op2), \ float32_t: svdivr_n_f32_m, \ float64_t: svdivr_n_f64_m, \ int64_t: svdivr_n_s64_m, \ svuint64_t: svdivr_u64_m, \ int32_t: svdivr_n_s32_m, \ uint64_t: svdivr_n_u64_m, \ uint32_t: svdivr_n_u32_m, \ svuint32_t: svdivr_u32_m, \ svint32_t: svdivr_s32_m, \ svfloat64_t: svdivr_f64_m, \ svfloat32_t: svdivr_f32_m, \ svfloat16_t: svdivr_f16_m, \ svint64_t: svdivr_s64_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Dot product: BFloat16 dot product svfloat32_t svbfdot_lane_f32(svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3, uint64_t imm_index); svfloat32_t svbfdot_f32(svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbfdot_lane(op1, op2, op3, imm_index) _Generic((op1), \ svfloat32_t: svbfdot_lane_f32, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #define svbfdot(op1, op2, op3) _Generic((op1), \ svfloat32_t: svbfdot_f32, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve: Vector arithmetic / Dot product: Dot product svuint32_t svdot_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3); svuint64_t svdot_u64(svuint64_t op1, svuint16_t op2, svuint16_t op3); svint32_t svdot_n_s32(svint32_t op1, svint8_t op2, int8_t op3); svint64_t svdot_n_s64(svint64_t op1, svint16_t op2, int16_t op3); svuint32_t svdot_n_u32(svuint32_t op1, svuint8_t op2, uint8_t op3); svuint64_t svdot_n_u64(svuint64_t op1, svuint16_t op2, uint16_t op3); svint32_t svdot_lane_s32(svint32_t op1, svint8_t op2, svint8_t op3, uint64_t imm_index); svint64_t svdot_lane_s64(svint64_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svuint32_t svdot_lane_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3, uint64_t imm_index); svuint64_t svdot_lane_u64(svuint64_t op1, svuint16_t op2, svuint16_t op3, uint64_t imm_index); svint64_t svdot_s64(svint64_t op1, svint16_t op2, svint16_t op3); svint32_t svdot_s32(svint32_t op1, svint8_t op2, svint8_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svdot(op1, op2, op3) _Generic((op3), \ svuint8_t: svdot_u32, \ svuint16_t: svdot_u64, \ int8_t: svdot_n_s32, \ int16_t: svdot_n_s64, \ uint8_t: svdot_n_u32, \ uint16_t: svdot_n_u64, \ svint16_t: svdot_s64, \ svint8_t: svdot_s32, \ default: __assume(0) \ )(op1, op2, op3) #define svdot_lane(op1, op2, op3, imm_index) _Generic((op3), \ svint8_t: svdot_lane_s32, \ svint16_t: svdot_lane_s64, \ svuint8_t: svdot_lane_u32, \ svuint16_t: svdot_lane_u64, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #endif // sve: Vector arithmetic / Dot product: Dot product (signed ? unsigned) svint32_t svsudot_n_s32(svint32_t op1, svint8_t op2, uint8_t op3); svint32_t svsudot_lane_s32(svint32_t op1, svint8_t op2, svuint8_t op3, uint64_t imm_index); svint32_t svsudot_s32(svint32_t op1, svint8_t op2, svuint8_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsudot(op1, op2, op3) _Generic((op3), \ uint8_t: svsudot_n_s32, \ svuint8_t: svsudot_s32, \ default: __assume(0) \ )(op1, op2, op3) #define svsudot_lane(op1, op2, op3, imm_index) _Generic((op3), \ svuint8_t: svsudot_lane_s32, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #endif // sve: Vector arithmetic / Dot product: Dot product (unsigned ? signed) svint32_t svusdot_lane_s32(svint32_t op1, svuint8_t op2, svint8_t op3, uint64_t imm_index); svint32_t svusdot_n_s32(svint32_t op1, svuint8_t op2, int8_t op3); svint32_t svusdot_s32(svint32_t op1, svuint8_t op2, svint8_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svusdot_lane(op1, op2, op3, imm_index) _Generic((op3), \ svint8_t: svusdot_lane_s32, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #define svusdot(op1, op2, op3) _Generic((op3), \ int8_t: svusdot_n_s32, \ svint8_t: svusdot_s32, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve: Vector arithmetic / Exponent: Adjust exponent svfloat32_t svscale_n_f32_x(svbool_t pg, svfloat32_t op1, int32_t op2); svfloat64_t svscale_n_f64_x(svbool_t pg, svfloat64_t op1, int64_t op2); svfloat16_t svscale_n_f16_m(svbool_t pg, svfloat16_t op1, int16_t op2); svfloat16_t svscale_n_f16_z(svbool_t pg, svfloat16_t op1, int16_t op2); svfloat32_t svscale_n_f32_z(svbool_t pg, svfloat32_t op1, int32_t op2); svfloat16_t svscale_n_f16_x(svbool_t pg, svfloat16_t op1, int16_t op2); svfloat16_t svscale_f16_m(svbool_t pg, svfloat16_t op1, svint16_t op2); svfloat64_t svscale_n_f64_z(svbool_t pg, svfloat64_t op1, int64_t op2); svfloat64_t svscale_f64_m(svbool_t pg, svfloat64_t op1, svint64_t op2); svfloat16_t svscale_f16_x(svbool_t pg, svfloat16_t op1, svint16_t op2); svfloat32_t svscale_n_f32_m(svbool_t pg, svfloat32_t op1, int32_t op2); svfloat32_t svscale_f32_x(svbool_t pg, svfloat32_t op1, svint32_t op2); svfloat64_t svscale_f64_x(svbool_t pg, svfloat64_t op1, svint64_t op2); svfloat16_t svscale_f16_z(svbool_t pg, svfloat16_t op1, svint16_t op2); svfloat32_t svscale_f32_z(svbool_t pg, svfloat32_t op1, svint32_t op2); svfloat64_t svscale_f64_z(svbool_t pg, svfloat64_t op1, svint64_t op2); svfloat32_t svscale_f32_m(svbool_t pg, svfloat32_t op1, svint32_t op2); svfloat64_t svscale_n_f64_m(svbool_t pg, svfloat64_t op1, int64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svscale_x(pg, op1, op2) _Generic((op2), \ int32_t: svscale_n_f32_x, \ int64_t: svscale_n_f64_x, \ int16_t: svscale_n_f16_x, \ svint16_t: svscale_f16_x, \ svint32_t: svscale_f32_x, \ svint64_t: svscale_f64_x, \ default: __assume(0) \ )(pg, op1, op2) #define svscale_m(pg, op1, op2) _Generic((op2), \ int16_t: svscale_n_f16_m, \ svint16_t: svscale_f16_m, \ svint64_t: svscale_f64_m, \ int32_t: svscale_n_f32_m, \ svint32_t: svscale_f32_m, \ int64_t: svscale_n_f64_m, \ default: __assume(0) \ )(pg, op1, op2) #define svscale_z(pg, op1, op2) _Generic((op2), \ int16_t: svscale_n_f16_z, \ int32_t: svscale_n_f32_z, \ int64_t: svscale_n_f64_z, \ svint16_t: svscale_f16_z, \ svint32_t: svscale_f32_z, \ svint64_t: svscale_f64_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Exponent: Floating-point exponential accelerator svfloat64_t svexpa_f64(svuint64_t op); svfloat16_t svexpa_f16(svuint16_t op); svfloat32_t svexpa_f32(svuint32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svexpa(op) _Generic((op), \ svuint64_t: svexpa_f64, \ svuint16_t: svexpa_f16, \ svuint32_t: svexpa_f32, \ default: __assume(0) \ )(op) #endif // sve: Vector arithmetic / Matrix multiply: BFloat16 matrix multiply-accumulate svfloat32_t svbfmmla_f32(svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbfmmla(op1, op2, op3) _Generic((op1), \ svfloat32_t: svbfmmla_f32, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve: Vector arithmetic / Matrix multiply: Matrix multiply-accumulate svint32_t svmmla_s32(svint32_t op1, svint8_t op2, svint8_t op3); svuint32_t svmmla_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3); svfloat32_t svmmla_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svmmla_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmmla(op1, op2, op3) _Generic((op1), \ svint32_t: svmmla_s32, \ svuint32_t: svmmla_u32, \ svfloat32_t: svmmla_f32, \ svfloat64_t: svmmla_f64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve: Vector arithmetic / Matrix multiply: Matrix multiply-accumulate (unsigned ? signed) svint32_t svusmmla_s32(svint32_t op1, svuint8_t op2, svint8_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svusmmla(op1, op2, op3) _Generic((op1), \ svint32_t: svusmmla_s32, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve: Vector arithmetic / Maximum: Maximum svuint32_t svmax_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svmax_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svmax_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svmax_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svmax_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svmax_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svmax_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svfloat64_t svmax_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svmax_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat32_t svmax_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svuint32_t svmax_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svmax_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svmax_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svmax_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svmax_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svmax_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svmax_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svmax_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svuint64_t svmax_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svfloat16_t svmax_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svint32_t svmax_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svuint16_t svmax_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint64_t svmax_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint8_t svmax_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svmax_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svmax_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svmax_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svmax_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svmax_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svmax_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svmax_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svuint64_t svmax_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svfloat16_t svmax_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svmax_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svmax_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svint8_t svmax_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svmax_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svmax_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svmax_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svmax_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint32_t svmax_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svmax_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint16_t svmax_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint64_t svmax_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svmax_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svmax_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svmax_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svmax_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svmax_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint8_t svmax_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svfloat64_t svmax_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svmax_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); svint16_t svmax_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svuint32_t svmax_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint64_t svmax_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t svmax_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svmax_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svuint32_t svmax_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svfloat64_t svmax_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svuint8_t svmax_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svmax_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svint64_t svmax_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svfloat32_t svmax_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmax_m(pg, op1, op2) _Generic((op2), \ uint32_t: svmax_n_u32_m, \ uint16_t: svmax_n_u16_m, \ uint8_t: svmax_n_u8_m, \ int64_t: svmax_n_s64_m, \ int32_t: svmax_n_s32_m, \ int16_t: svmax_n_s16_m, \ int8_t: svmax_n_s8_m, \ float64_t: svmax_n_f64_m, \ float32_t: svmax_n_f32_m, \ svuint16_t: svmax_u16_m, \ svuint64_t: svmax_u64_m, \ svfloat16_t: svmax_f16_m, \ svfloat32_t: svmax_f32_m, \ svfloat64_t: svmax_f64_m, \ svint8_t: svmax_s8_m, \ svint16_t: svmax_s16_m, \ svint32_t: svmax_s32_m, \ svint64_t: svmax_s64_m, \ svuint8_t: svmax_u8_m, \ svuint32_t: svmax_u32_m, \ uint64_t: svmax_n_u64_m, \ default: __assume(0) \ )(pg, op1, op2) #define svmax_z(pg, op1, op2) _Generic((op2), \ svfloat32_t: svmax_f32_z, \ svuint32_t: svmax_u32_z, \ svuint16_t: svmax_u16_z, \ svuint8_t: svmax_u8_z, \ svint64_t: svmax_s64_z, \ svint32_t: svmax_s32_z, \ svint16_t: svmax_s16_z, \ svint8_t: svmax_s8_z, \ svfloat64_t: svmax_f64_z, \ svuint64_t: svmax_u64_z, \ svfloat16_t: svmax_f16_z, \ uint64_t: svmax_n_u64_z, \ uint32_t: svmax_n_u32_z, \ uint16_t: svmax_n_u16_z, \ uint8_t: svmax_n_u8_z, \ int64_t: svmax_n_s64_z, \ int32_t: svmax_n_s32_z, \ int8_t: svmax_n_s8_z, \ float64_t: svmax_n_f64_z, \ float32_t: svmax_n_f32_z, \ int16_t: svmax_n_s16_z, \ default: __assume(0) \ )(pg, op1, op2) #define svmax_x(pg, op1, op2) _Generic((op2), \ int32_t: svmax_n_s32_x, \ svuint64_t: svmax_u64_x, \ svuint8_t: svmax_u8_x, \ svint64_t: svmax_s64_x, \ svint32_t: svmax_s32_x, \ svint16_t: svmax_s16_x, \ svint8_t: svmax_s8_x, \ svfloat64_t: svmax_f64_x, \ svfloat32_t: svmax_f32_x, \ svfloat16_t: svmax_f16_x, \ svuint16_t: svmax_u16_x, \ uint32_t: svmax_n_u32_x, \ uint64_t: svmax_n_u64_x, \ int8_t: svmax_n_s8_x, \ int16_t: svmax_n_s16_x, \ svuint32_t: svmax_u32_x, \ float64_t: svmax_n_f64_x, \ uint8_t: svmax_n_u8_x, \ uint16_t: svmax_n_u16_x, \ int64_t: svmax_n_s64_x, \ float32_t: svmax_n_f32_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Maximum: Maximum number svfloat64_t svmaxnm_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat64_t svmaxnm_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svmaxnm_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat32_t svmaxnm_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svmaxnm_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat16_t svmaxnm_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svmaxnm_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svmaxnm_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svmaxnm_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svmaxnm_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svmaxnm_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat64_t svmaxnm_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svmaxnm_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat16_t svmaxnm_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat16_t svmaxnm_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmaxnm_x(pg, op1, op2) _Generic((op2), \ svfloat64_t: svmaxnm_f64_x, \ svfloat16_t: svmaxnm_f16_x, \ svfloat32_t: svmaxnm_f32_x, \ float64_t: svmaxnm_n_f64_x, \ float32_t: svmaxnm_n_f32_x, \ default: __assume(0) \ )(pg, op1, op2) #define svmaxnm_z(pg, op1, op2) _Generic((op2), \ float64_t: svmaxnm_n_f64_z, \ float32_t: svmaxnm_n_f32_z, \ svfloat32_t: svmaxnm_f32_z, \ svfloat64_t: svmaxnm_f64_z, \ svfloat16_t: svmaxnm_f16_z, \ default: __assume(0) \ )(pg, op1, op2) #define svmaxnm_m(pg, op1, op2) _Generic((op2), \ svfloat32_t: svmaxnm_f32_m, \ svfloat64_t: svmaxnm_f64_m, \ float32_t: svmaxnm_n_f32_m, \ float64_t: svmaxnm_n_f64_m, \ svfloat16_t: svmaxnm_f16_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Minimum: Minimum svuint32_t svmin_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svmin_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svmin_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svfloat32_t svmin_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); svint16_t svmin_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svmin_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svfloat64_t svmin_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svuint64_t svmin_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svint64_t svmin_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svuint32_t svmin_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint8_t svmin_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint32_t svmin_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svmin_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svmin_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svmin_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svmin_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svmin_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svuint64_t svmin_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svmin_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svmin_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svint32_t svmin_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svuint8_t svmin_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svmin_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svmin_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svuint16_t svmin_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svint16_t svmin_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svfloat64_t svmin_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svmin_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svmin_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svuint64_t svmin_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svmin_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svmin_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svmin_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svmin_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svmin_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svmin_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svmin_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svmin_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svmin_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svmin_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svint8_t svmin_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svuint64_t svmin_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svint64_t svmin_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint8_t svmin_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svmin_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svmin_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svmin_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svmin_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svmin_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svmin_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint64_t svmin_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svfloat32_t svmin_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat64_t svmin_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svuint64_t svmin_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svmin_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svint8_t svmin_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svmin_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svmin_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svmin_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svmin_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svmin_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svfloat64_t svmin_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svmin_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmin_z(pg, op1, op2) _Generic((op2), \ uint32_t: svmin_n_u32_z, \ uint16_t: svmin_n_u16_z, \ uint8_t: svmin_n_u8_z, \ float32_t: svmin_n_f32_z, \ int16_t: svmin_n_s16_z, \ int8_t: svmin_n_s8_z, \ float64_t: svmin_n_f64_z, \ uint64_t: svmin_n_u64_z, \ svint64_t: svmin_s64_z, \ svuint8_t: svmin_u8_z, \ svint32_t: svmin_s32_z, \ svint16_t: svmin_s16_z, \ svint8_t: svmin_s8_z, \ svfloat64_t: svmin_f64_z, \ svfloat32_t: svmin_f32_z, \ svfloat16_t: svmin_f16_z, \ int32_t: svmin_n_s32_z, \ svuint16_t: svmin_u16_z, \ int64_t: svmin_n_s64_z, \ svuint64_t: svmin_u64_z, \ svuint32_t: svmin_u32_z, \ default: __assume(0) \ )(pg, op1, op2) #define svmin_x(pg, op1, op2) _Generic((op2), \ uint32_t: svmin_n_u32_x, \ svuint64_t: svmin_u64_x, \ svuint32_t: svmin_u32_x, \ svuint16_t: svmin_u16_x, \ svuint8_t: svmin_u8_x, \ svint64_t: svmin_s64_x, \ svint32_t: svmin_s32_x, \ svint16_t: svmin_s16_x, \ svfloat64_t: svmin_f64_x, \ svfloat32_t: svmin_f32_x, \ svfloat16_t: svmin_f16_x, \ svint8_t: svmin_s8_x, \ uint64_t: svmin_n_u64_x, \ float32_t: svmin_n_f32_x, \ float64_t: svmin_n_f64_x, \ int8_t: svmin_n_s8_x, \ int16_t: svmin_n_s16_x, \ int32_t: svmin_n_s32_x, \ int64_t: svmin_n_s64_x, \ uint8_t: svmin_n_u8_x, \ uint16_t: svmin_n_u16_x, \ default: __assume(0) \ )(pg, op1, op2) #define svmin_m(pg, op1, op2) _Generic((op2), \ svuint64_t: svmin_u64_m, \ svuint32_t: svmin_u32_m, \ svuint16_t: svmin_u16_m, \ svuint8_t: svmin_u8_m, \ svint64_t: svmin_s64_m, \ svint32_t: svmin_s32_m, \ svint16_t: svmin_s16_m, \ svint8_t: svmin_s8_m, \ svfloat64_t: svmin_f64_m, \ svfloat32_t: svmin_f32_m, \ svfloat16_t: svmin_f16_m, \ int8_t: svmin_n_s8_m, \ int16_t: svmin_n_s16_m, \ int32_t: svmin_n_s32_m, \ int64_t: svmin_n_s64_m, \ uint8_t: svmin_n_u8_m, \ uint16_t: svmin_n_u16_m, \ uint32_t: svmin_n_u32_m, \ uint64_t: svmin_n_u64_m, \ float64_t: svmin_n_f64_m, \ float32_t: svmin_n_f32_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Minimum: Minimum number svfloat32_t svminnm_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svminnm_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat16_t svminnm_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat64_t svminnm_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat64_t svminnm_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svminnm_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat64_t svminnm_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat64_t svminnm_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat64_t svminnm_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svminnm_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat16_t svminnm_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat64_t svminnm_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svminnm_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat32_t svminnm_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat32_t svminnm_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svminnm_z(pg, op1, op2) _Generic((op2), \ svfloat32_t: svminnm_f32_z, \ svfloat16_t: svminnm_f16_z, \ float64_t: svminnm_n_f64_z, \ svfloat64_t: svminnm_f64_z, \ float32_t: svminnm_n_f32_z, \ default: __assume(0) \ )(pg, op1, op2) #define svminnm_m(pg, op1, op2) _Generic((op2), \ svfloat16_t: svminnm_f16_m, \ float32_t: svminnm_n_f32_m, \ float64_t: svminnm_n_f64_m, \ svfloat64_t: svminnm_f64_m, \ svfloat32_t: svminnm_f32_m, \ default: __assume(0) \ )(pg, op1, op2) #define svminnm_x(pg, op1, op2) _Generic((op2), \ svfloat64_t: svminnm_f64_x, \ float64_t: svminnm_n_f64_x, \ svfloat16_t: svminnm_f16_x, \ float32_t: svminnm_n_f32_x, \ svfloat32_t: svminnm_f32_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Multiply / Multiplication: Multiply svuint64_t svmul_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svmul_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svmul_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svmul_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svmul_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint16_t svmul_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svfloat32_t svmul_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); svint8_t svmul_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svfloat64_t svmul_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svmul_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); svuint64_t svmul_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svint32_t svmul_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svfloat64_t svmul_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svint32_t svmul_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svmul_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint64_t svmul_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svmul_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svmul_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svmul_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint64_t svmul_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svfloat16_t svmul_lane_f16(svfloat16_t op1, svfloat16_t op2, uint64_t imm_index); svfloat32_t svmul_lane_f32(svfloat32_t op1, svfloat32_t op2, uint64_t imm_index); svfloat64_t svmul_lane_f64(svfloat64_t op1, svfloat64_t op2, uint64_t imm_index); svint16_t svmul_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svmul_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svmul_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svmul_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svmul_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svint8_t svmul_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svuint32_t svmul_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svint64_t svmul_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svmul_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svuint32_t svmul_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svmul_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svmul_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svmul_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svmul_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svmul_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svuint64_t svmul_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svmul_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svfloat32_t svmul_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svmul_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svuint64_t svmul_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svmul_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svmul_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svmul_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svfloat64_t svmul_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svuint16_t svmul_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svfloat16_t svmul_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat64_t svmul_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svint32_t svmul_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svmul_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint64_t svmul_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svfloat64_t svmul_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svmul_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat32_t svmul_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svuint64_t svmul_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint16_t svmul_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svmul_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svmul_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svmul_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svmul_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svmul_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svuint32_t svmul_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svint8_t svmul_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svint32_t svmul_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); // sve2: Vector arithmetic / Multiply / Multiplication: Multiply svuint64_t svmul_lane_u64(svuint64_t op1, svuint64_t op2, uint64_t imm_index); svuint32_t svmul_lane_u32(svuint32_t op1, svuint32_t op2, uint64_t imm_index); svint64_t svmul_lane_s64(svint64_t op1, svint64_t op2, uint64_t imm_index); svint32_t svmul_lane_s32(svint32_t op1, svint32_t op2, uint64_t imm_index); svint16_t svmul_lane_s16(svint16_t op1, svint16_t op2, uint64_t imm_index); svuint16_t svmul_lane_u16(svuint16_t op1, svuint16_t op2, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmul_x(pg, op1, op2) _Generic((op2), \ uint64_t: svmul_n_u64_x, \ uint32_t: svmul_n_u32_x, \ uint16_t: svmul_n_u16_x, \ uint8_t: svmul_n_u8_x, \ int64_t: svmul_n_s64_x, \ int16_t: svmul_n_s16_x, \ int8_t: svmul_n_s8_x, \ float64_t: svmul_n_f64_x, \ float32_t: svmul_n_f32_x, \ int32_t: svmul_n_s32_x, \ svuint32_t: svmul_u32_x, \ svuint16_t: svmul_u16_x, \ svuint8_t: svmul_u8_x, \ svint64_t: svmul_s64_x, \ svint32_t: svmul_s32_x, \ svint16_t: svmul_s16_x, \ svuint64_t: svmul_u64_x, \ svint8_t: svmul_s8_x, \ svfloat32_t: svmul_f32_x, \ svfloat16_t: svmul_f16_x, \ svfloat64_t: svmul_f64_x, \ default: __assume(0) \ )(pg, op1, op2) #define svmul_z(pg, op1, op2) _Generic((op2), \ float32_t: svmul_n_f32_z, \ float64_t: svmul_n_f64_z, \ int32_t: svmul_n_s32_z, \ int16_t: svmul_n_s16_z, \ int64_t: svmul_n_s64_z, \ uint8_t: svmul_n_u8_z, \ uint16_t: svmul_n_u16_z, \ uint32_t: svmul_n_u32_z, \ uint64_t: svmul_n_u64_z, \ int8_t: svmul_n_s8_z, \ svfloat16_t: svmul_f16_z, \ svfloat64_t: svmul_f64_z, \ svfloat32_t: svmul_f32_z, \ svuint64_t: svmul_u64_z, \ svuint16_t: svmul_u16_z, \ svuint8_t: svmul_u8_z, \ svint64_t: svmul_s64_z, \ svint32_t: svmul_s32_z, \ svint16_t: svmul_s16_z, \ svint8_t: svmul_s8_z, \ svuint32_t: svmul_u32_z, \ default: __assume(0) \ )(pg, op1, op2) #define svmul_m(pg, op1, op2) _Generic((op2), \ uint64_t: svmul_n_u64_m, \ svint16_t: svmul_s16_m, \ svint8_t: svmul_s8_m, \ svfloat64_t: svmul_f64_m, \ svfloat32_t: svmul_f32_m, \ svfloat16_t: svmul_f16_m, \ uint32_t: svmul_n_u32_m, \ int64_t: svmul_n_s64_m, \ uint8_t: svmul_n_u8_m, \ svuint64_t: svmul_u64_m, \ svuint32_t: svmul_u32_m, \ svuint16_t: svmul_u16_m, \ svuint8_t: svmul_u8_m, \ uint16_t: svmul_n_u16_m, \ int32_t: svmul_n_s32_m, \ int16_t: svmul_n_s16_m, \ svint64_t: svmul_s64_m, \ float64_t: svmul_n_f64_m, \ float32_t: svmul_n_f32_m, \ int8_t: svmul_n_s8_m, \ svint32_t: svmul_s32_m, \ default: __assume(0) \ )(pg, op1, op2) #define svmul_lane(op1, op2, imm_index) _Generic((op2), \ svfloat16_t: svmul_lane_f16, \ svfloat32_t: svmul_lane_f32, \ svfloat64_t: svmul_lane_f64, \ svuint64_t: svmul_lane_u64, \ svuint32_t: svmul_lane_u32, \ svint64_t: svmul_lane_s64, \ svint32_t: svmul_lane_s32, \ svint16_t: svmul_lane_s16, \ svuint16_t: svmul_lane_u16, \ default: __assume(0) \ )(op1, op2, imm_index) #endif // sve: Vector arithmetic / Multiply / Multiplication: Multiply, returning high half svuint16_t svmulh_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svmulh_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svmulh_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svmulh_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svmulh_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svmulh_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svmulh_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svmulh_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svuint64_t svmulh_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svmulh_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svmulh_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svint64_t svmulh_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint16_t svmulh_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svmulh_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svuint32_t svmulh_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svmulh_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svmulh_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svmulh_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint64_t svmulh_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svint32_t svmulh_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svuint8_t svmulh_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svint8_t svmulh_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svuint32_t svmulh_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svint32_t svmulh_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svmulh_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svuint64_t svmulh_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t svmulh_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svmulh_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svmulh_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svmulh_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svmulh_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svmulh_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svmulh_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint64_t svmulh_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint16_t svmulh_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svmulh_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svmulh_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svmulh_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svmulh_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svmulh_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svmulh_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svmulh_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svmulh_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svmulh_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svmulh_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svint16_t svmulh_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svmulh_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint32_t svmulh_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmulh_z(pg, op1, op2) _Generic((op2), \ svuint16_t: svmulh_u16_z, \ svuint32_t: svmulh_u32_z, \ svuint8_t: svmulh_u8_z, \ svint64_t: svmulh_s64_z, \ svint32_t: svmulh_s32_z, \ svint16_t: svmulh_s16_z, \ svint8_t: svmulh_s8_z, \ svuint64_t: svmulh_u64_z, \ int8_t: svmulh_n_s8_z, \ int16_t: svmulh_n_s16_z, \ int32_t: svmulh_n_s32_z, \ int64_t: svmulh_n_s64_z, \ uint8_t: svmulh_n_u8_z, \ uint16_t: svmulh_n_u16_z, \ uint32_t: svmulh_n_u32_z, \ uint64_t: svmulh_n_u64_z, \ default: __assume(0) \ )(pg, op1, op2) #define svmulh_x(pg, op1, op2) _Generic((op2), \ svuint16_t: svmulh_u16_x, \ svuint64_t: svmulh_u64_x, \ svuint32_t: svmulh_u32_x, \ svint64_t: svmulh_s64_x, \ svint16_t: svmulh_s16_x, \ svint8_t: svmulh_s8_x, \ svuint8_t: svmulh_u8_x, \ uint32_t: svmulh_n_u32_x, \ uint64_t: svmulh_n_u64_x, \ uint16_t: svmulh_n_u16_x, \ uint8_t: svmulh_n_u8_x, \ int64_t: svmulh_n_s64_x, \ int32_t: svmulh_n_s32_x, \ int16_t: svmulh_n_s16_x, \ int8_t: svmulh_n_s8_x, \ svint32_t: svmulh_s32_x, \ default: __assume(0) \ )(pg, op1, op2) #define svmulh_m(pg, op1, op2) _Generic((op2), \ svuint64_t: svmulh_u64_m, \ svuint32_t: svmulh_u32_m, \ svuint16_t: svmulh_u16_m, \ svuint8_t: svmulh_u8_m, \ svint64_t: svmulh_s64_m, \ svint32_t: svmulh_s32_m, \ int8_t: svmulh_n_s8_m, \ int32_t: svmulh_n_s32_m, \ svint16_t: svmulh_s16_m, \ uint64_t: svmulh_n_u64_m, \ uint32_t: svmulh_n_u32_m, \ uint16_t: svmulh_n_u16_m, \ uint8_t: svmulh_n_u8_m, \ int64_t: svmulh_n_s64_m, \ int16_t: svmulh_n_s16_m, \ svint8_t: svmulh_s8_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Multiply / Multiply extended: Multiply extended svfloat64_t svmulx_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat16_t svmulx_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svmulx_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svmulx_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat16_t svmulx_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svmulx_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svmulx_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svmulx_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svmulx_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svmulx_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat64_t svmulx_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svmulx_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat64_t svmulx_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat64_t svmulx_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svmulx_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmulx_x(pg, op1, op2) _Generic((op2), \ svfloat64_t: svmulx_f64_x, \ svfloat16_t: svmulx_f16_x, \ svfloat32_t: svmulx_f32_x, \ float32_t: svmulx_n_f32_x, \ float64_t: svmulx_n_f64_x, \ default: __assume(0) \ )(pg, op1, op2) #define svmulx_m(pg, op1, op2) _Generic((op2), \ svfloat16_t: svmulx_f16_m, \ svfloat32_t: svmulx_f32_m, \ svfloat64_t: svmulx_f64_m, \ float32_t: svmulx_n_f32_m, \ float64_t: svmulx_n_f64_m, \ default: __assume(0) \ )(pg, op1, op2) #define svmulx_z(pg, op1, op2) _Generic((op2), \ svfloat16_t: svmulx_f16_z, \ svfloat32_t: svmulx_f32_z, \ svfloat64_t: svmulx_f64_z, \ float64_t: svmulx_n_f64_z, \ float32_t: svmulx_n_f32_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Multiply-accumulate / Fused multiply-accumulate: Multiply-add, addend first svfloat16_t svmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat16_t svmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat16_t svmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat64_t svmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat32_t svmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat16_t svmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3, uint64_t imm_index); svfloat32_t svmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_index); svfloat64_t svmla_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_index); svfloat32_t svmla_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svmla_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svmla_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svmla_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svmla_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svmla_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); // sve: Vector arithmetic / Multiply-accumulate / Multiply-accumulate: Multiply-add, addend first svint32_t svmla_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svint16_t svmla_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svint8_t svmla_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svint16_t svmla_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svint32_t svmla_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svint64_t svmla_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); svuint8_t svmla_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svuint16_t svmla_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint32_t svmla_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); svuint64_t svmla_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); svint8_t svmla_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svint64_t svmla_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); svuint16_t svmla_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint8_t svmla_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svuint16_t svmla_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint32_t svmla_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); svuint64_t svmla_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); svuint8_t svmla_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svint16_t svmla_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svuint64_t svmla_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); svuint32_t svmla_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); svint8_t svmla_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svint32_t svmla_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svuint32_t svmla_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint64_t svmla_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint32_t svmla_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint16_t svmla_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svint8_t svmla_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svuint8_t svmla_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svint32_t svmla_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svint16_t svmla_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); svint8_t svmla_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svint64_t svmla_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svuint64_t svmla_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svint32_t svmla_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svuint16_t svmla_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint8_t svmla_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svint64_t svmla_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svint32_t svmla_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svint16_t svmla_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); svint8_t svmla_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svint16_t svmla_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); svuint64_t svmla_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint32_t svmla_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint16_t svmla_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint8_t svmla_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svint64_t svmla_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svint64_t svmla_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); // sve2: Vector arithmetic / Multiply-accumulate / Multiply-accumulate: Multiply-add, addend first svint16_t svmla_lane_s16(svint16_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svint32_t svmla_lane_s32(svint32_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); svint64_t svmla_lane_s64(svint64_t op1, svint64_t op2, svint64_t op3, uint64_t imm_index); svuint16_t svmla_lane_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3, uint64_t imm_index); svuint32_t svmla_lane_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3, uint64_t imm_index); svuint64_t svmla_lane_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmla_m(pg, op1, op2, op3) _Generic((op3), \ svfloat16_t: svmla_f16_m, \ svfloat32_t: svmla_f32_m, \ svfloat64_t: svmla_f64_m, \ float32_t: svmla_n_f32_m, \ float64_t: svmla_n_f64_m, \ int8_t: svmla_n_s8_m, \ int16_t: svmla_n_s16_m, \ int32_t: svmla_n_s32_m, \ int64_t: svmla_n_s64_m, \ uint8_t: svmla_n_u8_m, \ uint16_t: svmla_n_u16_m, \ uint32_t: svmla_n_u32_m, \ uint64_t: svmla_n_u64_m, \ svuint64_t: svmla_u64_m, \ svuint32_t: svmla_u32_m, \ svuint16_t: svmla_u16_m, \ svuint8_t: svmla_u8_m, \ svint32_t: svmla_s32_m, \ svint16_t: svmla_s16_m, \ svint8_t: svmla_s8_m, \ svint64_t: svmla_s64_m, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svmla_x(pg, op1, op2, op3) _Generic((op3), \ svfloat16_t: svmla_f16_x, \ svfloat32_t: svmla_f32_x, \ svfloat64_t: svmla_f64_x, \ float32_t: svmla_n_f32_x, \ float64_t: svmla_n_f64_x, \ int32_t: svmla_n_s32_x, \ int16_t: svmla_n_s16_x, \ int8_t: svmla_n_s8_x, \ int64_t: svmla_n_s64_x, \ uint16_t: svmla_n_u16_x, \ uint8_t: svmla_n_u8_x, \ uint64_t: svmla_n_u64_x, \ uint32_t: svmla_n_u32_x, \ svint8_t: svmla_s8_x, \ svint32_t: svmla_s32_x, \ svint16_t: svmla_s16_x, \ svuint64_t: svmla_u64_x, \ svuint32_t: svmla_u32_x, \ svuint16_t: svmla_u16_x, \ svuint8_t: svmla_u8_x, \ svint64_t: svmla_s64_x, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svmla_z(pg, op1, op2, op3) _Generic((op3), \ svfloat16_t: svmla_f16_z, \ svfloat64_t: svmla_f64_z, \ svfloat32_t: svmla_f32_z, \ float32_t: svmla_n_f32_z, \ float64_t: svmla_n_f64_z, \ uint8_t: svmla_n_u8_z, \ uint16_t: svmla_n_u16_z, \ uint32_t: svmla_n_u32_z, \ uint64_t: svmla_n_u64_z, \ int16_t: svmla_n_s16_z, \ int8_t: svmla_n_s8_z, \ int32_t: svmla_n_s32_z, \ svuint32_t: svmla_u32_z, \ svuint64_t: svmla_u64_z, \ svuint16_t: svmla_u16_z, \ svuint8_t: svmla_u8_z, \ svint64_t: svmla_s64_z, \ svint32_t: svmla_s32_z, \ svint16_t: svmla_s16_z, \ svint8_t: svmla_s8_z, \ int64_t: svmla_n_s64_z, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svmla_lane(op1, op2, op3, imm_index) _Generic((op3), \ svfloat16_t: svmla_lane_f16, \ svfloat32_t: svmla_lane_f32, \ svfloat64_t: svmla_lane_f64, \ svint16_t: svmla_lane_s16, \ svint32_t: svmla_lane_s32, \ svint64_t: svmla_lane_s64, \ svuint16_t: svmla_lane_u16, \ svuint32_t: svmla_lane_u32, \ svuint64_t: svmla_lane_u64, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #endif // sve: Vector arithmetic / Multiply-accumulate / Fused multiply-accumulate: Multiply-add, multiplicand first svfloat64_t svmad_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat32_t svmad_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat16_t svmad_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat64_t svmad_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat64_t svmad_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat32_t svmad_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat16_t svmad_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat64_t svmad_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat64_t svmad_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svmad_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svmad_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svmad_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat32_t svmad_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat16_t svmad_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svmad_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); // sve: Vector arithmetic / Multiply-accumulate / Multiply-accumulate: Multiply-add, multiplicand first svint16_t svmad_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); svint16_t svmad_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svuint16_t svmad_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint32_t svmad_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint16_t svmad_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint8_t svmad_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svint64_t svmad_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svint32_t svmad_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svint16_t svmad_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); svint8_t svmad_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svuint64_t svmad_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint32_t svmad_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint8_t svmad_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svint64_t svmad_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svint32_t svmad_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svint16_t svmad_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); svint8_t svmad_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svuint64_t svmad_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint32_t svmad_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint16_t svmad_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint8_t svmad_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svint64_t svmad_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svint32_t svmad_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svuint64_t svmad_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint32_t svmad_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); svuint8_t svmad_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svint64_t svmad_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); svint32_t svmad_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svint8_t svmad_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svint8_t svmad_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svuint64_t svmad_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); svuint32_t svmad_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); svuint16_t svmad_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint8_t svmad_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svint64_t svmad_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); svint32_t svmad_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svint16_t svmad_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svint8_t svmad_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svuint64_t svmad_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); svuint32_t svmad_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); svuint16_t svmad_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint8_t svmad_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svint64_t svmad_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); svint32_t svmad_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svint16_t svmad_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svint8_t svmad_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svuint16_t svmad_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint64_t svmad_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmad_z(pg, op1, op2, op3) _Generic((op3), \ svfloat64_t: svmad_f64_z, \ svfloat32_t: svmad_f32_z, \ svfloat16_t: svmad_f16_z, \ float64_t: svmad_n_f64_z, \ float32_t: svmad_n_f32_z, \ int16_t: svmad_n_s16_z, \ uint16_t: svmad_n_u16_z, \ svuint32_t: svmad_u32_z, \ svuint16_t: svmad_u16_z, \ svuint8_t: svmad_u8_z, \ svint64_t: svmad_s64_z, \ svint32_t: svmad_s32_z, \ svint16_t: svmad_s16_z, \ svint8_t: svmad_s8_z, \ svuint64_t: svmad_u64_z, \ uint32_t: svmad_n_u32_z, \ uint8_t: svmad_n_u8_z, \ int64_t: svmad_n_s64_z, \ int32_t: svmad_n_s32_z, \ int8_t: svmad_n_s8_z, \ uint64_t: svmad_n_u64_z, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svmad_m(pg, op1, op2, op3) _Generic((op3), \ svfloat64_t: svmad_f64_m, \ float64_t: svmad_n_f64_m, \ float32_t: svmad_n_f32_m, \ svfloat16_t: svmad_f16_m, \ svfloat32_t: svmad_f32_m, \ svint16_t: svmad_s16_m, \ svuint64_t: svmad_u64_m, \ svuint32_t: svmad_u32_m, \ svuint16_t: svmad_u16_m, \ svuint8_t: svmad_u8_m, \ svint64_t: svmad_s64_m, \ svint32_t: svmad_s32_m, \ svint8_t: svmad_s8_m, \ uint64_t: svmad_n_u64_m, \ uint32_t: svmad_n_u32_m, \ uint16_t: svmad_n_u16_m, \ uint8_t: svmad_n_u8_m, \ int64_t: svmad_n_s64_m, \ int32_t: svmad_n_s32_m, \ int16_t: svmad_n_s16_m, \ int8_t: svmad_n_s8_m, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svmad_x(pg, op1, op2, op3) _Generic((op3), \ svfloat64_t: svmad_f64_x, \ svfloat32_t: svmad_f32_x, \ svfloat16_t: svmad_f16_x, \ float64_t: svmad_n_f64_x, \ float32_t: svmad_n_f32_x, \ svuint64_t: svmad_u64_x, \ svuint32_t: svmad_u32_x, \ svuint8_t: svmad_u8_x, \ svint64_t: svmad_s64_x, \ svint32_t: svmad_s32_x, \ svint16_t: svmad_s16_x, \ svint8_t: svmad_s8_x, \ uint64_t: svmad_n_u64_x, \ uint32_t: svmad_n_u32_x, \ uint16_t: svmad_n_u16_x, \ uint8_t: svmad_n_u8_x, \ int64_t: svmad_n_s64_x, \ int32_t: svmad_n_s32_x, \ int16_t: svmad_n_s16_x, \ int8_t: svmad_n_s8_x, \ svuint16_t: svmad_u16_x, \ default: __assume(0) \ )(pg, op1, op2, op3) #endif // sve: Vector arithmetic / Multiply-accumulate / Fused multiply-accumulate: Multiply-subtract, minuend first svfloat16_t svmls_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svmls_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svmls_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat16_t svmls_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svmls_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svmls_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat16_t svmls_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svmls_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svmls_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat64_t svmls_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_index); svfloat32_t svmls_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat32_t svmls_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_index); svfloat16_t svmls_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3, uint64_t imm_index); svfloat64_t svmls_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svmls_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svmls_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svmls_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svmls_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); // sve: Vector arithmetic / Multiply-accumulate / Multiply-accumulate: Multiply-subtract, minuend first svuint8_t svmls_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svuint16_t svmls_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint32_t svmls_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint64_t svmls_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svint64_t svmls_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svint8_t svmls_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svint16_t svmls_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); svint32_t svmls_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svint64_t svmls_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svuint8_t svmls_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svuint16_t svmls_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svint32_t svmls_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svint16_t svmls_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); svint8_t svmls_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svint8_t svmls_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svint16_t svmls_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); svint32_t svmls_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svuint32_t svmls_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svint64_t svmls_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svuint16_t svmls_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint32_t svmls_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint64_t svmls_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint8_t svmls_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svuint64_t svmls_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); svuint32_t svmls_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); svuint16_t svmls_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint8_t svmls_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svint64_t svmls_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); svint32_t svmls_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svint16_t svmls_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svint8_t svmls_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svuint64_t svmls_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); svuint16_t svmls_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint8_t svmls_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svint64_t svmls_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); svint32_t svmls_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svint8_t svmls_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svint16_t svmls_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svint32_t svmls_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svint64_t svmls_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); svuint8_t svmls_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svuint64_t svmls_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint16_t svmls_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint64_t svmls_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); svint8_t svmls_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svint16_t svmls_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svuint32_t svmls_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); svuint32_t svmls_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); // sve2: Vector arithmetic / Multiply-accumulate / Multiply-accumulate: Multiply-subtract, minuend first svint32_t svmls_lane_s32(svint32_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); svint64_t svmls_lane_s64(svint64_t op1, svint64_t op2, svint64_t op3, uint64_t imm_index); svuint16_t svmls_lane_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3, uint64_t imm_index); svuint32_t svmls_lane_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3, uint64_t imm_index); svuint64_t svmls_lane_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3, uint64_t imm_index); svint16_t svmls_lane_s16(svint16_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmls_z(pg, op1, op2, op3) _Generic((op3), \ svfloat16_t: svmls_f16_z, \ svfloat32_t: svmls_f32_z, \ svfloat64_t: svmls_f64_z, \ float64_t: svmls_n_f64_z, \ float32_t: svmls_n_f32_z, \ svint8_t: svmls_s8_z, \ svint16_t: svmls_s16_z, \ svint32_t: svmls_s32_z, \ svint64_t: svmls_s64_z, \ svuint8_t: svmls_u8_z, \ svuint16_t: svmls_u16_z, \ svuint32_t: svmls_u32_z, \ uint64_t: svmls_n_u64_z, \ uint32_t: svmls_n_u32_z, \ uint16_t: svmls_n_u16_z, \ uint8_t: svmls_n_u8_z, \ int64_t: svmls_n_s64_z, \ int32_t: svmls_n_s32_z, \ int16_t: svmls_n_s16_z, \ int8_t: svmls_n_s8_z, \ svuint64_t: svmls_u64_z, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svmls_m(pg, op1, op2, op3) _Generic((op3), \ svfloat16_t: svmls_f16_m, \ svfloat32_t: svmls_f32_m, \ svfloat64_t: svmls_f64_m, \ float32_t: svmls_n_f32_m, \ float64_t: svmls_n_f64_m, \ svint8_t: svmls_s8_m, \ svint16_t: svmls_s16_m, \ svint32_t: svmls_s32_m, \ svint64_t: svmls_s64_m, \ svuint16_t: svmls_u16_m, \ svuint32_t: svmls_u32_m, \ svuint64_t: svmls_u64_m, \ svuint8_t: svmls_u8_m, \ int8_t: svmls_n_s8_m, \ int16_t: svmls_n_s16_m, \ int32_t: svmls_n_s32_m, \ int64_t: svmls_n_s64_m, \ uint8_t: svmls_n_u8_m, \ uint16_t: svmls_n_u16_m, \ uint64_t: svmls_n_u64_m, \ uint32_t: svmls_n_u32_m, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svmls_x(pg, op1, op2, op3) _Generic((op3), \ svfloat16_t: svmls_f16_x, \ svfloat32_t: svmls_f32_x, \ svfloat64_t: svmls_f64_x, \ float32_t: svmls_n_f32_x, \ float64_t: svmls_n_f64_x, \ svuint8_t: svmls_u8_x, \ svuint16_t: svmls_u16_x, \ svuint32_t: svmls_u32_x, \ svuint64_t: svmls_u64_x, \ svint64_t: svmls_s64_x, \ svint32_t: svmls_s32_x, \ svint16_t: svmls_s16_x, \ svint8_t: svmls_s8_x, \ uint64_t: svmls_n_u64_x, \ uint16_t: svmls_n_u16_x, \ uint8_t: svmls_n_u8_x, \ int64_t: svmls_n_s64_x, \ int32_t: svmls_n_s32_x, \ int8_t: svmls_n_s8_x, \ int16_t: svmls_n_s16_x, \ uint32_t: svmls_n_u32_x, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svmls_lane(op1, op2, op3, imm_index) _Generic((op3), \ svfloat64_t: svmls_lane_f64, \ svfloat32_t: svmls_lane_f32, \ svfloat16_t: svmls_lane_f16, \ svint32_t: svmls_lane_s32, \ svint64_t: svmls_lane_s64, \ svuint16_t: svmls_lane_u16, \ svuint32_t: svmls_lane_u32, \ svuint64_t: svmls_lane_u64, \ svint16_t: svmls_lane_s16, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #endif // sve: Vector arithmetic / Multiply-accumulate / Fused multiply-accumulate: Multiply-subtract, multiplicand first svfloat32_t svmsb_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svmsb_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svmsb_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svmsb_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat64_t svmsb_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svmsb_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat16_t svmsb_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svmsb_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svmsb_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat16_t svmsb_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svmsb_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svmsb_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat32_t svmsb_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat16_t svmsb_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat64_t svmsb_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); // sve: Vector arithmetic / Multiply-accumulate / Multiply-accumulate: Multiply-subtract, multiplicand first svint8_t svmsb_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svuint64_t svmsb_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); svuint32_t svmsb_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); svuint16_t svmsb_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint8_t svmsb_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svint64_t svmsb_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); svint32_t svmsb_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svint16_t svmsb_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svint8_t svmsb_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svuint64_t svmsb_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint32_t svmsb_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint16_t svmsb_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svint16_t svmsb_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svint64_t svmsb_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); svuint64_t svmsb_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); svuint32_t svmsb_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); svuint16_t svmsb_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint8_t svmsb_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svint64_t svmsb_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3); svint32_t svmsb_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svint16_t svmsb_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3); svint8_t svmsb_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svuint64_t svmsb_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3); svuint32_t svmsb_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3); svuint16_t svmsb_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3); svuint8_t svmsb_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3); svint32_t svmsb_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3); svuint8_t svmsb_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svint8_t svmsb_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3); svint32_t svmsb_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svint64_t svmsb_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svint16_t svmsb_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); svint32_t svmsb_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svint64_t svmsb_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svuint8_t svmsb_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svuint16_t svmsb_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint32_t svmsb_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint64_t svmsb_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svint8_t svmsb_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svint16_t svmsb_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); svint8_t svmsb_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3); svuint64_t svmsb_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint16_t svmsb_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint8_t svmsb_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3); svint64_t svmsb_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3); svint32_t svmsb_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3); svuint32_t svmsb_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3); svint16_t svmsb_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmsb_x(pg, op1, op2, op3) _Generic((op3), \ float32_t: svmsb_n_f32_x, \ float64_t: svmsb_n_f64_x, \ svfloat16_t: svmsb_f16_x, \ svfloat32_t: svmsb_f32_x, \ svfloat64_t: svmsb_f64_x, \ svint8_t: svmsb_s8_x, \ int16_t: svmsb_n_s16_x, \ int64_t: svmsb_n_s64_x, \ uint64_t: svmsb_n_u64_x, \ uint32_t: svmsb_n_u32_x, \ uint16_t: svmsb_n_u16_x, \ uint8_t: svmsb_n_u8_x, \ int32_t: svmsb_n_s32_x, \ int8_t: svmsb_n_s8_x, \ svint16_t: svmsb_s16_x, \ svuint64_t: svmsb_u64_x, \ svuint16_t: svmsb_u16_x, \ svuint8_t: svmsb_u8_x, \ svint64_t: svmsb_s64_x, \ svint32_t: svmsb_s32_x, \ svuint32_t: svmsb_u32_x, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svmsb_m(pg, op1, op2, op3) _Generic((op3), \ float64_t: svmsb_n_f64_m, \ float32_t: svmsb_n_f32_m, \ svfloat16_t: svmsb_f16_m, \ svfloat32_t: svmsb_f32_m, \ svfloat64_t: svmsb_f64_m, \ uint64_t: svmsb_n_u64_m, \ uint32_t: svmsb_n_u32_m, \ uint16_t: svmsb_n_u16_m, \ uint8_t: svmsb_n_u8_m, \ int64_t: svmsb_n_s64_m, \ int32_t: svmsb_n_s32_m, \ int16_t: svmsb_n_s16_m, \ int8_t: svmsb_n_s8_m, \ svint16_t: svmsb_s16_m, \ svint32_t: svmsb_s32_m, \ svint64_t: svmsb_s64_m, \ svuint8_t: svmsb_u8_m, \ svuint16_t: svmsb_u16_m, \ svuint32_t: svmsb_u32_m, \ svuint64_t: svmsb_u64_m, \ svint8_t: svmsb_s8_m, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svmsb_z(pg, op1, op2, op3) _Generic((op3), \ float64_t: svmsb_n_f64_z, \ float32_t: svmsb_n_f32_z, \ svfloat32_t: svmsb_f32_z, \ svfloat16_t: svmsb_f16_z, \ svfloat64_t: svmsb_f64_z, \ svuint64_t: svmsb_u64_z, \ svuint32_t: svmsb_u32_z, \ svuint16_t: svmsb_u16_z, \ uint64_t: svmsb_n_u64_z, \ uint32_t: svmsb_n_u32_z, \ uint16_t: svmsb_n_u16_z, \ uint8_t: svmsb_n_u8_z, \ int64_t: svmsb_n_s64_z, \ int32_t: svmsb_n_s32_z, \ int16_t: svmsb_n_s16_z, \ int8_t: svmsb_n_s8_z, \ svuint8_t: svmsb_u8_z, \ svint32_t: svmsb_s32_z, \ svint64_t: svmsb_s64_z, \ svint8_t: svmsb_s8_z, \ svint16_t: svmsb_s16_z, \ default: __assume(0) \ )(pg, op1, op2, op3) #endif // sve: Vector arithmetic / Multiply-accumulate / Fused multiply-accumulate: Negated multiply-add, addend first svfloat64_t svnmla_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svnmla_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svnmla_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svnmla_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat32_t svnmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svnmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat64_t svnmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat32_t svnmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat16_t svnmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svnmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat16_t svnmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat16_t svnmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svnmla_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svnmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat64_t svnmla_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svnmla_x(pg, op1, op2, op3) _Generic((op3), \ float64_t: svnmla_n_f64_x, \ float32_t: svnmla_n_f32_x, \ svfloat64_t: svnmla_f64_x, \ svfloat32_t: svnmla_f32_x, \ svfloat16_t: svnmla_f16_x, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svnmla_m(pg, op1, op2, op3) _Generic((op3), \ float64_t: svnmla_n_f64_m, \ float32_t: svnmla_n_f32_m, \ svfloat64_t: svnmla_f64_m, \ svfloat32_t: svnmla_f32_m, \ svfloat16_t: svnmla_f16_m, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svnmla_z(pg, op1, op2, op3) _Generic((op3), \ svfloat32_t: svnmla_f32_z, \ svfloat16_t: svnmla_f16_z, \ float32_t: svnmla_n_f32_z, \ svfloat64_t: svnmla_f64_z, \ float64_t: svnmla_n_f64_z, \ default: __assume(0) \ )(pg, op1, op2, op3) #endif // sve: Vector arithmetic / Multiply-accumulate / Fused multiply-accumulate: Negated multiply-add, multiplicand first svfloat64_t svnmad_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat32_t svnmad_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svnmad_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svnmad_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svnmad_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svnmad_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat16_t svnmad_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svnmad_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svnmad_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat16_t svnmad_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svnmad_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svnmad_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat16_t svnmad_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svnmad_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svnmad_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svnmad_z(pg, op1, op2, op3) _Generic((op3), \ svfloat64_t: svnmad_f64_z, \ float32_t: svnmad_n_f32_z, \ float64_t: svnmad_n_f64_z, \ svfloat16_t: svnmad_f16_z, \ svfloat32_t: svnmad_f32_z, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svnmad_x(pg, op1, op2, op3) _Generic((op3), \ float32_t: svnmad_n_f32_x, \ svfloat16_t: svnmad_f16_x, \ svfloat32_t: svnmad_f32_x, \ svfloat64_t: svnmad_f64_x, \ float64_t: svnmad_n_f64_x, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svnmad_m(pg, op1, op2, op3) _Generic((op3), \ float64_t: svnmad_n_f64_m, \ float32_t: svnmad_n_f32_m, \ svfloat16_t: svnmad_f16_m, \ svfloat32_t: svnmad_f32_m, \ svfloat64_t: svnmad_f64_m, \ default: __assume(0) \ )(pg, op1, op2, op3) #endif // sve: Vector arithmetic / Multiply-accumulate / Fused multiply-accumulate: Negated multiply-subtract, minuend first svfloat64_t svnmls_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svnmls_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat16_t svnmls_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat64_t svnmls_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svnmls_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svnmls_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat16_t svnmls_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svnmls_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svnmls_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat16_t svnmls_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svnmls_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svnmls_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat64_t svnmls_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svnmls_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat32_t svnmls_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svnmls_z(pg, op1, op2, op3) _Generic((op3), \ float64_t: svnmls_n_f64_z, \ float32_t: svnmls_n_f32_z, \ svfloat16_t: svnmls_f16_z, \ svfloat32_t: svnmls_f32_z, \ svfloat64_t: svnmls_f64_z, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svnmls_m(pg, op1, op2, op3) _Generic((op3), \ svfloat16_t: svnmls_f16_m, \ svfloat32_t: svnmls_f32_m, \ svfloat64_t: svnmls_f64_m, \ float64_t: svnmls_n_f64_m, \ float32_t: svnmls_n_f32_m, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svnmls_x(pg, op1, op2, op3) _Generic((op3), \ float64_t: svnmls_n_f64_x, \ svfloat16_t: svnmls_f16_x, \ svfloat32_t: svnmls_f32_x, \ svfloat64_t: svnmls_f64_x, \ float32_t: svnmls_n_f32_x, \ default: __assume(0) \ )(pg, op1, op2, op3) #endif // sve: Vector arithmetic / Multiply-accumulate / Fused multiply-accumulate: Negated multiply-subtract, multiplicand first svfloat64_t svnmsb_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat16_t svnmsb_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svnmsb_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svnmsb_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat32_t svnmsb_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svnmsb_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat64_t svnmsb_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svnmsb_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat64_t svnmsb_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3); svfloat32_t svnmsb_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat16_t svnmsb_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); svfloat32_t svnmsb_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3); svfloat64_t svnmsb_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3); svfloat32_t svnmsb_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3); svfloat16_t svnmsb_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svnmsb_m(pg, op1, op2, op3) _Generic((op3), \ float64_t: svnmsb_n_f64_m, \ svfloat16_t: svnmsb_f16_m, \ svfloat32_t: svnmsb_f32_m, \ svfloat64_t: svnmsb_f64_m, \ float32_t: svnmsb_n_f32_m, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svnmsb_x(pg, op1, op2, op3) _Generic((op3), \ svfloat32_t: svnmsb_f32_x, \ svfloat64_t: svnmsb_f64_x, \ float64_t: svnmsb_n_f64_x, \ float32_t: svnmsb_n_f32_x, \ svfloat16_t: svnmsb_f16_x, \ default: __assume(0) \ )(pg, op1, op2, op3) #define svnmsb_z(pg, op1, op2, op3) _Generic((op3), \ float64_t: svnmsb_n_f64_z, \ float32_t: svnmsb_n_f32_z, \ svfloat16_t: svnmsb_f16_z, \ svfloat32_t: svnmsb_f32_z, \ svfloat64_t: svnmsb_f64_z, \ default: __assume(0) \ )(pg, op1, op2, op3) #endif // sve: Vector arithmetic / Multiply-accumulate / Multiply-accumulate and widen: BFloat16 multiply-add long to single-precision (bottom) svfloat32_t svbfmlalb_f32(svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3); svfloat32_t svbfmlalb_lane_f32(svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbfmlalb(op1, op2, op3) _Generic((op1), \ svfloat32_t: svbfmlalb_f32, \ default: __assume(0) \ )(op1, op2, op3) #define svbfmlalb_lane(op1, op2, op3, imm_index) _Generic((op1), \ svfloat32_t: svbfmlalb_lane_f32, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #endif // sve: Vector arithmetic / Multiply-accumulate / Multiply-accumulate and widen: BFloat16 multiply-add long to single-precision (top) svfloat32_t svbfmlalt_f32(svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3); svfloat32_t svbfmlalt_lane_f32(svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbfmlalt(op1, op2, op3) _Generic((op1), \ svfloat32_t: svbfmlalt_f32, \ default: __assume(0) \ )(op1, op2, op3) #define svbfmlalt_lane(op1, op2, op3, imm_index) _Generic((op1), \ svfloat32_t: svbfmlalt_lane_f32, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #endif // sve: Vector arithmetic / Negate / Negation: Negate svfloat32_t svneg_f32_z(svbool_t pg, svfloat32_t op); svfloat16_t svneg_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op); svfloat32_t svneg_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op); svint8_t svneg_s8_m(svint8_t inactive, svbool_t pg, svint8_t op); svint16_t svneg_s16_m(svint16_t inactive, svbool_t pg, svint16_t op); svint32_t svneg_s32_m(svint32_t inactive, svbool_t pg, svint32_t op); svint64_t svneg_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svfloat16_t svneg_f16_x(svbool_t pg, svfloat16_t op); svfloat32_t svneg_f32_x(svbool_t pg, svfloat32_t op); svfloat64_t svneg_f64_x(svbool_t pg, svfloat64_t op); svfloat64_t svneg_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op); svint16_t svneg_s16_x(svbool_t pg, svint16_t op); svint8_t svneg_s8_x(svbool_t pg, svint8_t op); svint64_t svneg_s64_z(svbool_t pg, svint64_t op); svint16_t svneg_s16_z(svbool_t pg, svint16_t op); svint8_t svneg_s8_z(svbool_t pg, svint8_t op); svint32_t svneg_s32_z(svbool_t pg, svint32_t op); svfloat16_t svneg_f16_z(svbool_t pg, svfloat16_t op); svint64_t svneg_s64_x(svbool_t pg, svint64_t op); svint32_t svneg_s32_x(svbool_t pg, svint32_t op); svfloat64_t svneg_f64_z(svbool_t pg, svfloat64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svneg_z(pg, op) _Generic((op), \ svfloat32_t: svneg_f32_z, \ svint64_t: svneg_s64_z, \ svint16_t: svneg_s16_z, \ svint8_t: svneg_s8_z, \ svint32_t: svneg_s32_z, \ svfloat16_t: svneg_f16_z, \ svfloat64_t: svneg_f64_z, \ default: __assume(0) \ )(pg, op) #define svneg_m(inactive, pg, op) _Generic((op), \ svfloat16_t: svneg_f16_m, \ svfloat32_t: svneg_f32_m, \ svint8_t: svneg_s8_m, \ svint16_t: svneg_s16_m, \ svint32_t: svneg_s32_m, \ svint64_t: svneg_s64_m, \ svfloat64_t: svneg_f64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svneg_x(pg, op) _Generic((op), \ svfloat16_t: svneg_f16_x, \ svfloat32_t: svneg_f32_x, \ svfloat64_t: svneg_f64_x, \ svint16_t: svneg_s16_x, \ svint8_t: svneg_s8_x, \ svint64_t: svneg_s64_x, \ svint32_t: svneg_s32_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Reciprocal / Reciprocal estimate: Reciprocal estimate svfloat32_t svrecpe_f32(svfloat32_t op); svfloat64_t svrecpe_f64(svfloat64_t op); svfloat16_t svrecpe_f16(svfloat16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrecpe(op) _Generic((op), \ svfloat32_t: svrecpe_f32, \ svfloat64_t: svrecpe_f64, \ svfloat16_t: svrecpe_f16, \ default: __assume(0) \ )(op) #endif // sve: Vector arithmetic / Reciprocal / Reciprocal exponent: Reciprocal exponent svfloat16_t svrecpx_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op); svfloat64_t svrecpx_f64_z(svbool_t pg, svfloat64_t op); svfloat32_t svrecpx_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op); svfloat16_t svrecpx_f16_z(svbool_t pg, svfloat16_t op); svfloat64_t svrecpx_f64_x(svbool_t pg, svfloat64_t op); svfloat32_t svrecpx_f32_x(svbool_t pg, svfloat32_t op); svfloat64_t svrecpx_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op); svfloat32_t svrecpx_f32_z(svbool_t pg, svfloat32_t op); svfloat16_t svrecpx_f16_x(svbool_t pg, svfloat16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrecpx_m(inactive, pg, op) _Generic((op), \ svfloat16_t: svrecpx_f16_m, \ svfloat32_t: svrecpx_f32_m, \ svfloat64_t: svrecpx_f64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svrecpx_z(pg, op) _Generic((op), \ svfloat64_t: svrecpx_f64_z, \ svfloat16_t: svrecpx_f16_z, \ svfloat32_t: svrecpx_f32_z, \ default: __assume(0) \ )(pg, op) #define svrecpx_x(pg, op) _Generic((op), \ svfloat64_t: svrecpx_f64_x, \ svfloat32_t: svrecpx_f32_x, \ svfloat16_t: svrecpx_f16_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Reciprocal / Reciprocal square-root estimate: Reciprocal square root estimate svfloat16_t svrsqrte_f16(svfloat16_t op); svfloat32_t svrsqrte_f32(svfloat32_t op); svfloat64_t svrsqrte_f64(svfloat64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrsqrte(op) _Generic((op), \ svfloat16_t: svrsqrte_f16, \ svfloat32_t: svrsqrte_f32, \ svfloat64_t: svrsqrte_f64, \ default: __assume(0) \ )(op) #endif // sve: Vector arithmetic / Reciprocal / Reciprocal square-root step: Reciprocal square root step svfloat32_t svrsqrts_f32(svfloat32_t op1, svfloat32_t op2); svfloat64_t svrsqrts_f64(svfloat64_t op1, svfloat64_t op2); svfloat16_t svrsqrts_f16(svfloat16_t op1, svfloat16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrsqrts(op1, op2) _Generic((op2), \ svfloat32_t: svrsqrts_f32, \ svfloat64_t: svrsqrts_f64, \ svfloat16_t: svrsqrts_f16, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector arithmetic / Reciprocal / Reciprocal step: Reciprocal step svfloat16_t svrecps_f16(svfloat16_t op1, svfloat16_t op2); svfloat64_t svrecps_f64(svfloat64_t op1, svfloat64_t op2); svfloat32_t svrecps_f32(svfloat32_t op1, svfloat32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrecps(op1, op2) _Generic((op2), \ svfloat16_t: svrecps_f16, \ svfloat64_t: svrecps_f64, \ svfloat32_t: svrecps_f32, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector arithmetic / Rounding: Round to nearest, ties away from zero svfloat64_t svrinta_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op); svfloat16_t svrinta_f16_x(svbool_t pg, svfloat16_t op); svfloat32_t svrinta_f32_x(svbool_t pg, svfloat32_t op); svfloat64_t svrinta_f64_x(svbool_t pg, svfloat64_t op); svfloat16_t svrinta_f16_z(svbool_t pg, svfloat16_t op); svfloat32_t svrinta_f32_z(svbool_t pg, svfloat32_t op); svfloat64_t svrinta_f64_z(svbool_t pg, svfloat64_t op); svfloat16_t svrinta_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op); svfloat32_t svrinta_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrinta_m(inactive, pg, op) _Generic((op), \ svfloat64_t: svrinta_f64_m, \ svfloat16_t: svrinta_f16_m, \ svfloat32_t: svrinta_f32_m, \ default: __assume(0) \ )(inactive, pg, op) #define svrinta_x(pg, op) _Generic((op), \ svfloat16_t: svrinta_f16_x, \ svfloat32_t: svrinta_f32_x, \ svfloat64_t: svrinta_f64_x, \ default: __assume(0) \ )(pg, op) #define svrinta_z(pg, op) _Generic((op), \ svfloat16_t: svrinta_f16_z, \ svfloat32_t: svrinta_f32_z, \ svfloat64_t: svrinta_f64_z, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Rounding: Round to nearest, ties to even svfloat32_t svrintn_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op); svfloat64_t svrintn_f64_z(svbool_t pg, svfloat64_t op); svfloat32_t svrintn_f32_z(svbool_t pg, svfloat32_t op); svfloat16_t svrintn_f16_z(svbool_t pg, svfloat16_t op); svfloat64_t svrintn_f64_x(svbool_t pg, svfloat64_t op); svfloat32_t svrintn_f32_x(svbool_t pg, svfloat32_t op); svfloat16_t svrintn_f16_x(svbool_t pg, svfloat16_t op); svfloat64_t svrintn_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op); svfloat16_t svrintn_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrintn_m(inactive, pg, op) _Generic((op), \ svfloat32_t: svrintn_f32_m, \ svfloat64_t: svrintn_f64_m, \ svfloat16_t: svrintn_f16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svrintn_z(pg, op) _Generic((op), \ svfloat64_t: svrintn_f64_z, \ svfloat32_t: svrintn_f32_z, \ svfloat16_t: svrintn_f16_z, \ default: __assume(0) \ )(pg, op) #define svrintn_x(pg, op) _Generic((op), \ svfloat64_t: svrintn_f64_x, \ svfloat32_t: svrintn_f32_x, \ svfloat16_t: svrintn_f16_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Rounding: Round towards -? svfloat64_t svrintm_f64_x(svbool_t pg, svfloat64_t op); svfloat32_t svrintm_f32_z(svbool_t pg, svfloat32_t op); svfloat16_t svrintm_f16_z(svbool_t pg, svfloat16_t op); svfloat32_t svrintm_f32_x(svbool_t pg, svfloat32_t op); svfloat16_t svrintm_f16_x(svbool_t pg, svfloat16_t op); svfloat64_t svrintm_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op); svfloat32_t svrintm_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op); svfloat16_t svrintm_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op); svfloat64_t svrintm_f64_z(svbool_t pg, svfloat64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrintm_x(pg, op) _Generic((op), \ svfloat64_t: svrintm_f64_x, \ svfloat32_t: svrintm_f32_x, \ svfloat16_t: svrintm_f16_x, \ default: __assume(0) \ )(pg, op) #define svrintm_z(pg, op) _Generic((op), \ svfloat32_t: svrintm_f32_z, \ svfloat16_t: svrintm_f16_z, \ svfloat64_t: svrintm_f64_z, \ default: __assume(0) \ )(pg, op) #define svrintm_m(inactive, pg, op) _Generic((op), \ svfloat64_t: svrintm_f64_m, \ svfloat32_t: svrintm_f32_m, \ svfloat16_t: svrintm_f16_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve: Vector arithmetic / Rounding: Round towards +? svfloat16_t svrintp_f16_x(svbool_t pg, svfloat16_t op); svfloat32_t svrintp_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op); svfloat64_t svrintp_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op); svfloat32_t svrintp_f32_x(svbool_t pg, svfloat32_t op); svfloat16_t svrintp_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op); svfloat16_t svrintp_f16_z(svbool_t pg, svfloat16_t op); svfloat32_t svrintp_f32_z(svbool_t pg, svfloat32_t op); svfloat64_t svrintp_f64_z(svbool_t pg, svfloat64_t op); svfloat64_t svrintp_f64_x(svbool_t pg, svfloat64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrintp_x(pg, op) _Generic((op), \ svfloat16_t: svrintp_f16_x, \ svfloat32_t: svrintp_f32_x, \ svfloat64_t: svrintp_f64_x, \ default: __assume(0) \ )(pg, op) #define svrintp_m(inactive, pg, op) _Generic((op), \ svfloat32_t: svrintp_f32_m, \ svfloat64_t: svrintp_f64_m, \ svfloat16_t: svrintp_f16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svrintp_z(pg, op) _Generic((op), \ svfloat16_t: svrintp_f16_z, \ svfloat32_t: svrintp_f32_z, \ svfloat64_t: svrintp_f64_z, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Rounding: Round towards zero svfloat64_t svrintz_f64_z(svbool_t pg, svfloat64_t op); svfloat32_t svrintz_f32_z(svbool_t pg, svfloat32_t op); svfloat16_t svrintz_f16_z(svbool_t pg, svfloat16_t op); svfloat64_t svrintz_f64_x(svbool_t pg, svfloat64_t op); svfloat32_t svrintz_f32_x(svbool_t pg, svfloat32_t op); svfloat16_t svrintz_f16_x(svbool_t pg, svfloat16_t op); svfloat32_t svrintz_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op); svfloat16_t svrintz_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op); svfloat64_t svrintz_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrintz_z(pg, op) _Generic((op), \ svfloat64_t: svrintz_f64_z, \ svfloat32_t: svrintz_f32_z, \ svfloat16_t: svrintz_f16_z, \ default: __assume(0) \ )(pg, op) #define svrintz_x(pg, op) _Generic((op), \ svfloat64_t: svrintz_f64_x, \ svfloat32_t: svrintz_f32_x, \ svfloat16_t: svrintz_f16_x, \ default: __assume(0) \ )(pg, op) #define svrintz_m(inactive, pg, op) _Generic((op), \ svfloat32_t: svrintz_f32_m, \ svfloat16_t: svrintz_f16_m, \ svfloat64_t: svrintz_f64_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve: Vector arithmetic / Rounding: Round using current rounding mode (exact) svfloat32_t svrintx_f32_z(svbool_t pg, svfloat32_t op); svfloat64_t svrintx_f64_z(svbool_t pg, svfloat64_t op); svfloat32_t svrintx_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op); svfloat64_t svrintx_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op); svfloat16_t svrintx_f16_x(svbool_t pg, svfloat16_t op); svfloat16_t svrintx_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op); svfloat32_t svrintx_f32_x(svbool_t pg, svfloat32_t op); svfloat64_t svrintx_f64_x(svbool_t pg, svfloat64_t op); svfloat16_t svrintx_f16_z(svbool_t pg, svfloat16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrintx_z(pg, op) _Generic((op), \ svfloat32_t: svrintx_f32_z, \ svfloat64_t: svrintx_f64_z, \ svfloat16_t: svrintx_f16_z, \ default: __assume(0) \ )(pg, op) #define svrintx_m(inactive, pg, op) _Generic((op), \ svfloat32_t: svrintx_f32_m, \ svfloat64_t: svrintx_f64_m, \ svfloat16_t: svrintx_f16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svrintx_x(pg, op) _Generic((op), \ svfloat16_t: svrintx_f16_x, \ svfloat32_t: svrintx_f32_x, \ svfloat64_t: svrintx_f64_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Rounding: Round using current rounding mode (inexact) svfloat32_t svrinti_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op); svfloat64_t svrinti_f64_z(svbool_t pg, svfloat64_t op); svfloat32_t svrinti_f32_z(svbool_t pg, svfloat32_t op); svfloat16_t svrinti_f16_z(svbool_t pg, svfloat16_t op); svfloat64_t svrinti_f64_x(svbool_t pg, svfloat64_t op); svfloat32_t svrinti_f32_x(svbool_t pg, svfloat32_t op); svfloat16_t svrinti_f16_x(svbool_t pg, svfloat16_t op); svfloat64_t svrinti_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op); svfloat16_t svrinti_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrinti_m(inactive, pg, op) _Generic((op), \ svfloat32_t: svrinti_f32_m, \ svfloat64_t: svrinti_f64_m, \ svfloat16_t: svrinti_f16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svrinti_z(pg, op) _Generic((op), \ svfloat64_t: svrinti_f64_z, \ svfloat32_t: svrinti_f32_z, \ svfloat16_t: svrinti_f16_z, \ default: __assume(0) \ )(pg, op) #define svrinti_x(pg, op) _Generic((op), \ svfloat64_t: svrinti_f64_x, \ svfloat32_t: svrinti_f32_x, \ svfloat16_t: svrinti_f16_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector arithmetic / Square root: Square root svfloat64_t svsqrt_f64_z(svbool_t pg, svfloat64_t op); svfloat32_t svsqrt_f32_z(svbool_t pg, svfloat32_t op); svfloat16_t svsqrt_f16_z(svbool_t pg, svfloat16_t op); svfloat64_t svsqrt_f64_x(svbool_t pg, svfloat64_t op); svfloat64_t svsqrt_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op); svfloat16_t svsqrt_f16_x(svbool_t pg, svfloat16_t op); svfloat32_t svsqrt_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op); svfloat16_t svsqrt_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op); svfloat32_t svsqrt_f32_x(svbool_t pg, svfloat32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsqrt_z(pg, op) _Generic((op), \ svfloat64_t: svsqrt_f64_z, \ svfloat32_t: svsqrt_f32_z, \ svfloat16_t: svsqrt_f16_z, \ default: __assume(0) \ )(pg, op) #define svsqrt_x(pg, op) _Generic((op), \ svfloat64_t: svsqrt_f64_x, \ svfloat16_t: svsqrt_f16_x, \ svfloat32_t: svsqrt_f32_x, \ default: __assume(0) \ )(pg, op) #define svsqrt_m(inactive, pg, op) _Generic((op), \ svfloat64_t: svsqrt_f64_m, \ svfloat32_t: svsqrt_f32_m, \ svfloat16_t: svsqrt_f16_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve: Vector arithmetic / Subtract / Saturating subtract: Saturating subtract svuint64_t svqsub_n_u64(svuint64_t op1, uint64_t op2); svuint32_t svqsub_n_u32(svuint32_t op1, uint32_t op2); svuint16_t svqsub_n_u16(svuint16_t op1, uint16_t op2); svuint8_t svqsub_n_u8(svuint8_t op1, uint8_t op2); svint64_t svqsub_n_s64(svint64_t op1, int64_t op2); svint8_t svqsub_s8(svint8_t op1, svint8_t op2); svint16_t svqsub_s16(svint16_t op1, svint16_t op2); svint64_t svqsub_s64(svint64_t op1, svint64_t op2); svint32_t svqsub_s32(svint32_t op1, svint32_t op2); svuint16_t svqsub_u16(svuint16_t op1, svuint16_t op2); svuint8_t svqsub_u8(svuint8_t op1, svuint8_t op2); svint16_t svqsub_n_s16(svint16_t op1, int16_t op2); svint8_t svqsub_n_s8(svint8_t op1, int8_t op2); svint32_t svqsub_n_s32(svint32_t op1, int32_t op2); svuint64_t svqsub_u64(svuint64_t op1, svuint64_t op2); svuint32_t svqsub_u32(svuint32_t op1, svuint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqsub(op1, op2) _Generic((op2), \ uint64_t: svqsub_n_u64, \ uint32_t: svqsub_n_u32, \ uint16_t: svqsub_n_u16, \ uint8_t: svqsub_n_u8, \ int64_t: svqsub_n_s64, \ svint8_t: svqsub_s8, \ svint16_t: svqsub_s16, \ svint64_t: svqsub_s64, \ svint32_t: svqsub_s32, \ svuint16_t: svqsub_u16, \ svuint8_t: svqsub_u8, \ int16_t: svqsub_n_s16, \ int8_t: svqsub_n_s8, \ int32_t: svqsub_n_s32, \ svuint64_t: svqsub_u64, \ svuint32_t: svqsub_u32, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector arithmetic / Subtract / Subtraction: Subtract svint32_t svsub_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svsub_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint64_t svsub_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint16_t svsub_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svsub_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svint16_t svsub_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svuint8_t svsub_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svint8_t svsub_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svuint16_t svsub_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svfloat32_t svsub_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); svuint64_t svsub_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svsub_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint8_t svsub_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svsub_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svsub_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svfloat64_t svsub_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svsub_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); svuint16_t svsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svint8_t svsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svsub_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svsub_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svsub_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svsub_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svsub_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svsub_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svsub_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svsub_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svfloat64_t svsub_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svsub_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); svuint64_t svsub_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint8_t svsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svsub_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svfloat64_t svsub_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat64_t svsub_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svint16_t svsub_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svfloat64_t svsub_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svuint8_t svsub_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svint8_t svsub_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svsub_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svsub_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svsub_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svsub_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint32_t svsub_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svsub_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svfloat16_t svsub_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svsub_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svsub_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svint8_t svsub_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svsub_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svsub_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svsub_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svfloat32_t svsub_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svsub_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svuint16_t svsub_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint16_t svsub_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svfloat16_t svsub_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svuint64_t svsub_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svsub_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svfloat32_t svsub_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svint8_t svsub_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsub_m(pg, op1, op2) _Generic((op2), \ int32_t: svsub_n_s32_m, \ int64_t: svsub_n_s64_m, \ uint64_t: svsub_n_u64_m, \ uint16_t: svsub_n_u16_m, \ uint32_t: svsub_n_u32_m, \ int16_t: svsub_n_s16_m, \ uint8_t: svsub_n_u8_m, \ int8_t: svsub_n_s8_m, \ float32_t: svsub_n_f32_m, \ float64_t: svsub_n_f64_m, \ svfloat64_t: svsub_f64_m, \ svint8_t: svsub_s8_m, \ svint16_t: svsub_s16_m, \ svint32_t: svsub_s32_m, \ svint64_t: svsub_s64_m, \ svuint8_t: svsub_u8_m, \ svuint32_t: svsub_u32_m, \ svuint64_t: svsub_u64_m, \ svfloat32_t: svsub_f32_m, \ svfloat16_t: svsub_f16_m, \ svuint16_t: svsub_u16_m, \ default: __assume(0) \ )(pg, op1, op2) #define svsub_z(pg, op1, op2) _Generic((op2), \ svuint16_t: svsub_u16_z, \ svuint64_t: svsub_u64_z, \ svuint32_t: svsub_u32_z, \ svuint8_t: svsub_u8_z, \ svint64_t: svsub_s64_z, \ svint32_t: svsub_s32_z, \ uint64_t: svsub_n_u64_z, \ uint32_t: svsub_n_u32_z, \ uint16_t: svsub_n_u16_z, \ uint8_t: svsub_n_u8_z, \ int64_t: svsub_n_s64_z, \ int32_t: svsub_n_s32_z, \ int16_t: svsub_n_s16_z, \ int8_t: svsub_n_s8_z, \ float64_t: svsub_n_f64_z, \ float32_t: svsub_n_f32_z, \ svfloat64_t: svsub_f64_z, \ svint16_t: svsub_s16_z, \ svfloat16_t: svsub_f16_z, \ svfloat32_t: svsub_f32_z, \ svint8_t: svsub_s8_z, \ default: __assume(0) \ )(pg, op1, op2) #define svsub_x(pg, op1, op2) _Generic((op2), \ float32_t: svsub_n_f32_x, \ uint16_t: svsub_n_u16_x, \ int8_t: svsub_n_s8_x, \ uint64_t: svsub_n_u64_x, \ uint32_t: svsub_n_u32_x, \ uint8_t: svsub_n_u8_x, \ int64_t: svsub_n_s64_x, \ int32_t: svsub_n_s32_x, \ int16_t: svsub_n_s16_x, \ float64_t: svsub_n_f64_x, \ svuint8_t: svsub_u8_x, \ svfloat16_t: svsub_f16_x, \ svfloat32_t: svsub_f32_x, \ svfloat64_t: svsub_f64_x, \ svint8_t: svsub_s8_x, \ svint16_t: svsub_s16_x, \ svint32_t: svsub_s32_x, \ svint64_t: svsub_s64_x, \ svuint16_t: svsub_u16_x, \ svuint64_t: svsub_u64_x, \ svuint32_t: svsub_u32_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Subtract / Subtraction: Subtract reversed svuint32_t svsubr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svsubr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svsubr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svsubr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svuint32_t svsubr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svfloat32_t svsubr_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2); svint32_t svsubr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svsubr_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svsubr_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svsubr_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svuint64_t svsubr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint8_t svsubr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svfloat16_t svsubr_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svsubr_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svuint16_t svsubr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint64_t svsubr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svsubr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svfloat16_t svsubr_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svsubr_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svsubr_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat64_t svsubr_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svint8_t svsubr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svsubr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svsubr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svsubr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint64_t svsubr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svint64_t svsubr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svsubr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svsubr_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svuint8_t svsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svint8_t svsubr_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svfloat32_t svsubr_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat64_t svsubr_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2); svint8_t svsubr_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svsubr_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svsubr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svsubr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svsubr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svsubr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svsubr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint64_t svsubr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svint64_t svsubr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svuint64_t svsubr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t svsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svuint16_t svsubr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svint64_t svsubr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svsubr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svsubr_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint16_t svsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svsubr_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svsubr_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svsubr_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svuint8_t svsubr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint8_t svsubr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svsubr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svsubr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svfloat64_t svsubr_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2); svfloat32_t svsubr_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2); svfloat16_t svsubr_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svuint64_t svsubr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsubr_z(pg, op1, op2) _Generic((op2), \ svuint32_t: svsubr_u32_z, \ svuint16_t: svsubr_u16_z, \ svuint8_t: svsubr_u8_z, \ svint64_t: svsubr_s64_z, \ svint32_t: svsubr_s32_z, \ svint16_t: svsubr_s16_z, \ svint8_t: svsubr_s8_z, \ svfloat64_t: svsubr_f64_z, \ svuint64_t: svsubr_u64_z, \ svfloat16_t: svsubr_f16_z, \ svfloat32_t: svsubr_f32_z, \ float32_t: svsubr_n_f32_z, \ float64_t: svsubr_n_f64_z, \ int8_t: svsubr_n_s8_z, \ int16_t: svsubr_n_s16_z, \ int32_t: svsubr_n_s32_z, \ int64_t: svsubr_n_s64_z, \ uint8_t: svsubr_n_u8_z, \ uint16_t: svsubr_n_u16_z, \ uint32_t: svsubr_n_u32_z, \ uint64_t: svsubr_n_u64_z, \ default: __assume(0) \ )(pg, op1, op2) #define svsubr_m(pg, op1, op2) _Generic((op2), \ svuint32_t: svsubr_u32_m, \ float32_t: svsubr_n_f32_m, \ float64_t: svsubr_n_f64_m, \ svuint64_t: svsubr_u64_m, \ svint64_t: svsubr_s64_m, \ int32_t: svsubr_n_s32_m, \ int16_t: svsubr_n_s16_m, \ int8_t: svsubr_n_s8_m, \ svuint16_t: svsubr_u16_m, \ int64_t: svsubr_n_s64_m, \ svint32_t: svsubr_s32_m, \ svint16_t: svsubr_s16_m, \ svint8_t: svsubr_s8_m, \ svfloat64_t: svsubr_f64_m, \ svfloat32_t: svsubr_f32_m, \ svuint8_t: svsubr_u8_m, \ uint8_t: svsubr_n_u8_m, \ uint16_t: svsubr_n_u16_m, \ uint32_t: svsubr_n_u32_m, \ svfloat16_t: svsubr_f16_m, \ uint64_t: svsubr_n_u64_m, \ default: __assume(0) \ )(pg, op1, op2) #define svsubr_x(pg, op1, op2) _Generic((op2), \ svuint8_t: svsubr_u8_x, \ svuint16_t: svsubr_u16_x, \ svuint64_t: svsubr_u64_x, \ svuint32_t: svsubr_u32_x, \ svfloat16_t: svsubr_f16_x, \ svfloat32_t: svsubr_f32_x, \ svfloat64_t: svsubr_f64_x, \ svint8_t: svsubr_s8_x, \ svint16_t: svsubr_s16_x, \ svint32_t: svsubr_s32_x, \ svint64_t: svsubr_s64_x, \ uint8_t: svsubr_n_u8_x, \ uint16_t: svsubr_n_u16_x, \ uint32_t: svsubr_n_u32_x, \ int64_t: svsubr_n_s64_x, \ int32_t: svsubr_n_s32_x, \ uint64_t: svsubr_n_u64_x, \ int8_t: svsubr_n_s8_x, \ int16_t: svsubr_n_s16_x, \ float64_t: svsubr_n_f64_x, \ float32_t: svsubr_n_f32_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector arithmetic / Trigonometry: Trigonometric multiply-add coefficient svfloat16_t svtmad_f16(svfloat16_t op1, svfloat16_t op2, uint64_t imm3); svfloat64_t svtmad_f64(svfloat64_t op1, svfloat64_t op2, uint64_t imm3); svfloat32_t svtmad_f32(svfloat32_t op1, svfloat32_t op2, uint64_t imm3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svtmad(op1, op2, imm3) _Generic((op1), \ svfloat16_t: svtmad_f16, \ svfloat64_t: svtmad_f64, \ svfloat32_t: svtmad_f32, \ default: __assume(0) \ )(op1, op2, imm3) #endif // sve: Vector arithmetic / Trigonometry: Trigonometric select coefficient svfloat16_t svtssel_f16(svfloat16_t op1, svuint16_t op2); svfloat32_t svtssel_f32(svfloat32_t op1, svuint32_t op2); svfloat64_t svtssel_f64(svfloat64_t op1, svuint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svtssel(op1, op2) _Generic((op2), \ svuint16_t: svtssel_f16, \ svuint32_t: svtssel_f32, \ svuint64_t: svtssel_f64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector arithmetic / Trigonometry: Trigonometric starting value svfloat16_t svtsmul_f16(svfloat16_t op1, svuint16_t op2); svfloat32_t svtsmul_f32(svfloat32_t op1, svuint32_t op2); svfloat64_t svtsmul_f64(svfloat64_t op1, svuint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svtsmul(op1, op2) _Generic((op2), \ svuint16_t: svtsmul_f16, \ svuint32_t: svtsmul_f32, \ svuint64_t: svtsmul_f64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector length / Count elements: Count the number of 16-bit elements in a vector uint64_t svcnth(void); uint64_t svcnth_pat(enum svpattern pattern); // sve: Vector length / Count elements: Count the number of 32-bit elements in a vector uint64_t svcntw(void); uint64_t svcntw_pat(enum svpattern pattern); // sve: Vector length / Count elements: Count the number of 64-bit elements in a vector uint64_t svcntd(void); uint64_t svcntd_pat(enum svpattern pattern); // sve: Vector length / Count elements: Count the number of 8-bit elements in a vector uint64_t svcntb(void); uint64_t svcntb_pat(enum svpattern pattern); // sve: Vector length / Count elements: Count the number of elements in a full vector uint64_t svlen_f32(svfloat32_t op); uint64_t svlen_f16(svfloat16_t op); uint64_t svlen_bf16(svbfloat16_t op); uint64_t svlen_u8(svuint8_t op); uint64_t svlen_f64(svfloat64_t op); uint64_t svlen_s8(svint8_t op); uint64_t svlen_s16(svint16_t op); uint64_t svlen_s32(svint32_t op); uint64_t svlen_u64(svuint64_t op); uint64_t svlen_u32(svuint32_t op); uint64_t svlen_u16(svuint16_t op); uint64_t svlen_s64(svint64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svlen(op) _Generic((op), \ svfloat32_t: svlen_f32, \ svfloat16_t: svlen_f16, \ svbfloat16_t: svlen_bf16, \ svuint8_t: svlen_u8, \ svfloat64_t: svlen_f64, \ svint8_t: svlen_s8, \ svint16_t: svlen_s16, \ svint32_t: svlen_s32, \ svuint64_t: svlen_u64, \ svuint32_t: svlen_u32, \ svuint16_t: svlen_u16, \ svint64_t: svlen_s64, \ default: __assume(0) \ )(op) #endif // sve: Vector length / Saturating decrement: Saturating decrement by number of byte elements uint64_t svqdecb_pat_n_u64(uint64_t op, enum svpattern pattern, uint64_t imm_factor); uint32_t svqdecb_pat_n_u32(uint32_t op, enum svpattern pattern, uint64_t imm_factor); int32_t svqdecb_pat_n_s32(int32_t op, enum svpattern pattern, uint64_t imm_factor); uint64_t svqdecb_n_u64(uint64_t op, uint64_t imm_factor); uint32_t svqdecb_n_u32(uint32_t op, uint64_t imm_factor); int64_t svqdecb_n_s64(int64_t op, uint64_t imm_factor); int32_t svqdecb_n_s32(int32_t op, uint64_t imm_factor); int64_t svqdecb_pat_n_s64(int64_t op, enum svpattern pattern, uint64_t imm_factor); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdecb_pat(op, pattern, imm_factor) _Generic((op), \ uint64_t: svqdecb_pat_n_u64, \ uint32_t: svqdecb_pat_n_u32, \ int32_t: svqdecb_pat_n_s32, \ int64_t: svqdecb_pat_n_s64, \ default: __assume(0) \ )(op, pattern, imm_factor) #define svqdecb(op, imm_factor) _Generic((op), \ uint64_t: svqdecb_n_u64, \ uint32_t: svqdecb_n_u32, \ int64_t: svqdecb_n_s64, \ int32_t: svqdecb_n_s32, \ default: __assume(0) \ )(op, imm_factor) #endif // sve: Vector length / Saturating decrement: Saturating decrement by number of doubleword elements uint32_t svqdecd_n_u32(uint32_t op, uint64_t imm_factor); int64_t svqdecd_n_s64(int64_t op, uint64_t imm_factor); uint64_t svqdecd_n_u64(uint64_t op, uint64_t imm_factor); int32_t svqdecd_pat_n_s32(int32_t op, enum svpattern pattern, uint64_t imm_factor); int64_t svqdecd_pat_n_s64(int64_t op, enum svpattern pattern, uint64_t imm_factor); uint32_t svqdecd_pat_n_u32(uint32_t op, enum svpattern pattern, uint64_t imm_factor); uint64_t svqdecd_pat_n_u64(uint64_t op, enum svpattern pattern, uint64_t imm_factor); svint64_t svqdecd_s64(svint64_t op, uint64_t imm_factor); svuint64_t svqdecd_u64(svuint64_t op, uint64_t imm_factor); svint64_t svqdecd_pat_s64(svint64_t op, enum svpattern pattern, uint64_t imm_factor); svuint64_t svqdecd_pat_u64(svuint64_t op, enum svpattern pattern, uint64_t imm_factor); int32_t svqdecd_n_s32(int32_t op, uint64_t imm_factor); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdecd(op, imm_factor) _Generic((op), \ uint32_t: svqdecd_n_u32, \ int64_t: svqdecd_n_s64, \ uint64_t: svqdecd_n_u64, \ svint64_t: svqdecd_s64, \ svuint64_t: svqdecd_u64, \ int32_t: svqdecd_n_s32, \ default: __assume(0) \ )(op, imm_factor) #define svqdecd_pat(op, pattern, imm_factor) _Generic((op), \ int32_t: svqdecd_pat_n_s32, \ int64_t: svqdecd_pat_n_s64, \ uint32_t: svqdecd_pat_n_u32, \ uint64_t: svqdecd_pat_n_u64, \ svint64_t: svqdecd_pat_s64, \ svuint64_t: svqdecd_pat_u64, \ default: __assume(0) \ )(op, pattern, imm_factor) #endif // sve: Vector length / Saturating decrement: Saturating decrement by number of halfword elements uint64_t svqdech_n_u64(uint64_t op, uint64_t imm_factor); int32_t svqdech_pat_n_s32(int32_t op, enum svpattern pattern, uint64_t imm_factor); svuint16_t svqdech_u16(svuint16_t op, uint64_t imm_factor); svint16_t svqdech_pat_s16(svint16_t op, enum svpattern pattern, uint64_t imm_factor); uint32_t svqdech_pat_n_u32(uint32_t op, enum svpattern pattern, uint64_t imm_factor); int64_t svqdech_pat_n_s64(int64_t op, enum svpattern pattern, uint64_t imm_factor); svint16_t svqdech_s16(svint16_t op, uint64_t imm_factor); uint64_t svqdech_pat_n_u64(uint64_t op, enum svpattern pattern, uint64_t imm_factor); svuint16_t svqdech_pat_u16(svuint16_t op, enum svpattern pattern, uint64_t imm_factor); int64_t svqdech_n_s64(int64_t op, uint64_t imm_factor); uint32_t svqdech_n_u32(uint32_t op, uint64_t imm_factor); int32_t svqdech_n_s32(int32_t op, uint64_t imm_factor); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdech(op, imm_factor) _Generic((op), \ uint64_t: svqdech_n_u64, \ svuint16_t: svqdech_u16, \ svint16_t: svqdech_s16, \ int64_t: svqdech_n_s64, \ uint32_t: svqdech_n_u32, \ int32_t: svqdech_n_s32, \ default: __assume(0) \ )(op, imm_factor) #define svqdech_pat(op, pattern, imm_factor) _Generic((op), \ int32_t: svqdech_pat_n_s32, \ svint16_t: svqdech_pat_s16, \ uint32_t: svqdech_pat_n_u32, \ int64_t: svqdech_pat_n_s64, \ uint64_t: svqdech_pat_n_u64, \ svuint16_t: svqdech_pat_u16, \ default: __assume(0) \ )(op, pattern, imm_factor) #endif // sve: Vector length / Saturating decrement: Saturating decrement by number of word elements int64_t svqdecw_n_s64(int64_t op, uint64_t imm_factor); uint64_t svqdecw_pat_n_u64(uint64_t op, enum svpattern pattern, uint64_t imm_factor); uint32_t svqdecw_pat_n_u32(uint32_t op, enum svpattern pattern, uint64_t imm_factor); int32_t svqdecw_pat_n_s32(int32_t op, enum svpattern pattern, uint64_t imm_factor); uint64_t svqdecw_n_u64(uint64_t op, uint64_t imm_factor); int64_t svqdecw_pat_n_s64(int64_t op, enum svpattern pattern, uint64_t imm_factor); svuint32_t svqdecw_u32(svuint32_t op, uint64_t imm_factor); svint32_t svqdecw_pat_s32(svint32_t op, enum svpattern pattern, uint64_t imm_factor); svuint32_t svqdecw_pat_u32(svuint32_t op, enum svpattern pattern, uint64_t imm_factor); uint32_t svqdecw_n_u32(uint32_t op, uint64_t imm_factor); int32_t svqdecw_n_s32(int32_t op, uint64_t imm_factor); svint32_t svqdecw_s32(svint32_t op, uint64_t imm_factor); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdecw(op, imm_factor) _Generic((op), \ int64_t: svqdecw_n_s64, \ uint64_t: svqdecw_n_u64, \ svuint32_t: svqdecw_u32, \ uint32_t: svqdecw_n_u32, \ int32_t: svqdecw_n_s32, \ svint32_t: svqdecw_s32, \ default: __assume(0) \ )(op, imm_factor) #define svqdecw_pat(op, pattern, imm_factor) _Generic((op), \ uint64_t: svqdecw_pat_n_u64, \ uint32_t: svqdecw_pat_n_u32, \ int32_t: svqdecw_pat_n_s32, \ int64_t: svqdecw_pat_n_s64, \ svint32_t: svqdecw_pat_s32, \ svuint32_t: svqdecw_pat_u32, \ default: __assume(0) \ )(op, pattern, imm_factor) #endif // sve: Vector length / Saturating increment: Saturating increment by number of byte elements uint64_t svqincb_pat_n_u64(uint64_t op, enum svpattern pattern, uint64_t imm_factor); uint32_t svqincb_pat_n_u32(uint32_t op, enum svpattern pattern, uint64_t imm_factor); int64_t svqincb_pat_n_s64(int64_t op, enum svpattern pattern, uint64_t imm_factor); int32_t svqincb_pat_n_s32(int32_t op, enum svpattern pattern, uint64_t imm_factor); uint64_t svqincb_n_u64(uint64_t op, uint64_t imm_factor); uint32_t svqincb_n_u32(uint32_t op, uint64_t imm_factor); int64_t svqincb_n_s64(int64_t op, uint64_t imm_factor); int32_t svqincb_n_s32(int32_t op, uint64_t imm_factor); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqincb_pat(op, pattern, imm_factor) _Generic((op), \ uint64_t: svqincb_pat_n_u64, \ uint32_t: svqincb_pat_n_u32, \ int64_t: svqincb_pat_n_s64, \ int32_t: svqincb_pat_n_s32, \ default: __assume(0) \ )(op, pattern, imm_factor) #define svqincb(op, imm_factor) _Generic((op), \ uint64_t: svqincb_n_u64, \ uint32_t: svqincb_n_u32, \ int64_t: svqincb_n_s64, \ int32_t: svqincb_n_s32, \ default: __assume(0) \ )(op, imm_factor) #endif // sve: Vector length / Saturating increment: Saturating increment by number of doubleword elements int32_t svqincd_pat_n_s32(int32_t op, enum svpattern pattern, uint64_t imm_factor); uint32_t svqincd_pat_n_u32(uint32_t op, enum svpattern pattern, uint64_t imm_factor); int64_t svqincd_n_s64(int64_t op, uint64_t imm_factor); int32_t svqincd_n_s32(int32_t op, uint64_t imm_factor); uint64_t svqincd_n_u64(uint64_t op, uint64_t imm_factor); int64_t svqincd_pat_n_s64(int64_t op, enum svpattern pattern, uint64_t imm_factor); uint32_t svqincd_n_u32(uint32_t op, uint64_t imm_factor); svint64_t svqincd_s64(svint64_t op, uint64_t imm_factor); uint64_t svqincd_pat_n_u64(uint64_t op, enum svpattern pattern, uint64_t imm_factor); svuint64_t svqincd_u64(svuint64_t op, uint64_t imm_factor); svint64_t svqincd_pat_s64(svint64_t op, enum svpattern pattern, uint64_t imm_factor); svuint64_t svqincd_pat_u64(svuint64_t op, enum svpattern pattern, uint64_t imm_factor); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqincd_pat(op, pattern, imm_factor) _Generic((op), \ int32_t: svqincd_pat_n_s32, \ uint32_t: svqincd_pat_n_u32, \ int64_t: svqincd_pat_n_s64, \ uint64_t: svqincd_pat_n_u64, \ svint64_t: svqincd_pat_s64, \ svuint64_t: svqincd_pat_u64, \ default: __assume(0) \ )(op, pattern, imm_factor) #define svqincd(op, imm_factor) _Generic((op), \ int64_t: svqincd_n_s64, \ int32_t: svqincd_n_s32, \ uint64_t: svqincd_n_u64, \ uint32_t: svqincd_n_u32, \ svint64_t: svqincd_s64, \ svuint64_t: svqincd_u64, \ default: __assume(0) \ )(op, imm_factor) #endif // sve: Vector length / Saturating increment: Saturating increment by number of halfword elements svint16_t svqinch_s16(svint16_t op, uint64_t imm_factor); svuint16_t svqinch_pat_u16(svuint16_t op, enum svpattern pattern, uint64_t imm_factor); svint16_t svqinch_pat_s16(svint16_t op, enum svpattern pattern, uint64_t imm_factor); uint64_t svqinch_pat_n_u64(uint64_t op, enum svpattern pattern, uint64_t imm_factor); svuint16_t svqinch_u16(svuint16_t op, uint64_t imm_factor); int64_t svqinch_n_s64(int64_t op, uint64_t imm_factor); uint32_t svqinch_n_u32(uint32_t op, uint64_t imm_factor); uint64_t svqinch_n_u64(uint64_t op, uint64_t imm_factor); int32_t svqinch_pat_n_s32(int32_t op, enum svpattern pattern, uint64_t imm_factor); int64_t svqinch_pat_n_s64(int64_t op, enum svpattern pattern, uint64_t imm_factor); uint32_t svqinch_pat_n_u32(uint32_t op, enum svpattern pattern, uint64_t imm_factor); int32_t svqinch_n_s32(int32_t op, uint64_t imm_factor); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqinch(op, imm_factor) _Generic((op), \ svint16_t: svqinch_s16, \ svuint16_t: svqinch_u16, \ int64_t: svqinch_n_s64, \ uint32_t: svqinch_n_u32, \ uint64_t: svqinch_n_u64, \ int32_t: svqinch_n_s32, \ default: __assume(0) \ )(op, imm_factor) #define svqinch_pat(op, pattern, imm_factor) _Generic((op), \ svuint16_t: svqinch_pat_u16, \ svint16_t: svqinch_pat_s16, \ uint64_t: svqinch_pat_n_u64, \ int32_t: svqinch_pat_n_s32, \ int64_t: svqinch_pat_n_s64, \ uint32_t: svqinch_pat_n_u32, \ default: __assume(0) \ )(op, pattern, imm_factor) #endif // sve: Vector length / Saturating increment: Saturating increment by number of word elements int64_t svqincw_pat_n_s64(int64_t op, enum svpattern pattern, uint64_t imm_factor); int32_t svqincw_n_s32(int32_t op, uint64_t imm_factor); int64_t svqincw_n_s64(int64_t op, uint64_t imm_factor); uint64_t svqincw_n_u64(uint64_t op, uint64_t imm_factor); int32_t svqincw_pat_n_s32(int32_t op, enum svpattern pattern, uint64_t imm_factor); uint32_t svqincw_pat_n_u32(uint32_t op, enum svpattern pattern, uint64_t imm_factor); uint32_t svqincw_n_u32(uint32_t op, uint64_t imm_factor); svint32_t svqincw_s32(svint32_t op, uint64_t imm_factor); svuint32_t svqincw_u32(svuint32_t op, uint64_t imm_factor); svint32_t svqincw_pat_s32(svint32_t op, enum svpattern pattern, uint64_t imm_factor); svuint32_t svqincw_pat_u32(svuint32_t op, enum svpattern pattern, uint64_t imm_factor); uint64_t svqincw_pat_n_u64(uint64_t op, enum svpattern pattern, uint64_t imm_factor); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqincw_pat(op, pattern, imm_factor) _Generic((op), \ int64_t: svqincw_pat_n_s64, \ int32_t: svqincw_pat_n_s32, \ uint32_t: svqincw_pat_n_u32, \ svint32_t: svqincw_pat_s32, \ svuint32_t: svqincw_pat_u32, \ uint64_t: svqincw_pat_n_u64, \ default: __assume(0) \ )(op, pattern, imm_factor) #define svqincw(op, imm_factor) _Generic((op), \ int32_t: svqincw_n_s32, \ int64_t: svqincw_n_s64, \ uint64_t: svqincw_n_u64, \ uint32_t: svqincw_n_u32, \ svint32_t: svqincw_s32, \ svuint32_t: svqincw_u32, \ default: __assume(0) \ )(op, imm_factor) #endif // sve: Vector manipulation / Create linear sequence: Create linear series svint8_t svindex_s8(int8_t base, int8_t step); svint16_t svindex_s16(int16_t base, int16_t step); svint32_t svindex_s32(int32_t base, int32_t step); svuint8_t svindex_u8(uint8_t base, uint8_t step); svuint16_t svindex_u16(uint16_t base, uint16_t step); svuint32_t svindex_u32(uint32_t base, uint32_t step); svint64_t svindex_s64(int64_t base, int64_t step); svuint64_t svindex_u64(uint64_t base, uint64_t step); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svindex(base, step) _Generic((base), \ int8_t: svindex_s8, \ int16_t: svindex_s16, \ int32_t: svindex_s32, \ uint8_t: svindex_u8, \ uint16_t: svindex_u16, \ uint32_t: svindex_u32, \ int64_t: svindex_s64, \ uint64_t: svindex_u64, \ default: __assume(0) \ )(base, step) #endif // sve: Vector manipulation / Create uninitialized vector: Create an uninitialized vector svfloat16_t svundef_f16(void); svfloat32_t svundef_f32(void); svfloat64_t svundef_f64(void); svint8_t svundef_s8(void); svint16_t svundef_s16(void); svint32_t svundef_s32(void); svint64_t svundef_s64(void); svuint8_t svundef_u8(void); svuint16_t svundef_u16(void); svuint32_t svundef_u32(void); svuint64_t svundef_u64(void); // sve: Vector manipulation / Create vector: Broadcast a quadword of scalars svint64_t svdupq_lane_s64(svint64_t data, uint64_t index); svint32_t svdupq_lane_s32(svint32_t data, uint64_t index); svint16_t svdupq_lane_s16(svint16_t data, uint64_t index); svint8_t svdupq_lane_s8(svint8_t data, uint64_t index); svfloat64_t svdupq_lane_f64(svfloat64_t data, uint64_t index); svfloat32_t svdupq_lane_f32(svfloat32_t data, uint64_t index); svfloat16_t svdupq_lane_f16(svfloat16_t data, uint64_t index); svbfloat16_t svdupq_lane_bf16(svbfloat16_t data, uint64_t index); svuint64_t svdupq_n_u64(uint64_t x0, uint64_t x1); svint64_t svdupq_n_s64(int64_t x0, int64_t x1); svfloat32_t svdupq_n_f32(float32_t x0, float32_t x1, float32_t x2, float32_t x3); svuint16_t svdupq_lane_u16(svuint16_t data, uint64_t index); svuint16_t svdupq_n_u16(uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7); svint16_t svdupq_n_s16(int16_t x0, int16_t x1, int16_t x2, int16_t x3, int16_t x4, int16_t x5, int16_t x6, int16_t x7); svuint8_t svdupq_lane_u8(svuint8_t data, uint64_t index); svuint8_t svdupq_n_u8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15); svfloat64_t svdupq_n_f64(float64_t x0, float64_t x1); svuint64_t svdupq_lane_u64(svuint64_t data, uint64_t index); svuint32_t svdupq_lane_u32(svuint32_t data, uint64_t index); svint32_t svdupq_n_s32(int32_t x0, int32_t x1, int32_t x2, int32_t x3); svint8_t svdupq_n_s8(int8_t x0, int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6, int8_t x7, int8_t x8, int8_t x9, int8_t x10, int8_t x11, int8_t x12, int8_t x13, int8_t x14, int8_t x15); svuint32_t svdupq_n_u32(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svdupq_lane(data, index) _Generic((index), \ uint64_t: _Generic((data), \ svint64_t: svdupq_lane_s64, \ svint32_t: svdupq_lane_s32, \ svint16_t: svdupq_lane_s16, \ svint8_t: svdupq_lane_s8, \ svfloat64_t: svdupq_lane_f64, \ svfloat32_t: svdupq_lane_f32, \ svfloat16_t: svdupq_lane_f16, \ svbfloat16_t: svdupq_lane_bf16, \ svuint16_t: svdupq_lane_u16, \ svuint8_t: svdupq_lane_u8, \ svuint64_t: svdupq_lane_u64, \ svuint32_t: svdupq_lane_u32, \ default: __assume(0)), \ default: __assume(0) \ )(data, index) #define svdupq_u64(x0, x1) _Generic((x1), \ uint64_t: svdupq_n_u64, \ default: __assume(0) \ )(x0, x1) #define svdupq_s64(x0, x1) _Generic((x1), \ int64_t: svdupq_n_s64, \ default: __assume(0) \ )(x0, x1) #define svdupq_f32(x0, x1, x2, x3) _Generic((x3), \ float32_t: svdupq_n_f32, \ default: __assume(0) \ )(x0, x1, x2, x3) #define svdupq_u16(x0, x1, x2, x3, x4, x5, x6, x7) _Generic((x7), \ uint16_t: svdupq_n_u16, \ default: __assume(0) \ )(x0, x1, x2, x3, x4, x5, x6, x7) #define svdupq_s16(x0, x1, x2, x3, x4, x5, x6, x7) _Generic((x7), \ int16_t: svdupq_n_s16, \ default: __assume(0) \ )(x0, x1, x2, x3, x4, x5, x6, x7) #define svdupq_u8(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) _Generic((x15), \ uint8_t: svdupq_n_u8, \ default: __assume(0) \ )(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) #define svdupq_f64(x0, x1) _Generic((x1), \ float64_t: svdupq_n_f64, \ default: __assume(0) \ )(x0, x1) #define svdupq_s32(x0, x1, x2, x3) _Generic((x3), \ int32_t: svdupq_n_s32, \ default: __assume(0) \ )(x0, x1, x2, x3) #define svdupq_s8(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) _Generic((x15), \ int8_t: svdupq_n_s8, \ default: __assume(0) \ )(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) #define svdupq_u32(x0, x1, x2, x3) _Generic((x3), \ uint32_t: svdupq_n_u32, \ default: __assume(0) \ )(x0, x1, x2, x3) #endif // sve: Vector manipulation / Extract vector from a pair of vectors: Extract vector from pair of vectors svfloat64_t svext_f64(svfloat64_t op1, svfloat64_t op2, uint64_t imm3); svfloat32_t svext_f32(svfloat32_t op1, svfloat32_t op2, uint64_t imm3); svfloat16_t svext_f16(svfloat16_t op1, svfloat16_t op2, uint64_t imm3); svint8_t svext_s8(svint8_t op1, svint8_t op2, uint64_t imm3); svbfloat16_t svext_bf16(svbfloat16_t op1, svbfloat16_t op2, uint64_t imm3); svint16_t svext_s16(svint16_t op1, svint16_t op2, uint64_t imm3); svuint64_t svext_u64(svuint64_t op1, svuint64_t op2, uint64_t imm3); svint64_t svext_s64(svint64_t op1, svint64_t op2, uint64_t imm3); svuint8_t svext_u8(svuint8_t op1, svuint8_t op2, uint64_t imm3); svuint16_t svext_u16(svuint16_t op1, svuint16_t op2, uint64_t imm3); svuint32_t svext_u32(svuint32_t op1, svuint32_t op2, uint64_t imm3); svint32_t svext_s32(svint32_t op1, svint32_t op2, uint64_t imm3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svext(op1, op2, imm3) _Generic((op1), \ svfloat64_t: svext_f64, \ svfloat32_t: svext_f32, \ svfloat16_t: svext_f16, \ svint8_t: svext_s8, \ svbfloat16_t: svext_bf16, \ svint16_t: svext_s16, \ svuint64_t: svext_u64, \ svint64_t: svext_s64, \ svuint8_t: svext_u8, \ svuint16_t: svext_u16, \ svuint32_t: svext_u32, \ svint32_t: svext_s32, \ default: __assume(0) \ )(op1, op2, imm3) #endif // sve: Vector manipulation / Extract vector from a pair of vectors: Splice two vectors under predicate control svfloat32_t svsplice_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svsplice_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svint8_t svsplice_s8(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svsplice_s16(svbool_t pg, svint16_t op1, svint16_t op2); svfloat16_t svsplice_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svbfloat16_t svsplice_bf16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2); svint64_t svsplice_s64(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svsplice_u8(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svsplice_u16(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svsplice_u32(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svsplice_u64(svbool_t pg, svuint64_t op1, svuint64_t op2); svint32_t svsplice_s32(svbool_t pg, svint32_t op1, svint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsplice(pg, op1, op2) _Generic((op2), \ svfloat32_t: svsplice_f32, \ svfloat64_t: svsplice_f64, \ svint8_t: svsplice_s8, \ svint16_t: svsplice_s16, \ svfloat16_t: svsplice_f16, \ svbfloat16_t: svsplice_bf16, \ svint64_t: svsplice_s64, \ svuint8_t: svsplice_u8, \ svuint16_t: svsplice_u16, \ svuint32_t: svsplice_u32, \ svuint64_t: svsplice_u64, \ svint32_t: svsplice_s32, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve: Vector manipulation / Insert element: Insert scalar into shifted vector svuint32_t svinsr_n_u32(svuint32_t op1, uint32_t op2); svuint16_t svinsr_n_u16(svuint16_t op1, uint16_t op2); svint32_t svinsr_n_s32(svint32_t op1, int32_t op2); svint64_t svinsr_n_s64(svint64_t op1, int64_t op2); svint16_t svinsr_n_s16(svint16_t op1, int16_t op2); svint8_t svinsr_n_s8(svint8_t op1, int8_t op2); svfloat64_t svinsr_n_f64(svfloat64_t op1, float64_t op2); svfloat32_t svinsr_n_f32(svfloat32_t op1, float32_t op2); svuint8_t svinsr_n_u8(svuint8_t op1, uint8_t op2); svuint64_t svinsr_n_u64(svuint64_t op1, uint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svinsr(op1, op2) _Generic((op2), \ uint32_t: svinsr_n_u32, \ uint16_t: svinsr_n_u16, \ int32_t: svinsr_n_s32, \ int64_t: svinsr_n_s64, \ int16_t: svinsr_n_s16, \ int8_t: svinsr_n_s8, \ float64_t: svinsr_n_f64, \ float32_t: svinsr_n_f32, \ uint8_t: svinsr_n_u8, \ uint64_t: svinsr_n_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Reverse bits within elements: Reverse bits svuint32_t svrbit_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op); svuint64_t svrbit_u64_z(svbool_t pg, svuint64_t op); svuint32_t svrbit_u32_z(svbool_t pg, svuint32_t op); svuint16_t svrbit_u16_z(svbool_t pg, svuint16_t op); svuint8_t svrbit_u8_z(svbool_t pg, svuint8_t op); svint64_t svrbit_s64_z(svbool_t pg, svint64_t op); svint32_t svrbit_s32_z(svbool_t pg, svint32_t op); svint16_t svrbit_s16_z(svbool_t pg, svint16_t op); svint8_t svrbit_s8_z(svbool_t pg, svint8_t op); svuint64_t svrbit_u64_x(svbool_t pg, svuint64_t op); svuint64_t svrbit_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op); svuint32_t svrbit_u32_x(svbool_t pg, svuint32_t op); svuint8_t svrbit_u8_x(svbool_t pg, svuint8_t op); svint64_t svrbit_s64_x(svbool_t pg, svint64_t op); svint32_t svrbit_s32_x(svbool_t pg, svint32_t op); svint8_t svrbit_s8_m(svint8_t inactive, svbool_t pg, svint8_t op); svint16_t svrbit_s16_m(svint16_t inactive, svbool_t pg, svint16_t op); svint32_t svrbit_s32_m(svint32_t inactive, svbool_t pg, svint32_t op); svint64_t svrbit_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svint16_t svrbit_s16_x(svbool_t pg, svint16_t op); svuint16_t svrbit_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op); svuint16_t svrbit_u16_x(svbool_t pg, svuint16_t op); svuint8_t svrbit_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op); svint8_t svrbit_s8_x(svbool_t pg, svint8_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrbit_m(inactive, pg, op) _Generic((op), \ svuint32_t: svrbit_u32_m, \ svuint64_t: svrbit_u64_m, \ svint8_t: svrbit_s8_m, \ svint16_t: svrbit_s16_m, \ svint32_t: svrbit_s32_m, \ svint64_t: svrbit_s64_m, \ svuint16_t: svrbit_u16_m, \ svuint8_t: svrbit_u8_m, \ default: __assume(0) \ )(inactive, pg, op) #define svrbit_z(pg, op) _Generic((op), \ svuint64_t: svrbit_u64_z, \ svuint32_t: svrbit_u32_z, \ svuint16_t: svrbit_u16_z, \ svuint8_t: svrbit_u8_z, \ svint64_t: svrbit_s64_z, \ svint32_t: svrbit_s32_z, \ svint16_t: svrbit_s16_z, \ svint8_t: svrbit_s8_z, \ default: __assume(0) \ )(pg, op) #define svrbit_x(pg, op) _Generic((op), \ svuint64_t: svrbit_u64_x, \ svuint32_t: svrbit_u32_x, \ svuint8_t: svrbit_u8_x, \ svint64_t: svrbit_s64_x, \ svint32_t: svrbit_s32_x, \ svint16_t: svrbit_s16_x, \ svuint16_t: svrbit_u16_x, \ svint8_t: svrbit_s8_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector manipulation / Reverse elements: Reverse all elements svfloat32_t svrev_f32(svfloat32_t op); svfloat64_t svrev_f64(svfloat64_t op); svint8_t svrev_s8(svint8_t op); svint16_t svrev_s16(svint16_t op); svint32_t svrev_s32(svint32_t op); svint64_t svrev_s64(svint64_t op); svuint8_t svrev_u8(svuint8_t op); svuint16_t svrev_u16(svuint16_t op); svuint32_t svrev_u32(svuint32_t op); svuint64_t svrev_u64(svuint64_t op); svbool_t svrev_b8(svbool_t op); svbool_t svrev_b16(svbool_t op); svbool_t svrev_b32(svbool_t op); svbool_t svrev_b64(svbool_t op); svfloat16_t svrev_f16(svfloat16_t op); svbfloat16_t svrev_bf16(svbfloat16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrev(op) _Generic((op), \ svfloat32_t: svrev_f32, \ svfloat64_t: svrev_f64, \ svint8_t: svrev_s8, \ svint16_t: svrev_s16, \ svint32_t: svrev_s32, \ svint64_t: svrev_s64, \ svuint8_t: svrev_u8, \ svuint16_t: svrev_u16, \ svuint32_t: svrev_u32, \ svuint64_t: svrev_u64, \ svfloat16_t: svrev_f16, \ svbfloat16_t: svrev_bf16, \ default: __assume(0) \ )(op) #endif // sve: Vector manipulation / Reverse elements: Reverse bytes within elements svuint32_t svrevb_u32_z(svbool_t pg, svuint32_t op); svuint16_t svrevb_u16_z(svbool_t pg, svuint16_t op); svint64_t svrevb_s64_z(svbool_t pg, svint64_t op); svint32_t svrevb_s32_z(svbool_t pg, svint32_t op); svint16_t svrevb_s16_z(svbool_t pg, svint16_t op); svuint64_t svrevb_u64_x(svbool_t pg, svuint64_t op); svuint32_t svrevb_u32_x(svbool_t pg, svuint32_t op); svuint64_t svrevb_u64_z(svbool_t pg, svuint64_t op); svuint16_t svrevb_u16_x(svbool_t pg, svuint16_t op); svint32_t svrevb_s32_x(svbool_t pg, svint32_t op); svint16_t svrevb_s16_x(svbool_t pg, svint16_t op); svuint64_t svrevb_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op); svuint32_t svrevb_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op); svuint16_t svrevb_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op); svint64_t svrevb_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svint64_t svrevb_s64_x(svbool_t pg, svint64_t op); svint32_t svrevb_s32_m(svint32_t inactive, svbool_t pg, svint32_t op); svint16_t svrevb_s16_m(svint16_t inactive, svbool_t pg, svint16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrevb_z(pg, op) _Generic((op), \ svuint32_t: svrevb_u32_z, \ svuint16_t: svrevb_u16_z, \ svint64_t: svrevb_s64_z, \ svint32_t: svrevb_s32_z, \ svint16_t: svrevb_s16_z, \ svuint64_t: svrevb_u64_z, \ default: __assume(0) \ )(pg, op) #define svrevb_x(pg, op) _Generic((op), \ svuint64_t: svrevb_u64_x, \ svuint32_t: svrevb_u32_x, \ svuint16_t: svrevb_u16_x, \ svint32_t: svrevb_s32_x, \ svint16_t: svrevb_s16_x, \ svint64_t: svrevb_s64_x, \ default: __assume(0) \ )(pg, op) #define svrevb_m(inactive, pg, op) _Generic((op), \ svuint64_t: svrevb_u64_m, \ svuint32_t: svrevb_u32_m, \ svuint16_t: svrevb_u16_m, \ svint64_t: svrevb_s64_m, \ svint32_t: svrevb_s32_m, \ svint16_t: svrevb_s16_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve: Vector manipulation / Reverse elements: Reverse halfwords within elements svint32_t svrevh_s32_z(svbool_t pg, svint32_t op); svint64_t svrevh_s64_z(svbool_t pg, svint64_t op); svuint64_t svrevh_u64_z(svbool_t pg, svuint64_t op); svint32_t svrevh_s32_m(svint32_t inactive, svbool_t pg, svint32_t op); svint64_t svrevh_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svuint32_t svrevh_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op); svint64_t svrevh_s64_x(svbool_t pg, svint64_t op); svint32_t svrevh_s32_x(svbool_t pg, svint32_t op); svuint32_t svrevh_u32_z(svbool_t pg, svuint32_t op); svuint32_t svrevh_u32_x(svbool_t pg, svuint32_t op); svuint64_t svrevh_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op); svuint64_t svrevh_u64_x(svbool_t pg, svuint64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrevh_z(pg, op) _Generic((op), \ svint32_t: svrevh_s32_z, \ svint64_t: svrevh_s64_z, \ svuint64_t: svrevh_u64_z, \ svuint32_t: svrevh_u32_z, \ default: __assume(0) \ )(pg, op) #define svrevh_m(inactive, pg, op) _Generic((op), \ svint32_t: svrevh_s32_m, \ svint64_t: svrevh_s64_m, \ svuint32_t: svrevh_u32_m, \ svuint64_t: svrevh_u64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svrevh_x(pg, op) _Generic((op), \ svint64_t: svrevh_s64_x, \ svint32_t: svrevh_s32_x, \ svuint32_t: svrevh_u32_x, \ svuint64_t: svrevh_u64_x, \ default: __assume(0) \ )(pg, op) #endif // sve: Vector manipulation / Reverse elements: Reverse words within elements svuint64_t svrevw_u64_z(svbool_t pg, svuint64_t op); svint64_t svrevw_s64_z(svbool_t pg, svint64_t op); svint64_t svrevw_s64_x(svbool_t pg, svint64_t op); svuint64_t svrevw_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op); svint64_t svrevw_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svuint64_t svrevw_u64_x(svbool_t pg, svuint64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrevw_z(pg, op) _Generic((op), \ svuint64_t: svrevw_u64_z, \ svint64_t: svrevw_s64_z, \ default: __assume(0) \ )(pg, op) #define svrevw_x(pg, op) _Generic((op), \ svint64_t: svrevw_s64_x, \ svuint64_t: svrevw_u64_x, \ default: __assume(0) \ )(pg, op) #define svrevw_m(inactive, pg, op) _Generic((op), \ svuint64_t: svrevw_u64_m, \ svint64_t: svrevw_s64_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve: Vector manipulation / Set all lanes to the same value: Broadcast a scalar value svfloat32_t svdup_n_f32(float32_t op); svuint16_t svdup_n_u16(uint16_t op); svint8_t svdup_n_s8(int8_t op); svuint8_t svdup_n_u8_m(svuint8_t inactive, svbool_t pg, uint8_t op); svint64_t svdup_n_s64_m(svint64_t inactive, svbool_t pg, int64_t op); svint32_t svdup_n_s32_m(svint32_t inactive, svbool_t pg, int32_t op); svint16_t svdup_n_s16_m(svint16_t inactive, svbool_t pg, int16_t op); svint8_t svdup_n_s8_m(svint8_t inactive, svbool_t pg, int8_t op); svfloat64_t svdup_n_f64_m(svfloat64_t inactive, svbool_t pg, float64_t op); svfloat32_t svdup_n_f32_m(svfloat32_t inactive, svbool_t pg, float32_t op); svuint64_t svdup_n_u64(uint64_t op); svuint32_t svdup_n_u32(uint32_t op); svuint8_t svdup_n_u8(uint8_t op); svint64_t svdup_n_s64(int64_t op); svint32_t svdup_n_s32(int32_t op); svint16_t svdup_n_s16(int16_t op); svfloat64_t svdup_n_f64(float64_t op); svuint64_t svdup_n_u64_m(svuint64_t inactive, svbool_t pg, uint64_t op); svint8_t svdup_n_s8_x(svbool_t pg, int8_t op); svbfloat16_t svdup_lane_bf16(svbfloat16_t data, uint16_t index); svfloat16_t svdup_lane_f16(svfloat16_t data, uint16_t index); svfloat32_t svdup_lane_f32(svfloat32_t data, uint32_t index); svuint64_t svdup_n_u64_z(svbool_t pg, uint64_t op); svfloat64_t svdup_lane_f64(svfloat64_t data, uint64_t index); svint16_t svdup_lane_s16(svint16_t data, uint16_t index); svuint16_t svdup_n_u16_m(svuint16_t inactive, svbool_t pg, uint16_t op); svint32_t svdup_lane_s32(svint32_t data, uint32_t index); svint64_t svdup_lane_s64(svint64_t data, uint64_t index); svuint16_t svdup_lane_u16(svuint16_t data, uint16_t index); svuint32_t svdup_lane_u32(svuint32_t data, uint32_t index); svuint64_t svdup_lane_u64(svuint64_t data, uint64_t index); svint8_t svdup_lane_s8(svint8_t data, uint8_t index); svuint32_t svdup_n_u32_z(svbool_t pg, uint32_t op); svuint16_t svdup_n_u16_z(svbool_t pg, uint16_t op); svuint8_t svdup_n_u8_z(svbool_t pg, uint8_t op); svfloat32_t svdup_n_f32_x(svbool_t pg, float32_t op); svfloat64_t svdup_n_f64_x(svbool_t pg, float64_t op); svint16_t svdup_n_s16_x(svbool_t pg, int16_t op); svint32_t svdup_n_s32_x(svbool_t pg, int32_t op); svint64_t svdup_n_s64_x(svbool_t pg, int64_t op); svuint8_t svdup_n_u8_x(svbool_t pg, uint8_t op); svuint16_t svdup_n_u16_x(svbool_t pg, uint16_t op); svuint32_t svdup_n_u32_x(svbool_t pg, uint32_t op); svuint64_t svdup_n_u64_x(svbool_t pg, uint64_t op); svfloat32_t svdup_n_f32_z(svbool_t pg, float32_t op); svfloat64_t svdup_n_f64_z(svbool_t pg, float64_t op); svint8_t svdup_n_s8_z(svbool_t pg, int8_t op); svint16_t svdup_n_s16_z(svbool_t pg, int16_t op); svint32_t svdup_n_s32_z(svbool_t pg, int32_t op); svint64_t svdup_n_s64_z(svbool_t pg, int64_t op); svuint8_t svdup_lane_u8(svuint8_t data, uint8_t index); svuint32_t svdup_n_u32_m(svuint32_t inactive, svbool_t pg, uint32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svdup_f32(op) _Generic((op), \ float32_t: svdup_n_f32, \ default: __assume(0) \ )(op) #define svdup_u16(op) _Generic((op), \ uint16_t: svdup_n_u16, \ default: __assume(0) \ )(op) #define svdup_s8(op) _Generic((op), \ int8_t: svdup_n_s8, \ default: __assume(0) \ )(op) #define svdup_u8_m(inactive, pg, op) _Generic((op), \ uint8_t: svdup_n_u8_m, \ default: __assume(0) \ )(inactive, pg, op) #define svdup_s64_m(inactive, pg, op) _Generic((op), \ int64_t: svdup_n_s64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svdup_s32_m(inactive, pg, op) _Generic((op), \ int32_t: svdup_n_s32_m, \ default: __assume(0) \ )(inactive, pg, op) #define svdup_s16_m(inactive, pg, op) _Generic((op), \ int16_t: svdup_n_s16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svdup_s8_m(inactive, pg, op) _Generic((op), \ int8_t: svdup_n_s8_m, \ default: __assume(0) \ )(inactive, pg, op) #define svdup_f64_m(inactive, pg, op) _Generic((op), \ float64_t: svdup_n_f64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svdup_f32_m(inactive, pg, op) _Generic((op), \ float32_t: svdup_n_f32_m, \ default: __assume(0) \ )(inactive, pg, op) #define svdup_u64(op) _Generic((op), \ uint64_t: svdup_n_u64, \ default: __assume(0) \ )(op) #define svdup_u32(op) _Generic((op), \ uint32_t: svdup_n_u32, \ default: __assume(0) \ )(op) #define svdup_u8(op) _Generic((op), \ uint8_t: svdup_n_u8, \ default: __assume(0) \ )(op) #define svdup_s64(op) _Generic((op), \ int64_t: svdup_n_s64, \ default: __assume(0) \ )(op) #define svdup_s32(op) _Generic((op), \ int32_t: svdup_n_s32, \ default: __assume(0) \ )(op) #define svdup_s16(op) _Generic((op), \ int16_t: svdup_n_s16, \ default: __assume(0) \ )(op) #define svdup_f64(op) _Generic((op), \ float64_t: svdup_n_f64, \ default: __assume(0) \ )(op) #define svdup_u64_m(inactive, pg, op) _Generic((op), \ uint64_t: svdup_n_u64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svdup_s8_x(pg, op) _Generic((op), \ int8_t: svdup_n_s8_x, \ default: __assume(0) \ )(pg, op) #define svdup_lane(data, index) _Generic((index), \ uint16_t: _Generic((data), \ svbfloat16_t: svdup_lane_bf16, \ svfloat16_t: svdup_lane_f16, \ svint16_t: svdup_lane_s16, \ svuint16_t: svdup_lane_u16, \ default: __assume(0)), \ uint32_t: _Generic((data), \ svfloat32_t: svdup_lane_f32, \ svint32_t: svdup_lane_s32, \ svuint32_t: svdup_lane_u32, \ default: __assume(0)), \ uint64_t: _Generic((data), \ svfloat64_t: svdup_lane_f64, \ svint64_t: svdup_lane_s64, \ svuint64_t: svdup_lane_u64, \ default: __assume(0)), \ uint8_t: _Generic((data), \ svint8_t: svdup_lane_s8, \ svuint8_t: svdup_lane_u8, \ default: __assume(0)), \ default: __assume(0) \ )(data, index) #define svdup_u64_z(pg, op) _Generic((op), \ uint64_t: svdup_n_u64_z, \ default: __assume(0) \ )(pg, op) #define svdup_u16_m(inactive, pg, op) _Generic((op), \ uint16_t: svdup_n_u16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svdup_u32_z(pg, op) _Generic((op), \ uint32_t: svdup_n_u32_z, \ default: __assume(0) \ )(pg, op) #define svdup_u16_z(pg, op) _Generic((op), \ uint16_t: svdup_n_u16_z, \ default: __assume(0) \ )(pg, op) #define svdup_u8_z(pg, op) _Generic((op), \ uint8_t: svdup_n_u8_z, \ default: __assume(0) \ )(pg, op) #define svdup_f32_x(pg, op) _Generic((op), \ float32_t: svdup_n_f32_x, \ default: __assume(0) \ )(pg, op) #define svdup_f64_x(pg, op) _Generic((op), \ float64_t: svdup_n_f64_x, \ default: __assume(0) \ )(pg, op) #define svdup_s16_x(pg, op) _Generic((op), \ int16_t: svdup_n_s16_x, \ default: __assume(0) \ )(pg, op) #define svdup_s32_x(pg, op) _Generic((op), \ int32_t: svdup_n_s32_x, \ default: __assume(0) \ )(pg, op) #define svdup_s64_x(pg, op) _Generic((op), \ int64_t: svdup_n_s64_x, \ default: __assume(0) \ )(pg, op) #define svdup_u8_x(pg, op) _Generic((op), \ uint8_t: svdup_n_u8_x, \ default: __assume(0) \ )(pg, op) #define svdup_u16_x(pg, op) _Generic((op), \ uint16_t: svdup_n_u16_x, \ default: __assume(0) \ )(pg, op) #define svdup_u32_x(pg, op) _Generic((op), \ uint32_t: svdup_n_u32_x, \ default: __assume(0) \ )(pg, op) #define svdup_u64_x(pg, op) _Generic((op), \ uint64_t: svdup_n_u64_x, \ default: __assume(0) \ )(pg, op) #define svdup_f32_z(pg, op) _Generic((op), \ float32_t: svdup_n_f32_z, \ default: __assume(0) \ )(pg, op) #define svdup_f64_z(pg, op) _Generic((op), \ float64_t: svdup_n_f64_z, \ default: __assume(0) \ )(pg, op) #define svdup_s8_z(pg, op) _Generic((op), \ int8_t: svdup_n_s8_z, \ default: __assume(0) \ )(pg, op) #define svdup_s16_z(pg, op) _Generic((op), \ int16_t: svdup_n_s16_z, \ default: __assume(0) \ )(pg, op) #define svdup_s32_z(pg, op) _Generic((op), \ int32_t: svdup_n_s32_z, \ default: __assume(0) \ )(pg, op) #define svdup_s64_z(pg, op) _Generic((op), \ int64_t: svdup_n_s64_z, \ default: __assume(0) \ )(pg, op) #define svdup_u32_m(inactive, pg, op) _Generic((op), \ uint32_t: svdup_n_u32_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve: Vector manipulation / Transpose elements: Interleave even elements from two inputs svuint32_t svtrn1_u32(svuint32_t op1, svuint32_t op2); svint32_t svtrn1_s32(svint32_t op1, svint32_t op2); svint64_t svtrn1_s64(svint64_t op1, svint64_t op2); svuint8_t svtrn1_u8(svuint8_t op1, svuint8_t op2); svuint16_t svtrn1_u16(svuint16_t op1, svuint16_t op2); svbfloat16_t svtrn1_bf16(svbfloat16_t op1, svbfloat16_t op2); svfloat16_t svtrn1_f16(svfloat16_t op1, svfloat16_t op2); svint8_t svtrn1_s8(svint8_t op1, svint8_t op2); svfloat32_t svtrn1_f32(svfloat32_t op1, svfloat32_t op2); svfloat64_t svtrn1_f64(svfloat64_t op1, svfloat64_t op2); svuint64_t svtrn1_u64(svuint64_t op1, svuint64_t op2); svint16_t svtrn1_s16(svint16_t op1, svint16_t op2); svbool_t svtrn1_b64(svbool_t op1, svbool_t op2); svbool_t svtrn1_b32(svbool_t op1, svbool_t op2); svbool_t svtrn1_b16(svbool_t op1, svbool_t op2); svbool_t svtrn1_b8(svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svtrn1(op1, op2) _Generic((op2), \ svuint32_t: svtrn1_u32, \ svint32_t: svtrn1_s32, \ svint64_t: svtrn1_s64, \ svuint8_t: svtrn1_u8, \ svuint16_t: svtrn1_u16, \ svbfloat16_t: svtrn1_bf16, \ svfloat16_t: svtrn1_f16, \ svint8_t: svtrn1_s8, \ svfloat32_t: svtrn1_f32, \ svfloat64_t: svtrn1_f64, \ svuint64_t: svtrn1_u64, \ svint16_t: svtrn1_s16, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Transpose elements: Interleave even quadwords from two inputs svint16_t svtrn1q_s16(svint16_t op1, svint16_t op2); svint32_t svtrn1q_s32(svint32_t op1, svint32_t op2); svfloat64_t svtrn1q_f64(svfloat64_t op1, svfloat64_t op2); svuint8_t svtrn1q_u8(svuint8_t op1, svuint8_t op2); svuint16_t svtrn1q_u16(svuint16_t op1, svuint16_t op2); svuint32_t svtrn1q_u32(svuint32_t op1, svuint32_t op2); svuint64_t svtrn1q_u64(svuint64_t op1, svuint64_t op2); svfloat32_t svtrn1q_f32(svfloat32_t op1, svfloat32_t op2); svfloat16_t svtrn1q_f16(svfloat16_t op1, svfloat16_t op2); svbfloat16_t svtrn1q_bf16(svbfloat16_t op1, svbfloat16_t op2); svint8_t svtrn1q_s8(svint8_t op1, svint8_t op2); svint64_t svtrn1q_s64(svint64_t op1, svint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svtrn1q(op1, op2) _Generic((op2), \ svint16_t: svtrn1q_s16, \ svint32_t: svtrn1q_s32, \ svfloat64_t: svtrn1q_f64, \ svuint8_t: svtrn1q_u8, \ svuint16_t: svtrn1q_u16, \ svuint32_t: svtrn1q_u32, \ svuint64_t: svtrn1q_u64, \ svfloat32_t: svtrn1q_f32, \ svfloat16_t: svtrn1q_f16, \ svbfloat16_t: svtrn1q_bf16, \ svint8_t: svtrn1q_s8, \ svint64_t: svtrn1q_s64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Transpose elements: Interleave odd elements from two inputs svfloat32_t svtrn2_f32(svfloat32_t op1, svfloat32_t op2); svbfloat16_t svtrn2_bf16(svbfloat16_t op1, svbfloat16_t op2); svfloat16_t svtrn2_f16(svfloat16_t op1, svfloat16_t op2); svfloat64_t svtrn2_f64(svfloat64_t op1, svfloat64_t op2); svint8_t svtrn2_s8(svint8_t op1, svint8_t op2); svint16_t svtrn2_s16(svint16_t op1, svint16_t op2); svint32_t svtrn2_s32(svint32_t op1, svint32_t op2); svint64_t svtrn2_s64(svint64_t op1, svint64_t op2); svuint8_t svtrn2_u8(svuint8_t op1, svuint8_t op2); svuint16_t svtrn2_u16(svuint16_t op1, svuint16_t op2); svuint32_t svtrn2_u32(svuint32_t op1, svuint32_t op2); svuint64_t svtrn2_u64(svuint64_t op1, svuint64_t op2); svbool_t svtrn2_b64(svbool_t op1, svbool_t op2); svbool_t svtrn2_b32(svbool_t op1, svbool_t op2); svbool_t svtrn2_b16(svbool_t op1, svbool_t op2); svbool_t svtrn2_b8(svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svtrn2(op1, op2) _Generic((op2), \ svfloat32_t: svtrn2_f32, \ svbfloat16_t: svtrn2_bf16, \ svfloat16_t: svtrn2_f16, \ svfloat64_t: svtrn2_f64, \ svint8_t: svtrn2_s8, \ svint16_t: svtrn2_s16, \ svint32_t: svtrn2_s32, \ svint64_t: svtrn2_s64, \ svuint8_t: svtrn2_u8, \ svuint16_t: svtrn2_u16, \ svuint32_t: svtrn2_u32, \ svuint64_t: svtrn2_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Transpose elements: Interleave odd quadwords from two inputs svuint32_t svtrn2q_u32(svuint32_t op1, svuint32_t op2); svbfloat16_t svtrn2q_bf16(svbfloat16_t op1, svbfloat16_t op2); svfloat16_t svtrn2q_f16(svfloat16_t op1, svfloat16_t op2); svfloat32_t svtrn2q_f32(svfloat32_t op1, svfloat32_t op2); svint8_t svtrn2q_s8(svint8_t op1, svint8_t op2); svint16_t svtrn2q_s16(svint16_t op1, svint16_t op2); svint32_t svtrn2q_s32(svint32_t op1, svint32_t op2); svint64_t svtrn2q_s64(svint64_t op1, svint64_t op2); svuint8_t svtrn2q_u8(svuint8_t op1, svuint8_t op2); svuint16_t svtrn2q_u16(svuint16_t op1, svuint16_t op2); svuint64_t svtrn2q_u64(svuint64_t op1, svuint64_t op2); svfloat64_t svtrn2q_f64(svfloat64_t op1, svfloat64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svtrn2q(op1, op2) _Generic((op2), \ svuint32_t: svtrn2q_u32, \ svbfloat16_t: svtrn2q_bf16, \ svfloat16_t: svtrn2q_f16, \ svfloat32_t: svtrn2q_f32, \ svint8_t: svtrn2q_s8, \ svint16_t: svtrn2q_s16, \ svint32_t: svtrn2q_s32, \ svint64_t: svtrn2q_s64, \ svuint8_t: svtrn2q_u8, \ svuint16_t: svtrn2q_u16, \ svuint64_t: svtrn2q_u64, \ svfloat64_t: svtrn2q_f64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Unzip elements: Concatenate even elements from two inputs svuint64_t svuzp1_u64(svuint64_t op1, svuint64_t op2); svuint32_t svuzp1_u32(svuint32_t op1, svuint32_t op2); svfloat32_t svuzp1_f32(svfloat32_t op1, svfloat32_t op2); svfloat16_t svuzp1_f16(svfloat16_t op1, svfloat16_t op2); svuint16_t svuzp1_u16(svuint16_t op1, svuint16_t op2); svfloat64_t svuzp1_f64(svfloat64_t op1, svfloat64_t op2); svint8_t svuzp1_s8(svint8_t op1, svint8_t op2); svint16_t svuzp1_s16(svint16_t op1, svint16_t op2); svint32_t svuzp1_s32(svint32_t op1, svint32_t op2); svint64_t svuzp1_s64(svint64_t op1, svint64_t op2); svuint8_t svuzp1_u8(svuint8_t op1, svuint8_t op2); svbfloat16_t svuzp1_bf16(svbfloat16_t op1, svbfloat16_t op2); svbool_t svuzp1_b64(svbool_t op1, svbool_t op2); svbool_t svuzp1_b32(svbool_t op1, svbool_t op2); svbool_t svuzp1_b16(svbool_t op1, svbool_t op2); svbool_t svuzp1_b8(svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svuzp1(op1, op2) _Generic((op2), \ svuint64_t: svuzp1_u64, \ svuint32_t: svuzp1_u32, \ svfloat32_t: svuzp1_f32, \ svfloat16_t: svuzp1_f16, \ svuint16_t: svuzp1_u16, \ svfloat64_t: svuzp1_f64, \ svint8_t: svuzp1_s8, \ svint16_t: svuzp1_s16, \ svint32_t: svuzp1_s32, \ svint64_t: svuzp1_s64, \ svuint8_t: svuzp1_u8, \ svbfloat16_t: svuzp1_bf16, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Unzip elements: Concatenate even quadwords from two inputs svbfloat16_t svuzp1q_bf16(svbfloat16_t op1, svbfloat16_t op2); svuint64_t svuzp1q_u64(svuint64_t op1, svuint64_t op2); svuint32_t svuzp1q_u32(svuint32_t op1, svuint32_t op2); svuint16_t svuzp1q_u16(svuint16_t op1, svuint16_t op2); svuint8_t svuzp1q_u8(svuint8_t op1, svuint8_t op2); svint64_t svuzp1q_s64(svint64_t op1, svint64_t op2); svint32_t svuzp1q_s32(svint32_t op1, svint32_t op2); svint16_t svuzp1q_s16(svint16_t op1, svint16_t op2); svint8_t svuzp1q_s8(svint8_t op1, svint8_t op2); svfloat64_t svuzp1q_f64(svfloat64_t op1, svfloat64_t op2); svfloat32_t svuzp1q_f32(svfloat32_t op1, svfloat32_t op2); svfloat16_t svuzp1q_f16(svfloat16_t op1, svfloat16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svuzp1q(op1, op2) _Generic((op2), \ svbfloat16_t: svuzp1q_bf16, \ svuint64_t: svuzp1q_u64, \ svuint32_t: svuzp1q_u32, \ svuint16_t: svuzp1q_u16, \ svuint8_t: svuzp1q_u8, \ svint64_t: svuzp1q_s64, \ svint32_t: svuzp1q_s32, \ svint16_t: svuzp1q_s16, \ svint8_t: svuzp1q_s8, \ svfloat64_t: svuzp1q_f64, \ svfloat32_t: svuzp1q_f32, \ svfloat16_t: svuzp1q_f16, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Unzip elements: Concatenate odd elements from two inputs svuint64_t svuzp2_u64(svuint64_t op1, svuint64_t op2); svfloat16_t svuzp2_f16(svfloat16_t op1, svfloat16_t op2); svuint16_t svuzp2_u16(svuint16_t op1, svuint16_t op2); svbfloat16_t svuzp2_bf16(svbfloat16_t op1, svbfloat16_t op2); svfloat32_t svuzp2_f32(svfloat32_t op1, svfloat32_t op2); svfloat64_t svuzp2_f64(svfloat64_t op1, svfloat64_t op2); svint8_t svuzp2_s8(svint8_t op1, svint8_t op2); svint16_t svuzp2_s16(svint16_t op1, svint16_t op2); svint32_t svuzp2_s32(svint32_t op1, svint32_t op2); svint64_t svuzp2_s64(svint64_t op1, svint64_t op2); svuint8_t svuzp2_u8(svuint8_t op1, svuint8_t op2); svuint32_t svuzp2_u32(svuint32_t op1, svuint32_t op2); svbool_t svuzp2_b64(svbool_t op1, svbool_t op2); svbool_t svuzp2_b32(svbool_t op1, svbool_t op2); svbool_t svuzp2_b16(svbool_t op1, svbool_t op2); svbool_t svuzp2_b8(svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svuzp2(op1, op2) _Generic((op2), \ svuint64_t: svuzp2_u64, \ svfloat16_t: svuzp2_f16, \ svuint16_t: svuzp2_u16, \ svbfloat16_t: svuzp2_bf16, \ svfloat32_t: svuzp2_f32, \ svfloat64_t: svuzp2_f64, \ svint8_t: svuzp2_s8, \ svint16_t: svuzp2_s16, \ svint32_t: svuzp2_s32, \ svint64_t: svuzp2_s64, \ svuint8_t: svuzp2_u8, \ svuint32_t: svuzp2_u32, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Unzip elements: Concatenate odd quadwords from two inputs svfloat16_t svuzp2q_f16(svfloat16_t op1, svfloat16_t op2); svuint32_t svuzp2q_u32(svuint32_t op1, svuint32_t op2); svuint16_t svuzp2q_u16(svuint16_t op1, svuint16_t op2); svint64_t svuzp2q_s64(svint64_t op1, svint64_t op2); svint32_t svuzp2q_s32(svint32_t op1, svint32_t op2); svint16_t svuzp2q_s16(svint16_t op1, svint16_t op2); svint8_t svuzp2q_s8(svint8_t op1, svint8_t op2); svfloat64_t svuzp2q_f64(svfloat64_t op1, svfloat64_t op2); svfloat32_t svuzp2q_f32(svfloat32_t op1, svfloat32_t op2); svbfloat16_t svuzp2q_bf16(svbfloat16_t op1, svbfloat16_t op2); svuint8_t svuzp2q_u8(svuint8_t op1, svuint8_t op2); svuint64_t svuzp2q_u64(svuint64_t op1, svuint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svuzp2q(op1, op2) _Generic((op2), \ svfloat16_t: svuzp2q_f16, \ svuint32_t: svuzp2q_u32, \ svuint16_t: svuzp2q_u16, \ svint64_t: svuzp2q_s64, \ svint32_t: svuzp2q_s32, \ svint16_t: svuzp2q_s16, \ svint8_t: svuzp2q_s8, \ svfloat64_t: svuzp2q_f64, \ svfloat32_t: svuzp2q_f32, \ svbfloat16_t: svuzp2q_bf16, \ svuint8_t: svuzp2q_u8, \ svuint64_t: svuzp2q_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Zip elements: Interleave elements from high halves of two inputs svuint32_t svzip2_u32(svuint32_t op1, svuint32_t op2); svuint64_t svzip2_u64(svuint64_t op1, svuint64_t op2); svuint16_t svzip2_u16(svuint16_t op1, svuint16_t op2); svint64_t svzip2_s64(svint64_t op1, svint64_t op2); svint32_t svzip2_s32(svint32_t op1, svint32_t op2); svint16_t svzip2_s16(svint16_t op1, svint16_t op2); svint8_t svzip2_s8(svint8_t op1, svint8_t op2); svfloat64_t svzip2_f64(svfloat64_t op1, svfloat64_t op2); svfloat32_t svzip2_f32(svfloat32_t op1, svfloat32_t op2); svfloat16_t svzip2_f16(svfloat16_t op1, svfloat16_t op2); svuint8_t svzip2_u8(svuint8_t op1, svuint8_t op2); svbfloat16_t svzip2_bf16(svbfloat16_t op1, svbfloat16_t op2); svbool_t svzip2_b64(svbool_t op1, svbool_t op2); svbool_t svzip2_b32(svbool_t op1, svbool_t op2); svbool_t svzip2_b16(svbool_t op1, svbool_t op2); svbool_t svzip2_b8(svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svzip2(op1, op2) _Generic((op2), \ svuint32_t: svzip2_u32, \ svuint64_t: svzip2_u64, \ svuint16_t: svzip2_u16, \ svint64_t: svzip2_s64, \ svint32_t: svzip2_s32, \ svint16_t: svzip2_s16, \ svint8_t: svzip2_s8, \ svfloat64_t: svzip2_f64, \ svfloat32_t: svzip2_f32, \ svfloat16_t: svzip2_f16, \ svuint8_t: svzip2_u8, \ svbfloat16_t: svzip2_bf16, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Zip elements: Interleave elements from low halves of two inputs svbfloat16_t svzip1_bf16(svbfloat16_t op1, svbfloat16_t op2); svfloat16_t svzip1_f16(svfloat16_t op1, svfloat16_t op2); svfloat32_t svzip1_f32(svfloat32_t op1, svfloat32_t op2); svfloat64_t svzip1_f64(svfloat64_t op1, svfloat64_t op2); svint32_t svzip1_s32(svint32_t op1, svint32_t op2); svint8_t svzip1_s8(svint8_t op1, svint8_t op2); svuint8_t svzip1_u8(svuint8_t op1, svuint8_t op2); svuint16_t svzip1_u16(svuint16_t op1, svuint16_t op2); svuint32_t svzip1_u32(svuint32_t op1, svuint32_t op2); svuint64_t svzip1_u64(svuint64_t op1, svuint64_t op2); svint64_t svzip1_s64(svint64_t op1, svint64_t op2); svint16_t svzip1_s16(svint16_t op1, svint16_t op2); svbool_t svzip1_b64(svbool_t op1, svbool_t op2); svbool_t svzip1_b32(svbool_t op1, svbool_t op2); svbool_t svzip1_b16(svbool_t op1, svbool_t op2); svbool_t svzip1_b8(svbool_t op1, svbool_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svzip1(op1, op2) _Generic((op2), \ svbfloat16_t: svzip1_bf16, \ svfloat16_t: svzip1_f16, \ svfloat32_t: svzip1_f32, \ svfloat64_t: svzip1_f64, \ svint32_t: svzip1_s32, \ svint8_t: svzip1_s8, \ svuint8_t: svzip1_u8, \ svuint16_t: svzip1_u16, \ svuint32_t: svzip1_u32, \ svuint64_t: svzip1_u64, \ svint64_t: svzip1_s64, \ svint16_t: svzip1_s16, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Zip elements: Interleave quadwords from high halves of two inputs svuint64_t svzip2q_u64(svuint64_t op1, svuint64_t op2); svint16_t svzip2q_s16(svint16_t op1, svint16_t op2); svbfloat16_t svzip2q_bf16(svbfloat16_t op1, svbfloat16_t op2); svfloat16_t svzip2q_f16(svfloat16_t op1, svfloat16_t op2); svuint32_t svzip2q_u32(svuint32_t op1, svuint32_t op2); svfloat32_t svzip2q_f32(svfloat32_t op1, svfloat32_t op2); svfloat64_t svzip2q_f64(svfloat64_t op1, svfloat64_t op2); svint8_t svzip2q_s8(svint8_t op1, svint8_t op2); svint32_t svzip2q_s32(svint32_t op1, svint32_t op2); svint64_t svzip2q_s64(svint64_t op1, svint64_t op2); svuint8_t svzip2q_u8(svuint8_t op1, svuint8_t op2); svuint16_t svzip2q_u16(svuint16_t op1, svuint16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svzip2q(op1, op2) _Generic((op2), \ svuint64_t: svzip2q_u64, \ svint16_t: svzip2q_s16, \ svbfloat16_t: svzip2q_bf16, \ svfloat16_t: svzip2q_f16, \ svuint32_t: svzip2q_u32, \ svfloat32_t: svzip2q_f32, \ svfloat64_t: svzip2q_f64, \ svint8_t: svzip2q_s8, \ svint32_t: svzip2q_s32, \ svint64_t: svzip2q_s64, \ svuint8_t: svzip2q_u8, \ svuint16_t: svzip2q_u16, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector manipulation / Zip elements: Interleave quadwords from low halves of two inputs svuint32_t svzip1q_u32(svuint32_t op1, svuint32_t op2); svuint64_t svzip1q_u64(svuint64_t op1, svuint64_t op2); svbfloat16_t svzip1q_bf16(svbfloat16_t op1, svbfloat16_t op2); svfloat32_t svzip1q_f32(svfloat32_t op1, svfloat32_t op2); svfloat16_t svzip1q_f16(svfloat16_t op1, svfloat16_t op2); svuint16_t svzip1q_u16(svuint16_t op1, svuint16_t op2); svuint8_t svzip1q_u8(svuint8_t op1, svuint8_t op2); svint64_t svzip1q_s64(svint64_t op1, svint64_t op2); svint32_t svzip1q_s32(svint32_t op1, svint32_t op2); svint16_t svzip1q_s16(svint16_t op1, svint16_t op2); svint8_t svzip1q_s8(svint8_t op1, svint8_t op2); svfloat64_t svzip1q_f64(svfloat64_t op1, svfloat64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svzip1q(op1, op2) _Generic((op2), \ svuint32_t: svzip1q_u32, \ svuint64_t: svzip1q_u64, \ svbfloat16_t: svzip1q_bf16, \ svfloat32_t: svzip1q_f32, \ svfloat16_t: svzip1q_f16, \ svuint16_t: svzip1q_u16, \ svuint8_t: svzip1q_u8, \ svint64_t: svzip1q_s64, \ svint32_t: svzip1q_s32, \ svint16_t: svzip1q_s16, \ svint8_t: svzip1q_s8, \ svfloat64_t: svzip1q_f64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector tuple manipulation / Create uninitialized vector tuple: Create an uninitialized tuple of two vectors svfloat16x2_t svundef2_f16(void); svfloat32x2_t svundef2_f32(void); svfloat64x2_t svundef2_f64(void); svint8x2_t svundef2_s8(void); svint16x2_t svundef2_s16(void); svint32x2_t svundef2_s32(void); svint64x2_t svundef2_s64(void); svuint8x2_t svundef2_u8(void); svuint16x2_t svundef2_u16(void); svuint32x2_t svundef2_u32(void); svuint64x2_t svundef2_u64(void); // sve: Vector tuple manipulation / Create uninitialized vector tuple: Create an uninitialized tuple of three vectors svfloat16x3_t svundef3_f16(void); svfloat32x3_t svundef3_f32(void); svfloat64x3_t svundef3_f64(void); svint8x3_t svundef3_s8(void); svint16x3_t svundef3_s16(void); svint32x3_t svundef3_s32(void); svint64x3_t svundef3_s64(void); svuint8x3_t svundef3_u8(void); svuint16x3_t svundef3_u16(void); svuint32x3_t svundef3_u32(void); svuint64x3_t svundef3_u64(void); // sve: Vector tuple manipulation / Create uninitialized vector tuple: Create an uninitialized tuple of four vectors svfloat16x4_t svundef4_f16(void); svfloat32x4_t svundef4_f32(void); svfloat64x4_t svundef4_f64(void); svint8x4_t svundef4_s8(void); svint16x4_t svundef4_s16(void); svint32x4_t svundef4_s32(void); svint64x4_t svundef4_s64(void); svuint8x4_t svundef4_u8(void); svuint16x4_t svundef4_u16(void); svuint32x4_t svundef4_u32(void); svuint64x4_t svundef4_u64(void); // sve: Vector tuple manipulation / Create vector tuple: Create a tuple of two vectors svfloat16x2_t svcreate2_f16(svfloat16_t x0, svfloat16_t x1); svfloat32x2_t svcreate2_f32(svfloat32_t x0, svfloat32_t x1); svfloat64x2_t svcreate2_f64(svfloat64_t x0, svfloat64_t x1); svint8x2_t svcreate2_s8(svint8_t x0, svint8_t x1); svint16x2_t svcreate2_s16(svint16_t x0, svint16_t x1); svint32x2_t svcreate2_s32(svint32_t x0, svint32_t x1); svint64x2_t svcreate2_s64(svint64_t x0, svint64_t x1); svuint8x2_t svcreate2_u8(svuint8_t x0, svuint8_t x1); svuint16x2_t svcreate2_u16(svuint16_t x0, svuint16_t x1); svuint32x2_t svcreate2_u32(svuint32_t x0, svuint32_t x1); svuint64x2_t svcreate2_u64(svuint64_t x0, svuint64_t x1); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcreate2(op1, op2) _Generic((op2), \ svint32_t: svcreate2_s32, \ svint16_t: svcreate2_s16, \ svuint8_t: svcreate2_u8, \ svuint16_t: svcreate2_u16, \ svuint32_t: svcreate2_u32, \ svint8_t: svcreate2_s8, \ svint64_t: svcreate2_s64, \ svuint64_t: svcreate2_u64, \ svfloat16_t: svcreate2_f16, \ svfloat32_t: svcreate2_f32, \ svfloat64_t: svcreate2_f64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector tuple manipulation / Create vector tuple: Create a tuple of three vectors svfloat16x3_t svcreate3_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2); svfloat32x3_t svcreate3_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2); svfloat64x3_t svcreate3_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2); svint8x3_t svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2); svint16x3_t svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2); svint32x3_t svcreate3_s32(svint32_t x0, svint32_t x1, svint32_t x2); svint64x3_t svcreate3_s64(svint64_t x0, svint64_t x1, svint64_t x2); svuint8x3_t svcreate3_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2); svuint16x3_t svcreate3_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2); svuint32x3_t svcreate3_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2); svuint64x3_t svcreate3_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcreate3(op1, op2, op3) _Generic((op3), \ svint32_t: svcreate3_s32, \ svint16_t: svcreate3_s16, \ svuint8_t: svcreate3_u8, \ svuint16_t: svcreate3_u16, \ svuint32_t: svcreate3_u32, \ svint8_t: svcreate3_s8, \ svint64_t: svcreate3_s64, \ svuint64_t: svcreate3_u64, \ svfloat16_t: svcreate3_f16, \ svfloat32_t: svcreate3_f32, \ svfloat64_t: svcreate3_f64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve: Vector tuple manipulation / Create vector tuple: Create a tuple of four vectors svfloat16x4_t svcreate4_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2, svfloat16_t x3); svfloat32x4_t svcreate4_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2, svfloat32_t x3); svfloat64x4_t svcreate4_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2, svfloat64_t x3); svint8x4_t svcreate4_s8(svint8_t x0, svint8_t x1, svint8_t x2, svint8_t x3); svint16x4_t svcreate4_s16(svint16_t x0, svint16_t x1, svint16_t x2, svint16_t x3); svint32x4_t svcreate4_s32(svint32_t x0, svint32_t x1, svint32_t x2, svint32_t x3); svint64x4_t svcreate4_s64(svint64_t x0, svint64_t x1, svint64_t x2, svint64_t x3); svuint8x4_t svcreate4_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2, svuint8_t x3); svuint16x4_t svcreate4_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2, svuint16_t x3); svuint32x4_t svcreate4_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2, svuint32_t x3); svuint64x4_t svcreate4_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2, svuint64_t x3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcreate4(op1, op2, op3, op4) _Generic((op4), \ svint32_t: svcreate4_s32, \ svint16_t: svcreate4_s16, \ svuint8_t: svcreate4_u8, \ svuint16_t: svcreate4_u16, \ svuint32_t: svcreate4_u32, \ svint8_t: svcreate4_s8, \ svint64_t: svcreate4_s64, \ svuint64_t: svcreate4_u64, \ svfloat16_t: svcreate4_f16, \ svfloat32_t: svcreate4_f32, \ svfloat64_t: svcreate4_f64, \ default: __assume(0) \ )(op1, op2, op3, op4) #endif // sve: Vector tuple manipulation / Extract vector from tuple: Extract one vector from a tuple of two vectors svfloat16_t svget2_f16(svfloat16x2_t tuple, uint64_t imm_index); svfloat32_t svget2_f32(svfloat32x2_t tuple, uint64_t imm_index); svfloat64_t svget2_f64(svfloat64x2_t tuple, uint64_t imm_index); svint8_t svget2_s8(svint8x2_t tuple, uint64_t imm_index); svint16_t svget2_s16(svint16x2_t tuple, uint64_t imm_index); svint32_t svget2_s32(svint32x2_t tuple, uint64_t imm_index); svint64_t svget2_s64(svint64x2_t tuple, uint64_t imm_index); svuint8_t svget2_u8(svuint8x2_t tuple, uint64_t imm_index); svuint16_t svget2_u16(svuint16x2_t tuple, uint64_t imm_index); svuint32_t svget2_u32(svuint32x2_t tuple, uint64_t imm_index); svuint64_t svget2_u64(svuint64x2_t tuple, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svget2(op1, op2) _Generic((op1), \ svint32x2_t: svget2_s32, \ svint16x2_t: svget2_s16, \ svuint8x2_t: svget2_u8, \ svuint16x2_t: svget2_u16, \ svuint32x2_t: svget2_u32, \ svint8x2_t: svget2_s8, \ svint64x2_t: svget2_s64, \ svuint64x2_t: svget2_u64, \ svfloat16x2_t: svget2_f16, \ svfloat32x2_t: svget2_f32, \ svfloat64x2_t: svget2_f64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector tuple manipulation / Extract vector from tuple: Extract one vector from a tuple of three vectors svfloat16_t svget3_f16(svfloat16x3_t tuple, uint64_t imm_index); svfloat32_t svget3_f32(svfloat32x3_t tuple, uint64_t imm_index); svfloat64_t svget3_f64(svfloat64x3_t tuple, uint64_t imm_index); svint8_t svget3_s8(svint8x3_t tuple, uint64_t imm_index); svint16_t svget3_s16(svint16x3_t tuple, uint64_t imm_index); svint32_t svget3_s32(svint32x3_t tuple, uint64_t imm_index); svint64_t svget3_s64(svint64x3_t tuple, uint64_t imm_index); svuint8_t svget3_u8(svuint8x3_t tuple, uint64_t imm_index); svuint16_t svget3_u16(svuint16x3_t tuple, uint64_t imm_index); svuint32_t svget3_u32(svuint32x3_t tuple, uint64_t imm_index); svuint64_t svget3_u64(svuint64x3_t tuple, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svget3(op1, op2) _Generic((op1), \ svint32x3_t: svget3_s32, \ svint16x3_t: svget3_s16, \ svuint8x3_t: svget3_u8, \ svuint16x3_t: svget3_u16, \ svuint32x3_t: svget3_u32, \ svint8x3_t: svget3_s8, \ svint64x3_t: svget3_s64, \ svuint64x3_t: svget3_u64, \ svfloat16x3_t: svget3_f16, \ svfloat32x3_t: svget3_f32, \ svfloat64x3_t: svget3_f64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector tuple manipulation / Extract vector from tuple: Extract one vector from a tuple of four vectors svfloat16_t svget4_f16(svfloat16x4_t tuple, uint64_t imm_index); svfloat32_t svget4_f32(svfloat32x4_t tuple, uint64_t imm_index); svfloat64_t svget4_f64(svfloat64x4_t tuple, uint64_t imm_index); svint8_t svget4_s8(svint8x4_t tuple, uint64_t imm_index); svint16_t svget4_s16(svint16x4_t tuple, uint64_t imm_index); svint32_t svget4_s32(svint32x4_t tuple, uint64_t imm_index); svint64_t svget4_s64(svint64x4_t tuple, uint64_t imm_index); svuint8_t svget4_u8(svuint8x4_t tuple, uint64_t imm_index); svuint16_t svget4_u16(svuint16x4_t tuple, uint64_t imm_index); svuint32_t svget4_u32(svuint32x4_t tuple, uint64_t imm_index); svuint64_t svget4_u64(svuint64x4_t tuple, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svget4(op1, op2) _Generic((op1), \ svint32x4_t: svget4_s32, \ svint16x4_t: svget4_s16, \ svuint8x4_t: svget4_u8, \ svuint16x4_t: svget4_u16, \ svuint32x4_t: svget4_u32, \ svint8x4_t: svget4_s8, \ svint64x4_t: svget4_s64, \ svuint64x4_t: svget4_u64, \ svfloat16x4_t: svget4_f16, \ svfloat32x4_t: svget4_f32, \ svfloat64x4_t: svget4_f64, \ default: __assume(0) \ )(op1, op2) #endif // sve: Vector tuple manipulation / Set vector in tuple: Change one vector in a tuple of two vectors svfloat16x2_t svset2_f16(svfloat16x2_t tuple, uint64_t imm_index, svfloat16_t x); svfloat32x2_t svset2_f32(svfloat32x2_t tuple, uint64_t imm_index, svfloat32_t x); svfloat64x2_t svset2_f64(svfloat64x2_t tuple, uint64_t imm_index, svfloat64_t x); svint8x2_t svset2_s8(svint8x2_t tuple, uint64_t imm_index, svint8_t x); svint16x2_t svset2_s16(svint16x2_t tuple, uint64_t imm_index, svint16_t x); svint32x2_t svset2_s32(svint32x2_t tuple, uint64_t imm_index, svint32_t x); svint64x2_t svset2_s64(svint64x2_t tuple, uint64_t imm_index, svint64_t x); svuint8x2_t svset2_u8(svuint8x2_t tuple, uint64_t imm_index, svuint8_t x); svuint16x2_t svset2_u16(svuint16x2_t tuple, uint64_t imm_index, svuint16_t x); svuint32x2_t svset2_u32(svuint32x2_t tuple, uint64_t imm_index, svuint32_t x); svuint64x2_t svset2_u64(svuint64x2_t tuple, uint64_t imm_index, svuint64_t x); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svset2(op1, op2, op3) _Generic((op1), \ svint32x2_t: svset2_s32, \ svint16x2_t: svset2_s16, \ svuint8x2_t: svset2_u8, \ svuint16x2_t: svset2_u16, \ svuint32x2_t: svset2_u32, \ svint8x2_t: svset2_s8, \ svint64x2_t: svset2_s64, \ svuint64x2_t: svset2_u64, \ svfloat16x2_t: svset2_f16, \ svfloat32x2_t: svset2_f32, \ svfloat64x2_t: svset2_f64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve: Vector tuple manipulation / Set vector in tuple: Change one vector in a tuple of three vectors svfloat16x3_t svset3_f16(svfloat16x3_t tuple, uint64_t imm_index, svfloat16_t x); svfloat32x3_t svset3_f32(svfloat32x3_t tuple, uint64_t imm_index, svfloat32_t x); svfloat64x3_t svset3_f64(svfloat64x3_t tuple, uint64_t imm_index, svfloat64_t x); svint8x3_t svset3_s8(svint8x3_t tuple, uint64_t imm_index, svint8_t x); svint16x3_t svset3_s16(svint16x3_t tuple, uint64_t imm_index, svint16_t x); svint32x3_t svset3_s32(svint32x3_t tuple, uint64_t imm_index, svint32_t x); svint64x3_t svset3_s64(svint64x3_t tuple, uint64_t imm_index, svint64_t x); svuint8x3_t svset3_u8(svuint8x3_t tuple, uint64_t imm_index, svuint8_t x); svuint16x3_t svset3_u16(svuint16x3_t tuple, uint64_t imm_index, svuint16_t x); svuint32x3_t svset3_u32(svuint32x3_t tuple, uint64_t imm_index, svuint32_t x); svuint64x3_t svset3_u64(svuint64x3_t tuple, uint64_t imm_index, svuint64_t x); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svset3(op1, op2, op3) _Generic((op1), \ svint32x3_t: svset3_s32, \ svint16x3_t: svset3_s16, \ svuint8x3_t: svset3_u8, \ svuint16x3_t: svset3_u16, \ svuint32x3_t: svset3_u32, \ svint8x3_t: svset3_s8, \ svint64x3_t: svset3_s64, \ svuint64x3_t: svset3_u64, \ svfloat16x3_t: svset3_f16, \ svfloat32x3_t: svset3_f32, \ svfloat64x3_t: svset3_f64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve: Vector tuple manipulation / Set vector in tuple: Change one vector in a tuple of four vectors svfloat16x4_t svset4_f16(svfloat16x4_t tuple, uint64_t imm_index, svfloat16_t x); svfloat32x4_t svset4_f32(svfloat32x4_t tuple, uint64_t imm_index, svfloat32_t x); svfloat64x4_t svset4_f64(svfloat64x4_t tuple, uint64_t imm_index, svfloat64_t x); svint8x4_t svset4_s8(svint8x4_t tuple, uint64_t imm_index, svint8_t x); svint16x4_t svset4_s16(svint16x4_t tuple, uint64_t imm_index, svint16_t x); svint32x4_t svset4_s32(svint32x4_t tuple, uint64_t imm_index, svint32_t x); svint64x4_t svset4_s64(svint64x4_t tuple, uint64_t imm_index, svint64_t x); svuint8x4_t svset4_u8(svuint8x4_t tuple, uint64_t imm_index, svuint8_t x); svuint16x4_t svset4_u16(svuint16x4_t tuple, uint64_t imm_index, svuint16_t x); svuint32x4_t svset4_u32(svuint32x4_t tuple, uint64_t imm_index, svuint32_t x); svuint64x4_t svset4_u64(svuint64x4_t tuple, uint64_t imm_index, svuint64_t x); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svset4(op1, op2, op3) _Generic((op1), \ svint32x4_t: svset4_s32, \ svint16x4_t: svset4_s16, \ svuint8x4_t: svset4_u8, \ svuint16x4_t: svset4_u16, \ svuint32x4_t: svset4_u32, \ svint8x4_t: svset4_s8, \ svint64x4_t: svset4_s64, \ svuint64x4_t: svset4_u64, \ svfloat16x4_t: svset4_f16, \ svfloat32x4_t: svset4_f32, \ svfloat64x4_t: svset4_f64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Bit manipulation / Gather bits: Gather lower bits from positions selected by bitmask svuint64_t svbext_u64(svuint64_t op1, svuint64_t op2); svuint32_t svbext_n_u32(svuint32_t op1, uint32_t op2); svuint64_t svbext_n_u64(svuint64_t op1, uint64_t op2); svuint8_t svbext_n_u8(svuint8_t op1, uint8_t op2); svuint32_t svbext_u32(svuint32_t op1, svuint32_t op2); svuint16_t svbext_u16(svuint16_t op1, svuint16_t op2); svuint8_t svbext_u8(svuint8_t op1, svuint8_t op2); svuint16_t svbext_n_u16(svuint16_t op1, uint16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbext(op1, op2) _Generic((op2), \ svuint64_t: svbext_u64, \ uint32_t: svbext_n_u32, \ uint64_t: svbext_n_u64, \ uint8_t: svbext_n_u8, \ svuint32_t: svbext_u32, \ svuint16_t: svbext_u16, \ svuint8_t: svbext_u8, \ uint16_t: svbext_n_u16, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Bit manipulation / Group bits: Group bits to right or left as selected by bitmask svuint16_t svbgrp_u16(svuint16_t op1, svuint16_t op2); svuint32_t svbgrp_u32(svuint32_t op1, svuint32_t op2); svuint8_t svbgrp_n_u8(svuint8_t op1, uint8_t op2); svuint16_t svbgrp_n_u16(svuint16_t op1, uint16_t op2); svuint32_t svbgrp_n_u32(svuint32_t op1, uint32_t op2); svuint8_t svbgrp_u8(svuint8_t op1, svuint8_t op2); svuint64_t svbgrp_n_u64(svuint64_t op1, uint64_t op2); svuint64_t svbgrp_u64(svuint64_t op1, svuint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbgrp(op1, op2) _Generic((op2), \ svuint16_t: svbgrp_u16, \ svuint32_t: svbgrp_u32, \ uint8_t: svbgrp_n_u8, \ uint16_t: svbgrp_n_u16, \ uint32_t: svbgrp_n_u32, \ svuint8_t: svbgrp_u8, \ uint64_t: svbgrp_n_u64, \ svuint64_t: svbgrp_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Bit manipulation / Scatter bits: Scatter lower bits into positions selected by bitmask svuint64_t svbdep_n_u64(svuint64_t op1, uint64_t op2); svuint32_t svbdep_n_u32(svuint32_t op1, uint32_t op2); svuint16_t svbdep_n_u16(svuint16_t op1, uint16_t op2); svuint8_t svbdep_n_u8(svuint8_t op1, uint8_t op2); svuint32_t svbdep_u32(svuint32_t op1, svuint32_t op2); svuint16_t svbdep_u16(svuint16_t op1, svuint16_t op2); svuint8_t svbdep_u8(svuint8_t op1, svuint8_t op2); svuint64_t svbdep_u64(svuint64_t op1, svuint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbdep(op1, op2) _Generic((op2), \ uint64_t: svbdep_n_u64, \ uint32_t: svbdep_n_u32, \ uint16_t: svbdep_n_u16, \ uint8_t: svbdep_n_u8, \ svuint32_t: svbdep_u32, \ svuint16_t: svbdep_u16, \ svuint8_t: svbdep_u8, \ svuint64_t: svbdep_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Compare multiple / Histogram acceleration: Count matching elements svuint32_t svhistcnt_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svuint32_t svhistcnt_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svhistcnt_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint64_t svhistcnt_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svhistcnt_z(pg, op1, op2) _Generic((op2), \ svint32_t: svhistcnt_s32_z, \ svuint32_t: svhistcnt_u32_z, \ svuint64_t: svhistcnt_u64_z, \ svint64_t: svhistcnt_s64_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Compare multiple / Histogram acceleration: Count matching elements in 128-bit segments svuint8_t svhistseg_u8(svuint8_t op1, svuint8_t op2); svuint8_t svhistseg_s8(svint8_t op1, svint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svhistseg(op1, op2) _Generic((op2), \ svuint8_t: svhistseg_u8, \ svint8_t: svhistseg_s8, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Compare multiple / Search for matches: Detect any matching elements svbool_t svmatch_s16(svbool_t pg, svint16_t op1, svint16_t op2); svbool_t svmatch_u8(svbool_t pg, svuint8_t op1, svuint8_t op2); svbool_t svmatch_u16(svbool_t pg, svuint16_t op1, svuint16_t op2); svbool_t svmatch_s8(svbool_t pg, svint8_t op1, svint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmatch(pg, op1, op2) _Generic((op2), \ svint16_t: svmatch_s16, \ svuint8_t: svmatch_u8, \ svuint16_t: svmatch_u16, \ svint8_t: svmatch_s8, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Compare multiple / Search for matches: Detect no matching elements svbool_t svnmatch_s8(svbool_t pg, svint8_t op1, svint8_t op2); svbool_t svnmatch_s16(svbool_t pg, svint16_t op1, svint16_t op2); svbool_t svnmatch_u8(svbool_t pg, svuint8_t op1, svuint8_t op2); svbool_t svnmatch_u16(svbool_t pg, svuint16_t op1, svuint16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svnmatch(pg, op1, op2) _Generic((op2), \ svint8_t: svnmatch_s8, \ svint16_t: svnmatch_s16, \ svuint8_t: svnmatch_u8, \ svuint16_t: svnmatch_u16, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Complex arithmetic / Complex addition: Complex add with rotate svuint64_t svcadd_u64(svuint64_t op1, svuint64_t op2, uint64_t imm_rotation); svuint16_t svcadd_u16(svuint16_t op1, svuint16_t op2, uint64_t imm_rotation); svuint8_t svcadd_u8(svuint8_t op1, svuint8_t op2, uint64_t imm_rotation); svint64_t svcadd_s64(svint64_t op1, svint64_t op2, uint64_t imm_rotation); svint32_t svcadd_s32(svint32_t op1, svint32_t op2, uint64_t imm_rotation); svint16_t svcadd_s16(svint16_t op1, svint16_t op2, uint64_t imm_rotation); svint8_t svcadd_s8(svint8_t op1, svint8_t op2, uint64_t imm_rotation); svuint32_t svcadd_u32(svuint32_t op1, svuint32_t op2, uint64_t imm_rotation); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcadd(op1, op2, imm_rotation) _Generic((op1), \ svuint64_t: svcadd_u64, \ svuint16_t: svcadd_u16, \ svuint8_t: svcadd_u8, \ svint64_t: svcadd_s64, \ svint32_t: svcadd_s32, \ svint16_t: svcadd_s16, \ svint8_t: svcadd_s8, \ svuint32_t: svcadd_u32, \ default: __assume(0) \ )(op1, op2, imm_rotation) #endif // sve2: Complex arithmetic / Complex dot product: Complex dot product svint32_t svcdot_s32(svint32_t op1, svint8_t op2, svint8_t op3, uint64_t imm_rotation); svint64_t svcdot_s64(svint64_t op1, svint16_t op2, svint16_t op3, uint64_t imm_rotation); svint32_t svcdot_lane_s32(svint32_t op1, svint8_t op2, svint8_t op3, uint64_t imm_index, uint64_t imm_rotation); svint64_t svcdot_lane_s64(svint64_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index, uint64_t imm_rotation); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcdot(op1, op2, op3, imm_rotation) _Generic((op1), \ svint32_t: svcdot_s32, \ svint64_t: svcdot_s64, \ default: __assume(0) \ )(op1, op2, op3, imm_rotation) #define svcdot_lane(op1, op2, op3, imm_index, imm_rotation) _Generic((op1), \ svint32_t: svcdot_lane_s32, \ svint64_t: svcdot_lane_s64, \ default: __assume(0) \ )(op1, op2, op3, imm_index, imm_rotation) #endif // sve2: Complex arithmetic / Saturating complex addition: Saturating complex add with rotate svint8_t svqcadd_s8(svint8_t op1, svint8_t op2, uint64_t imm_rotation); svint16_t svqcadd_s16(svint16_t op1, svint16_t op2, uint64_t imm_rotation); svint32_t svqcadd_s32(svint32_t op1, svint32_t op2, uint64_t imm_rotation); svint64_t svqcadd_s64(svint64_t op1, svint64_t op2, uint64_t imm_rotation); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqcadd(op1, op2, imm_rotation) _Generic((op1), \ svint8_t: svqcadd_s8, \ svint16_t: svqcadd_s16, \ svint32_t: svqcadd_s32, \ svint64_t: svqcadd_s64, \ default: __assume(0) \ )(op1, op2, imm_rotation) #endif // sve2: Complex arithmetic / Saturating complex multiply-accumulate: Saturating rounding doubling complex multiply-add high with rotate svint64_t svqrdcmlah_s64(svint64_t op1, svint64_t op2, svint64_t op3, uint64_t imm_rotation); svint32_t svqrdcmlah_s32(svint32_t op1, svint32_t op2, svint32_t op3, uint64_t imm_rotation); svint16_t svqrdcmlah_lane_s16(svint16_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index, uint64_t imm_rotation); svint8_t svqrdcmlah_s8(svint8_t op1, svint8_t op2, svint8_t op3, uint64_t imm_rotation); svint16_t svqrdcmlah_s16(svint16_t op1, svint16_t op2, svint16_t op3, uint64_t imm_rotation); svint32_t svqrdcmlah_lane_s32(svint32_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index, uint64_t imm_rotation); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqrdcmlah(op1, op2, op3, imm_rotation) _Generic((op1), \ svint64_t: svqrdcmlah_s64, \ svint32_t: svqrdcmlah_s32, \ svint8_t: svqrdcmlah_s8, \ svint16_t: svqrdcmlah_s16, \ default: __assume(0) \ )(op1, op2, op3, imm_rotation) #define svqrdcmlah_lane(op1, op2, op3, imm_index, imm_rotation) _Generic((op1), \ svint16_t: svqrdcmlah_lane_s16, \ svint32_t: svqrdcmlah_lane_s32, \ default: __assume(0) \ )(op1, op2, op3, imm_index, imm_rotation) #endif // sve2: Cryptography / AES: AES inverse mix columns svuint8_t svaesimc_u8(svuint8_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaesimc(op) _Generic((op), \ svuint8_t: svaesimc_u8, \ default: __assume(0) \ )(op) #endif // sve2: Cryptography / AES: AES mix columns svuint8_t svaesmc_u8(svuint8_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaesmc(op) _Generic((op), \ svuint8_t: svaesmc_u8, \ default: __assume(0) \ )(op) #endif // sve2: Cryptography / AES: AES single round decryption svuint8_t svaesd_u8(svuint8_t op1, svuint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaesd(op1, op2) _Generic((op2), \ svuint8_t: svaesd_u8, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Cryptography / AES: AES single round encryption svuint8_t svaese_u8(svuint8_t op1, svuint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaese(op1, op2) _Generic((op2), \ svuint8_t: svaese_u8, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Cryptography / SM4: SM4 encryption and decryption svuint32_t svsm4e_u32(svuint32_t op1, svuint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsm4e(op1, op2) _Generic((op2), \ svuint32_t: svsm4e_u32, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Cryptography / SM4: SM4 key updates svuint32_t svsm4ekey_u32(svuint32_t op1, svuint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsm4ekey(op1, op2) _Generic((op2), \ svuint32_t: svsm4ekey_u32, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Data type conversion / Conversions: Down convert and narrow (top) svfloat16_t svcvtnt_f16_f32_m(svfloat16_t even, svbool_t pg, svfloat32_t op); svfloat32_t svcvtnt_f32_f64_x(svfloat32_t even, svbool_t pg, svfloat64_t op); svfloat16_t svcvtnt_f16_f32_x(svfloat16_t even, svbool_t pg, svfloat32_t op); svfloat32_t svcvtnt_f32_f64_m(svfloat32_t even, svbool_t pg, svfloat64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcvtnt_f16_m(even, pg, op) _Generic((op), \ svfloat32_t: svcvtnt_f16_f32_m, \ default: __assume(0) \ )(even, pg, op) #define svcvtnt_f32_x(even, pg, op) _Generic((op), \ svfloat64_t: svcvtnt_f32_f64_x, \ default: __assume(0) \ )(even, pg, op) #define svcvtnt_f16_x(even, pg, op) _Generic((op), \ svfloat32_t: svcvtnt_f16_f32_x, \ default: __assume(0) \ )(even, pg, op) #define svcvtnt_f32_m(even, pg, op) _Generic((op), \ svfloat64_t: svcvtnt_f32_f64_m, \ default: __assume(0) \ )(even, pg, op) #endif // sve2: Data type conversion / Conversions: Down convert, rounding to odd svfloat32_t svcvtx_f32_f64_z(svbool_t pg, svfloat64_t op); svfloat32_t svcvtx_f32_f64_x(svbool_t pg, svfloat64_t op); svfloat32_t svcvtx_f32_f64_m(svfloat32_t inactive, svbool_t pg, svfloat64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcvtx_f32_z(pg, op) _Generic((op), \ svfloat64_t: svcvtx_f32_f64_z, \ default: __assume(0) \ )(pg, op) #define svcvtx_f32_x(pg, op) _Generic((op), \ svfloat64_t: svcvtx_f32_f64_x, \ default: __assume(0) \ )(pg, op) #define svcvtx_f32_m(inactive, pg, op) _Generic((op), \ svfloat64_t: svcvtx_f32_f64_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve2: Data type conversion / Conversions: Down convert, rounding to odd (top) svfloat32_t svcvtxnt_f32_f64_x(svfloat32_t even, svbool_t pg, svfloat64_t op); svfloat32_t svcvtxnt_f32_f64_m(svfloat32_t even, svbool_t pg, svfloat64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcvtxnt_f32_x(even, pg, op) _Generic((op), \ svfloat64_t: svcvtxnt_f32_f64_x, \ default: __assume(0) \ )(even, pg, op) #define svcvtxnt_f32_m(even, pg, op) _Generic((op), \ svfloat64_t: svcvtxnt_f32_f64_m, \ default: __assume(0) \ )(even, pg, op) #endif // sve2: Data type conversion / Conversions: Up convert long (top) svfloat64_t svcvtlt_f64_f32_x(svbool_t pg, svfloat32_t op); svfloat64_t svcvtlt_f64_f32_m(svfloat64_t inactive, svbool_t pg, svfloat32_t op); svfloat32_t svcvtlt_f32_f16_m(svfloat32_t inactive, svbool_t pg, svfloat16_t op); svfloat32_t svcvtlt_f32_f16_x(svbool_t pg, svfloat16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svcvtlt_f64_x(pg, op) _Generic((op), \ svfloat32_t: svcvtlt_f64_f32_x, \ default: __assume(0) \ )(pg, op) #define svcvtlt_f64_m(inactive, pg, op) _Generic((op), \ svfloat32_t: svcvtlt_f64_f32_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvtlt_f32_m(inactive, pg, op) _Generic((op), \ svfloat16_t: svcvtlt_f32_f16_m, \ default: __assume(0) \ )(inactive, pg, op) #define svcvtlt_f32_x(pg, op) _Generic((op), \ svfloat16_t: svcvtlt_f32_f16_x, \ default: __assume(0) \ )(pg, op) #endif // sve2: Load / Gather: Load 16-bit data and sign-extend, non-temporal svint64_t svldnt1sh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint64_t svldnt1sh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svldnt1sh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svuint32_t svldnt1sh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index); svint32_t svldnt1sh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index); svint32_t svldnt1sh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svint64_t svldnt1sh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svint32_t svldnt1sh_gather_u32base_s32(svbool_t pg, svuint32_t bases); svuint32_t svldnt1sh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint64_t svldnt1sh_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint64_t svldnt1sh_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint32_t svldnt1sh_gather_u32offset_s32(svbool_t pg, const int16_t *base, svuint32_t offsets); svuint32_t svldnt1sh_gather_u32offset_u32(svbool_t pg, const int16_t *base, svuint32_t offsets); svuint32_t svldnt1sh_gather_u32base_u32(svbool_t pg, svuint32_t bases); svuint64_t svldnt1sh_gather_s64offset_u64(svbool_t pg, const int16_t *base, svint64_t offsets); svint64_t svldnt1sh_gather_u64offset_s64(svbool_t pg, const int16_t *base, svuint64_t offsets); svuint64_t svldnt1sh_gather_u64offset_u64(svbool_t pg, const int16_t *base, svuint64_t offsets); svint64_t svldnt1sh_gather_s64index_s64(svbool_t pg, const int16_t *base, svint64_t indices); svuint64_t svldnt1sh_gather_u64index_u64(svbool_t pg, const int16_t *base, svuint64_t indices); svint64_t svldnt1sh_gather_u64index_s64(svbool_t pg, const int16_t *base, svuint64_t indices); svint64_t svldnt1sh_gather_s64offset_s64(svbool_t pg, const int16_t *base, svint64_t offsets); svuint64_t svldnt1sh_gather_s64index_u64(svbool_t pg, const int16_t *base, svint64_t indices); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldnt1sh_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1sh_gather_u64base_offset_s64, \ svuint64_t: svldnt1sh_gather_u64offset_s64, \ svint64_t: svldnt1sh_gather_s64offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1sh_gather_index_u64(pg, bases, index) _Generic((index), \ int64_t: svldnt1sh_gather_u64base_index_u64, \ svuint64_t: svldnt1sh_gather_u64index_u64, \ svint64_t: svldnt1sh_gather_s64index_u64, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1sh_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1sh_gather_u64base_offset_u64, \ svint64_t: svldnt1sh_gather_s64offset_u64, \ svuint64_t: svldnt1sh_gather_u64offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1sh_gather_index_u32(pg, bases, index) _Generic((index), \ int64_t: svldnt1sh_gather_u32base_index_u32, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1sh_gather_index_s32(pg, bases, index) _Generic((index), \ int64_t: svldnt1sh_gather_u32base_index_s32, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1sh_gather_offset_s32(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1sh_gather_u32base_offset_s32, \ svuint32_t: svldnt1sh_gather_u32offset_s32, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1sh_gather_index_s64(pg, bases, index) _Generic((index), \ int64_t: svldnt1sh_gather_u64base_index_s64, \ svint64_t: svldnt1sh_gather_s64index_s64, \ svuint64_t: svldnt1sh_gather_u64index_s64, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1sh_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svldnt1sh_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #define svldnt1sh_gather_offset_u32(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1sh_gather_u32base_offset_u32, \ svuint32_t: svldnt1sh_gather_u32offset_u32, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1sh_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1sh_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldnt1sh_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1sh_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldnt1sh_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svldnt1sh_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #endif // sve2: Load / Gather: Load 16-bit data and zero-extend, non-temporal svint64_t svldnt1uh_gather_u64index_s64(svbool_t pg, const uint16_t *base, svuint64_t indices); svuint64_t svldnt1uh_gather_u64index_u64(svbool_t pg, const uint16_t *base, svuint64_t indices); svint32_t svldnt1uh_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svint64_t svldnt1uh_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint64_t svldnt1uh_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint32_t svldnt1uh_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index); svuint32_t svldnt1uh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index); svuint64_t svldnt1uh_gather_s64index_u64(svbool_t pg, const uint16_t *base, svint64_t indices); svuint32_t svldnt1uh_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint64_t svldnt1uh_gather_s64index_s64(svbool_t pg, const uint16_t *base, svint64_t indices); svint32_t svldnt1uh_gather_u32offset_s32(svbool_t pg, const uint16_t *base, svuint32_t offsets); svint64_t svldnt1uh_gather_u64offset_s64(svbool_t pg, const uint16_t *base, svuint64_t offsets); svuint64_t svldnt1uh_gather_s64offset_u64(svbool_t pg, const uint16_t *base, svint64_t offsets); svint64_t svldnt1uh_gather_s64offset_s64(svbool_t pg, const uint16_t *base, svint64_t offsets); svuint32_t svldnt1uh_gather_u32offset_u32(svbool_t pg, const uint16_t *base, svuint32_t offsets); svuint64_t svldnt1uh_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint64_t svldnt1uh_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint32_t svldnt1uh_gather_u32base_u32(svbool_t pg, svuint32_t bases); svint32_t svldnt1uh_gather_u32base_s32(svbool_t pg, svuint32_t bases); svint64_t svldnt1uh_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svldnt1uh_gather_u64offset_u64(svbool_t pg, const uint16_t *base, svuint64_t offsets); svuint64_t svldnt1uh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldnt1uh_gather_index_s64(pg, base, indices) _Generic((indices), \ svuint64_t: svldnt1uh_gather_u64index_s64, \ svint64_t: svldnt1uh_gather_s64index_s64, \ int64_t: svldnt1uh_gather_u64base_index_s64, \ default: __assume(0) \ )(pg, base, indices) #define svldnt1uh_gather_index_u64(pg, base, indices) _Generic((indices), \ svuint64_t: svldnt1uh_gather_u64index_u64, \ svint64_t: svldnt1uh_gather_s64index_u64, \ int64_t: svldnt1uh_gather_u64base_index_u64, \ default: __assume(0) \ )(pg, base, indices) #define svldnt1uh_gather_offset_s32(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1uh_gather_u32base_offset_s32, \ svuint32_t: svldnt1uh_gather_u32offset_s32, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1uh_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1uh_gather_u64base_offset_s64, \ svuint64_t: svldnt1uh_gather_u64offset_s64, \ svint64_t: svldnt1uh_gather_s64offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1uh_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1uh_gather_u64base_offset_u64, \ svint64_t: svldnt1uh_gather_s64offset_u64, \ svuint64_t: svldnt1uh_gather_u64offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1uh_gather_index_s32(pg, bases, index) _Generic((index), \ int64_t: svldnt1uh_gather_u32base_index_s32, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1uh_gather_index_u32(pg, bases, index) _Generic((index), \ int64_t: svldnt1uh_gather_u32base_index_u32, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1uh_gather_offset_u32(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1uh_gather_u32base_offset_u32, \ svuint32_t: svldnt1uh_gather_u32offset_u32, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1uh_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1uh_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldnt1uh_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1uh_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldnt1uh_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svldnt1uh_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #define svldnt1uh_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svldnt1uh_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #endif // sve2: Load / Gather: Load 32-bit data and sign-extend, non-temporal svuint64_t svldnt1sw_gather_u64index_u64(svbool_t pg, const int32_t *base, svuint64_t indices); svint64_t svldnt1sw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svldnt1sw_gather_s64offset_s64(svbool_t pg, const int32_t *base, svint64_t offsets); svuint64_t svldnt1sw_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint64_t svldnt1sw_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint64_t svldnt1sw_gather_u64offset_u64(svbool_t pg, const int32_t *base, svuint64_t offsets); svint64_t svldnt1sw_gather_s64index_s64(svbool_t pg, const int32_t *base, svint64_t indices); svuint64_t svldnt1sw_gather_s64index_u64(svbool_t pg, const int32_t *base, svint64_t indices); svint64_t svldnt1sw_gather_u64index_s64(svbool_t pg, const int32_t *base, svuint64_t indices); svint64_t svldnt1sw_gather_u64offset_s64(svbool_t pg, const int32_t *base, svuint64_t offsets); svuint64_t svldnt1sw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svldnt1sw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svldnt1sw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svldnt1sw_gather_s64offset_u64(svbool_t pg, const int32_t *base, svint64_t offsets); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldnt1sw_gather_index_u64(pg, base, indices) _Generic((indices), \ svuint64_t: svldnt1sw_gather_u64index_u64, \ svint64_t: svldnt1sw_gather_s64index_u64, \ int64_t: svldnt1sw_gather_u64base_index_u64, \ default: __assume(0) \ )(pg, base, indices) #define svldnt1sw_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1sw_gather_u64base_offset_s64, \ svint64_t: svldnt1sw_gather_s64offset_s64, \ svuint64_t: svldnt1sw_gather_u64offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1sw_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1sw_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldnt1sw_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1sw_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldnt1sw_gather_offset_u64(pg, base, offsets) _Generic((offsets), \ svuint64_t: svldnt1sw_gather_u64offset_u64, \ int64_t: svldnt1sw_gather_u64base_offset_u64, \ svint64_t: svldnt1sw_gather_s64offset_u64, \ default: __assume(0) \ )(pg, base, offsets) #define svldnt1sw_gather_index_s64(pg, base, indices) _Generic((indices), \ svint64_t: svldnt1sw_gather_s64index_s64, \ svuint64_t: svldnt1sw_gather_u64index_s64, \ int64_t: svldnt1sw_gather_u64base_index_s64, \ default: __assume(0) \ )(pg, base, indices) #endif // sve2: Load / Gather: Load 32-bit data and zero-extend, non-temporal svint64_t svldnt1uw_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint64_t svldnt1uw_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svldnt1uw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svldnt1uw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svldnt1uw_gather_u64base_u64(svbool_t pg, svuint64_t bases); svuint64_t svldnt1uw_gather_s64offset_u64(svbool_t pg, const uint32_t *base, svint64_t offsets); svuint64_t svldnt1uw_gather_u64offset_u64(svbool_t pg, const uint32_t *base, svuint64_t offsets); svint64_t svldnt1uw_gather_s64offset_s64(svbool_t pg, const uint32_t *base, svint64_t offsets); svint64_t svldnt1uw_gather_s64index_s64(svbool_t pg, const uint32_t *base, svint64_t indices); svuint64_t svldnt1uw_gather_s64index_u64(svbool_t pg, const uint32_t *base, svint64_t indices); svint64_t svldnt1uw_gather_u64offset_s64(svbool_t pg, const uint32_t *base, svuint64_t offsets); svuint64_t svldnt1uw_gather_u64index_u64(svbool_t pg, const uint32_t *base, svuint64_t indices); svint64_t svldnt1uw_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svldnt1uw_gather_u64index_s64(svbool_t pg, const uint32_t *base, svuint64_t indices); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldnt1uw_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1uw_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldnt1uw_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1uw_gather_u64base_offset_u64, \ svint64_t: svldnt1uw_gather_s64offset_u64, \ svuint64_t: svldnt1uw_gather_u64offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1uw_gather_index_s64(pg, bases, index) _Generic((index), \ int64_t: svldnt1uw_gather_u64base_index_s64, \ svint64_t: svldnt1uw_gather_s64index_s64, \ svuint64_t: svldnt1uw_gather_u64index_s64, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1uw_gather_index_u64(pg, bases, index) _Generic((index), \ int64_t: svldnt1uw_gather_u64base_index_u64, \ svint64_t: svldnt1uw_gather_s64index_u64, \ svuint64_t: svldnt1uw_gather_u64index_u64, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1uw_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1uw_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldnt1uw_gather_offset_s64(pg, base, offsets) _Generic((offsets), \ svint64_t: svldnt1uw_gather_s64offset_s64, \ svuint64_t: svldnt1uw_gather_u64offset_s64, \ int64_t: svldnt1uw_gather_u64base_offset_s64, \ default: __assume(0) \ )(pg, base, offsets) #endif // sve2: Load / Gather: Load 8-bit data and sign-extend, non-temporal svuint64_t svldnt1sb_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svldnt1sb_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint32_t svldnt1sb_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint32_t svldnt1sb_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svuint64_t svldnt1sb_gather_u64offset_u64(svbool_t pg, const int8_t *base, svuint64_t offsets); svint64_t svldnt1sb_gather_u64offset_s64(svbool_t pg, const int8_t *base, svuint64_t offsets); svint64_t svldnt1sb_gather_s64offset_s64(svbool_t pg, const int8_t *base, svint64_t offsets); svuint32_t svldnt1sb_gather_u32offset_u32(svbool_t pg, const int8_t *base, svuint32_t offsets); svint32_t svldnt1sb_gather_u32base_s32(svbool_t pg, svuint32_t bases); svint32_t svldnt1sb_gather_u32offset_s32(svbool_t pg, const int8_t *base, svuint32_t offsets); svuint64_t svldnt1sb_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint64_t svldnt1sb_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint32_t svldnt1sb_gather_u32base_u32(svbool_t pg, svuint32_t bases); svuint64_t svldnt1sb_gather_s64offset_u64(svbool_t pg, const int8_t *base, svint64_t offsets); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldnt1sb_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1sb_gather_u64base_offset_u64, \ svuint64_t: svldnt1sb_gather_u64offset_u64, \ svint64_t: svldnt1sb_gather_s64offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1sb_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1sb_gather_u64base_offset_s64, \ svuint64_t: svldnt1sb_gather_u64offset_s64, \ svint64_t: svldnt1sb_gather_s64offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1sb_gather_offset_u32(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1sb_gather_u32base_offset_u32, \ svuint32_t: svldnt1sb_gather_u32offset_u32, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1sb_gather_offset_s32(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1sb_gather_u32base_offset_s32, \ svuint32_t: svldnt1sb_gather_u32offset_s32, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1sb_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svldnt1sb_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #define svldnt1sb_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1sb_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldnt1sb_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1sb_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldnt1sb_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svldnt1sb_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #endif // sve2: Load / Gather: Load 8-bit data and zero-extend, non-temporal svuint32_t svldnt1ub_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint64_t svldnt1ub_gather_u64base_s64(svbool_t pg, svuint64_t bases); svuint32_t svldnt1ub_gather_u32offset_u32(svbool_t pg, const uint8_t *base, svuint32_t offsets); svint64_t svldnt1ub_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svint32_t svldnt1ub_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svuint64_t svldnt1ub_gather_u64offset_u64(svbool_t pg, const uint8_t *base, svuint64_t offsets); svint64_t svldnt1ub_gather_u64offset_s64(svbool_t pg, const uint8_t *base, svuint64_t offsets); svuint64_t svldnt1ub_gather_s64offset_u64(svbool_t pg, const uint8_t *base, svint64_t offsets); svint64_t svldnt1ub_gather_s64offset_s64(svbool_t pg, const uint8_t *base, svint64_t offsets); svuint64_t svldnt1ub_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint32_t svldnt1ub_gather_u32offset_s32(svbool_t pg, const uint8_t *base, svuint32_t offsets); svuint64_t svldnt1ub_gather_u64base_u64(svbool_t pg, svuint64_t bases); svuint32_t svldnt1ub_gather_u32base_u32(svbool_t pg, svuint32_t bases); svint32_t svldnt1ub_gather_u32base_s32(svbool_t pg, svuint32_t bases); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldnt1ub_gather_offset_u32(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1ub_gather_u32base_offset_u32, \ svuint32_t: svldnt1ub_gather_u32offset_u32, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1ub_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1ub_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldnt1ub_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1ub_gather_u64base_offset_s64, \ svuint64_t: svldnt1ub_gather_u64offset_s64, \ svint64_t: svldnt1ub_gather_s64offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1ub_gather_offset_s32(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1ub_gather_u32base_offset_s32, \ svuint32_t: svldnt1ub_gather_u32offset_s32, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1ub_gather_offset_u64(pg, base, offsets) _Generic((offsets), \ svuint64_t: svldnt1ub_gather_u64offset_u64, \ svint64_t: svldnt1ub_gather_s64offset_u64, \ int64_t: svldnt1ub_gather_u64base_offset_u64, \ default: __assume(0) \ )(pg, base, offsets) #define svldnt1ub_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1ub_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldnt1ub_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svldnt1ub_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #define svldnt1ub_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svldnt1ub_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #endif // sve2: Load / Gather: Unextended load, non-temporal svint64_t svldnt1_gather_u64index_s64(svbool_t pg, const int64_t *base, svuint64_t indices); svuint32_t svldnt1_gather_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index); svfloat64_t svldnt1_gather_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index); svint64_t svldnt1_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index); svuint64_t svldnt1_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index); svfloat64_t svldnt1_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t offsets); svint64_t svldnt1_gather_u64offset_s64(svbool_t pg, const int64_t *base, svuint64_t offsets); svuint64_t svldnt1_gather_u64index_u64(svbool_t pg, const uint64_t *base, svuint64_t indices); svfloat32_t svldnt1_gather_u32offset_f32(svbool_t pg, const float32_t *base, svuint32_t offsets); svfloat64_t svldnt1_gather_s64index_f64(svbool_t pg, const float64_t *base, svint64_t indices); svuint64_t svldnt1_gather_u64base_u64(svbool_t pg, svuint64_t bases); svint64_t svldnt1_gather_u64base_s64(svbool_t pg, svuint64_t bases); svfloat64_t svldnt1_gather_u64base_f64(svbool_t pg, svuint64_t bases); svuint32_t svldnt1_gather_u32base_u32(svbool_t pg, svuint32_t bases); svint32_t svldnt1_gather_u32base_s32(svbool_t pg, svuint32_t bases); svfloat32_t svldnt1_gather_u32base_f32(svbool_t pg, svuint32_t bases); svint32_t svldnt1_gather_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index); svfloat32_t svldnt1_gather_u32base_index_f32(svbool_t pg, svuint32_t bases, int64_t index); svint32_t svldnt1_gather_u32offset_s32(svbool_t pg, const int32_t *base, svuint32_t offsets); svint64_t svldnt1_gather_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset); svuint64_t svldnt1_gather_s64offset_u64(svbool_t pg, const uint64_t *base, svint64_t offsets); svuint64_t svldnt1_gather_s64index_u64(svbool_t pg, const uint64_t *base, svint64_t indices); svint64_t svldnt1_gather_s64index_s64(svbool_t pg, const int64_t *base, svint64_t indices); svuint64_t svldnt1_gather_u64offset_u64(svbool_t pg, const uint64_t *base, svuint64_t offsets); svuint64_t svldnt1_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset); svint64_t svldnt1_gather_s64offset_s64(svbool_t pg, const int64_t *base, svint64_t offsets); svfloat64_t svldnt1_gather_s64offset_f64(svbool_t pg, const float64_t *base, svint64_t offsets); svfloat64_t svldnt1_gather_u64index_f64(svbool_t pg, const float64_t *base, svuint64_t indices); svfloat64_t svldnt1_gather_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset); svuint32_t svldnt1_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset); svint32_t svldnt1_gather_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset); svfloat32_t svldnt1_gather_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset); svuint32_t svldnt1_gather_u32offset_u32(svbool_t pg, const uint32_t *base, svuint32_t offsets); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svldnt1_gather_index(pg, base, indices) _Generic((indices), \ svuint64_t: _Generic((base), \ const int64_t *: svldnt1_gather_u64index_s64, \ const uint64_t *: svldnt1_gather_u64index_u64, \ const float64_t *: svldnt1_gather_u64index_f64, \ default: __assume(0)), \ svint64_t: _Generic((base), \ const float64_t *: svldnt1_gather_s64index_f64, \ const uint64_t *: svldnt1_gather_s64index_u64, \ const int64_t *: svldnt1_gather_s64index_s64, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, indices) #define svldnt1_gather_index_u32(pg, bases, index) _Generic((index), \ int64_t: svldnt1_gather_u32base_index_u32, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1_gather_index_f64(pg, bases, index) _Generic((index), \ int64_t: svldnt1_gather_u64base_index_f64, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1_gather_index_s64(pg, bases, index) _Generic((index), \ int64_t: svldnt1_gather_u64base_index_s64, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1_gather_index_u64(pg, bases, index) _Generic((index), \ int64_t: svldnt1_gather_u64base_index_u64, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1_gather_offset(pg, base, offsets) _Generic((offsets), \ svuint64_t: _Generic((base), \ const float64_t *: svldnt1_gather_u64offset_f64, \ const int64_t *: svldnt1_gather_u64offset_s64, \ const uint64_t *: svldnt1_gather_u64offset_u64, \ default: __assume(0)), \ svuint32_t: _Generic((base), \ const float32_t *: svldnt1_gather_u32offset_f32, \ const int32_t *: svldnt1_gather_u32offset_s32, \ const uint32_t *: svldnt1_gather_u32offset_u32, \ default: __assume(0)), \ svint64_t: _Generic((base), \ const uint64_t *: svldnt1_gather_s64offset_u64, \ const int64_t *: svldnt1_gather_s64offset_s64, \ const float64_t *: svldnt1_gather_s64offset_f64, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, offsets) #define svldnt1_gather_u64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1_gather_u64base_u64, \ default: __assume(0) \ )(pg, bases) #define svldnt1_gather_s64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1_gather_u64base_s64, \ default: __assume(0) \ )(pg, bases) #define svldnt1_gather_f64(pg, bases) _Generic((bases), \ svuint64_t: svldnt1_gather_u64base_f64, \ default: __assume(0) \ )(pg, bases) #define svldnt1_gather_u32(pg, bases) _Generic((bases), \ svuint32_t: svldnt1_gather_u32base_u32, \ default: __assume(0) \ )(pg, bases) #define svldnt1_gather_s32(pg, bases) _Generic((bases), \ svuint32_t: svldnt1_gather_u32base_s32, \ default: __assume(0) \ )(pg, bases) #define svldnt1_gather_f32(pg, bases) _Generic((bases), \ svuint32_t: svldnt1_gather_u32base_f32, \ default: __assume(0) \ )(pg, bases) #define svldnt1_gather_index_s32(pg, bases, index) _Generic((index), \ int64_t: svldnt1_gather_u32base_index_s32, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1_gather_index_f32(pg, bases, index) _Generic((index), \ int64_t: svldnt1_gather_u32base_index_f32, \ default: __assume(0) \ )(pg, bases, index) #define svldnt1_gather_offset_s64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1_gather_u64base_offset_s64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1_gather_offset_u64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1_gather_u64base_offset_u64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1_gather_offset_f64(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1_gather_u64base_offset_f64, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1_gather_offset_u32(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1_gather_u32base_offset_u32, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1_gather_offset_s32(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1_gather_u32base_offset_s32, \ default: __assume(0) \ )(pg, bases, offset) #define svldnt1_gather_offset_f32(pg, bases, offset) _Generic((offset), \ int64_t: svldnt1_gather_u32base_offset_f32, \ default: __assume(0) \ )(pg, bases, offset) #endif // sve2: Logical / Bit clear and exclusive OR: Bitwise clear and exclusive OR svuint8_t svbcax_n_u8(svuint8_t op1, svuint8_t op2, uint8_t op3); svuint16_t svbcax_n_u16(svuint16_t op1, svuint16_t op2, uint16_t op3); svuint32_t svbcax_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3); svint64_t svbcax_s64(svint64_t op1, svint64_t op2, svint64_t op3); svint8_t svbcax_s8(svint8_t op1, svint8_t op2, svint8_t op3); svint16_t svbcax_s16(svint16_t op1, svint16_t op2, svint16_t op3); svint32_t svbcax_s32(svint32_t op1, svint32_t op2, svint32_t op3); svint32_t svbcax_n_s32(svint32_t op1, svint32_t op2, int32_t op3); svuint64_t svbcax_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3); svint16_t svbcax_n_s16(svint16_t op1, svint16_t op2, int16_t op3); svint64_t svbcax_n_s64(svint64_t op1, svint64_t op2, int64_t op3); svuint64_t svbcax_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint32_t svbcax_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint16_t svbcax_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint8_t svbcax_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3); svint8_t svbcax_n_s8(svint8_t op1, svint8_t op2, int8_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbcax(op1, op2, op3) _Generic((op3), \ uint8_t: svbcax_n_u8, \ uint16_t: svbcax_n_u16, \ uint32_t: svbcax_n_u32, \ svint64_t: svbcax_s64, \ svint8_t: svbcax_s8, \ svint16_t: svbcax_s16, \ svint32_t: svbcax_s32, \ int32_t: svbcax_n_s32, \ uint64_t: svbcax_n_u64, \ int16_t: svbcax_n_s16, \ int64_t: svbcax_n_s64, \ svuint64_t: svbcax_u64, \ svuint32_t: svbcax_u32, \ svuint16_t: svbcax_u16, \ svuint8_t: svbcax_u8, \ int8_t: svbcax_n_s8, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Logical / Bitwise select: Bitwise select svint16_t svbsl_s16(svint16_t op1, svint16_t op2, svint16_t op3); svuint16_t svnbsl_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint32_t svnbsl_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint64_t svnbsl_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3); svint8_t svnbsl_n_s8(svint8_t op1, svint8_t op2, int8_t op3); svint16_t svnbsl_n_s16(svint16_t op1, svint16_t op2, int16_t op3); svint64_t svnbsl_n_s64(svint64_t op1, svint64_t op2, int64_t op3); svint64_t svbsl_n_s64(svint64_t op1, svint64_t op2, int64_t op3); svuint8_t svnbsl_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3); svuint8_t svbsl_n_u8(svuint8_t op1, svuint8_t op2, uint8_t op3); svuint16_t svbsl_n_u16(svuint16_t op1, svuint16_t op2, uint16_t op3); svuint32_t svbsl_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3); svuint64_t svbsl_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3); svint8_t svbsl_s8(svint8_t op1, svint8_t op2, svint8_t op3); svint32_t svnbsl_n_s32(svint32_t op1, svint32_t op2, int32_t op3); svuint8_t svnbsl_n_u8(svuint8_t op1, svuint8_t op2, uint8_t op3); svuint16_t svnbsl_n_u16(svuint16_t op1, svuint16_t op2, uint16_t op3); svuint32_t svnbsl_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3); svint32_t svbsl_s32(svint32_t op1, svint32_t op2, svint32_t op3); svint64_t svbsl_s64(svint64_t op1, svint64_t op2, svint64_t op3); svuint8_t svbsl_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3); svint64_t svnbsl_s64(svint64_t op1, svint64_t op2, svint64_t op3); svint32_t svnbsl_s32(svint32_t op1, svint32_t op2, svint32_t op3); svint16_t svnbsl_s16(svint16_t op1, svint16_t op2, svint16_t op3); svint8_t svnbsl_s8(svint8_t op1, svint8_t op2, svint8_t op3); svuint32_t svbsl_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint64_t svbsl_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3); svint8_t svbsl_n_s8(svint8_t op1, svint8_t op2, int8_t op3); svint16_t svbsl_n_s16(svint16_t op1, svint16_t op2, int16_t op3); svint32_t svbsl_n_s32(svint32_t op1, svint32_t op2, int32_t op3); svuint64_t svnbsl_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3); svuint16_t svbsl_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbsl(op1, op2, op3) _Generic((op3), \ svint16_t: svbsl_s16, \ int64_t: svbsl_n_s64, \ uint8_t: svbsl_n_u8, \ uint16_t: svbsl_n_u16, \ uint32_t: svbsl_n_u32, \ uint64_t: svbsl_n_u64, \ svint8_t: svbsl_s8, \ svint32_t: svbsl_s32, \ svint64_t: svbsl_s64, \ svuint8_t: svbsl_u8, \ svuint32_t: svbsl_u32, \ svuint64_t: svbsl_u64, \ int8_t: svbsl_n_s8, \ int16_t: svbsl_n_s16, \ int32_t: svbsl_n_s32, \ svuint16_t: svbsl_u16, \ default: __assume(0) \ )(op1, op2, op3) #define svnbsl(op1, op2, op3) _Generic((op3), \ svuint16_t: svnbsl_u16, \ svuint32_t: svnbsl_u32, \ svuint64_t: svnbsl_u64, \ int8_t: svnbsl_n_s8, \ int16_t: svnbsl_n_s16, \ int64_t: svnbsl_n_s64, \ svuint8_t: svnbsl_u8, \ int32_t: svnbsl_n_s32, \ uint8_t: svnbsl_n_u8, \ uint16_t: svnbsl_n_u16, \ uint32_t: svnbsl_n_u32, \ svint64_t: svnbsl_s64, \ svint32_t: svnbsl_s32, \ svint16_t: svnbsl_s16, \ svint8_t: svnbsl_s8, \ uint64_t: svnbsl_n_u64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Logical / Bitwise select: Bitwise select with first input inverted svint16_t svbsl1n_s16(svint16_t op1, svint16_t op2, svint16_t op3); svint8_t svbsl1n_s8(svint8_t op1, svint8_t op2, svint8_t op3); svint32_t svbsl1n_s32(svint32_t op1, svint32_t op2, svint32_t op3); svint8_t svbsl1n_n_s8(svint8_t op1, svint8_t op2, int8_t op3); svuint8_t svbsl1n_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3); svint64_t svbsl1n_s64(svint64_t op1, svint64_t op2, svint64_t op3); svuint64_t svbsl1n_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3); svuint32_t svbsl1n_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3); svuint8_t svbsl1n_n_u8(svuint8_t op1, svuint8_t op2, uint8_t op3); svint64_t svbsl1n_n_s64(svint64_t op1, svint64_t op2, int64_t op3); svuint16_t svbsl1n_n_u16(svuint16_t op1, svuint16_t op2, uint16_t op3); svint16_t svbsl1n_n_s16(svint16_t op1, svint16_t op2, int16_t op3); svuint64_t svbsl1n_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint32_t svbsl1n_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint16_t svbsl1n_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3); svint32_t svbsl1n_n_s32(svint32_t op1, svint32_t op2, int32_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbsl1n(op1, op2, op3) _Generic((op3), \ svint16_t: svbsl1n_s16, \ svint8_t: svbsl1n_s8, \ svint32_t: svbsl1n_s32, \ int8_t: svbsl1n_n_s8, \ svuint8_t: svbsl1n_u8, \ svint64_t: svbsl1n_s64, \ uint64_t: svbsl1n_n_u64, \ uint32_t: svbsl1n_n_u32, \ uint8_t: svbsl1n_n_u8, \ int64_t: svbsl1n_n_s64, \ uint16_t: svbsl1n_n_u16, \ int16_t: svbsl1n_n_s16, \ svuint64_t: svbsl1n_u64, \ svuint32_t: svbsl1n_u32, \ svuint16_t: svbsl1n_u16, \ int32_t: svbsl1n_n_s32, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Logical / Bitwise select: Bitwise select with second input inverted svuint8_t svbsl2n_n_u8(svuint8_t op1, svuint8_t op2, uint8_t op3); svint8_t svbsl2n_n_s8(svint8_t op1, svint8_t op2, int8_t op3); svuint16_t svbsl2n_n_u16(svuint16_t op1, svuint16_t op2, uint16_t op3); svuint32_t svbsl2n_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3); svuint64_t svbsl2n_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3); svint16_t svbsl2n_n_s16(svint16_t op1, svint16_t op2, int16_t op3); svuint64_t svbsl2n_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3); svint16_t svbsl2n_s16(svint16_t op1, svint16_t op2, svint16_t op3); svuint16_t svbsl2n_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint8_t svbsl2n_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3); svint64_t svbsl2n_s64(svint64_t op1, svint64_t op2, svint64_t op3); svint32_t svbsl2n_s32(svint32_t op1, svint32_t op2, svint32_t op3); svint8_t svbsl2n_s8(svint8_t op1, svint8_t op2, svint8_t op3); svint32_t svbsl2n_n_s32(svint32_t op1, svint32_t op2, int32_t op3); svuint32_t svbsl2n_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3); svint64_t svbsl2n_n_s64(svint64_t op1, svint64_t op2, int64_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svbsl2n(op1, op2, op3) _Generic((op3), \ uint8_t: svbsl2n_n_u8, \ int8_t: svbsl2n_n_s8, \ uint16_t: svbsl2n_n_u16, \ uint32_t: svbsl2n_n_u32, \ uint64_t: svbsl2n_n_u64, \ int16_t: svbsl2n_n_s16, \ svuint64_t: svbsl2n_u64, \ svint16_t: svbsl2n_s16, \ svuint16_t: svbsl2n_u16, \ svuint8_t: svbsl2n_u8, \ svint64_t: svbsl2n_s64, \ svint32_t: svbsl2n_s32, \ svint8_t: svbsl2n_s8, \ int32_t: svbsl2n_n_s32, \ svuint32_t: svbsl2n_u32, \ int64_t: svbsl2n_n_s64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Logical / Exclusive OR and rotate: Bitwise exclusive OR and rotate right svuint64_t svxar_n_u64(svuint64_t op1, svuint64_t op2, uint64_t imm3); svuint16_t svxar_n_u16(svuint16_t op1, svuint16_t op2, uint64_t imm3); svuint32_t svxar_n_u32(svuint32_t op1, svuint32_t op2, uint64_t imm3); svint16_t svxar_n_s16(svint16_t op1, svint16_t op2, uint64_t imm3); svint32_t svxar_n_s32(svint32_t op1, svint32_t op2, uint64_t imm3); svint8_t svxar_n_s8(svint8_t op1, svint8_t op2, uint64_t imm3); svuint8_t svxar_n_u8(svuint8_t op1, svuint8_t op2, uint64_t imm3); svint64_t svxar_n_s64(svint64_t op1, svint64_t op2, uint64_t imm3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svxar(op1, op2, imm3) _Generic((op2), \ svuint64_t: svxar_n_u64, \ svuint16_t: svxar_n_u16, \ svuint32_t: svxar_n_u32, \ svint16_t: svxar_n_s16, \ svint32_t: svxar_n_s32, \ svint8_t: svxar_n_s8, \ svuint8_t: svxar_n_u8, \ svint64_t: svxar_n_s64, \ default: __assume(0) \ )(op1, op2, imm3) #endif // sve2: Logical / Exclusive OR: Bitwise exclusive OR of three vectors svint8_t sveor3_s8(svint8_t op1, svint8_t op2, svint8_t op3); svuint64_t sveor3_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3); svint32_t sveor3_s32(svint32_t op1, svint32_t op2, svint32_t op3); svint64_t sveor3_s64(svint64_t op1, svint64_t op2, svint64_t op3); svint8_t sveor3_n_s8(svint8_t op1, svint8_t op2, int8_t op3); svint16_t sveor3_n_s16(svint16_t op1, svint16_t op2, int16_t op3); svuint8_t sveor3_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3); svuint32_t sveor3_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3); svuint16_t sveor3_n_u16(svuint16_t op1, svuint16_t op2, uint16_t op3); svuint8_t sveor3_n_u8(svuint8_t op1, svuint8_t op2, uint8_t op3); svint64_t sveor3_n_s64(svint64_t op1, svint64_t op2, int64_t op3); svint32_t sveor3_n_s32(svint32_t op1, svint32_t op2, int32_t op3); svuint64_t sveor3_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint16_t sveor3_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint32_t sveor3_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3); svint16_t sveor3_s16(svint16_t op1, svint16_t op2, svint16_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define sveor3(op1, op2, op3) _Generic((op3), \ svint8_t: sveor3_s8, \ uint64_t: sveor3_n_u64, \ svint32_t: sveor3_s32, \ svint64_t: sveor3_s64, \ int8_t: sveor3_n_s8, \ int16_t: sveor3_n_s16, \ svuint8_t: sveor3_u8, \ uint32_t: sveor3_n_u32, \ uint16_t: sveor3_n_u16, \ uint8_t: sveor3_n_u8, \ int64_t: sveor3_n_s64, \ int32_t: sveor3_n_s32, \ svuint64_t: sveor3_u64, \ svuint16_t: sveor3_u16, \ svuint32_t: sveor3_u32, \ svint16_t: sveor3_s16, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Logical / Exclusive OR: Interleaving exclusive OR (bottom, top) svint32_t sveorbt_n_s32(svint32_t odd, svint32_t op1, int32_t op2); svuint32_t sveorbt_u32(svuint32_t odd, svuint32_t op1, svuint32_t op2); svuint64_t sveorbt_u64(svuint64_t odd, svuint64_t op1, svuint64_t op2); svint8_t sveorbt_n_s8(svint8_t odd, svint8_t op1, int8_t op2); svint16_t sveorbt_n_s16(svint16_t odd, svint16_t op1, int16_t op2); svint64_t sveorbt_n_s64(svint64_t odd, svint64_t op1, int64_t op2); svuint8_t sveorbt_u8(svuint8_t odd, svuint8_t op1, svuint8_t op2); svuint64_t sveorbt_n_u64(svuint64_t odd, svuint64_t op1, uint64_t op2); svuint16_t sveorbt_u16(svuint16_t odd, svuint16_t op1, svuint16_t op2); svuint16_t sveorbt_n_u16(svuint16_t odd, svuint16_t op1, uint16_t op2); svint64_t sveorbt_s64(svint64_t odd, svint64_t op1, svint64_t op2); svint32_t sveorbt_s32(svint32_t odd, svint32_t op1, svint32_t op2); svint16_t sveorbt_s16(svint16_t odd, svint16_t op1, svint16_t op2); svint8_t sveorbt_s8(svint8_t odd, svint8_t op1, svint8_t op2); svuint32_t sveorbt_n_u32(svuint32_t odd, svuint32_t op1, uint32_t op2); svuint8_t sveorbt_n_u8(svuint8_t odd, svuint8_t op1, uint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define sveorbt(odd, op1, op2) _Generic((op2), \ int32_t: sveorbt_n_s32, \ svuint32_t: sveorbt_u32, \ svuint64_t: sveorbt_u64, \ int8_t: sveorbt_n_s8, \ int16_t: sveorbt_n_s16, \ int64_t: sveorbt_n_s64, \ svuint8_t: sveorbt_u8, \ uint64_t: sveorbt_n_u64, \ svuint16_t: sveorbt_u16, \ uint16_t: sveorbt_n_u16, \ svint64_t: sveorbt_s64, \ svint32_t: sveorbt_s32, \ svint16_t: sveorbt_s16, \ svint8_t: sveorbt_s8, \ uint32_t: sveorbt_n_u32, \ uint8_t: sveorbt_n_u8, \ default: __assume(0) \ )(odd, op1, op2) #endif // sve2: Logical / Exclusive OR: Interleaving exclusive OR (top, bottom) svint8_t sveortb_s8(svint8_t even, svint8_t op1, svint8_t op2); svint16_t sveortb_s16(svint16_t even, svint16_t op1, svint16_t op2); svint32_t sveortb_s32(svint32_t even, svint32_t op1, svint32_t op2); svint64_t sveortb_s64(svint64_t even, svint64_t op1, svint64_t op2); svint8_t sveortb_n_s8(svint8_t even, svint8_t op1, int8_t op2); svuint64_t sveortb_u64(svuint64_t even, svuint64_t op1, svuint64_t op2); svuint32_t sveortb_u32(svuint32_t even, svuint32_t op1, svuint32_t op2); svint64_t sveortb_n_s64(svint64_t even, svint64_t op1, int64_t op2); svint16_t sveortb_n_s16(svint16_t even, svint16_t op1, int16_t op2); svint32_t sveortb_n_s32(svint32_t even, svint32_t op1, int32_t op2); svuint16_t sveortb_u16(svuint16_t even, svuint16_t op1, svuint16_t op2); svuint8_t sveortb_n_u8(svuint8_t even, svuint8_t op1, uint8_t op2); svuint16_t sveortb_n_u16(svuint16_t even, svuint16_t op1, uint16_t op2); svuint32_t sveortb_n_u32(svuint32_t even, svuint32_t op1, uint32_t op2); svuint64_t sveortb_n_u64(svuint64_t even, svuint64_t op1, uint64_t op2); svuint8_t sveortb_u8(svuint8_t even, svuint8_t op1, svuint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define sveortb(even, op1, op2) _Generic((op2), \ svint8_t: sveortb_s8, \ svint16_t: sveortb_s16, \ svint32_t: sveortb_s32, \ svint64_t: sveortb_s64, \ int8_t: sveortb_n_s8, \ svuint64_t: sveortb_u64, \ svuint32_t: sveortb_u32, \ int64_t: sveortb_n_s64, \ int16_t: sveortb_n_s16, \ int32_t: sveortb_n_s32, \ svuint16_t: sveortb_u16, \ uint8_t: sveortb_n_u8, \ uint16_t: sveortb_n_u16, \ uint32_t: sveortb_n_u32, \ uint64_t: sveortb_n_u64, \ svuint8_t: sveortb_u8, \ default: __assume(0) \ )(even, op1, op2) #endif // sve2: Logical / Rotate and exclusive OR: Bitwise rotate left by 1 and exclusive OR svuint64_t svrax1_u64(svuint64_t op1, svuint64_t op2); svint64_t svrax1_s64(svint64_t op1, svint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrax1(op1, op2) _Generic((op2), \ svuint64_t: svrax1_u64, \ svint64_t: svrax1_s64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Move / Saturating narrow: Saturating extract narrow (bottom) svuint8_t svqxtnb_u16(svuint16_t op); svint32_t svqxtnb_s64(svint64_t op); svuint16_t svqxtnb_u32(svuint32_t op); svuint32_t svqxtnb_u64(svuint64_t op); svint16_t svqxtnb_s32(svint32_t op); svint8_t svqxtnb_s16(svint16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqxtnb(op) _Generic((op), \ svuint16_t: svqxtnb_u16, \ svint64_t: svqxtnb_s64, \ svuint32_t: svqxtnb_u32, \ svuint64_t: svqxtnb_u64, \ svint32_t: svqxtnb_s32, \ svint16_t: svqxtnb_s16, \ default: __assume(0) \ )(op) #endif // sve2: Move / Saturating narrow: Saturating extract narrow (top) svint8_t svqxtnt_s16(svint8_t even, svint16_t op); svint16_t svqxtnt_s32(svint16_t even, svint32_t op); svint32_t svqxtnt_s64(svint32_t even, svint64_t op); svuint8_t svqxtnt_u16(svuint8_t even, svuint16_t op); svuint16_t svqxtnt_u32(svuint16_t even, svuint32_t op); svuint32_t svqxtnt_u64(svuint32_t even, svuint64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqxtnt(even, op) _Generic((op), \ svint16_t: svqxtnt_s16, \ svint32_t: svqxtnt_s32, \ svint64_t: svqxtnt_s64, \ svuint16_t: svqxtnt_u16, \ svuint32_t: svqxtnt_u32, \ svuint64_t: svqxtnt_u64, \ default: __assume(0) \ )(even, op) #endif // sve2: Move / Saturating narrow: Saturating extract unsigned narrow (bottom) svuint32_t svqxtunb_s64(svint64_t op); svuint8_t svqxtunb_s16(svint16_t op); svuint16_t svqxtunb_s32(svint32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqxtunb(op) _Generic((op), \ svint64_t: svqxtunb_s64, \ svint16_t: svqxtunb_s16, \ svint32_t: svqxtunb_s32, \ default: __assume(0) \ )(op) #endif // sve2: Move / Saturating narrow: Saturating extract unsigned narrow (top) svuint32_t svqxtunt_s64(svuint32_t even, svint64_t op); svuint16_t svqxtunt_s32(svuint16_t even, svint32_t op); svuint8_t svqxtunt_s16(svuint8_t even, svint16_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqxtunt(even, op) _Generic((op), \ svint64_t: svqxtunt_s64, \ svint32_t: svqxtunt_s32, \ svint16_t: svqxtunt_s16, \ default: __assume(0) \ )(even, op) #endif // sve2: Move / Widen: Move long (bottom) svint16_t svmovlb_s16(svint8_t op); svint32_t svmovlb_s32(svint16_t op); svint64_t svmovlb_s64(svint32_t op); svuint16_t svmovlb_u16(svuint8_t op); svuint32_t svmovlb_u32(svuint16_t op); svuint64_t svmovlb_u64(svuint32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmovlb(op) _Generic((op), \ svint8_t: svmovlb_s16, \ svint16_t: svmovlb_s32, \ svint32_t: svmovlb_s64, \ svuint8_t: svmovlb_u16, \ svuint16_t: svmovlb_u32, \ svuint32_t: svmovlb_u64, \ default: __assume(0) \ )(op) #endif // sve2: Move / Widen: Move long (top) svint16_t svmovlt_s16(svint8_t op); svint32_t svmovlt_s32(svint16_t op); svint64_t svmovlt_s64(svint32_t op); svuint16_t svmovlt_u16(svuint8_t op); svuint32_t svmovlt_u32(svuint16_t op); svuint64_t svmovlt_u64(svuint32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmovlt(op) _Generic((op), \ svint8_t: svmovlt_s16, \ svint16_t: svmovlt_s32, \ svint32_t: svmovlt_s64, \ svuint8_t: svmovlt_u16, \ svuint16_t: svmovlt_u32, \ svuint32_t: svmovlt_u64, \ default: __assume(0) \ )(op) #endif // sve2: Predication / Initialization / Detect hazards: While free of read-after-write conflicts svbool_t svwhilerw_f32(const float32_t *op1, const float32_t *op2); svbool_t svwhilerw_u16(const uint16_t *op1, const uint16_t *op2); svbool_t svwhilerw_s16(const int16_t *op1, const int16_t *op2); svbool_t svwhilerw_s32(const int32_t *op1, const int32_t *op2); svbool_t svwhilerw_s64(const int64_t *op1, const int64_t *op2); svbool_t svwhilerw_u8(const uint8_t *op1, const uint8_t *op2); svbool_t svwhilerw_u32(const uint32_t *op1, const uint32_t *op2); svbool_t svwhilerw_u64(const uint64_t *op1, const uint64_t *op2); svbool_t svwhilerw_s8(const int8_t *op1, const int8_t *op2); svbool_t svwhilerw_f64(const float64_t *op1, const float64_t *op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svwhilerw(op1, op2) _Generic((op2), \ const float32_t *: svwhilerw_f32, \ const uint16_t *: svwhilerw_u16, \ const int16_t *: svwhilerw_s16, \ const int32_t *: svwhilerw_s32, \ const int64_t *: svwhilerw_s64, \ const uint8_t *: svwhilerw_u8, \ const uint32_t *: svwhilerw_u32, \ const uint64_t *: svwhilerw_u64, \ const int8_t *: svwhilerw_s8, \ const float64_t *: svwhilerw_f64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Predication / Initialization / Detect hazards: While free of write-after-read conflicts svbool_t svwhilewr_u32(const uint32_t *op1, const uint32_t *op2); svbool_t svwhilewr_f32(const float32_t *op1, const float32_t *op2); svbool_t svwhilewr_f64(const float64_t *op1, const float64_t *op2); svbool_t svwhilewr_s8(const int8_t *op1, const int8_t *op2); svbool_t svwhilewr_s32(const int32_t *op1, const int32_t *op2); svbool_t svwhilewr_s64(const int64_t *op1, const int64_t *op2); svbool_t svwhilewr_u8(const uint8_t *op1, const uint8_t *op2); svbool_t svwhilewr_u16(const uint16_t *op1, const uint16_t *op2); svbool_t svwhilewr_u64(const uint64_t *op1, const uint64_t *op2); svbool_t svwhilewr_s16(const int16_t *op1, const int16_t *op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svwhilewr(op1, op2) _Generic((op2), \ const uint32_t *: svwhilewr_u32, \ const float32_t *: svwhilewr_f32, \ const float64_t *: svwhilewr_f64, \ const int8_t *: svwhilewr_s8, \ const int32_t *: svwhilewr_s32, \ const int64_t *: svwhilewr_s64, \ const uint8_t *: svwhilewr_u8, \ const uint16_t *: svwhilewr_u16, \ const uint64_t *: svwhilewr_u64, \ const int16_t *: svwhilewr_s16, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Predication / Initialization / While counter meets condition (backward): While decrementing scalar is greater than svbool_t svwhilegt_b8_s32(int32_t op1, int32_t op2); svbool_t svwhilegt_b8_s64(int64_t op1, int64_t op2); svbool_t svwhilegt_b8_u32(uint32_t op1, uint32_t op2); svbool_t svwhilegt_b8_u64(uint64_t op1, uint64_t op2); svbool_t svwhilegt_b16_s32(int32_t op1, int32_t op2); svbool_t svwhilegt_b16_s64(int64_t op1, int64_t op2); svbool_t svwhilegt_b16_u32(uint32_t op1, uint32_t op2); svbool_t svwhilegt_b16_u64(uint64_t op1, uint64_t op2); svbool_t svwhilegt_b32_s32(int32_t op1, int32_t op2); svbool_t svwhilegt_b32_s64(int64_t op1, int64_t op2); svbool_t svwhilegt_b32_u32(uint32_t op1, uint32_t op2); svbool_t svwhilegt_b32_u64(uint64_t op1, uint64_t op2); svbool_t svwhilegt_b64_s32(int32_t op1, int32_t op2); svbool_t svwhilegt_b64_s64(int64_t op1, int64_t op2); svbool_t svwhilegt_b64_u32(uint32_t op1, uint32_t op2); svbool_t svwhilegt_b64_u64(uint64_t op1, uint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svwhilegt_b8(op1, op2) _Generic((op2), \ int32_t: svwhilegt_b8_s32, \ int64_t: svwhilegt_b8_s64, \ uint32_t: svwhilegt_b8_u32, \ uint64_t: svwhilegt_b8_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilegt_b16(op1, op2) _Generic((op2), \ int32_t: svwhilegt_b16_s32, \ int64_t: svwhilegt_b16_s64, \ uint32_t: svwhilegt_b16_u32, \ uint64_t: svwhilegt_b16_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilegt_b32(op1, op2) _Generic((op2), \ int32_t: svwhilegt_b32_s32, \ int64_t: svwhilegt_b32_s64, \ uint32_t: svwhilegt_b32_u32, \ uint64_t: svwhilegt_b32_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilegt_b64(op1, op2) _Generic((op2), \ int32_t: svwhilegt_b64_s32, \ int64_t: svwhilegt_b64_s64, \ uint32_t: svwhilegt_b64_u32, \ uint64_t: svwhilegt_b64_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Predication / Initialization / While counter meets condition (backward): While decrementing scalar is greater than or equal to svbool_t svwhilege_b8_s32(int32_t op1, int32_t op2); svbool_t svwhilege_b8_s64(int64_t op1, int64_t op2); svbool_t svwhilege_b8_u32(uint32_t op1, uint32_t op2); svbool_t svwhilege_b8_u64(uint64_t op1, uint64_t op2); svbool_t svwhilege_b16_s32(int32_t op1, int32_t op2); svbool_t svwhilege_b16_s64(int64_t op1, int64_t op2); svbool_t svwhilege_b16_u32(uint32_t op1, uint32_t op2); svbool_t svwhilege_b16_u64(uint64_t op1, uint64_t op2); svbool_t svwhilege_b32_s32(int32_t op1, int32_t op2); svbool_t svwhilege_b32_s64(int64_t op1, int64_t op2); svbool_t svwhilege_b32_u32(uint32_t op1, uint32_t op2); svbool_t svwhilege_b32_u64(uint64_t op1, uint64_t op2); svbool_t svwhilege_b64_s32(int32_t op1, int32_t op2); svbool_t svwhilege_b64_s64(int64_t op1, int64_t op2); svbool_t svwhilege_b64_u32(uint32_t op1, uint32_t op2); svbool_t svwhilege_b64_u64(uint64_t op1, uint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svwhilege_b8(op1, op2) _Generic((op2), \ int32_t: svwhilege_b8_s32, \ int64_t: svwhilege_b8_s64, \ uint32_t: svwhilege_b8_u32, \ uint64_t: svwhilege_b8_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilege_b16(op1, op2) _Generic((op2), \ int32_t: svwhilege_b16_s32, \ int64_t: svwhilege_b16_s64, \ uint32_t: svwhilege_b16_u32, \ uint64_t: svwhilege_b16_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilege_b32(op1, op2) _Generic((op2), \ int32_t: svwhilege_b32_s32, \ int64_t: svwhilege_b32_s64, \ uint32_t: svwhilege_b32_u32, \ uint64_t: svwhilege_b32_u64, \ default: __assume(0) \ )(op1, op2) #define svwhilege_b64(op1, op2) _Generic((op2), \ int32_t: svwhilege_b64_s32, \ int64_t: svwhilege_b64_s64, \ uint32_t: svwhilege_b64_u32, \ uint64_t: svwhilege_b64_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Shift / Left / Vector rounding shift left: Rounding shift left svuint16_t svrshl_u16_z(svbool_t pg, svuint16_t op1, svint16_t op2); svint32_t svrshl_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svrshl_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svrshl_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svrshl_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svrshl_u8_m(svbool_t pg, svuint8_t op1, svint8_t op2); svuint16_t svrshl_u16_m(svbool_t pg, svuint16_t op1, svint16_t op2); svuint32_t svrshl_u32_m(svbool_t pg, svuint32_t op1, svint32_t op2); svuint64_t svrshl_u64_m(svbool_t pg, svuint64_t op1, svint64_t op2); svint8_t svrshl_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svrshl_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svrshl_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svrshl_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svrshl_u8_x(svbool_t pg, svuint8_t op1, svint8_t op2); svuint16_t svrshl_u16_x(svbool_t pg, svuint16_t op1, svint16_t op2); svuint32_t svrshl_u32_x(svbool_t pg, svuint32_t op1, svint32_t op2); svuint64_t svrshl_u64_x(svbool_t pg, svuint64_t op1, svint64_t op2); svint8_t svrshl_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svrshl_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svrshl_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svrshl_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svrshl_u8_z(svbool_t pg, svuint8_t op1, svint8_t op2); svuint32_t svrshl_u32_z(svbool_t pg, svuint32_t op1, svint32_t op2); svuint64_t svrshl_u64_z(svbool_t pg, svuint64_t op1, svint64_t op2); svint8_t svrshl_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svrshl_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svuint64_t svrshl_n_u64_m(svbool_t pg, svuint64_t op1, int64_t op2); svuint32_t svrshl_n_u32_m(svbool_t pg, svuint32_t op1, int32_t op2); svint64_t svrshl_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svint8_t svrshl_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svuint8_t svrshl_n_u8_m(svbool_t pg, svuint8_t op1, int8_t op2); svint8_t svrshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svint32_t svrshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svrshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svuint64_t svrshl_n_u64_z(svbool_t pg, svuint64_t op1, int64_t op2); svuint32_t svrshl_n_u32_z(svbool_t pg, svuint32_t op1, int32_t op2); svuint16_t svrshl_n_u16_z(svbool_t pg, svuint16_t op1, int16_t op2); svuint8_t svrshl_n_u8_z(svbool_t pg, svuint8_t op1, int8_t op2); svint64_t svrshl_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svrshl_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svuint16_t svrshl_n_u16_m(svbool_t pg, svuint16_t op1, int16_t op2); svint8_t svrshl_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svrshl_n_u64_x(svbool_t pg, svuint64_t op1, int64_t op2); svuint32_t svrshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2); svuint16_t svrshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2); svuint8_t svrshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2); svint64_t svrshl_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint16_t svrshl_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrshl_z(pg, op1, op2) _Generic((op2), \ svint16_t: _Generic((op1), \ svuint16_t: svrshl_u16_z, \ svint16_t: svrshl_s16_z, \ default: __assume(0)), \ svint8_t: _Generic((op1), \ svint8_t: svrshl_s8_z, \ svuint8_t: svrshl_u8_z, \ default: __assume(0)), \ svint32_t: _Generic((op1), \ svint32_t: svrshl_s32_z, \ svuint32_t: svrshl_u32_z, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svint64_t: svrshl_s64_z, \ svuint64_t: svrshl_u64_z, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svuint64_t: svrshl_n_u64_z, \ svint64_t: svrshl_n_s64_z, \ default: __assume(0)), \ int32_t: _Generic((op1), \ svuint32_t: svrshl_n_u32_z, \ svint32_t: svrshl_n_s32_z, \ default: __assume(0)), \ int16_t: _Generic((op1), \ svuint16_t: svrshl_n_u16_z, \ svint16_t: svrshl_n_s16_z, \ default: __assume(0)), \ int8_t: _Generic((op1), \ svuint8_t: svrshl_n_u8_z, \ svint8_t: svrshl_n_s8_z, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svrshl_m(pg, op1, op2) _Generic((op2), \ int32_t: _Generic((op1), \ svint32_t: svrshl_n_s32_m, \ svuint32_t: svrshl_n_u32_m, \ default: __assume(0)), \ svint16_t: _Generic((op1), \ svint16_t: svrshl_s16_m, \ svuint16_t: svrshl_u16_m, \ default: __assume(0)), \ svint32_t: _Generic((op1), \ svint32_t: svrshl_s32_m, \ svuint32_t: svrshl_u32_m, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svint64_t: svrshl_s64_m, \ svuint64_t: svrshl_u64_m, \ default: __assume(0)), \ svint8_t: _Generic((op1), \ svuint8_t: svrshl_u8_m, \ svint8_t: svrshl_s8_m, \ default: __assume(0)), \ int8_t: _Generic((op1), \ svint8_t: svrshl_n_s8_m, \ svuint8_t: svrshl_n_u8_m, \ default: __assume(0)), \ int16_t: _Generic((op1), \ svint16_t: svrshl_n_s16_m, \ svuint16_t: svrshl_n_u16_m, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svuint64_t: svrshl_n_u64_m, \ svint64_t: svrshl_n_s64_m, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svrshl_x(pg, op1, op2) _Generic((op2), \ svint8_t: _Generic((op1), \ svint8_t: svrshl_s8_x, \ svuint8_t: svrshl_u8_x, \ default: __assume(0)), \ svint16_t: _Generic((op1), \ svint16_t: svrshl_s16_x, \ svuint16_t: svrshl_u16_x, \ default: __assume(0)), \ svint32_t: _Generic((op1), \ svint32_t: svrshl_s32_x, \ svuint32_t: svrshl_u32_x, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svint64_t: svrshl_s64_x, \ svuint64_t: svrshl_u64_x, \ default: __assume(0)), \ int8_t: _Generic((op1), \ svint8_t: svrshl_n_s8_x, \ svuint8_t: svrshl_n_u8_x, \ default: __assume(0)), \ int32_t: _Generic((op1), \ svint32_t: svrshl_n_s32_x, \ svuint32_t: svrshl_n_u32_x, \ default: __assume(0)), \ int16_t: _Generic((op1), \ svint16_t: svrshl_n_s16_x, \ svuint16_t: svrshl_n_u16_x, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svuint64_t: svrshl_n_u64_x, \ svint64_t: svrshl_n_s64_x, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Shift / Left / Vector saturating rounding shift left: Saturating rounding shift left svint16_t svqrshl_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svuint16_t svqrshl_n_u16_z(svbool_t pg, svuint16_t op1, int16_t op2); svint64_t svqrshl_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svqrshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2); svuint16_t svqrshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2); svuint32_t svqrshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2); svuint64_t svqrshl_n_u64_x(svbool_t pg, svuint64_t op1, int64_t op2); svint8_t svqrshl_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svqrshl_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svqrshl_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svqrshl_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svqrshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svuint8_t svqrshl_n_u8_z(svbool_t pg, svuint8_t op1, int8_t op2); svuint64_t svqrshl_n_u64_z(svbool_t pg, svuint64_t op1, int64_t op2); svint8_t svqrshl_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svqrshl_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svqrshl_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svqrshl_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svqrshl_u8_m(svbool_t pg, svuint8_t op1, svint8_t op2); svuint16_t svqrshl_u16_m(svbool_t pg, svuint16_t op1, svint16_t op2); svuint32_t svqrshl_u32_m(svbool_t pg, svuint32_t op1, svint32_t op2); svint8_t svqrshl_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svuint32_t svqrshl_n_u32_z(svbool_t pg, svuint32_t op1, int32_t op2); svint16_t svqrshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svuint64_t svqrshl_u64_m(svbool_t pg, svuint64_t op1, svint64_t op2); svuint64_t svqrshl_n_u64_m(svbool_t pg, svuint64_t op1, int64_t op2); svint8_t svqrshl_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svuint64_t svqrshl_u64_x(svbool_t pg, svuint64_t op1, svint64_t op2); svuint32_t svqrshl_u32_x(svbool_t pg, svuint32_t op1, svint32_t op2); svuint16_t svqrshl_u16_x(svbool_t pg, svuint16_t op1, svint16_t op2); svuint8_t svqrshl_u8_x(svbool_t pg, svuint8_t op1, svint8_t op2); svint64_t svqrshl_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svqrshl_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svqrshl_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svqrshl_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svqrshl_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svuint8_t svqrshl_u8_z(svbool_t pg, svuint8_t op1, svint8_t op2); svuint32_t svqrshl_u32_z(svbool_t pg, svuint32_t op1, svint32_t op2); svuint64_t svqrshl_u64_z(svbool_t pg, svuint64_t op1, svint64_t op2); svint8_t svqrshl_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svqrshl_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svqrshl_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svqrshl_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svqrshl_n_u8_m(svbool_t pg, svuint8_t op1, int8_t op2); svuint16_t svqrshl_n_u16_m(svbool_t pg, svuint16_t op1, int16_t op2); svuint32_t svqrshl_n_u32_m(svbool_t pg, svuint32_t op1, int32_t op2); svuint16_t svqrshl_u16_z(svbool_t pg, svuint16_t op1, svint16_t op2); svint8_t svqrshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqrshl_x(pg, op1, op2) _Generic((op2), \ svint16_t: _Generic((op1), \ svint16_t: svqrshl_s16_x, \ svuint16_t: svqrshl_u16_x, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svint64_t: svqrshl_n_s64_x, \ svuint64_t: svqrshl_n_u64_x, \ default: __assume(0)), \ int8_t: _Generic((op1), \ svuint8_t: svqrshl_n_u8_x, \ svint8_t: svqrshl_n_s8_x, \ default: __assume(0)), \ int16_t: _Generic((op1), \ svuint16_t: svqrshl_n_u16_x, \ svint16_t: svqrshl_n_s16_x, \ default: __assume(0)), \ int32_t: _Generic((op1), \ svuint32_t: svqrshl_n_u32_x, \ svint32_t: svqrshl_n_s32_x, \ default: __assume(0)), \ svint8_t: _Generic((op1), \ svint8_t: svqrshl_s8_x, \ svuint8_t: svqrshl_u8_x, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svuint64_t: svqrshl_u64_x, \ svint64_t: svqrshl_s64_x, \ default: __assume(0)), \ svint32_t: _Generic((op1), \ svuint32_t: svqrshl_u32_x, \ svint32_t: svqrshl_s32_x, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svqrshl_z(pg, op1, op2) _Generic((op2), \ int16_t: _Generic((op1), \ svuint16_t: svqrshl_n_u16_z, \ svint16_t: svqrshl_n_s16_z, \ default: __assume(0)), \ int8_t: _Generic((op1), \ svint8_t: svqrshl_n_s8_z, \ svuint8_t: svqrshl_n_u8_z, \ default: __assume(0)), \ int32_t: _Generic((op1), \ svint32_t: svqrshl_n_s32_z, \ svuint32_t: svqrshl_n_u32_z, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svint64_t: svqrshl_n_s64_z, \ svuint64_t: svqrshl_n_u64_z, \ default: __assume(0)), \ svint8_t: _Generic((op1), \ svint8_t: svqrshl_s8_z, \ svuint8_t: svqrshl_u8_z, \ default: __assume(0)), \ svint32_t: _Generic((op1), \ svint32_t: svqrshl_s32_z, \ svuint32_t: svqrshl_u32_z, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svint64_t: svqrshl_s64_z, \ svuint64_t: svqrshl_u64_z, \ default: __assume(0)), \ svint16_t: _Generic((op1), \ svint16_t: svqrshl_s16_z, \ svuint16_t: svqrshl_u16_z, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svqrshl_m(pg, op1, op2) _Generic((op2), \ svint8_t: _Generic((op1), \ svint8_t: svqrshl_s8_m, \ svuint8_t: svqrshl_u8_m, \ default: __assume(0)), \ svint16_t: _Generic((op1), \ svint16_t: svqrshl_s16_m, \ svuint16_t: svqrshl_u16_m, \ default: __assume(0)), \ svint32_t: _Generic((op1), \ svint32_t: svqrshl_s32_m, \ svuint32_t: svqrshl_u32_m, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svint64_t: svqrshl_s64_m, \ svuint64_t: svqrshl_u64_m, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svuint64_t: svqrshl_n_u64_m, \ svint64_t: svqrshl_n_s64_m, \ default: __assume(0)), \ int8_t: _Generic((op1), \ svint8_t: svqrshl_n_s8_m, \ svuint8_t: svqrshl_n_u8_m, \ default: __assume(0)), \ int16_t: _Generic((op1), \ svint16_t: svqrshl_n_s16_m, \ svuint16_t: svqrshl_n_u16_m, \ default: __assume(0)), \ int32_t: _Generic((op1), \ svint32_t: svqrshl_n_s32_m, \ svuint32_t: svqrshl_n_u32_m, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Shift / Left / Vector saturating shift left: Saturating shift left svuint32_t svqshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2); svuint8_t svqshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2); svint64_t svqshl_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svuint64_t svqshl_n_u64_m(svbool_t pg, svuint64_t op1, int64_t op2); svint16_t svqshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svqshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svqshl_n_u64_x(svbool_t pg, svuint64_t op1, int64_t op2); svint32_t svqshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svuint32_t svqshl_n_u32_m(svbool_t pg, svuint32_t op1, int32_t op2); svint8_t svqshl_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svqshl_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svqshl_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svqshl_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svqshl_n_u8_z(svbool_t pg, svuint8_t op1, int8_t op2); svuint16_t svqshl_n_u16_z(svbool_t pg, svuint16_t op1, int16_t op2); svuint32_t svqshl_n_u32_z(svbool_t pg, svuint32_t op1, int32_t op2); svuint64_t svqshl_n_u64_z(svbool_t pg, svuint64_t op1, int64_t op2); svuint16_t svqshl_n_u16_m(svbool_t pg, svuint16_t op1, int16_t op2); svint8_t svqshl_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svuint8_t svqshl_n_u8_m(svbool_t pg, svuint8_t op1, int8_t op2); svuint16_t svqshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2); svint32_t svqshl_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint8_t svqshl_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svqshl_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svqshl_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svqshl_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svqshl_u8_m(svbool_t pg, svuint8_t op1, svint8_t op2); svint64_t svqshl_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint32_t svqshl_u32_m(svbool_t pg, svuint32_t op1, svint32_t op2); svuint64_t svqshl_u64_m(svbool_t pg, svuint64_t op1, svint64_t op2); svint8_t svqshl_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svqshl_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svqshl_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svqshl_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svqshl_u8_x(svbool_t pg, svuint8_t op1, svint8_t op2); svuint16_t svqshl_u16_m(svbool_t pg, svuint16_t op1, svint16_t op2); svuint32_t svqshl_u32_x(svbool_t pg, svuint32_t op1, svint32_t op2); svuint16_t svqshl_u16_z(svbool_t pg, svuint16_t op1, svint16_t op2); svuint32_t svqshl_u32_z(svbool_t pg, svuint32_t op1, svint32_t op2); svuint64_t svqshl_u64_z(svbool_t pg, svuint64_t op1, svint64_t op2); svint8_t svqshl_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svqshl_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svuint8_t svqshl_u8_z(svbool_t pg, svuint8_t op1, svint8_t op2); svuint16_t svqshl_u16_x(svbool_t pg, svuint16_t op1, svint16_t op2); svint64_t svqshl_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svuint64_t svqshl_u64_x(svbool_t pg, svuint64_t op1, svint64_t op2); svint16_t svqshl_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svqshl_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqshl_x(pg, op1, op2) _Generic((op2), \ int32_t: _Generic((op1), \ svuint32_t: svqshl_n_u32_x, \ svint32_t: svqshl_n_s32_x, \ default: __assume(0)), \ int8_t: _Generic((op1), \ svuint8_t: svqshl_n_u8_x, \ svint8_t: svqshl_n_s8_x, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svint64_t: svqshl_n_s64_x, \ svuint64_t: svqshl_n_u64_x, \ default: __assume(0)), \ int16_t: _Generic((op1), \ svint16_t: svqshl_n_s16_x, \ svuint16_t: svqshl_n_u16_x, \ default: __assume(0)), \ svint8_t: _Generic((op1), \ svint8_t: svqshl_s8_x, \ svuint8_t: svqshl_u8_x, \ default: __assume(0)), \ svint16_t: _Generic((op1), \ svint16_t: svqshl_s16_x, \ svuint16_t: svqshl_u16_x, \ default: __assume(0)), \ svint32_t: _Generic((op1), \ svint32_t: svqshl_s32_x, \ svuint32_t: svqshl_u32_x, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svint64_t: svqshl_s64_x, \ svuint64_t: svqshl_u64_x, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svqshl_m(pg, op1, op2) _Generic((op2), \ int64_t: _Generic((op1), \ svuint64_t: svqshl_n_u64_m, \ svint64_t: svqshl_n_s64_m, \ default: __assume(0)), \ int32_t: _Generic((op1), \ svuint32_t: svqshl_n_u32_m, \ svint32_t: svqshl_n_s32_m, \ default: __assume(0)), \ int16_t: _Generic((op1), \ svuint16_t: svqshl_n_u16_m, \ svint16_t: svqshl_n_s16_m, \ default: __assume(0)), \ int8_t: _Generic((op1), \ svuint8_t: svqshl_n_u8_m, \ svint8_t: svqshl_n_s8_m, \ default: __assume(0)), \ svint8_t: _Generic((op1), \ svint8_t: svqshl_s8_m, \ svuint8_t: svqshl_u8_m, \ default: __assume(0)), \ svint16_t: _Generic((op1), \ svint16_t: svqshl_s16_m, \ svuint16_t: svqshl_u16_m, \ default: __assume(0)), \ svint32_t: _Generic((op1), \ svint32_t: svqshl_s32_m, \ svuint32_t: svqshl_u32_m, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svint64_t: svqshl_s64_m, \ svuint64_t: svqshl_u64_m, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #define svqshl_z(pg, op1, op2) _Generic((op2), \ svint8_t: _Generic((op1), \ svint8_t: svqshl_s8_z, \ svuint8_t: svqshl_u8_z, \ default: __assume(0)), \ int16_t: _Generic((op1), \ svint16_t: svqshl_n_s16_z, \ svuint16_t: svqshl_n_u16_z, \ default: __assume(0)), \ int32_t: _Generic((op1), \ svint32_t: svqshl_n_s32_z, \ svuint32_t: svqshl_n_u32_z, \ default: __assume(0)), \ int64_t: _Generic((op1), \ svint64_t: svqshl_n_s64_z, \ svuint64_t: svqshl_n_u64_z, \ default: __assume(0)), \ int8_t: _Generic((op1), \ svuint8_t: svqshl_n_u8_z, \ svint8_t: svqshl_n_s8_z, \ default: __assume(0)), \ svint16_t: _Generic((op1), \ svuint16_t: svqshl_u16_z, \ svint16_t: svqshl_s16_z, \ default: __assume(0)), \ svint32_t: _Generic((op1), \ svuint32_t: svqshl_u32_z, \ svint32_t: svqshl_s32_z, \ default: __assume(0)), \ svint64_t: _Generic((op1), \ svuint64_t: svqshl_u64_z, \ svint64_t: svqshl_s64_z, \ default: __assume(0)), \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Shift / Left / Vector saturating shift left: Saturating shift left unsigned svuint8_t svqshlu_n_s8_x(svbool_t pg, svint8_t op1, uint64_t imm2); svuint16_t svqshlu_n_s16_x(svbool_t pg, svint16_t op1, uint64_t imm2); svuint32_t svqshlu_n_s32_x(svbool_t pg, svint32_t op1, uint64_t imm2); svuint64_t svqshlu_n_s64_x(svbool_t pg, svint64_t op1, uint64_t imm2); svuint8_t svqshlu_n_s8_z(svbool_t pg, svint8_t op1, uint64_t imm2); svuint16_t svqshlu_n_s16_z(svbool_t pg, svint16_t op1, uint64_t imm2); svuint32_t svqshlu_n_s32_z(svbool_t pg, svint32_t op1, uint64_t imm2); svuint64_t svqshlu_n_s64_z(svbool_t pg, svint64_t op1, uint64_t imm2); svuint32_t svqshlu_n_s32_m(svbool_t pg, svint32_t op1, uint64_t imm2); svuint8_t svqshlu_n_s8_m(svbool_t pg, svint8_t op1, uint64_t imm2); svuint64_t svqshlu_n_s64_m(svbool_t pg, svint64_t op1, uint64_t imm2); svuint16_t svqshlu_n_s16_m(svbool_t pg, svint16_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqshlu_x(pg, op1, imm2) _Generic((op1), \ svint8_t: svqshlu_n_s8_x, \ svint16_t: svqshlu_n_s16_x, \ svint32_t: svqshlu_n_s32_x, \ svint64_t: svqshlu_n_s64_x, \ default: __assume(0) \ )(pg, op1, imm2) #define svqshlu_z(pg, op1, imm2) _Generic((op1), \ svint8_t: svqshlu_n_s8_z, \ svint16_t: svqshlu_n_s16_z, \ svint32_t: svqshlu_n_s32_z, \ svint64_t: svqshlu_n_s64_z, \ default: __assume(0) \ )(pg, op1, imm2) #define svqshlu_m(pg, op1, imm2) _Generic((op1), \ svint32_t: svqshlu_n_s32_m, \ svint8_t: svqshlu_n_s8_m, \ svint64_t: svqshlu_n_s64_m, \ svint16_t: svqshlu_n_s16_m, \ default: __assume(0) \ )(pg, op1, imm2) #endif // sve2: Shift / Left / Vector shift left and insert: Shift left and insert svint32_t svsli_n_s32(svint32_t op1, svint32_t op2, uint64_t imm3); svint8_t svsli_n_s8(svint8_t op1, svint8_t op2, uint64_t imm3); svint64_t svsli_n_s64(svint64_t op1, svint64_t op2, uint64_t imm3); svuint8_t svsli_n_u8(svuint8_t op1, svuint8_t op2, uint64_t imm3); svuint16_t svsli_n_u16(svuint16_t op1, svuint16_t op2, uint64_t imm3); svuint32_t svsli_n_u32(svuint32_t op1, svuint32_t op2, uint64_t imm3); svint16_t svsli_n_s16(svint16_t op1, svint16_t op2, uint64_t imm3); svuint64_t svsli_n_u64(svuint64_t op1, svuint64_t op2, uint64_t imm3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsli(op1, op2, imm3) _Generic((op2), \ svint32_t: svsli_n_s32, \ svint8_t: svsli_n_s8, \ svint64_t: svsli_n_s64, \ svuint8_t: svsli_n_u8, \ svuint16_t: svsli_n_u16, \ svuint32_t: svsli_n_u32, \ svint16_t: svsli_n_s16, \ svuint64_t: svsli_n_u64, \ default: __assume(0) \ )(op1, op2, imm3) #endif // sve2: Shift / Left / Vector shift left and widen: Shift left long (bottom) svint16_t svshllb_n_s16(svint8_t op1, uint64_t imm2); svuint64_t svshllb_n_u64(svuint32_t op1, uint64_t imm2); svuint32_t svshllb_n_u32(svuint16_t op1, uint64_t imm2); svuint16_t svshllb_n_u16(svuint8_t op1, uint64_t imm2); svint64_t svshllb_n_s64(svint32_t op1, uint64_t imm2); svint32_t svshllb_n_s32(svint16_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svshllb(op1, imm2) _Generic((op1), \ svint8_t: svshllb_n_s16, \ svuint32_t: svshllb_n_u64, \ svuint16_t: svshllb_n_u32, \ svuint8_t: svshllb_n_u16, \ svint32_t: svshllb_n_s64, \ svint16_t: svshllb_n_s32, \ default: __assume(0) \ )(op1, imm2) #endif // sve2: Shift / Left / Vector shift left and widen: Shift left long (top) svint64_t svshllt_n_s64(svint32_t op1, uint64_t imm2); svint16_t svshllt_n_s16(svint8_t op1, uint64_t imm2); svuint16_t svshllt_n_u16(svuint8_t op1, uint64_t imm2); svint32_t svshllt_n_s32(svint16_t op1, uint64_t imm2); svuint64_t svshllt_n_u64(svuint32_t op1, uint64_t imm2); svuint32_t svshllt_n_u32(svuint16_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svshllt(op1, imm2) _Generic((op1), \ svint32_t: svshllt_n_s64, \ svint8_t: svshllt_n_s16, \ svuint8_t: svshllt_n_u16, \ svint16_t: svshllt_n_s32, \ svuint32_t: svshllt_n_u64, \ svuint16_t: svshllt_n_u32, \ default: __assume(0) \ )(op1, imm2) #endif // sve2: Shift / Right / Vector rounding shift right and accumulate: Rounding shift right and accumulate svint16_t svrsra_n_s16(svint16_t op1, svint16_t op2, uint64_t imm3); svint8_t svrsra_n_s8(svint8_t op1, svint8_t op2, uint64_t imm3); svint64_t svrsra_n_s64(svint64_t op1, svint64_t op2, uint64_t imm3); svuint8_t svrsra_n_u8(svuint8_t op1, svuint8_t op2, uint64_t imm3); svuint16_t svrsra_n_u16(svuint16_t op1, svuint16_t op2, uint64_t imm3); svuint32_t svrsra_n_u32(svuint32_t op1, svuint32_t op2, uint64_t imm3); svuint64_t svrsra_n_u64(svuint64_t op1, svuint64_t op2, uint64_t imm3); svint32_t svrsra_n_s32(svint32_t op1, svint32_t op2, uint64_t imm3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrsra(op1, op2, imm3) _Generic((op2), \ svint16_t: svrsra_n_s16, \ svint8_t: svrsra_n_s8, \ svint64_t: svrsra_n_s64, \ svuint8_t: svrsra_n_u8, \ svuint16_t: svrsra_n_u16, \ svuint32_t: svrsra_n_u32, \ svuint64_t: svrsra_n_u64, \ svint32_t: svrsra_n_s32, \ default: __assume(0) \ )(op1, op2, imm3) #endif // sve2: Shift / Right / Vector rounding shift right and narrow: Rounding shift right narrow (bottom) svint32_t svrshrnb_n_s64(svint64_t op1, uint64_t imm2); svint8_t svrshrnb_n_s16(svint16_t op1, uint64_t imm2); svuint32_t svrshrnb_n_u64(svuint64_t op1, uint64_t imm2); svuint16_t svrshrnb_n_u32(svuint32_t op1, uint64_t imm2); svuint8_t svrshrnb_n_u16(svuint16_t op1, uint64_t imm2); svint16_t svrshrnb_n_s32(svint32_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrshrnb(op1, imm2) _Generic((op1), \ svint64_t: svrshrnb_n_s64, \ svint16_t: svrshrnb_n_s16, \ svuint64_t: svrshrnb_n_u64, \ svuint32_t: svrshrnb_n_u32, \ svuint16_t: svrshrnb_n_u16, \ svint32_t: svrshrnb_n_s32, \ default: __assume(0) \ )(op1, imm2) #endif // sve2: Shift / Right / Vector rounding shift right and narrow: Rounding shift right narrow (top) svuint8_t svrshrnt_n_u16(svuint8_t even, svuint16_t op1, uint64_t imm2); svint16_t svrshrnt_n_s32(svint16_t even, svint32_t op1, uint64_t imm2); svint32_t svrshrnt_n_s64(svint32_t even, svint64_t op1, uint64_t imm2); svuint16_t svrshrnt_n_u32(svuint16_t even, svuint32_t op1, uint64_t imm2); svuint32_t svrshrnt_n_u64(svuint32_t even, svuint64_t op1, uint64_t imm2); svint8_t svrshrnt_n_s16(svint8_t even, svint16_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrshrnt(even, op1, imm2) _Generic((op1), \ svuint16_t: svrshrnt_n_u16, \ svint32_t: svrshrnt_n_s32, \ svint64_t: svrshrnt_n_s64, \ svuint32_t: svrshrnt_n_u32, \ svuint64_t: svrshrnt_n_u64, \ svint16_t: svrshrnt_n_s16, \ default: __assume(0) \ )(even, op1, imm2) #endif // sve2: Shift / Right / Vector rounding shift right: Rounding shift right svuint64_t svrshr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t imm2); svint8_t svrshr_n_s8_x(svbool_t pg, svint8_t op1, uint64_t imm2); svint16_t svrshr_n_s16_x(svbool_t pg, svint16_t op1, uint64_t imm2); svint32_t svrshr_n_s32_x(svbool_t pg, svint32_t op1, uint64_t imm2); svint64_t svrshr_n_s64_x(svbool_t pg, svint64_t op1, uint64_t imm2); svuint8_t svrshr_n_u8_x(svbool_t pg, svuint8_t op1, uint64_t imm2); svuint16_t svrshr_n_u16_m(svbool_t pg, svuint16_t op1, uint64_t imm2); svuint32_t svrshr_n_u32_x(svbool_t pg, svuint32_t op1, uint64_t imm2); svuint8_t svrshr_n_u8_m(svbool_t pg, svuint8_t op1, uint64_t imm2); svint64_t svrshr_n_s64_m(svbool_t pg, svint64_t op1, uint64_t imm2); svint32_t svrshr_n_s32_m(svbool_t pg, svint32_t op1, uint64_t imm2); svint16_t svrshr_n_s16_m(svbool_t pg, svint16_t op1, uint64_t imm2); svuint16_t svrshr_n_u16_x(svbool_t pg, svuint16_t op1, uint64_t imm2); svint8_t svrshr_n_s8_m(svbool_t pg, svint8_t op1, uint64_t imm2); svint8_t svrshr_n_s8_z(svbool_t pg, svint8_t op1, uint64_t imm2); svuint64_t svrshr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t imm2); svint16_t svrshr_n_s16_z(svbool_t pg, svint16_t op1, uint64_t imm2); svint32_t svrshr_n_s32_z(svbool_t pg, svint32_t op1, uint64_t imm2); svint64_t svrshr_n_s64_z(svbool_t pg, svint64_t op1, uint64_t imm2); svuint8_t svrshr_n_u8_z(svbool_t pg, svuint8_t op1, uint64_t imm2); svuint16_t svrshr_n_u16_z(svbool_t pg, svuint16_t op1, uint64_t imm2); svuint32_t svrshr_n_u32_z(svbool_t pg, svuint32_t op1, uint64_t imm2); svuint32_t svrshr_n_u32_m(svbool_t pg, svuint32_t op1, uint64_t imm2); svuint64_t svrshr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrshr_m(pg, op1, imm2) _Generic((op1), \ svuint64_t: svrshr_n_u64_m, \ svuint16_t: svrshr_n_u16_m, \ svuint8_t: svrshr_n_u8_m, \ svint64_t: svrshr_n_s64_m, \ svint32_t: svrshr_n_s32_m, \ svint16_t: svrshr_n_s16_m, \ svint8_t: svrshr_n_s8_m, \ svuint32_t: svrshr_n_u32_m, \ default: __assume(0) \ )(pg, op1, imm2) #define svrshr_x(pg, op1, imm2) _Generic((op1), \ svint8_t: svrshr_n_s8_x, \ svint16_t: svrshr_n_s16_x, \ svint32_t: svrshr_n_s32_x, \ svint64_t: svrshr_n_s64_x, \ svuint8_t: svrshr_n_u8_x, \ svuint32_t: svrshr_n_u32_x, \ svuint16_t: svrshr_n_u16_x, \ svuint64_t: svrshr_n_u64_x, \ default: __assume(0) \ )(pg, op1, imm2) #define svrshr_z(pg, op1, imm2) _Generic((op1), \ svint8_t: svrshr_n_s8_z, \ svint16_t: svrshr_n_s16_z, \ svint32_t: svrshr_n_s32_z, \ svint64_t: svrshr_n_s64_z, \ svuint8_t: svrshr_n_u8_z, \ svuint16_t: svrshr_n_u16_z, \ svuint32_t: svrshr_n_u32_z, \ svuint64_t: svrshr_n_u64_z, \ default: __assume(0) \ )(pg, op1, imm2) #endif // sve2: Shift / Right / Vector saturating rounding shift right and narrow: Saturating rounding shift right narrow (bottom) svuint8_t svqrshrnb_n_u16(svuint16_t op1, uint64_t imm2); svint8_t svqrshrnb_n_s16(svint16_t op1, uint64_t imm2); svint16_t svqrshrnb_n_s32(svint32_t op1, uint64_t imm2); svint32_t svqrshrnb_n_s64(svint64_t op1, uint64_t imm2); svuint16_t svqrshrnb_n_u32(svuint32_t op1, uint64_t imm2); svuint32_t svqrshrnb_n_u64(svuint64_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqrshrnb(op1, imm2) _Generic((op1), \ svuint16_t: svqrshrnb_n_u16, \ svint16_t: svqrshrnb_n_s16, \ svint32_t: svqrshrnb_n_s32, \ svint64_t: svqrshrnb_n_s64, \ svuint32_t: svqrshrnb_n_u32, \ svuint64_t: svqrshrnb_n_u64, \ default: __assume(0) \ )(op1, imm2) #endif // sve2: Shift / Right / Vector saturating rounding shift right and narrow: Saturating rounding shift right narrow (top) svint16_t svqrshrnt_n_s32(svint16_t even, svint32_t op1, uint64_t imm2); svint32_t svqrshrnt_n_s64(svint32_t even, svint64_t op1, uint64_t imm2); svint8_t svqrshrnt_n_s16(svint8_t even, svint16_t op1, uint64_t imm2); svuint8_t svqrshrnt_n_u16(svuint8_t even, svuint16_t op1, uint64_t imm2); svuint32_t svqrshrnt_n_u64(svuint32_t even, svuint64_t op1, uint64_t imm2); svuint16_t svqrshrnt_n_u32(svuint16_t even, svuint32_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqrshrnt(even, op1, imm2) _Generic((op1), \ svint32_t: svqrshrnt_n_s32, \ svint64_t: svqrshrnt_n_s64, \ svint16_t: svqrshrnt_n_s16, \ svuint16_t: svqrshrnt_n_u16, \ svuint64_t: svqrshrnt_n_u64, \ svuint32_t: svqrshrnt_n_u32, \ default: __assume(0) \ )(even, op1, imm2) #endif // sve2: Shift / Right / Vector saturating rounding shift right and narrow: Saturating rounding shift right unsigned narrow (bottom) svuint32_t svqrshrunb_n_s64(svint64_t op1, uint64_t imm2); svuint16_t svqrshrunb_n_s32(svint32_t op1, uint64_t imm2); svuint8_t svqrshrunb_n_s16(svint16_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqrshrunb(op1, imm2) _Generic((op1), \ svint64_t: svqrshrunb_n_s64, \ svint32_t: svqrshrunb_n_s32, \ svint16_t: svqrshrunb_n_s16, \ default: __assume(0) \ )(op1, imm2) #endif // sve2: Shift / Right / Vector saturating rounding shift right and narrow: Saturating rounding shift right unsigned narrow (top) svuint16_t svqrshrunt_n_s32(svuint16_t even, svint32_t op1, uint64_t imm2); svuint8_t svqrshrunt_n_s16(svuint8_t even, svint16_t op1, uint64_t imm2); svuint32_t svqrshrunt_n_s64(svuint32_t even, svint64_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqrshrunt(even, op1, imm2) _Generic((op1), \ svint32_t: svqrshrunt_n_s32, \ svint16_t: svqrshrunt_n_s16, \ svint64_t: svqrshrunt_n_s64, \ default: __assume(0) \ )(even, op1, imm2) #endif // sve2: Shift / Right / Vector saturating shift right and narrow: Saturating shift right narrow (bottom) svuint32_t svqshrnb_n_u64(svuint64_t op1, uint64_t imm2); svuint16_t svqshrnb_n_u32(svuint32_t op1, uint64_t imm2); svuint8_t svqshrnb_n_u16(svuint16_t op1, uint64_t imm2); svint32_t svqshrnb_n_s64(svint64_t op1, uint64_t imm2); svint16_t svqshrnb_n_s32(svint32_t op1, uint64_t imm2); svint8_t svqshrnb_n_s16(svint16_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqshrnb(op1, imm2) _Generic((op1), \ svuint64_t: svqshrnb_n_u64, \ svuint32_t: svqshrnb_n_u32, \ svuint16_t: svqshrnb_n_u16, \ svint64_t: svqshrnb_n_s64, \ svint32_t: svqshrnb_n_s32, \ svint16_t: svqshrnb_n_s16, \ default: __assume(0) \ )(op1, imm2) #endif // sve2: Shift / Right / Vector saturating shift right and narrow: Saturating shift right narrow (top) svint32_t svqshrnt_n_s64(svint32_t even, svint64_t op1, uint64_t imm2); svuint8_t svqshrnt_n_u16(svuint8_t even, svuint16_t op1, uint64_t imm2); svuint16_t svqshrnt_n_u32(svuint16_t even, svuint32_t op1, uint64_t imm2); svuint32_t svqshrnt_n_u64(svuint32_t even, svuint64_t op1, uint64_t imm2); svint8_t svqshrnt_n_s16(svint8_t even, svint16_t op1, uint64_t imm2); svint16_t svqshrnt_n_s32(svint16_t even, svint32_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqshrnt(even, op1, imm2) _Generic((op1), \ svint64_t: svqshrnt_n_s64, \ svuint16_t: svqshrnt_n_u16, \ svuint32_t: svqshrnt_n_u32, \ svuint64_t: svqshrnt_n_u64, \ svint16_t: svqshrnt_n_s16, \ svint32_t: svqshrnt_n_s32, \ default: __assume(0) \ )(even, op1, imm2) #endif // sve2: Shift / Right / Vector saturating shift right and narrow: Saturating shift right unsigned narrow (bottom) svuint32_t svqshrunb_n_s64(svint64_t op1, uint64_t imm2); svuint16_t svqshrunb_n_s32(svint32_t op1, uint64_t imm2); svuint8_t svqshrunb_n_s16(svint16_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqshrunb(op1, imm2) _Generic((op1), \ svint64_t: svqshrunb_n_s64, \ svint32_t: svqshrunb_n_s32, \ svint16_t: svqshrunb_n_s16, \ default: __assume(0) \ )(op1, imm2) #endif // sve2: Shift / Right / Vector saturating shift right and narrow: Saturating shift right unsigned narrow (top) svuint16_t svqshrunt_n_s32(svuint16_t even, svint32_t op1, uint64_t imm2); svuint8_t svqshrunt_n_s16(svuint8_t even, svint16_t op1, uint64_t imm2); svuint32_t svqshrunt_n_s64(svuint32_t even, svint64_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqshrunt(even, op1, imm2) _Generic((op1), \ svint32_t: svqshrunt_n_s32, \ svint16_t: svqshrunt_n_s16, \ svint64_t: svqshrunt_n_s64, \ default: __assume(0) \ )(even, op1, imm2) #endif // sve2: Shift / Right / Vector shift right and accumulate: Shift right and accumulate svuint64_t svsra_n_u64(svuint64_t op1, svuint64_t op2, uint64_t imm3); svint64_t svsra_n_s64(svint64_t op1, svint64_t op2, uint64_t imm3); svint16_t svsra_n_s16(svint16_t op1, svint16_t op2, uint64_t imm3); svuint32_t svsra_n_u32(svuint32_t op1, svuint32_t op2, uint64_t imm3); svint8_t svsra_n_s8(svint8_t op1, svint8_t op2, uint64_t imm3); svuint16_t svsra_n_u16(svuint16_t op1, svuint16_t op2, uint64_t imm3); svuint8_t svsra_n_u8(svuint8_t op1, svuint8_t op2, uint64_t imm3); svint32_t svsra_n_s32(svint32_t op1, svint32_t op2, uint64_t imm3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsra(op1, op2, imm3) _Generic((op2), \ svuint64_t: svsra_n_u64, \ svint64_t: svsra_n_s64, \ svint16_t: svsra_n_s16, \ svuint32_t: svsra_n_u32, \ svint8_t: svsra_n_s8, \ svuint16_t: svsra_n_u16, \ svuint8_t: svsra_n_u8, \ svint32_t: svsra_n_s32, \ default: __assume(0) \ )(op1, op2, imm3) #endif // sve2: Shift / Right / Vector shift right and insert: Shift right and insert svuint32_t svsri_n_u32(svuint32_t op1, svuint32_t op2, uint64_t imm3); svuint16_t svsri_n_u16(svuint16_t op1, svuint16_t op2, uint64_t imm3); svuint8_t svsri_n_u8(svuint8_t op1, svuint8_t op2, uint64_t imm3); svint64_t svsri_n_s64(svint64_t op1, svint64_t op2, uint64_t imm3); svint32_t svsri_n_s32(svint32_t op1, svint32_t op2, uint64_t imm3); svuint64_t svsri_n_u64(svuint64_t op1, svuint64_t op2, uint64_t imm3); svint16_t svsri_n_s16(svint16_t op1, svint16_t op2, uint64_t imm3); svint8_t svsri_n_s8(svint8_t op1, svint8_t op2, uint64_t imm3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsri(op1, op2, imm3) _Generic((op2), \ svuint32_t: svsri_n_u32, \ svuint16_t: svsri_n_u16, \ svuint8_t: svsri_n_u8, \ svint64_t: svsri_n_s64, \ svint32_t: svsri_n_s32, \ svuint64_t: svsri_n_u64, \ svint16_t: svsri_n_s16, \ svint8_t: svsri_n_s8, \ default: __assume(0) \ )(op1, op2, imm3) #endif // sve2: Shift / Right / Vector shift right and narrow: Shift right narrow (bottom) svuint16_t svshrnb_n_u32(svuint32_t op1, uint64_t imm2); svint8_t svshrnb_n_s16(svint16_t op1, uint64_t imm2); svint16_t svshrnb_n_s32(svint32_t op1, uint64_t imm2); svint32_t svshrnb_n_s64(svint64_t op1, uint64_t imm2); svuint8_t svshrnb_n_u16(svuint16_t op1, uint64_t imm2); svuint32_t svshrnb_n_u64(svuint64_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svshrnb(op1, imm2) _Generic((op1), \ svuint32_t: svshrnb_n_u32, \ svint16_t: svshrnb_n_s16, \ svint32_t: svshrnb_n_s32, \ svint64_t: svshrnb_n_s64, \ svuint16_t: svshrnb_n_u16, \ svuint64_t: svshrnb_n_u64, \ default: __assume(0) \ )(op1, imm2) #endif // sve2: Shift / Right / Vector shift right and narrow: Shift right narrow (top) svint16_t svshrnt_n_s32(svint16_t even, svint32_t op1, uint64_t imm2); svint32_t svshrnt_n_s64(svint32_t even, svint64_t op1, uint64_t imm2); svuint8_t svshrnt_n_u16(svuint8_t even, svuint16_t op1, uint64_t imm2); svuint16_t svshrnt_n_u32(svuint16_t even, svuint32_t op1, uint64_t imm2); svuint32_t svshrnt_n_u64(svuint32_t even, svuint64_t op1, uint64_t imm2); svint8_t svshrnt_n_s16(svint8_t even, svint16_t op1, uint64_t imm2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svshrnt(even, op1, imm2) _Generic((op1), \ svint32_t: svshrnt_n_s32, \ svint64_t: svshrnt_n_s64, \ svuint16_t: svshrnt_n_u16, \ svuint32_t: svshrnt_n_u32, \ svuint64_t: svshrnt_n_u64, \ svint16_t: svshrnt_n_s16, \ default: __assume(0) \ )(even, op1, imm2) #endif // sve2: Store / Scatter: Non-truncating store, non-temporal void svstnt1_scatter_u64index_s64(svbool_t pg, int64_t *base, svuint64_t indices, svint64_t data); void svstnt1_scatter_u64index_u64(svbool_t pg, uint64_t *base, svuint64_t indices, svuint64_t data); void svstnt1_scatter_u32base_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset, svfloat32_t data); void svstnt1_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data); void svstnt1_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data); void svstnt1_scatter_u64base_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset, svfloat64_t data); void svstnt1_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data); void svstnt1_scatter_u64index_f64(svbool_t pg, float64_t *base, svuint64_t indices, svfloat64_t data); void svstnt1_scatter_u32base_index_f32(svbool_t pg, svuint32_t bases, int64_t index, svfloat32_t data); void svstnt1_scatter_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index, svint32_t data); void svstnt1_scatter_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index, svuint32_t data); void svstnt1_scatter_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t index, svfloat64_t data); void svstnt1_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data); void svstnt1_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data); void svstnt1_scatter_s64index_u64(svbool_t pg, uint64_t *base, svint64_t indices, svuint64_t data); void svstnt1_scatter_u64offset_u64(svbool_t pg, uint64_t *base, svuint64_t offsets, svuint64_t data); void svstnt1_scatter_s64index_f64(svbool_t pg, float64_t *base, svint64_t indices, svfloat64_t data); void svstnt1_scatter_u64offset_s64(svbool_t pg, int64_t *base, svuint64_t offsets, svint64_t data); void svstnt1_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t offsets, svfloat64_t data); void svstnt1_scatter_s64offset_u64(svbool_t pg, uint64_t *base, svint64_t offsets, svuint64_t data); void svstnt1_scatter_s64offset_s64(svbool_t pg, int64_t *base, svint64_t offsets, svint64_t data); void svstnt1_scatter_s64offset_f64(svbool_t pg, float64_t *base, svint64_t offsets, svfloat64_t data); void svstnt1_scatter_u32offset_u32(svbool_t pg, uint32_t *base, svuint32_t offsets, svuint32_t data); void svstnt1_scatter_u32offset_s32(svbool_t pg, int32_t *base, svuint32_t offsets, svint32_t data); void svstnt1_scatter_u32offset_f32(svbool_t pg, float32_t *base, svuint32_t offsets, svfloat32_t data); void svstnt1_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data); void svstnt1_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data); void svstnt1_scatter_u64base_f64(svbool_t pg, svuint64_t bases, svfloat64_t data); void svstnt1_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t data); void svstnt1_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t data); void svstnt1_scatter_s64index_s64(svbool_t pg, int64_t *base, svint64_t indices, svint64_t data); void svstnt1_scatter_u32base_f32(svbool_t pg, svuint32_t bases, svfloat32_t data); void svstnt1_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svstnt1_scatter_index(pg, base, indices, data) _Generic((data), \ svint64_t: _Generic((indices), \ svuint64_t: svstnt1_scatter_u64index_s64, \ int64_t: svstnt1_scatter_u64base_index_s64, \ svint64_t: svstnt1_scatter_s64index_s64, \ default: __assume(0)), \ svuint64_t: _Generic((indices), \ svuint64_t: svstnt1_scatter_u64index_u64, \ svint64_t: svstnt1_scatter_s64index_u64, \ int64_t: svstnt1_scatter_u64base_index_u64, \ default: __assume(0)), \ svfloat64_t: _Generic((indices), \ svuint64_t: svstnt1_scatter_u64index_f64, \ int64_t: svstnt1_scatter_u64base_index_f64, \ svint64_t: svstnt1_scatter_s64index_f64, \ default: __assume(0)), \ svfloat32_t: svstnt1_scatter_u32base_index_f32, \ svint32_t: svstnt1_scatter_u32base_index_s32, \ svuint32_t: svstnt1_scatter_u32base_index_u32, \ default: __assume(0) \ )(pg, base, indices, data) #define svstnt1_scatter_offset(pg, bases, offset, data) _Generic((data), \ svfloat32_t: _Generic((offset), \ int64_t: svstnt1_scatter_u32base_offset_f32, \ svuint32_t: svstnt1_scatter_u32offset_f32, \ default: __assume(0)), \ svint32_t: _Generic((offset), \ int64_t: svstnt1_scatter_u32base_offset_s32, \ svuint32_t: svstnt1_scatter_u32offset_s32, \ default: __assume(0)), \ svuint32_t: _Generic((offset), \ int64_t: svstnt1_scatter_u32base_offset_u32, \ svuint32_t: svstnt1_scatter_u32offset_u32, \ default: __assume(0)), \ svfloat64_t: _Generic((offset), \ int64_t: svstnt1_scatter_u64base_offset_f64, \ svuint64_t: svstnt1_scatter_u64offset_f64, \ svint64_t: svstnt1_scatter_s64offset_f64, \ default: __assume(0)), \ svuint64_t: _Generic((offset), \ int64_t: svstnt1_scatter_u64base_offset_u64, \ svuint64_t: svstnt1_scatter_u64offset_u64, \ svint64_t: svstnt1_scatter_s64offset_u64, \ default: __assume(0)), \ svint64_t: _Generic((offset), \ int64_t: svstnt1_scatter_u64base_offset_s64, \ svuint64_t: svstnt1_scatter_u64offset_s64, \ svint64_t: svstnt1_scatter_s64offset_s64, \ default: __assume(0)), \ default: __assume(0) \ )(pg, bases, offset, data) #define svstnt1_scatter(pg, bases, data) _Generic((data), \ svuint64_t: svstnt1_scatter_u64base_u64, \ svint64_t: svstnt1_scatter_u64base_s64, \ svfloat64_t: svstnt1_scatter_u64base_f64, \ svuint32_t: svstnt1_scatter_u32base_u32, \ svint32_t: svstnt1_scatter_u32base_s32, \ svfloat32_t: svstnt1_scatter_u32base_f32, \ default: __assume(0) \ )(pg, bases, data) #endif // sve2: Store / Scatter: Truncate to 16 bits and store, non-temporal void svstnt1h_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data); void svstnt1h_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data); void svstnt1h_scatter_u32offset_s32(svbool_t pg, int16_t *base, svuint32_t offsets, svint32_t data); void svstnt1h_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t data); void svstnt1h_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t data); void svstnt1h_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data); void svstnt1h_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data); void svstnt1h_scatter_u32base_index_s32(svbool_t pg, svuint32_t bases, int64_t index, svint32_t data); void svstnt1h_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data); void svstnt1h_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data); void svstnt1h_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data); void svstnt1h_scatter_u32base_index_u32(svbool_t pg, svuint32_t bases, int64_t index, svuint32_t data); void svstnt1h_scatter_u64index_u64(svbool_t pg, uint16_t *base, svuint64_t indices, svuint64_t data); void svstnt1h_scatter_u64index_s64(svbool_t pg, int16_t *base, svuint64_t indices, svint64_t data); void svstnt1h_scatter_s64index_u64(svbool_t pg, uint16_t *base, svint64_t indices, svuint64_t data); void svstnt1h_scatter_s64index_s64(svbool_t pg, int16_t *base, svint64_t indices, svint64_t data); void svstnt1h_scatter_u64offset_u64(svbool_t pg, uint16_t *base, svuint64_t offsets, svuint64_t data); void svstnt1h_scatter_u64offset_s64(svbool_t pg, int16_t *base, svuint64_t offsets, svint64_t data); void svstnt1h_scatter_u32offset_u32(svbool_t pg, uint16_t *base, svuint32_t offsets, svuint32_t data); void svstnt1h_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data); void svstnt1h_scatter_s64offset_u64(svbool_t pg, uint16_t *base, svint64_t offsets, svuint64_t data); void svstnt1h_scatter_s64offset_s64(svbool_t pg, int16_t *base, svint64_t offsets, svint64_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svstnt1h_scatter(pg, bases, data) _Generic((data), \ svint64_t: svstnt1h_scatter_u64base_s64, \ svuint64_t: svstnt1h_scatter_u64base_u64, \ svint32_t: svstnt1h_scatter_u32base_s32, \ svuint32_t: svstnt1h_scatter_u32base_u32, \ default: __assume(0) \ )(pg, bases, data) #define svstnt1h_scatter_offset(pg, base, offsets, data) _Generic((data), \ svint32_t: _Generic((offsets), \ svuint32_t: svstnt1h_scatter_u32offset_s32, \ int64_t: svstnt1h_scatter_u32base_offset_s32, \ default: __assume(0)), \ svuint64_t: _Generic((offsets), \ int64_t: svstnt1h_scatter_u64base_offset_u64, \ svuint64_t: svstnt1h_scatter_u64offset_u64, \ svint64_t: svstnt1h_scatter_s64offset_u64, \ default: __assume(0)), \ svint64_t: _Generic((offsets), \ int64_t: svstnt1h_scatter_u64base_offset_s64, \ svuint64_t: svstnt1h_scatter_u64offset_s64, \ svint64_t: svstnt1h_scatter_s64offset_s64, \ default: __assume(0)), \ svuint32_t: _Generic((offsets), \ int64_t: svstnt1h_scatter_u32base_offset_u32, \ svuint32_t: svstnt1h_scatter_u32offset_u32, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, offsets, data) #define svstnt1h_scatter_index(pg, bases, index, data) _Generic((data), \ svuint64_t: _Generic((index), \ int64_t: svstnt1h_scatter_u64base_index_u64, \ svuint64_t: svstnt1h_scatter_u64index_u64, \ svint64_t: svstnt1h_scatter_s64index_u64, \ default: __assume(0)), \ svint64_t: _Generic((index), \ int64_t: svstnt1h_scatter_u64base_index_s64, \ svuint64_t: svstnt1h_scatter_u64index_s64, \ svint64_t: svstnt1h_scatter_s64index_s64, \ default: __assume(0)), \ svint32_t: svstnt1h_scatter_u32base_index_s32, \ svuint32_t: svstnt1h_scatter_u32base_index_u32, \ default: __assume(0) \ )(pg, bases, index, data) #endif // sve2: Store / Scatter: Truncate to 32 bits and store, non-temporal void svstnt1w_scatter_u64offset_u64(svbool_t pg, uint32_t *base, svuint64_t offsets, svuint64_t data); void svstnt1w_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data); void svstnt1w_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data); void svstnt1w_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data); void svstnt1w_scatter_s64offset_u64(svbool_t pg, uint32_t *base, svint64_t offsets, svuint64_t data); void svstnt1w_scatter_s64offset_s64(svbool_t pg, int32_t *base, svint64_t offsets, svint64_t data); void svstnt1w_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data); void svstnt1w_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data); void svstnt1w_scatter_s64index_s64(svbool_t pg, int32_t *base, svint64_t indices, svint64_t data); void svstnt1w_scatter_u64offset_s64(svbool_t pg, int32_t *base, svuint64_t offsets, svint64_t data); void svstnt1w_scatter_s64index_u64(svbool_t pg, uint32_t *base, svint64_t indices, svuint64_t data); void svstnt1w_scatter_u64base_index_s64(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data); void svstnt1w_scatter_u64index_u64(svbool_t pg, uint32_t *base, svuint64_t indices, svuint64_t data); void svstnt1w_scatter_u64index_s64(svbool_t pg, int32_t *base, svuint64_t indices, svint64_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svstnt1w_scatter_offset(pg, base, offsets, data) _Generic((data), \ svuint64_t: _Generic((offsets), \ svuint64_t: svstnt1w_scatter_u64offset_u64, \ int64_t: svstnt1w_scatter_u64base_offset_u64, \ svint64_t: svstnt1w_scatter_s64offset_u64, \ default: __assume(0)), \ svint64_t: _Generic((offsets), \ int64_t: svstnt1w_scatter_u64base_offset_s64, \ svint64_t: svstnt1w_scatter_s64offset_s64, \ svuint64_t: svstnt1w_scatter_u64offset_s64, \ default: __assume(0)), \ default: __assume(0) \ )(pg, base, offsets, data) #define svstnt1w_scatter_index(pg, bases, index, data) _Generic((data), \ svuint64_t: _Generic((index), \ int64_t: svstnt1w_scatter_u64base_index_u64, \ svint64_t: svstnt1w_scatter_s64index_u64, \ svuint64_t: svstnt1w_scatter_u64index_u64, \ default: __assume(0)), \ svint64_t: _Generic((index), \ svint64_t: svstnt1w_scatter_s64index_s64, \ int64_t: svstnt1w_scatter_u64base_index_s64, \ svuint64_t: svstnt1w_scatter_u64index_s64, \ default: __assume(0)), \ default: __assume(0) \ )(pg, bases, index, data) #define svstnt1w_scatter(pg, bases, data) _Generic((data), \ svuint64_t: svstnt1w_scatter_u64base_u64, \ svint64_t: svstnt1w_scatter_u64base_s64, \ default: __assume(0) \ )(pg, bases, data) #endif // sve2: Store / Scatter: Truncate to 8 bits and store, non-temporal void svstnt1b_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data); void svstnt1b_scatter_u64base_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data); void svstnt1b_scatter_u32base_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data); void svstnt1b_scatter_u32base_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data); void svstnt1b_scatter_u64offset_u64(svbool_t pg, uint8_t *base, svuint64_t offsets, svuint64_t data); void svstnt1b_scatter_u64offset_s64(svbool_t pg, int8_t *base, svuint64_t offsets, svint64_t data); void svstnt1b_scatter_s64offset_u64(svbool_t pg, uint8_t *base, svint64_t offsets, svuint64_t data); void svstnt1b_scatter_u32offset_u32(svbool_t pg, uint8_t *base, svuint32_t offsets, svuint32_t data); void svstnt1b_scatter_u32offset_s32(svbool_t pg, int8_t *base, svuint32_t offsets, svint32_t data); void svstnt1b_scatter_u64base_u64(svbool_t pg, svuint64_t bases, svuint64_t data); void svstnt1b_scatter_u64base_s64(svbool_t pg, svuint64_t bases, svint64_t data); void svstnt1b_scatter_u32base_u32(svbool_t pg, svuint32_t bases, svuint32_t data); void svstnt1b_scatter_u32base_s32(svbool_t pg, svuint32_t bases, svint32_t data); void svstnt1b_scatter_s64offset_s64(svbool_t pg, int8_t *base, svint64_t offsets, svint64_t data); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svstnt1b_scatter_offset(pg, bases, offset, data) _Generic((data), \ svuint64_t: _Generic((offset), \ int64_t: svstnt1b_scatter_u64base_offset_u64, \ svuint64_t: svstnt1b_scatter_u64offset_u64, \ svint64_t: svstnt1b_scatter_s64offset_u64, \ default: __assume(0)), \ svint64_t: _Generic((offset), \ int64_t: svstnt1b_scatter_u64base_offset_s64, \ svuint64_t: svstnt1b_scatter_u64offset_s64, \ svint64_t: svstnt1b_scatter_s64offset_s64, \ default: __assume(0)), \ svuint32_t: _Generic((offset), \ int64_t: svstnt1b_scatter_u32base_offset_u32, \ svuint32_t: svstnt1b_scatter_u32offset_u32, \ default: __assume(0)), \ svint32_t: _Generic((offset), \ int64_t: svstnt1b_scatter_u32base_offset_s32, \ svuint32_t: svstnt1b_scatter_u32offset_s32, \ default: __assume(0)), \ default: __assume(0) \ )(pg, bases, offset, data) #define svstnt1b_scatter(pg, bases, data) _Generic((data), \ svuint64_t: svstnt1b_scatter_u64base_u64, \ svint64_t: svstnt1b_scatter_u64base_s64, \ svuint32_t: svstnt1b_scatter_u32base_u32, \ svint32_t: svstnt1b_scatter_u32base_s32, \ default: __assume(0) \ )(pg, bases, data) #endif // sve2: Table lookups / Extended table lookup: Table lookup in single-vector table (merging) svbfloat16_t svtbx_bf16(svbfloat16_t fallback, svbfloat16_t data, svuint16_t indices); svint8_t svtbx_s8(svint8_t fallback, svint8_t data, svuint8_t indices); svuint32_t svtbx_u32(svuint32_t fallback, svuint32_t data, svuint32_t indices); svfloat16_t svtbx_f16(svfloat16_t fallback, svfloat16_t data, svuint16_t indices); svfloat32_t svtbx_f32(svfloat32_t fallback, svfloat32_t data, svuint32_t indices); svfloat64_t svtbx_f64(svfloat64_t fallback, svfloat64_t data, svuint64_t indices); svint16_t svtbx_s16(svint16_t fallback, svint16_t data, svuint16_t indices); svint32_t svtbx_s32(svint32_t fallback, svint32_t data, svuint32_t indices); svint64_t svtbx_s64(svint64_t fallback, svint64_t data, svuint64_t indices); svuint64_t svtbx_u64(svuint64_t fallback, svuint64_t data, svuint64_t indices); svuint16_t svtbx_u16(svuint16_t fallback, svuint16_t data, svuint16_t indices); svuint8_t svtbx_u8(svuint8_t fallback, svuint8_t data, svuint8_t indices); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svtbx(fallback, data, indices) _Generic((data), \ svbfloat16_t: svtbx_bf16, \ svint8_t: svtbx_s8, \ svuint32_t: svtbx_u32, \ svfloat16_t: svtbx_f16, \ svfloat32_t: svtbx_f32, \ svfloat64_t: svtbx_f64, \ svint16_t: svtbx_s16, \ svint32_t: svtbx_s32, \ svint64_t: svtbx_s64, \ svuint64_t: svtbx_u64, \ svuint16_t: svtbx_u16, \ svuint8_t: svtbx_u8, \ default: __assume(0) \ )(fallback, data, indices) #endif // sve2: Table lookups / Table lookup: Table lookup in two-vector table svbfloat16_t svtbl2_bf16(svbfloat16x2_t data, svuint16_t indices); svfloat16_t svtbl2_f16(svfloat16x2_t data, svuint16_t indices); svfloat64_t svtbl2_f64(svfloat64x2_t data, svuint64_t indices); svint32_t svtbl2_s32(svint32x2_t data, svuint32_t indices); svint16_t svtbl2_s16(svint16x2_t data, svuint16_t indices); svint64_t svtbl2_s64(svint64x2_t data, svuint64_t indices); svuint8_t svtbl2_u8(svuint8x2_t data, svuint8_t indices); svuint16_t svtbl2_u16(svuint16x2_t data, svuint16_t indices); svint8_t svtbl2_s8(svint8x2_t data, svuint8_t indices); svuint32_t svtbl2_u32(svuint32x2_t data, svuint32_t indices); svuint64_t svtbl2_u64(svuint64x2_t data, svuint64_t indices); svfloat32_t svtbl2_f32(svfloat32x2_t data, svuint32_t indices); // sve2: Vector arithmetic / Absolute / Absolute difference and accumulate: Absolute difference and accumulate svuint64_t svaba_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3); svuint32_t svaba_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3); svuint16_t svaba_n_u16(svuint16_t op1, svuint16_t op2, uint16_t op3); svuint8_t svaba_n_u8(svuint8_t op1, svuint8_t op2, uint8_t op3); svint64_t svaba_n_s64(svint64_t op1, svint64_t op2, int64_t op3); svint32_t svaba_n_s32(svint32_t op1, svint32_t op2, int32_t op3); svint16_t svaba_n_s16(svint16_t op1, svint16_t op2, int16_t op3); svint8_t svaba_n_s8(svint8_t op1, svint8_t op2, int8_t op3); svuint64_t svaba_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint32_t svaba_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint16_t svaba_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3); svuint8_t svaba_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3); svint64_t svaba_s64(svint64_t op1, svint64_t op2, svint64_t op3); svint16_t svaba_s16(svint16_t op1, svint16_t op2, svint16_t op3); svint32_t svaba_s32(svint32_t op1, svint32_t op2, svint32_t op3); svint8_t svaba_s8(svint8_t op1, svint8_t op2, svint8_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaba(op1, op2, op3) _Generic((op3), \ uint64_t: svaba_n_u64, \ uint32_t: svaba_n_u32, \ uint16_t: svaba_n_u16, \ uint8_t: svaba_n_u8, \ int64_t: svaba_n_s64, \ int32_t: svaba_n_s32, \ int16_t: svaba_n_s16, \ int8_t: svaba_n_s8, \ svuint64_t: svaba_u64, \ svuint32_t: svaba_u32, \ svuint16_t: svaba_u16, \ svuint8_t: svaba_u8, \ svint64_t: svaba_s64, \ svint16_t: svaba_s16, \ svint32_t: svaba_s32, \ svint8_t: svaba_s8, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Absolute / Saturating absolute value: Saturating absolute value svint64_t svqabs_s64_x(svbool_t pg, svint64_t op); svint32_t svqabs_s32_x(svbool_t pg, svint32_t op); svint16_t svqabs_s16_x(svbool_t pg, svint16_t op); svint8_t svqabs_s8_x(svbool_t pg, svint8_t op); svint64_t svqabs_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svint32_t svqabs_s32_m(svint32_t inactive, svbool_t pg, svint32_t op); svint16_t svqabs_s16_m(svint16_t inactive, svbool_t pg, svint16_t op); svint8_t svqabs_s8_m(svint8_t inactive, svbool_t pg, svint8_t op); svint8_t svqabs_s8_z(svbool_t pg, svint8_t op); svint16_t svqabs_s16_z(svbool_t pg, svint16_t op); svint32_t svqabs_s32_z(svbool_t pg, svint32_t op); svint64_t svqabs_s64_z(svbool_t pg, svint64_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqabs_x(pg, op) _Generic((op), \ svint64_t: svqabs_s64_x, \ svint32_t: svqabs_s32_x, \ svint16_t: svqabs_s16_x, \ svint8_t: svqabs_s8_x, \ default: __assume(0) \ )(pg, op) #define svqabs_m(inactive, pg, op) _Generic((op), \ svint64_t: svqabs_s64_m, \ svint32_t: svqabs_s32_m, \ svint16_t: svqabs_s16_m, \ svint8_t: svqabs_s8_m, \ default: __assume(0) \ )(inactive, pg, op) #define svqabs_z(pg, op) _Generic((op), \ svint8_t: svqabs_s8_z, \ svint16_t: svqabs_s16_z, \ svint32_t: svqabs_s32_z, \ svint64_t: svqabs_s64_z, \ default: __assume(0) \ )(pg, op) #endif // sve2: Vector arithmetic / Absolute / Widening absolute difference and accumulate: Absolute difference and accumulate long (bottom) svint32_t svabalb_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svint16_t svabalb_s16(svint16_t op1, svint8_t op2, svint8_t op3); svint32_t svabalb_s32(svint32_t op1, svint16_t op2, svint16_t op3); svint64_t svabalb_s64(svint64_t op1, svint32_t op2, svint32_t op3); svuint16_t svabalb_u16(svuint16_t op1, svuint8_t op2, svuint8_t op3); svuint32_t svabalb_u32(svuint32_t op1, svuint16_t op2, svuint16_t op3); svuint64_t svabalb_u64(svuint64_t op1, svuint32_t op2, svuint32_t op3); svint16_t svabalb_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svint64_t svabalb_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svuint16_t svabalb_n_u16(svuint16_t op1, svuint8_t op2, uint8_t op3); svuint32_t svabalb_n_u32(svuint32_t op1, svuint16_t op2, uint16_t op3); svuint64_t svabalb_n_u64(svuint64_t op1, svuint32_t op2, uint32_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svabalb(op1, op2, op3) _Generic((op3), \ int16_t: svabalb_n_s32, \ svint8_t: svabalb_s16, \ svint16_t: svabalb_s32, \ svint32_t: svabalb_s64, \ svuint8_t: svabalb_u16, \ svuint16_t: svabalb_u32, \ svuint32_t: svabalb_u64, \ int8_t: svabalb_n_s16, \ int32_t: svabalb_n_s64, \ uint8_t: svabalb_n_u16, \ uint16_t: svabalb_n_u32, \ uint32_t: svabalb_n_u64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Absolute / Widening absolute difference and accumulate: Absolute difference and accumulate long (top) svint64_t svabalt_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svuint16_t svabalt_n_u16(svuint16_t op1, svuint8_t op2, uint8_t op3); svuint32_t svabalt_n_u32(svuint32_t op1, svuint16_t op2, uint16_t op3); svint32_t svabalt_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svuint64_t svabalt_n_u64(svuint64_t op1, svuint32_t op2, uint32_t op3); svint16_t svabalt_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svint32_t svabalt_s32(svint32_t op1, svint16_t op2, svint16_t op3); svuint32_t svabalt_u32(svuint32_t op1, svuint16_t op2, svuint16_t op3); svuint16_t svabalt_u16(svuint16_t op1, svuint8_t op2, svuint8_t op3); svint64_t svabalt_s64(svint64_t op1, svint32_t op2, svint32_t op3); svint16_t svabalt_s16(svint16_t op1, svint8_t op2, svint8_t op3); svuint64_t svabalt_u64(svuint64_t op1, svuint32_t op2, svuint32_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svabalt(op1, op2, op3) _Generic((op3), \ int32_t: svabalt_n_s64, \ uint8_t: svabalt_n_u16, \ uint16_t: svabalt_n_u32, \ int16_t: svabalt_n_s32, \ uint32_t: svabalt_n_u64, \ int8_t: svabalt_n_s16, \ svint16_t: svabalt_s32, \ svuint16_t: svabalt_u32, \ svuint8_t: svabalt_u16, \ svint32_t: svabalt_s64, \ svint8_t: svabalt_s16, \ svuint32_t: svabalt_u64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Absolute / Widening absolute difference: Absolute difference long (bottom) svint32_t svabdlb_n_s32(svint16_t op1, int16_t op2); svuint64_t svabdlb_n_u64(svuint32_t op1, uint32_t op2); svuint32_t svabdlb_n_u32(svuint16_t op1, uint16_t op2); svuint16_t svabdlb_n_u16(svuint8_t op1, uint8_t op2); svint64_t svabdlb_n_s64(svint32_t op1, int32_t op2); svint16_t svabdlb_n_s16(svint8_t op1, int8_t op2); svuint32_t svabdlb_u32(svuint16_t op1, svuint16_t op2); svuint16_t svabdlb_u16(svuint8_t op1, svuint8_t op2); svint64_t svabdlb_s64(svint32_t op1, svint32_t op2); svint32_t svabdlb_s32(svint16_t op1, svint16_t op2); svint16_t svabdlb_s16(svint8_t op1, svint8_t op2); svuint64_t svabdlb_u64(svuint32_t op1, svuint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svabdlb(op1, op2) _Generic((op2), \ int16_t: svabdlb_n_s32, \ uint32_t: svabdlb_n_u64, \ uint16_t: svabdlb_n_u32, \ uint8_t: svabdlb_n_u16, \ int32_t: svabdlb_n_s64, \ int8_t: svabdlb_n_s16, \ svuint16_t: svabdlb_u32, \ svuint8_t: svabdlb_u16, \ svint32_t: svabdlb_s64, \ svint16_t: svabdlb_s32, \ svint8_t: svabdlb_s16, \ svuint32_t: svabdlb_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Absolute / Widening absolute difference: Absolute difference long (top) svuint16_t svabdlt_u16(svuint8_t op1, svuint8_t op2); svuint16_t svabdlt_n_u16(svuint8_t op1, uint8_t op2); svint64_t svabdlt_n_s64(svint32_t op1, int32_t op2); svint32_t svabdlt_n_s32(svint16_t op1, int16_t op2); svint16_t svabdlt_n_s16(svint8_t op1, int8_t op2); svint64_t svabdlt_s64(svint32_t op1, svint32_t op2); svuint32_t svabdlt_u32(svuint16_t op1, svuint16_t op2); svint32_t svabdlt_s32(svint16_t op1, svint16_t op2); svint16_t svabdlt_s16(svint8_t op1, svint8_t op2); svuint32_t svabdlt_n_u32(svuint16_t op1, uint16_t op2); svuint64_t svabdlt_u64(svuint32_t op1, svuint32_t op2); svuint64_t svabdlt_n_u64(svuint32_t op1, uint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svabdlt(op1, op2) _Generic((op2), \ svuint8_t: svabdlt_u16, \ uint8_t: svabdlt_n_u16, \ int32_t: svabdlt_n_s64, \ int16_t: svabdlt_n_s32, \ int8_t: svabdlt_n_s16, \ svint32_t: svabdlt_s64, \ svuint16_t: svabdlt_u32, \ svint16_t: svabdlt_s32, \ svint8_t: svabdlt_s16, \ uint16_t: svabdlt_n_u32, \ svuint32_t: svabdlt_u64, \ uint32_t: svabdlt_n_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Add / Addition with carry: Add with carry long (bottom) svuint32_t svadclb_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint64_t svadclb_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint32_t svadclb_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3); svuint64_t svadclb_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svadclb(op1, op2, op3) _Generic((op3), \ svuint32_t: svadclb_u32, \ svuint64_t: svadclb_u64, \ uint32_t: svadclb_n_u32, \ uint64_t: svadclb_n_u64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Add / Addition with carry: Add with carry long (top) svuint32_t svadclt_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3); svuint64_t svadclt_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3); svuint32_t svadclt_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint64_t svadclt_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svadclt(op1, op2, op3) _Generic((op3), \ uint32_t: svadclt_n_u32, \ uint64_t: svadclt_n_u64, \ svuint32_t: svadclt_u32, \ svuint64_t: svadclt_u64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Add / Addition: Halving add svint16_t svhadd_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svhadd_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svint32_t svhadd_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svuint16_t svhadd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint64_t svhadd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint8_t svhadd_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svhadd_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svhadd_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint64_t svhadd_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svhadd_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svint64_t svhadd_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svuint32_t svhadd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svint16_t svhadd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svuint8_t svhadd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svhadd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svhadd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svuint16_t svhadd_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svint8_t svhadd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svhadd_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svhadd_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svhadd_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svhadd_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svhadd_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svhadd_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svuint64_t svhadd_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint8_t svhadd_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svhadd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svint32_t svhadd_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint8_t svhadd_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svhadd_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svhadd_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svhadd_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svhadd_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svhadd_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svuint32_t svhadd_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svhadd_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svhadd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svuint16_t svhadd_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svint32_t svhadd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svhadd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint16_t svhadd_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svhadd_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svuint64_t svhadd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svhadd_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svhadd_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svuint8_t svhadd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svhadd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint32_t svhadd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svhadd_z(pg, op1, op2) _Generic((op2), \ int16_t: svhadd_n_s16_z, \ int8_t: svhadd_n_s8_z, \ int32_t: svhadd_n_s32_z, \ uint8_t: svhadd_n_u8_z, \ uint16_t: svhadd_n_u16_z, \ uint32_t: svhadd_n_u32_z, \ uint64_t: svhadd_n_u64_z, \ svuint32_t: svhadd_u32_z, \ int64_t: svhadd_n_s64_z, \ svuint16_t: svhadd_u16_z, \ svuint64_t: svhadd_u64_z, \ svuint8_t: svhadd_u8_z, \ svint32_t: svhadd_s32_z, \ svint64_t: svhadd_s64_z, \ svint16_t: svhadd_s16_z, \ svint8_t: svhadd_s8_z, \ default: __assume(0) \ )(pg, op1, op2) #define svhadd_x(pg, op1, op2) _Generic((op2), \ uint16_t: svhadd_n_u16_x, \ uint64_t: svhadd_n_u64_x, \ uint32_t: svhadd_n_u32_x, \ int16_t: svhadd_n_s16_x, \ uint8_t: svhadd_n_u8_x, \ int64_t: svhadd_n_s64_x, \ int32_t: svhadd_n_s32_x, \ int8_t: svhadd_n_s8_x, \ svuint16_t: svhadd_u16_x, \ svint8_t: svhadd_s8_x, \ svint32_t: svhadd_s32_x, \ svint16_t: svhadd_s16_x, \ svuint64_t: svhadd_u64_x, \ svuint8_t: svhadd_u8_x, \ svint64_t: svhadd_s64_x, \ svuint32_t: svhadd_u32_x, \ default: __assume(0) \ )(pg, op1, op2) #define svhadd_m(pg, op1, op2) _Generic((op2), \ uint64_t: svhadd_n_u64_m, \ uint32_t: svhadd_n_u32_m, \ uint16_t: svhadd_n_u16_m, \ uint8_t: svhadd_n_u8_m, \ int64_t: svhadd_n_s64_m, \ int32_t: svhadd_n_s32_m, \ svint8_t: svhadd_s8_m, \ svint16_t: svhadd_s16_m, \ svint32_t: svhadd_s32_m, \ svint64_t: svhadd_s64_m, \ svuint8_t: svhadd_u8_m, \ svuint32_t: svhadd_u32_m, \ svuint64_t: svhadd_u64_m, \ svuint16_t: svhadd_u16_m, \ int8_t: svhadd_n_s8_m, \ int16_t: svhadd_n_s16_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Add / Addition: Rounding halving add svint32_t svrhadd_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svrhadd_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svrhadd_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint64_t svrhadd_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint16_t svrhadd_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svint64_t svrhadd_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svuint32_t svrhadd_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint8_t svrhadd_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svrhadd_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svrhadd_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svrhadd_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svrhadd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svrhadd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svrhadd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svrhadd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svrhadd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint64_t svrhadd_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svrhadd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svrhadd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svrhadd_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svrhadd_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svrhadd_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svrhadd_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svrhadd_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint8_t svrhadd_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svrhadd_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svrhadd_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svrhadd_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint16_t svrhadd_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svrhadd_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svuint32_t svrhadd_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svint32_t svrhadd_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svrhadd_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svrhadd_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svrhadd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svrhadd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint64_t svrhadd_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint16_t svrhadd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svint64_t svrhadd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svrhadd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svrhadd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svrhadd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svrhadd_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svrhadd_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svrhadd_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svrhadd_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svuint8_t svrhadd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svrhadd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrhadd_m(pg, op1, op2) _Generic((op2), \ svint32_t: svrhadd_s32_m, \ svint16_t: svrhadd_s16_m, \ svint8_t: svrhadd_s8_m, \ svint64_t: svrhadd_s64_m, \ svuint8_t: svrhadd_u8_m, \ svuint16_t: svrhadd_u16_m, \ svuint32_t: svrhadd_u32_m, \ svuint64_t: svrhadd_u64_m, \ int8_t: svrhadd_n_s8_m, \ int16_t: svrhadd_n_s16_m, \ int32_t: svrhadd_n_s32_m, \ int64_t: svrhadd_n_s64_m, \ uint64_t: svrhadd_n_u64_m, \ uint32_t: svrhadd_n_u32_m, \ uint16_t: svrhadd_n_u16_m, \ uint8_t: svrhadd_n_u8_m, \ default: __assume(0) \ )(pg, op1, op2) #define svrhadd_z(pg, op1, op2) _Generic((op2), \ svuint16_t: svrhadd_u16_z, \ int64_t: svrhadd_n_s64_z, \ uint32_t: svrhadd_n_u32_z, \ svuint64_t: svrhadd_u64_z, \ svint8_t: svrhadd_s8_z, \ svint16_t: svrhadd_s16_z, \ svint32_t: svrhadd_s32_z, \ svint64_t: svrhadd_s64_z, \ svuint8_t: svrhadd_u8_z, \ uint16_t: svrhadd_n_u16_z, \ uint8_t: svrhadd_n_u8_z, \ svuint32_t: svrhadd_u32_z, \ int32_t: svrhadd_n_s32_z, \ int16_t: svrhadd_n_s16_z, \ int8_t: svrhadd_n_s8_z, \ uint64_t: svrhadd_n_u64_z, \ default: __assume(0) \ )(pg, op1, op2) #define svrhadd_x(pg, op1, op2) _Generic((op2), \ svint8_t: svrhadd_s8_x, \ svint16_t: svrhadd_s16_x, \ svint32_t: svrhadd_s32_x, \ svint64_t: svrhadd_s64_x, \ svuint8_t: svrhadd_u8_x, \ svuint32_t: svrhadd_u32_x, \ svuint64_t: svrhadd_u64_x, \ uint64_t: svrhadd_n_u64_x, \ uint32_t: svrhadd_n_u32_x, \ uint16_t: svrhadd_n_u16_x, \ int64_t: svrhadd_n_s64_x, \ int32_t: svrhadd_n_s32_x, \ int16_t: svrhadd_n_s16_x, \ int8_t: svrhadd_n_s8_x, \ uint8_t: svrhadd_n_u8_x, \ svuint16_t: svrhadd_u16_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Add / Narrowing addition: Add narrow high part (bottom) svint32_t svaddhnb_n_s64(svint64_t op1, int64_t op2); svuint8_t svaddhnb_n_u16(svuint16_t op1, uint16_t op2); svuint16_t svaddhnb_n_u32(svuint32_t op1, uint32_t op2); svint16_t svaddhnb_n_s32(svint32_t op1, int32_t op2); svuint32_t svaddhnb_n_u64(svuint64_t op1, uint64_t op2); svuint32_t svaddhnb_u64(svuint64_t op1, svuint64_t op2); svint8_t svaddhnb_n_s16(svint16_t op1, int16_t op2); svuint8_t svaddhnb_u16(svuint16_t op1, svuint16_t op2); svint32_t svaddhnb_s64(svint64_t op1, svint64_t op2); svint16_t svaddhnb_s32(svint32_t op1, svint32_t op2); svint8_t svaddhnb_s16(svint16_t op1, svint16_t op2); svuint16_t svaddhnb_u32(svuint32_t op1, svuint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaddhnb(op1, op2) _Generic((op2), \ int64_t: svaddhnb_n_s64, \ uint16_t: svaddhnb_n_u16, \ uint32_t: svaddhnb_n_u32, \ int32_t: svaddhnb_n_s32, \ uint64_t: svaddhnb_n_u64, \ svuint64_t: svaddhnb_u64, \ int16_t: svaddhnb_n_s16, \ svuint16_t: svaddhnb_u16, \ svint64_t: svaddhnb_s64, \ svint32_t: svaddhnb_s32, \ svint16_t: svaddhnb_s16, \ svuint32_t: svaddhnb_u32, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Add / Narrowing addition: Add narrow high part (top) svuint32_t svaddhnt_n_u64(svuint32_t even, svuint64_t op1, uint64_t op2); svuint16_t svaddhnt_n_u32(svuint16_t even, svuint32_t op1, uint32_t op2); svuint8_t svaddhnt_n_u16(svuint8_t even, svuint16_t op1, uint16_t op2); svint32_t svaddhnt_n_s64(svint32_t even, svint64_t op1, int64_t op2); svint16_t svaddhnt_n_s32(svint16_t even, svint32_t op1, int32_t op2); svint8_t svaddhnt_n_s16(svint8_t even, svint16_t op1, int16_t op2); svuint32_t svaddhnt_u64(svuint32_t even, svuint64_t op1, svuint64_t op2); svuint16_t svaddhnt_u32(svuint16_t even, svuint32_t op1, svuint32_t op2); svint32_t svaddhnt_s64(svint32_t even, svint64_t op1, svint64_t op2); svint16_t svaddhnt_s32(svint16_t even, svint32_t op1, svint32_t op2); svint8_t svaddhnt_s16(svint8_t even, svint16_t op1, svint16_t op2); svuint8_t svaddhnt_u16(svuint8_t even, svuint16_t op1, svuint16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaddhnt(even, op1, op2) _Generic((op2), \ uint64_t: svaddhnt_n_u64, \ uint32_t: svaddhnt_n_u32, \ uint16_t: svaddhnt_n_u16, \ int64_t: svaddhnt_n_s64, \ int32_t: svaddhnt_n_s32, \ int16_t: svaddhnt_n_s16, \ svuint64_t: svaddhnt_u64, \ svuint32_t: svaddhnt_u32, \ svint64_t: svaddhnt_s64, \ svint32_t: svaddhnt_s32, \ svint16_t: svaddhnt_s16, \ svuint16_t: svaddhnt_u16, \ default: __assume(0) \ )(even, op1, op2) #endif // sve2: Vector arithmetic / Add / Narrowing addition: Rounding add narrow high part (bottom) svint32_t svraddhnb_s64(svint64_t op1, svint64_t op2); svint16_t svraddhnb_s32(svint32_t op1, svint32_t op2); svuint8_t svraddhnb_u16(svuint16_t op1, svuint16_t op2); svuint16_t svraddhnb_u32(svuint32_t op1, svuint32_t op2); svuint32_t svraddhnb_u64(svuint64_t op1, svuint64_t op2); svint8_t svraddhnb_n_s16(svint16_t op1, int16_t op2); svuint8_t svraddhnb_n_u16(svuint16_t op1, uint16_t op2); svuint32_t svraddhnb_n_u64(svuint64_t op1, uint64_t op2); svint16_t svraddhnb_n_s32(svint32_t op1, int32_t op2); svuint16_t svraddhnb_n_u32(svuint32_t op1, uint32_t op2); svint32_t svraddhnb_n_s64(svint64_t op1, int64_t op2); svint8_t svraddhnb_s16(svint16_t op1, svint16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svraddhnb(op1, op2) _Generic((op2), \ svint64_t: svraddhnb_s64, \ svint32_t: svraddhnb_s32, \ svuint16_t: svraddhnb_u16, \ svuint32_t: svraddhnb_u32, \ svuint64_t: svraddhnb_u64, \ int16_t: svraddhnb_n_s16, \ uint16_t: svraddhnb_n_u16, \ uint64_t: svraddhnb_n_u64, \ int32_t: svraddhnb_n_s32, \ uint32_t: svraddhnb_n_u32, \ int64_t: svraddhnb_n_s64, \ svint16_t: svraddhnb_s16, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Add / Narrowing addition: Rounding add narrow high part (top) svint8_t svraddhnt_s16(svint8_t even, svint16_t op1, svint16_t op2); svint16_t svraddhnt_s32(svint16_t even, svint32_t op1, svint32_t op2); svint32_t svraddhnt_s64(svint32_t even, svint64_t op1, svint64_t op2); svuint8_t svraddhnt_u16(svuint8_t even, svuint16_t op1, svuint16_t op2); svuint16_t svraddhnt_u32(svuint16_t even, svuint32_t op1, svuint32_t op2); svuint16_t svraddhnt_n_u32(svuint16_t even, svuint32_t op1, uint32_t op2); svuint32_t svraddhnt_n_u64(svuint32_t even, svuint64_t op1, uint64_t op2); svuint8_t svraddhnt_n_u16(svuint8_t even, svuint16_t op1, uint16_t op2); svuint32_t svraddhnt_u64(svuint32_t even, svuint64_t op1, svuint64_t op2); svint8_t svraddhnt_n_s16(svint8_t even, svint16_t op1, int16_t op2); svint16_t svraddhnt_n_s32(svint16_t even, svint32_t op1, int32_t op2); svint32_t svraddhnt_n_s64(svint32_t even, svint64_t op1, int64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svraddhnt(even, op1, op2) _Generic((op2), \ svint16_t: svraddhnt_s16, \ svint32_t: svraddhnt_s32, \ svint64_t: svraddhnt_s64, \ svuint16_t: svraddhnt_u16, \ svuint32_t: svraddhnt_u32, \ uint32_t: svraddhnt_n_u32, \ uint64_t: svraddhnt_n_u64, \ uint16_t: svraddhnt_n_u16, \ svuint64_t: svraddhnt_u64, \ int16_t: svraddhnt_n_s16, \ int32_t: svraddhnt_n_s32, \ int64_t: svraddhnt_n_s64, \ default: __assume(0) \ )(even, op1, op2) #endif // sve2: Vector arithmetic / Add / Saturating addition: Saturating add svuint8_t svqadd_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svuint64_t svqadd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svqadd_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svqadd_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svqadd_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svqadd_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint8_t svqadd_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svuint16_t svqadd_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svqadd_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svqadd_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svqadd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint8_t svqadd_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svqadd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint64_t svqadd_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svint64_t svqadd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svqadd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svqadd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svqadd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svqadd_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint64_t svqadd_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint16_t svqadd_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svqadd_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svqadd_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svqadd_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svqadd_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svqadd_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svuint8_t svqadd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svint32_t svqadd_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svuint32_t svqadd_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint32_t svqadd_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svqadd_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint64_t svqadd_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t svqadd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svqadd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svqadd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svuint64_t svqadd_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint16_t svqadd_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svqadd_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svuint32_t svqadd_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svint32_t svqadd_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svqadd_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svqadd_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svqadd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svqadd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svqadd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svqadd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svqadd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint64_t svqadd_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqadd_m(pg, op1, op2) _Generic((op2), \ uint8_t: svqadd_n_u8_m, \ int8_t: svqadd_n_s8_m, \ svuint64_t: svqadd_u64_m, \ int16_t: svqadd_n_s16_m, \ int64_t: svqadd_n_s64_m, \ svuint16_t: svqadd_u16_m, \ svuint8_t: svqadd_u8_m, \ svint64_t: svqadd_s64_m, \ svint32_t: svqadd_s32_m, \ svint16_t: svqadd_s16_m, \ svint8_t: svqadd_s8_m, \ int32_t: svqadd_n_s32_m, \ svuint32_t: svqadd_u32_m, \ uint16_t: svqadd_n_u16_m, \ uint64_t: svqadd_n_u64_m, \ uint32_t: svqadd_n_u32_m, \ default: __assume(0) \ )(pg, op1, op2) #define svqadd_x(pg, op1, op2) _Generic((op2), \ svuint64_t: svqadd_u64_x, \ svuint32_t: svqadd_u32_x, \ svuint16_t: svqadd_u16_x, \ svint64_t: svqadd_s64_x, \ svint32_t: svqadd_s32_x, \ svint16_t: svqadd_s16_x, \ svint8_t: svqadd_s8_x, \ svuint8_t: svqadd_u8_x, \ int8_t: svqadd_n_s8_x, \ int16_t: svqadd_n_s16_x, \ int32_t: svqadd_n_s32_x, \ uint64_t: svqadd_n_u64_x, \ uint32_t: svqadd_n_u32_x, \ uint16_t: svqadd_n_u16_x, \ uint8_t: svqadd_n_u8_x, \ int64_t: svqadd_n_s64_x, \ default: __assume(0) \ )(pg, op1, op2) #define svqadd_z(pg, op1, op2) _Generic((op2), \ svint8_t: svqadd_s8_z, \ svint16_t: svqadd_s16_z, \ svint32_t: svqadd_s32_z, \ svint64_t: svqadd_s64_z, \ svuint16_t: svqadd_u16_z, \ svuint32_t: svqadd_u32_z, \ svuint64_t: svqadd_u64_z, \ svuint8_t: svqadd_u8_z, \ uint32_t: svqadd_n_u32_z, \ uint64_t: svqadd_n_u64_z, \ uint16_t: svqadd_n_u16_z, \ uint8_t: svqadd_n_u8_z, \ int32_t: svqadd_n_s32_z, \ int16_t: svqadd_n_s16_z, \ int8_t: svqadd_n_s8_z, \ int64_t: svqadd_n_s64_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Add / Saturating addition: Saturating add with signed addend svuint8_t svsqadd_u8_m(svbool_t pg, svuint8_t op1, svint8_t op2); svuint32_t svsqadd_u32_m(svbool_t pg, svuint32_t op1, svint32_t op2); svuint64_t svsqadd_u64_m(svbool_t pg, svuint64_t op1, svint64_t op2); svuint8_t svsqadd_u8_x(svbool_t pg, svuint8_t op1, svint8_t op2); svuint16_t svsqadd_u16_x(svbool_t pg, svuint16_t op1, svint16_t op2); svuint64_t svsqadd_u64_x(svbool_t pg, svuint64_t op1, svint64_t op2); svuint32_t svsqadd_u32_x(svbool_t pg, svuint32_t op1, svint32_t op2); svuint16_t svsqadd_u16_m(svbool_t pg, svuint16_t op1, svint16_t op2); svuint32_t svsqadd_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2); svuint16_t svsqadd_n_u16_z(svbool_t pg, svuint16_t op1, int16_t op2); svuint8_t svsqadd_n_u8_z(svbool_t pg, svuint8_t op1, int8_t op2); svuint64_t svsqadd_n_u64_x(svbool_t pg, svuint64_t op1, int64_t op2); svuint16_t svsqadd_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2); svuint8_t svsqadd_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2); svuint64_t svsqadd_n_u64_z(svbool_t pg, svuint64_t op1, int64_t op2); svuint64_t svsqadd_n_u64_m(svbool_t pg, svuint64_t op1, int64_t op2); svuint16_t svsqadd_n_u16_m(svbool_t pg, svuint16_t op1, int16_t op2); svuint8_t svsqadd_n_u8_m(svbool_t pg, svuint8_t op1, int8_t op2); svuint64_t svsqadd_u64_z(svbool_t pg, svuint64_t op1, svint64_t op2); svuint32_t svsqadd_u32_z(svbool_t pg, svuint32_t op1, svint32_t op2); svuint16_t svsqadd_u16_z(svbool_t pg, svuint16_t op1, svint16_t op2); svuint8_t svsqadd_u8_z(svbool_t pg, svuint8_t op1, svint8_t op2); svuint32_t svsqadd_n_u32_m(svbool_t pg, svuint32_t op1, int32_t op2); svuint32_t svsqadd_n_u32_z(svbool_t pg, svuint32_t op1, int32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsqadd_m(pg, op1, op2) _Generic((op2), \ svint8_t: svsqadd_u8_m, \ svint32_t: svsqadd_u32_m, \ svint64_t: svsqadd_u64_m, \ svint16_t: svsqadd_u16_m, \ int64_t: svsqadd_n_u64_m, \ int16_t: svsqadd_n_u16_m, \ int8_t: svsqadd_n_u8_m, \ int32_t: svsqadd_n_u32_m, \ default: __assume(0) \ )(pg, op1, op2) #define svsqadd_x(pg, op1, op2) _Generic((op2), \ svint8_t: svsqadd_u8_x, \ svint16_t: svsqadd_u16_x, \ svint64_t: svsqadd_u64_x, \ svint32_t: svsqadd_u32_x, \ int32_t: svsqadd_n_u32_x, \ int64_t: svsqadd_n_u64_x, \ int16_t: svsqadd_n_u16_x, \ int8_t: svsqadd_n_u8_x, \ default: __assume(0) \ )(pg, op1, op2) #define svsqadd_z(pg, op1, op2) _Generic((op2), \ int16_t: svsqadd_n_u16_z, \ int8_t: svsqadd_n_u8_z, \ int64_t: svsqadd_n_u64_z, \ svint64_t: svsqadd_u64_z, \ svint32_t: svsqadd_u32_z, \ svint16_t: svsqadd_u16_z, \ svint8_t: svsqadd_u8_z, \ int32_t: svsqadd_n_u32_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Add / Saturating addition: Saturating add with unsigned addend svint16_t svuqadd_n_s16_z(svbool_t pg, svint16_t op1, uint16_t op2); svint64_t svuqadd_s64_x(svbool_t pg, svint64_t op1, svuint64_t op2); svint64_t svuqadd_n_s64_x(svbool_t pg, svint64_t op1, uint64_t op2); svint16_t svuqadd_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2); svint32_t svuqadd_n_s32_x(svbool_t pg, svint32_t op1, uint32_t op2); svint32_t svuqadd_n_s32_z(svbool_t pg, svint32_t op1, uint32_t op2); svint8_t svuqadd_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2); svint8_t svuqadd_s8_m(svbool_t pg, svint8_t op1, svuint8_t op2); svint16_t svuqadd_s16_x(svbool_t pg, svint16_t op1, svuint16_t op2); svint32_t svuqadd_s32_m(svbool_t pg, svint32_t op1, svuint32_t op2); svint64_t svuqadd_s64_m(svbool_t pg, svint64_t op1, svuint64_t op2); svint64_t svuqadd_n_s64_z(svbool_t pg, svint64_t op1, uint64_t op2); svint8_t svuqadd_s8_x(svbool_t pg, svint8_t op1, svuint8_t op2); svint64_t svuqadd_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2); svint32_t svuqadd_s32_x(svbool_t pg, svint32_t op1, svuint32_t op2); svint16_t svuqadd_s16_m(svbool_t pg, svint16_t op1, svuint16_t op2); svint8_t svuqadd_n_s8_m(svbool_t pg, svint8_t op1, uint8_t op2); svint32_t svuqadd_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2); svint32_t svuqadd_n_s32_m(svbool_t pg, svint32_t op1, uint32_t op2); svint64_t svuqadd_n_s64_m(svbool_t pg, svint64_t op1, uint64_t op2); svint8_t svuqadd_n_s8_x(svbool_t pg, svint8_t op1, uint8_t op2); svint16_t svuqadd_n_s16_x(svbool_t pg, svint16_t op1, uint16_t op2); svint16_t svuqadd_n_s16_m(svbool_t pg, svint16_t op1, uint16_t op2); svint8_t svuqadd_n_s8_z(svbool_t pg, svint8_t op1, uint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svuqadd_z(pg, op1, op2) _Generic((op2), \ uint16_t: svuqadd_n_s16_z, \ svuint16_t: svuqadd_s16_z, \ uint32_t: svuqadd_n_s32_z, \ svuint8_t: svuqadd_s8_z, \ uint64_t: svuqadd_n_s64_z, \ svuint64_t: svuqadd_s64_z, \ svuint32_t: svuqadd_s32_z, \ uint8_t: svuqadd_n_s8_z, \ default: __assume(0) \ )(pg, op1, op2) #define svuqadd_x(pg, op1, op2) _Generic((op2), \ svuint64_t: svuqadd_s64_x, \ uint64_t: svuqadd_n_s64_x, \ uint32_t: svuqadd_n_s32_x, \ svuint16_t: svuqadd_s16_x, \ svuint8_t: svuqadd_s8_x, \ svuint32_t: svuqadd_s32_x, \ uint8_t: svuqadd_n_s8_x, \ uint16_t: svuqadd_n_s16_x, \ default: __assume(0) \ )(pg, op1, op2) #define svuqadd_m(pg, op1, op2) _Generic((op2), \ svuint8_t: svuqadd_s8_m, \ svuint32_t: svuqadd_s32_m, \ svuint64_t: svuqadd_s64_m, \ svuint16_t: svuqadd_s16_m, \ uint8_t: svuqadd_n_s8_m, \ uint32_t: svuqadd_n_s32_m, \ uint64_t: svuqadd_n_s64_m, \ uint16_t: svuqadd_n_s16_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Add / Widening addition: Add long (bottom + top) svint32_t svaddlbt_s32(svint16_t op1, svint16_t op2); svint64_t svaddlbt_s64(svint32_t op1, svint32_t op2); svint16_t svaddlbt_s16(svint8_t op1, svint8_t op2); svint32_t svaddlbt_n_s32(svint16_t op1, int16_t op2); svint64_t svaddlbt_n_s64(svint32_t op1, int32_t op2); svint16_t svaddlbt_n_s16(svint8_t op1, int8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaddlbt(op1, op2) _Generic((op2), \ svint16_t: svaddlbt_s32, \ svint32_t: svaddlbt_s64, \ svint8_t: svaddlbt_s16, \ int16_t: svaddlbt_n_s32, \ int32_t: svaddlbt_n_s64, \ int8_t: svaddlbt_n_s16, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Add / Widening addition: Add long (bottom) svint16_t svaddlb_s16(svint8_t op1, svint8_t op2); svuint64_t svaddlb_n_u64(svuint32_t op1, uint32_t op2); svuint32_t svaddlb_n_u32(svuint16_t op1, uint16_t op2); svuint16_t svaddlb_n_u16(svuint8_t op1, uint8_t op2); svint64_t svaddlb_n_s64(svint32_t op1, int32_t op2); svint32_t svaddlb_n_s32(svint16_t op1, int16_t op2); svint32_t svaddlb_s32(svint16_t op1, svint16_t op2); svint64_t svaddlb_s64(svint32_t op1, svint32_t op2); svuint64_t svaddlb_u64(svuint32_t op1, svuint32_t op2); svuint32_t svaddlb_u32(svuint16_t op1, svuint16_t op2); svuint16_t svaddlb_u16(svuint8_t op1, svuint8_t op2); svint16_t svaddlb_n_s16(svint8_t op1, int8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaddlb(op1, op2) _Generic((op2), \ svint8_t: svaddlb_s16, \ uint32_t: svaddlb_n_u64, \ uint16_t: svaddlb_n_u32, \ uint8_t: svaddlb_n_u16, \ int32_t: svaddlb_n_s64, \ int16_t: svaddlb_n_s32, \ svint16_t: svaddlb_s32, \ svint32_t: svaddlb_s64, \ svuint32_t: svaddlb_u64, \ svuint16_t: svaddlb_u32, \ svuint8_t: svaddlb_u16, \ int8_t: svaddlb_n_s16, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Add / Widening addition: Add long (top) svint32_t svaddlt_n_s32(svint16_t op1, int16_t op2); svint16_t svaddlt_s16(svint8_t op1, svint8_t op2); svint32_t svaddlt_s32(svint16_t op1, svint16_t op2); svint64_t svaddlt_s64(svint32_t op1, svint32_t op2); svuint16_t svaddlt_u16(svuint8_t op1, svuint8_t op2); svuint32_t svaddlt_u32(svuint16_t op1, svuint16_t op2); svuint64_t svaddlt_u64(svuint32_t op1, svuint32_t op2); svint16_t svaddlt_n_s16(svint8_t op1, int8_t op2); svint64_t svaddlt_n_s64(svint32_t op1, int32_t op2); svuint16_t svaddlt_n_u16(svuint8_t op1, uint8_t op2); svuint32_t svaddlt_n_u32(svuint16_t op1, uint16_t op2); svuint64_t svaddlt_n_u64(svuint32_t op1, uint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaddlt(op1, op2) _Generic((op2), \ int16_t: svaddlt_n_s32, \ svint8_t: svaddlt_s16, \ svint16_t: svaddlt_s32, \ svint32_t: svaddlt_s64, \ svuint8_t: svaddlt_u16, \ svuint16_t: svaddlt_u32, \ svuint32_t: svaddlt_u64, \ int8_t: svaddlt_n_s16, \ int32_t: svaddlt_n_s64, \ uint8_t: svaddlt_n_u16, \ uint16_t: svaddlt_n_u32, \ uint32_t: svaddlt_n_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Add / Widening addition: Add wide (bottom) svuint64_t svaddwb_n_u64(svuint64_t op1, uint32_t op2); svuint64_t svaddwb_u64(svuint64_t op1, svuint32_t op2); svint16_t svaddwb_n_s16(svint16_t op1, int8_t op2); svint32_t svaddwb_n_s32(svint32_t op1, int16_t op2); svuint16_t svaddwb_n_u16(svuint16_t op1, uint8_t op2); svint64_t svaddwb_n_s64(svint64_t op1, int32_t op2); svuint32_t svaddwb_u32(svuint32_t op1, svuint16_t op2); svuint32_t svaddwb_n_u32(svuint32_t op1, uint16_t op2); svint64_t svaddwb_s64(svint64_t op1, svint32_t op2); svint32_t svaddwb_s32(svint32_t op1, svint16_t op2); svint16_t svaddwb_s16(svint16_t op1, svint8_t op2); svuint16_t svaddwb_u16(svuint16_t op1, svuint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaddwb(op1, op2) _Generic((op2), \ uint32_t: svaddwb_n_u64, \ svuint32_t: svaddwb_u64, \ int8_t: svaddwb_n_s16, \ int16_t: svaddwb_n_s32, \ uint8_t: svaddwb_n_u16, \ int32_t: svaddwb_n_s64, \ svuint16_t: svaddwb_u32, \ uint16_t: svaddwb_n_u32, \ svint32_t: svaddwb_s64, \ svint16_t: svaddwb_s32, \ svint8_t: svaddwb_s16, \ svuint8_t: svaddwb_u16, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Add / Widening addition: Add wide (top) svint64_t svaddwt_n_s64(svint64_t op1, int32_t op2); svuint16_t svaddwt_n_u16(svuint16_t op1, uint8_t op2); svuint32_t svaddwt_n_u32(svuint32_t op1, uint16_t op2); svuint64_t svaddwt_n_u64(svuint64_t op1, uint32_t op2); svint32_t svaddwt_n_s32(svint32_t op1, int16_t op2); svint16_t svaddwt_n_s16(svint16_t op1, int8_t op2); svuint64_t svaddwt_u64(svuint64_t op1, svuint32_t op2); svuint32_t svaddwt_u32(svuint32_t op1, svuint16_t op2); svuint16_t svaddwt_u16(svuint16_t op1, svuint8_t op2); svint32_t svaddwt_s32(svint32_t op1, svint16_t op2); svint16_t svaddwt_s16(svint16_t op1, svint8_t op2); svint64_t svaddwt_s64(svint64_t op1, svint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaddwt(op1, op2) _Generic((op2), \ int32_t: svaddwt_n_s64, \ uint8_t: svaddwt_n_u16, \ uint16_t: svaddwt_n_u32, \ uint32_t: svaddwt_n_u64, \ int16_t: svaddwt_n_s32, \ int8_t: svaddwt_n_s16, \ svuint32_t: svaddwt_u64, \ svuint16_t: svaddwt_u32, \ svuint8_t: svaddwt_u16, \ svint16_t: svaddwt_s32, \ svint8_t: svaddwt_s16, \ svint32_t: svaddwt_s64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Logarithm: Base 2 logarithm as integer svint16_t svlogb_f16_x(svbool_t pg, svfloat16_t op); svint64_t svlogb_f64_x(svbool_t pg, svfloat64_t op); svint16_t svlogb_f16_z(svbool_t pg, svfloat16_t op); svint32_t svlogb_f32_z(svbool_t pg, svfloat32_t op); svint64_t svlogb_f64_z(svbool_t pg, svfloat64_t op); svint64_t svlogb_f64_m(svint64_t inactive, svbool_t pg, svfloat64_t op); svint32_t svlogb_f32_m(svint32_t inactive, svbool_t pg, svfloat32_t op); svint16_t svlogb_f16_m(svint16_t inactive, svbool_t pg, svfloat16_t op); svint32_t svlogb_f32_x(svbool_t pg, svfloat32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svlogb_x(pg, op) _Generic((op), \ svfloat16_t: svlogb_f16_x, \ svfloat64_t: svlogb_f64_x, \ svfloat32_t: svlogb_f32_x, \ default: __assume(0) \ )(pg, op) #define svlogb_z(pg, op) _Generic((op), \ svfloat16_t: svlogb_f16_z, \ svfloat32_t: svlogb_f32_z, \ svfloat64_t: svlogb_f64_z, \ default: __assume(0) \ )(pg, op) #define svlogb_m(inactive, pg, op) _Generic((op), \ svfloat64_t: svlogb_f64_m, \ svfloat32_t: svlogb_f32_m, \ svfloat16_t: svlogb_f16_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve2: Vector arithmetic / Multiply / Saturating multiply and widen: Saturating doubling multiply long (bottom) svint32_t svqdmullb_s32(svint16_t op1, svint16_t op2); svint64_t svqdmullb_s64(svint32_t op1, svint32_t op2); svint16_t svqdmullb_s16(svint8_t op1, svint8_t op2); svint16_t svqdmullb_n_s16(svint8_t op1, int8_t op2); svint32_t svqdmullb_n_s32(svint16_t op1, int16_t op2); svint64_t svqdmullb_n_s64(svint32_t op1, int32_t op2); svint32_t svqdmullb_lane_s32(svint16_t op1, svint16_t op2, uint64_t imm_index); svint64_t svqdmullb_lane_s64(svint32_t op1, svint32_t op2, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdmullb(op1, op2) _Generic((op2), \ svint16_t: svqdmullb_s32, \ svint32_t: svqdmullb_s64, \ svint8_t: svqdmullb_s16, \ int8_t: svqdmullb_n_s16, \ int16_t: svqdmullb_n_s32, \ int32_t: svqdmullb_n_s64, \ default: __assume(0) \ )(op1, op2) #define svqdmullb_lane(op1, op2, imm_index) _Generic((op2), \ svint16_t: svqdmullb_lane_s32, \ svint32_t: svqdmullb_lane_s64, \ default: __assume(0) \ )(op1, op2, imm_index) #endif // sve2: Vector arithmetic / Multiply / Saturating multiply and widen: Saturating doubling multiply long (top) svint64_t svqdmullt_lane_s64(svint32_t op1, svint32_t op2, uint64_t imm_index); svint64_t svqdmullt_s64(svint32_t op1, svint32_t op2); svint32_t svqdmullt_lane_s32(svint16_t op1, svint16_t op2, uint64_t imm_index); svint64_t svqdmullt_n_s64(svint32_t op1, int32_t op2); svint32_t svqdmullt_n_s32(svint16_t op1, int16_t op2); svint16_t svqdmullt_n_s16(svint8_t op1, int8_t op2); svint32_t svqdmullt_s32(svint16_t op1, svint16_t op2); svint16_t svqdmullt_s16(svint8_t op1, svint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdmullt_lane(op1, op2, imm_index) _Generic((op2), \ svint32_t: svqdmullt_lane_s64, \ svint16_t: svqdmullt_lane_s32, \ default: __assume(0) \ )(op1, op2, imm_index) #define svqdmullt(op1, op2) _Generic((op2), \ svint32_t: svqdmullt_s64, \ int32_t: svqdmullt_n_s64, \ int16_t: svqdmullt_n_s32, \ int8_t: svqdmullt_n_s16, \ svint16_t: svqdmullt_s32, \ svint8_t: svqdmullt_s16, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Multiply / Saturating multiply: Saturating doubling multiply high svint64_t svqdmulh_lane_s64(svint64_t op1, svint64_t op2, uint64_t imm_index); svint32_t svqdmulh_n_s32(svint32_t op1, int32_t op2); svint64_t svqdmulh_n_s64(svint64_t op1, int64_t op2); svint16_t svqdmulh_lane_s16(svint16_t op1, svint16_t op2, uint64_t imm_index); svint32_t svqdmulh_lane_s32(svint32_t op1, svint32_t op2, uint64_t imm_index); svint64_t svqdmulh_s64(svint64_t op1, svint64_t op2); svint32_t svqdmulh_s32(svint32_t op1, svint32_t op2); svint16_t svqdmulh_s16(svint16_t op1, svint16_t op2); svint16_t svqdmulh_n_s16(svint16_t op1, int16_t op2); svint8_t svqdmulh_s8(svint8_t op1, svint8_t op2); svint8_t svqdmulh_n_s8(svint8_t op1, int8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdmulh_lane(op1, op2, imm_index) _Generic((op2), \ svint64_t: svqdmulh_lane_s64, \ svint16_t: svqdmulh_lane_s16, \ svint32_t: svqdmulh_lane_s32, \ default: __assume(0) \ )(op1, op2, imm_index) #define svqdmulh(op1, op2) _Generic((op2), \ int32_t: svqdmulh_n_s32, \ int64_t: svqdmulh_n_s64, \ svint64_t: svqdmulh_s64, \ svint32_t: svqdmulh_s32, \ svint16_t: svqdmulh_s16, \ int16_t: svqdmulh_n_s16, \ svint8_t: svqdmulh_s8, \ int8_t: svqdmulh_n_s8, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Multiply / Saturating multiply: Saturating rounding doubling multiply high svint16_t svqrdmulh_s16(svint16_t op1, svint16_t op2); svint64_t svqrdmulh_lane_s64(svint64_t op1, svint64_t op2, uint64_t imm_index); svint32_t svqrdmulh_lane_s32(svint32_t op1, svint32_t op2, uint64_t imm_index); svint16_t svqrdmulh_lane_s16(svint16_t op1, svint16_t op2, uint64_t imm_index); svint64_t svqrdmulh_n_s64(svint64_t op1, int64_t op2); svint32_t svqrdmulh_n_s32(svint32_t op1, int32_t op2); svint8_t svqrdmulh_s8(svint8_t op1, svint8_t op2); svint8_t svqrdmulh_n_s8(svint8_t op1, int8_t op2); svint64_t svqrdmulh_s64(svint64_t op1, svint64_t op2); svint32_t svqrdmulh_s32(svint32_t op1, svint32_t op2); svint16_t svqrdmulh_n_s16(svint16_t op1, int16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqrdmulh(op1, op2) _Generic((op2), \ svint16_t: svqrdmulh_s16, \ int64_t: svqrdmulh_n_s64, \ int32_t: svqrdmulh_n_s32, \ svint8_t: svqrdmulh_s8, \ int8_t: svqrdmulh_n_s8, \ svint64_t: svqrdmulh_s64, \ svint32_t: svqrdmulh_s32, \ int16_t: svqrdmulh_n_s16, \ default: __assume(0) \ )(op1, op2) #define svqrdmulh_lane(op1, op2, imm_index) _Generic((op2), \ svint64_t: svqrdmulh_lane_s64, \ svint32_t: svqrdmulh_lane_s32, \ svint16_t: svqrdmulh_lane_s16, \ default: __assume(0) \ )(op1, op2, imm_index) #endif // sve2: Vector arithmetic / Multiply / Widening multiplication: Multiply long (bottom) svuint32_t svmullb_n_u32(svuint16_t op1, uint16_t op2); svuint64_t svmullb_lane_u64(svuint32_t op1, svuint32_t op2, uint64_t imm_index); svuint32_t svmullb_lane_u32(svuint16_t op1, svuint16_t op2, uint64_t imm_index); svint64_t svmullb_lane_s64(svint32_t op1, svint32_t op2, uint64_t imm_index); svint32_t svmullb_lane_s32(svint16_t op1, svint16_t op2, uint64_t imm_index); svuint64_t svmullb_n_u64(svuint32_t op1, uint32_t op2); svuint16_t svmullb_n_u16(svuint8_t op1, uint8_t op2); svint32_t svmullb_n_s32(svint16_t op1, int16_t op2); svint64_t svmullb_n_s64(svint32_t op1, int32_t op2); svuint64_t svmullb_u64(svuint32_t op1, svuint32_t op2); svuint32_t svmullb_u32(svuint16_t op1, svuint16_t op2); svuint16_t svmullb_u16(svuint8_t op1, svuint8_t op2); svint64_t svmullb_s64(svint32_t op1, svint32_t op2); svint32_t svmullb_s32(svint16_t op1, svint16_t op2); svint16_t svmullb_s16(svint8_t op1, svint8_t op2); svint16_t svmullb_n_s16(svint8_t op1, int8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmullb(op1, op2) _Generic((op2), \ uint16_t: svmullb_n_u32, \ uint32_t: svmullb_n_u64, \ uint8_t: svmullb_n_u16, \ int16_t: svmullb_n_s32, \ int32_t: svmullb_n_s64, \ svuint32_t: svmullb_u64, \ svuint16_t: svmullb_u32, \ svuint8_t: svmullb_u16, \ svint32_t: svmullb_s64, \ svint16_t: svmullb_s32, \ svint8_t: svmullb_s16, \ int8_t: svmullb_n_s16, \ default: __assume(0) \ )(op1, op2) #define svmullb_lane(op1, op2, imm_index) _Generic((op2), \ svuint32_t: svmullb_lane_u64, \ svuint16_t: svmullb_lane_u32, \ svint32_t: svmullb_lane_s64, \ svint16_t: svmullb_lane_s32, \ default: __assume(0) \ )(op1, op2, imm_index) #endif // sve2: Vector arithmetic / Multiply / Widening multiplication: Multiply long (top) svint32_t svmullt_n_s32(svint16_t op1, int16_t op2); svint64_t svmullt_n_s64(svint32_t op1, int32_t op2); svuint32_t svmullt_lane_u32(svuint16_t op1, svuint16_t op2, uint64_t imm_index); svuint64_t svmullt_lane_u64(svuint32_t op1, svuint32_t op2, uint64_t imm_index); svint64_t svmullt_lane_s64(svint32_t op1, svint32_t op2, uint64_t imm_index); svuint16_t svmullt_n_u16(svuint8_t op1, uint8_t op2); svint16_t svmullt_n_s16(svint8_t op1, int8_t op2); svuint64_t svmullt_u64(svuint32_t op1, svuint32_t op2); svuint32_t svmullt_u32(svuint16_t op1, svuint16_t op2); svuint16_t svmullt_u16(svuint8_t op1, svuint8_t op2); svuint32_t svmullt_n_u32(svuint16_t op1, uint16_t op2); svuint64_t svmullt_n_u64(svuint32_t op1, uint32_t op2); svint32_t svmullt_lane_s32(svint16_t op1, svint16_t op2, uint64_t imm_index); svint64_t svmullt_s64(svint32_t op1, svint32_t op2); svint32_t svmullt_s32(svint16_t op1, svint16_t op2); svint16_t svmullt_s16(svint8_t op1, svint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmullt(op1, op2) _Generic((op2), \ int16_t: svmullt_n_s32, \ int32_t: svmullt_n_s64, \ uint8_t: svmullt_n_u16, \ int8_t: svmullt_n_s16, \ svuint32_t: svmullt_u64, \ svuint16_t: svmullt_u32, \ svuint8_t: svmullt_u16, \ uint16_t: svmullt_n_u32, \ uint32_t: svmullt_n_u64, \ svint32_t: svmullt_s64, \ svint16_t: svmullt_s32, \ svint8_t: svmullt_s16, \ default: __assume(0) \ )(op1, op2) #define svmullt_lane(op1, op2, imm_index) _Generic((op2), \ svuint16_t: svmullt_lane_u32, \ svuint32_t: svmullt_lane_u64, \ svint32_t: svmullt_lane_s64, \ svint16_t: svmullt_lane_s32, \ default: __assume(0) \ )(op1, op2, imm_index) #endif // sve2: Vector arithmetic / Multiply-accumulate / Multiply-accumulate and widen: Multiply-add long (bottom) svuint32_t svmlalb_lane_u32(svuint32_t op1, svuint16_t op2, svuint16_t op3, uint64_t imm_index); svint64_t svmlalb_lane_s64(svint64_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); svint32_t svmlalb_lane_s32(svint32_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svfloat32_t svmlalb_f32(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3); svuint64_t svmlalb_n_u64(svuint64_t op1, svuint32_t op2, uint32_t op3); svuint32_t svmlalb_n_u32(svuint32_t op1, svuint16_t op2, uint16_t op3); svint16_t svmlalb_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svint64_t svmlalb_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svint32_t svmlalb_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svuint64_t svmlalb_lane_u64(svuint64_t op1, svuint32_t op2, svuint32_t op3, uint64_t imm_index); svuint64_t svmlalb_u64(svuint64_t op1, svuint32_t op2, svuint32_t op3); svuint32_t svmlalb_u32(svuint32_t op1, svuint16_t op2, svuint16_t op3); svuint16_t svmlalb_u16(svuint16_t op1, svuint8_t op2, svuint8_t op3); svint64_t svmlalb_s64(svint64_t op1, svint32_t op2, svint32_t op3); svuint16_t svmlalb_n_u16(svuint16_t op1, svuint8_t op2, uint8_t op3); svfloat32_t svmlalb_lane_f32(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3, uint64_t imm_index); svint16_t svmlalb_s16(svint16_t op1, svint8_t op2, svint8_t op3); svint32_t svmlalb_s32(svint32_t op1, svint16_t op2, svint16_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmlalb_lane(op1, op2, op3, imm_index) _Generic((op3), \ svuint16_t: svmlalb_lane_u32, \ svint32_t: svmlalb_lane_s64, \ svint16_t: svmlalb_lane_s32, \ svuint32_t: svmlalb_lane_u64, \ svfloat16_t: svmlalb_lane_f32, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #define svmlalb(op1, op2, op3) _Generic((op3), \ svfloat16_t: svmlalb_f32, \ uint32_t: svmlalb_n_u64, \ uint16_t: svmlalb_n_u32, \ int8_t: svmlalb_n_s16, \ int32_t: svmlalb_n_s64, \ int16_t: svmlalb_n_s32, \ svuint32_t: svmlalb_u64, \ svuint16_t: svmlalb_u32, \ svuint8_t: svmlalb_u16, \ svint32_t: svmlalb_s64, \ uint8_t: svmlalb_n_u16, \ svint8_t: svmlalb_s16, \ svint16_t: svmlalb_s32, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Multiply-accumulate / Multiply-accumulate and widen: Multiply-add long (top) svint32_t svmlalt_s32(svint32_t op1, svint16_t op2, svint16_t op3); svfloat32_t svmlalt_lane_f32(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3, uint64_t imm_index); svuint64_t svmlalt_lane_u64(svuint64_t op1, svuint32_t op2, svuint32_t op3, uint64_t imm_index); svint64_t svmlalt_lane_s64(svint64_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); svint32_t svmlalt_lane_s32(svint32_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svfloat32_t svmlalt_f32(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3); svint16_t svmlalt_s16(svint16_t op1, svint8_t op2, svint8_t op3); svuint64_t svmlalt_n_u64(svuint64_t op1, svuint32_t op2, uint32_t op3); svuint16_t svmlalt_n_u16(svuint16_t op1, svuint8_t op2, uint8_t op3); svint64_t svmlalt_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svint32_t svmlalt_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svint16_t svmlalt_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svuint64_t svmlalt_u64(svuint64_t op1, svuint32_t op2, svuint32_t op3); svuint32_t svmlalt_u32(svuint32_t op1, svuint16_t op2, svuint16_t op3); svint64_t svmlalt_s64(svint64_t op1, svint32_t op2, svint32_t op3); svuint32_t svmlalt_n_u32(svuint32_t op1, svuint16_t op2, uint16_t op3); svuint32_t svmlalt_lane_u32(svuint32_t op1, svuint16_t op2, svuint16_t op3, uint64_t imm_index); svuint16_t svmlalt_u16(svuint16_t op1, svuint8_t op2, svuint8_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmlalt(op1, op2, op3) _Generic((op3), \ svint16_t: svmlalt_s32, \ svfloat16_t: svmlalt_f32, \ svint8_t: svmlalt_s16, \ uint32_t: svmlalt_n_u64, \ uint8_t: svmlalt_n_u16, \ int32_t: svmlalt_n_s64, \ int16_t: svmlalt_n_s32, \ int8_t: svmlalt_n_s16, \ svuint32_t: svmlalt_u64, \ svuint16_t: svmlalt_u32, \ svint32_t: svmlalt_s64, \ uint16_t: svmlalt_n_u32, \ svuint8_t: svmlalt_u16, \ default: __assume(0) \ )(op1, op2, op3) #define svmlalt_lane(op1, op2, op3, imm_index) _Generic((op3), \ svfloat16_t: svmlalt_lane_f32, \ svuint32_t: svmlalt_lane_u64, \ svint32_t: svmlalt_lane_s64, \ svint16_t: svmlalt_lane_s32, \ svuint16_t: svmlalt_lane_u32, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #endif // sve2: Vector arithmetic / Multiply-accumulate / Multiply-accumulate and widen: Multiply-subtract long (bottom) svuint32_t svmlslb_lane_u32(svuint32_t op1, svuint16_t op2, svuint16_t op3, uint64_t imm_index); svint64_t svmlslb_lane_s64(svint64_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); svint32_t svmlslb_lane_s32(svint32_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svfloat32_t svmlslb_f32(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3); svuint64_t svmlslb_n_u64(svuint64_t op1, svuint32_t op2, uint32_t op3); svuint32_t svmlslb_n_u32(svuint32_t op1, svuint16_t op2, uint16_t op3); svint32_t svmlslb_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svint64_t svmlslb_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svuint64_t svmlslb_lane_u64(svuint64_t op1, svuint32_t op2, svuint32_t op3, uint64_t imm_index); svint16_t svmlslb_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svuint64_t svmlslb_u64(svuint64_t op1, svuint32_t op2, svuint32_t op3); svuint32_t svmlslb_u32(svuint32_t op1, svuint16_t op2, svuint16_t op3); svuint16_t svmlslb_u16(svuint16_t op1, svuint8_t op2, svuint8_t op3); svint64_t svmlslb_s64(svint64_t op1, svint32_t op2, svint32_t op3); svuint16_t svmlslb_n_u16(svuint16_t op1, svuint8_t op2, uint8_t op3); svfloat32_t svmlslb_lane_f32(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3, uint64_t imm_index); svint16_t svmlslb_s16(svint16_t op1, svint8_t op2, svint8_t op3); svint32_t svmlslb_s32(svint32_t op1, svint16_t op2, svint16_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmlslb_lane(op1, op2, op3, imm_index) _Generic((op3), \ svuint16_t: svmlslb_lane_u32, \ svint32_t: svmlslb_lane_s64, \ svint16_t: svmlslb_lane_s32, \ svuint32_t: svmlslb_lane_u64, \ svfloat16_t: svmlslb_lane_f32, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #define svmlslb(op1, op2, op3) _Generic((op3), \ svfloat16_t: svmlslb_f32, \ uint32_t: svmlslb_n_u64, \ uint16_t: svmlslb_n_u32, \ int16_t: svmlslb_n_s32, \ int32_t: svmlslb_n_s64, \ int8_t: svmlslb_n_s16, \ svuint32_t: svmlslb_u64, \ svuint16_t: svmlslb_u32, \ svuint8_t: svmlslb_u16, \ svint32_t: svmlslb_s64, \ uint8_t: svmlslb_n_u16, \ svint8_t: svmlslb_s16, \ svint16_t: svmlslb_s32, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Multiply-accumulate / Multiply-accumulate and widen: Multiply-subtract long (top) svuint32_t svmlslt_lane_u32(svuint32_t op1, svuint16_t op2, svuint16_t op3, uint64_t imm_index); svint64_t svmlslt_lane_s64(svint64_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); svfloat32_t svmlslt_lane_f32(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3, uint64_t imm_index); svint32_t svmlslt_lane_s32(svint32_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svfloat32_t svmlslt_f32(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3); svuint64_t svmlslt_n_u64(svuint64_t op1, svuint32_t op2, uint32_t op3); svint16_t svmlslt_s16(svint16_t op1, svint8_t op2, svint8_t op3); svuint32_t svmlslt_n_u32(svuint32_t op1, svuint16_t op2, uint16_t op3); svint64_t svmlslt_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svint32_t svmlslt_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svint16_t svmlslt_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svuint64_t svmlslt_u64(svuint64_t op1, svuint32_t op2, svuint32_t op3); svuint32_t svmlslt_u32(svuint32_t op1, svuint16_t op2, svuint16_t op3); svuint16_t svmlslt_u16(svuint16_t op1, svuint8_t op2, svuint8_t op3); svint64_t svmlslt_s64(svint64_t op1, svint32_t op2, svint32_t op3); svuint16_t svmlslt_n_u16(svuint16_t op1, svuint8_t op2, uint8_t op3); svint32_t svmlslt_s32(svint32_t op1, svint16_t op2, svint16_t op3); svuint64_t svmlslt_lane_u64(svuint64_t op1, svuint32_t op2, svuint32_t op3, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmlslt_lane(op1, op2, op3, imm_index) _Generic((op3), \ svuint16_t: svmlslt_lane_u32, \ svint32_t: svmlslt_lane_s64, \ svfloat16_t: svmlslt_lane_f32, \ svint16_t: svmlslt_lane_s32, \ svuint32_t: svmlslt_lane_u64, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #define svmlslt(op1, op2, op3) _Generic((op3), \ svfloat16_t: svmlslt_f32, \ uint32_t: svmlslt_n_u64, \ svint8_t: svmlslt_s16, \ uint16_t: svmlslt_n_u32, \ int32_t: svmlslt_n_s64, \ int16_t: svmlslt_n_s32, \ int8_t: svmlslt_n_s16, \ svuint32_t: svmlslt_u64, \ svuint16_t: svmlslt_u32, \ svuint8_t: svmlslt_u16, \ svint32_t: svmlslt_s64, \ uint8_t: svmlslt_n_u16, \ svint16_t: svmlslt_s32, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Multiply-accumulate / Saturating multiply-accumulate and widen: Saturating doubling multiply-add long (bottom ? top) svint32_t svqdmlalbt_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svint32_t svqdmlalbt_s32(svint32_t op1, svint16_t op2, svint16_t op3); svint64_t svqdmlalbt_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svint16_t svqdmlalbt_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svint64_t svqdmlalbt_s64(svint64_t op1, svint32_t op2, svint32_t op3); svint16_t svqdmlalbt_s16(svint16_t op1, svint8_t op2, svint8_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdmlalbt(op1, op2, op3) _Generic((op3), \ int16_t: svqdmlalbt_n_s32, \ svint16_t: svqdmlalbt_s32, \ int32_t: svqdmlalbt_n_s64, \ int8_t: svqdmlalbt_n_s16, \ svint32_t: svqdmlalbt_s64, \ svint8_t: svqdmlalbt_s16, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Multiply-accumulate / Saturating multiply-accumulate and widen: Saturating doubling multiply-add long (bottom) svint16_t svqdmlalb_s16(svint16_t op1, svint8_t op2, svint8_t op3); svint64_t svqdmlalb_lane_s64(svint64_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); svint32_t svqdmlalb_s32(svint32_t op1, svint16_t op2, svint16_t op3); svint32_t svqdmlalb_lane_s32(svint32_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svint64_t svqdmlalb_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svint32_t svqdmlalb_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svint16_t svqdmlalb_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svint64_t svqdmlalb_s64(svint64_t op1, svint32_t op2, svint32_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdmlalb(op1, op2, op3) _Generic((op3), \ svint8_t: svqdmlalb_s16, \ svint16_t: svqdmlalb_s32, \ int32_t: svqdmlalb_n_s64, \ int16_t: svqdmlalb_n_s32, \ int8_t: svqdmlalb_n_s16, \ svint32_t: svqdmlalb_s64, \ default: __assume(0) \ )(op1, op2, op3) #define svqdmlalb_lane(op1, op2, op3, imm_index) _Generic((op3), \ svint32_t: svqdmlalb_lane_s64, \ svint16_t: svqdmlalb_lane_s32, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #endif // sve2: Vector arithmetic / Multiply-accumulate / Saturating multiply-accumulate and widen: Saturating doubling multiply-add long (top) svint16_t svqdmlalt_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svint32_t svqdmlalt_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svint64_t svqdmlalt_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svint32_t svqdmlalt_lane_s32(svint32_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svint64_t svqdmlalt_s64(svint64_t op1, svint32_t op2, svint32_t op3); svint64_t svqdmlalt_lane_s64(svint64_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); svint32_t svqdmlalt_s32(svint32_t op1, svint16_t op2, svint16_t op3); svint16_t svqdmlalt_s16(svint16_t op1, svint8_t op2, svint8_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdmlalt(op1, op2, op3) _Generic((op3), \ int8_t: svqdmlalt_n_s16, \ int16_t: svqdmlalt_n_s32, \ int32_t: svqdmlalt_n_s64, \ svint32_t: svqdmlalt_s64, \ svint16_t: svqdmlalt_s32, \ svint8_t: svqdmlalt_s16, \ default: __assume(0) \ )(op1, op2, op3) #define svqdmlalt_lane(op1, op2, op3, imm_index) _Generic((op3), \ svint16_t: svqdmlalt_lane_s32, \ svint32_t: svqdmlalt_lane_s64, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #endif // sve2: Vector arithmetic / Multiply-accumulate / Saturating multiply-accumulate and widen: Saturating doubling multiply-subtract long (bottom ? top) svint16_t svqdmlslbt_s16(svint16_t op1, svint8_t op2, svint8_t op3); svint64_t svqdmlslbt_s64(svint64_t op1, svint32_t op2, svint32_t op3); svint16_t svqdmlslbt_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svint32_t svqdmlslbt_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svint64_t svqdmlslbt_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svint32_t svqdmlslbt_s32(svint32_t op1, svint16_t op2, svint16_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdmlslbt(op1, op2, op3) _Generic((op3), \ svint8_t: svqdmlslbt_s16, \ svint32_t: svqdmlslbt_s64, \ int8_t: svqdmlslbt_n_s16, \ int16_t: svqdmlslbt_n_s32, \ int32_t: svqdmlslbt_n_s64, \ svint16_t: svqdmlslbt_s32, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Multiply-accumulate / Saturating multiply-accumulate and widen: Saturating doubling multiply-subtract long (bottom) svint32_t svqdmlslb_lane_s32(svint32_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svint64_t svqdmlslb_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svint32_t svqdmlslb_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svint16_t svqdmlslb_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svint64_t svqdmlslb_s64(svint64_t op1, svint32_t op2, svint32_t op3); svint32_t svqdmlslb_s32(svint32_t op1, svint16_t op2, svint16_t op3); svint16_t svqdmlslb_s16(svint16_t op1, svint8_t op2, svint8_t op3); svint64_t svqdmlslb_lane_s64(svint64_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdmlslb_lane(op1, op2, op3, imm_index) _Generic((op3), \ svint16_t: svqdmlslb_lane_s32, \ svint32_t: svqdmlslb_lane_s64, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #define svqdmlslb(op1, op2, op3) _Generic((op3), \ int32_t: svqdmlslb_n_s64, \ int16_t: svqdmlslb_n_s32, \ int8_t: svqdmlslb_n_s16, \ svint32_t: svqdmlslb_s64, \ svint16_t: svqdmlslb_s32, \ svint8_t: svqdmlslb_s16, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Multiply-accumulate / Saturating multiply-accumulate and widen: Saturating doubling multiply-subtract long (top) svint32_t svqdmlslt_lane_s32(svint32_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svint16_t svqdmlslt_s16(svint16_t op1, svint8_t op2, svint8_t op3); svint32_t svqdmlslt_s32(svint32_t op1, svint16_t op2, svint16_t op3); svint64_t svqdmlslt_s64(svint64_t op1, svint32_t op2, svint32_t op3); svint16_t svqdmlslt_n_s16(svint16_t op1, svint8_t op2, int8_t op3); svint32_t svqdmlslt_n_s32(svint32_t op1, svint16_t op2, int16_t op3); svint64_t svqdmlslt_n_s64(svint64_t op1, svint32_t op2, int32_t op3); svint64_t svqdmlslt_lane_s64(svint64_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqdmlslt_lane(op1, op2, op3, imm_index) _Generic((op3), \ svint16_t: svqdmlslt_lane_s32, \ svint32_t: svqdmlslt_lane_s64, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #define svqdmlslt(op1, op2, op3) _Generic((op3), \ svint8_t: svqdmlslt_s16, \ svint16_t: svqdmlslt_s32, \ svint32_t: svqdmlslt_s64, \ int8_t: svqdmlslt_n_s16, \ int16_t: svqdmlslt_n_s32, \ int32_t: svqdmlslt_n_s64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Multiply-accumulate / Saturating multiply-accumulate: Saturating rounding doubling multiply-add high svint32_t svqrdmlah_lane_s32(svint32_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); svint16_t svqrdmlah_lane_s16(svint16_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svint64_t svqrdmlah_n_s64(svint64_t op1, svint64_t op2, int64_t op3); svint8_t svqrdmlah_s8(svint8_t op1, svint8_t op2, svint8_t op3); svint64_t svqrdmlah_lane_s64(svint64_t op1, svint64_t op2, svint64_t op3, uint64_t imm_index); svint8_t svqrdmlah_n_s8(svint8_t op1, svint8_t op2, int8_t op3); svint16_t svqrdmlah_n_s16(svint16_t op1, svint16_t op2, int16_t op3); svint32_t svqrdmlah_s32(svint32_t op1, svint32_t op2, svint32_t op3); svint32_t svqrdmlah_n_s32(svint32_t op1, svint32_t op2, int32_t op3); svint64_t svqrdmlah_s64(svint64_t op1, svint64_t op2, svint64_t op3); svint16_t svqrdmlah_s16(svint16_t op1, svint16_t op2, svint16_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqrdmlah_lane(op1, op2, op3, imm_index) _Generic((op3), \ svint32_t: svqrdmlah_lane_s32, \ svint16_t: svqrdmlah_lane_s16, \ svint64_t: svqrdmlah_lane_s64, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #define svqrdmlah(op1, op2, op3) _Generic((op3), \ int64_t: svqrdmlah_n_s64, \ svint8_t: svqrdmlah_s8, \ int8_t: svqrdmlah_n_s8, \ int16_t: svqrdmlah_n_s16, \ svint32_t: svqrdmlah_s32, \ int32_t: svqrdmlah_n_s32, \ svint64_t: svqrdmlah_s64, \ svint16_t: svqrdmlah_s16, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Multiply-accumulate / Saturating multiply-accumulate: Saturating rounding doubling multiply-subtract high svint8_t svqrdmlsh_s8(svint8_t op1, svint8_t op2, svint8_t op3); svint16_t svqrdmlsh_s16(svint16_t op1, svint16_t op2, svint16_t op3); svint32_t svqrdmlsh_s32(svint32_t op1, svint32_t op2, svint32_t op3); svint64_t svqrdmlsh_s64(svint64_t op1, svint64_t op2, svint64_t op3); svint16_t svqrdmlsh_n_s16(svint16_t op1, svint16_t op2, int16_t op3); svint32_t svqrdmlsh_n_s32(svint32_t op1, svint32_t op2, int32_t op3); svint64_t svqrdmlsh_n_s64(svint64_t op1, svint64_t op2, int64_t op3); svint16_t svqrdmlsh_lane_s16(svint16_t op1, svint16_t op2, svint16_t op3, uint64_t imm_index); svint32_t svqrdmlsh_lane_s32(svint32_t op1, svint32_t op2, svint32_t op3, uint64_t imm_index); svint64_t svqrdmlsh_lane_s64(svint64_t op1, svint64_t op2, svint64_t op3, uint64_t imm_index); svint8_t svqrdmlsh_n_s8(svint8_t op1, svint8_t op2, int8_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqrdmlsh(op1, op2, op3) _Generic((op3), \ svint8_t: svqrdmlsh_s8, \ svint16_t: svqrdmlsh_s16, \ svint32_t: svqrdmlsh_s32, \ svint64_t: svqrdmlsh_s64, \ int16_t: svqrdmlsh_n_s16, \ int32_t: svqrdmlsh_n_s32, \ int64_t: svqrdmlsh_n_s64, \ int8_t: svqrdmlsh_n_s8, \ default: __assume(0) \ )(op1, op2, op3) #define svqrdmlsh_lane(op1, op2, op3, imm_index) _Generic((op3), \ svint16_t: svqrdmlsh_lane_s16, \ svint32_t: svqrdmlsh_lane_s32, \ svint64_t: svqrdmlsh_lane_s64, \ default: __assume(0) \ )(op1, op2, op3, imm_index) #endif // sve2: Vector arithmetic / Negate / Saturating negation: Saturating negate svint8_t svqneg_s8_m(svint8_t inactive, svbool_t pg, svint8_t op); svint16_t svqneg_s16_m(svint16_t inactive, svbool_t pg, svint16_t op); svint32_t svqneg_s32_m(svint32_t inactive, svbool_t pg, svint32_t op); svint16_t svqneg_s16_x(svbool_t pg, svint16_t op); svint64_t svqneg_s64_m(svint64_t inactive, svbool_t pg, svint64_t op); svint64_t svqneg_s64_z(svbool_t pg, svint64_t op); svint32_t svqneg_s32_z(svbool_t pg, svint32_t op); svint16_t svqneg_s16_z(svbool_t pg, svint16_t op); svint8_t svqneg_s8_z(svbool_t pg, svint8_t op); svint64_t svqneg_s64_x(svbool_t pg, svint64_t op); svint32_t svqneg_s32_x(svbool_t pg, svint32_t op); svint8_t svqneg_s8_x(svbool_t pg, svint8_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqneg_m(inactive, pg, op) _Generic((op), \ svint8_t: svqneg_s8_m, \ svint16_t: svqneg_s16_m, \ svint32_t: svqneg_s32_m, \ svint64_t: svqneg_s64_m, \ default: __assume(0) \ )(inactive, pg, op) #define svqneg_x(pg, op) _Generic((op), \ svint16_t: svqneg_s16_x, \ svint64_t: svqneg_s64_x, \ svint32_t: svqneg_s32_x, \ svint8_t: svqneg_s8_x, \ default: __assume(0) \ )(pg, op) #define svqneg_z(pg, op) _Generic((op), \ svint64_t: svqneg_s64_z, \ svint32_t: svqneg_s32_z, \ svint16_t: svqneg_s16_z, \ svint8_t: svqneg_s8_z, \ default: __assume(0) \ )(pg, op) #endif // sve2: Vector arithmetic / Pairwise arithmetic / Pairwise addition and widen: Add and accumulate long pairwise svuint16_t svadalp_u16_z(svbool_t pg, svuint16_t op1, svuint8_t op2); svint32_t svadalp_s32_m(svbool_t pg, svint32_t op1, svint16_t op2); svint64_t svadalp_s64_m(svbool_t pg, svint64_t op1, svint32_t op2); svuint16_t svadalp_u16_m(svbool_t pg, svuint16_t op1, svuint8_t op2); svuint32_t svadalp_u32_m(svbool_t pg, svuint32_t op1, svuint16_t op2); svuint64_t svadalp_u64_m(svbool_t pg, svuint64_t op1, svuint32_t op2); svint16_t svadalp_s16_x(svbool_t pg, svint16_t op1, svint8_t op2); svint32_t svadalp_s32_x(svbool_t pg, svint32_t op1, svint16_t op2); svint64_t svadalp_s64_x(svbool_t pg, svint64_t op1, svint32_t op2); svuint64_t svadalp_u64_x(svbool_t pg, svuint64_t op1, svuint32_t op2); svint16_t svadalp_s16_m(svbool_t pg, svint16_t op1, svint8_t op2); svint16_t svadalp_s16_z(svbool_t pg, svint16_t op1, svint8_t op2); svuint32_t svadalp_u32_x(svbool_t pg, svuint32_t op1, svuint16_t op2); svint64_t svadalp_s64_z(svbool_t pg, svint64_t op1, svint32_t op2); svuint16_t svadalp_u16_x(svbool_t pg, svuint16_t op1, svuint8_t op2); svuint32_t svadalp_u32_z(svbool_t pg, svuint32_t op1, svuint16_t op2); svuint64_t svadalp_u64_z(svbool_t pg, svuint64_t op1, svuint32_t op2); svint32_t svadalp_s32_z(svbool_t pg, svint32_t op1, svint16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svadalp_z(pg, op1, op2) _Generic((op2), \ svuint8_t: svadalp_u16_z, \ svint8_t: svadalp_s16_z, \ svint32_t: svadalp_s64_z, \ svuint16_t: svadalp_u32_z, \ svuint32_t: svadalp_u64_z, \ svint16_t: svadalp_s32_z, \ default: __assume(0) \ )(pg, op1, op2) #define svadalp_m(pg, op1, op2) _Generic((op2), \ svint16_t: svadalp_s32_m, \ svint32_t: svadalp_s64_m, \ svuint8_t: svadalp_u16_m, \ svuint16_t: svadalp_u32_m, \ svuint32_t: svadalp_u64_m, \ svint8_t: svadalp_s16_m, \ default: __assume(0) \ )(pg, op1, op2) #define svadalp_x(pg, op1, op2) _Generic((op2), \ svint8_t: svadalp_s16_x, \ svint16_t: svadalp_s32_x, \ svint32_t: svadalp_s64_x, \ svuint32_t: svadalp_u64_x, \ svuint16_t: svadalp_u32_x, \ svuint8_t: svadalp_u16_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Pairwise arithmetic / Pairwise addition: Add pairwise svuint64_t svaddp_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svaddp_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svint64_t svaddp_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint16_t svaddp_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svaddp_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svint32_t svaddp_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svaddp_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint8_t svaddp_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svaddp_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svaddp_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svaddp_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat16_t svaddp_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svint16_t svaddp_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svfloat32_t svaddp_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svuint64_t svaddp_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svaddp_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svaddp_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svaddp_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svuint8_t svaddp_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svaddp_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svaddp_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svfloat64_t svaddp_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svaddp_m(pg, op1, op2) _Generic((op2), \ svuint64_t: svaddp_u64_m, \ svuint32_t: svaddp_u32_m, \ svint64_t: svaddp_s64_m, \ svuint16_t: svaddp_u16_m, \ svuint8_t: svaddp_u8_m, \ svint32_t: svaddp_s32_m, \ svint8_t: svaddp_s8_m, \ svfloat64_t: svaddp_f64_m, \ svfloat32_t: svaddp_f32_m, \ svfloat16_t: svaddp_f16_m, \ svint16_t: svaddp_s16_m, \ default: __assume(0) \ )(pg, op1, op2) #define svaddp_x(pg, op1, op2) _Generic((op2), \ svint64_t: svaddp_s64_x, \ svfloat16_t: svaddp_f16_x, \ svfloat32_t: svaddp_f32_x, \ svuint64_t: svaddp_u64_x, \ svint8_t: svaddp_s8_x, \ svint16_t: svaddp_s16_x, \ svint32_t: svaddp_s32_x, \ svuint8_t: svaddp_u8_x, \ svuint16_t: svaddp_u16_x, \ svuint32_t: svaddp_u32_x, \ svfloat64_t: svaddp_f64_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Pairwise arithmetic / Pairwise maximum (IEEE754): Maximum number pairwise svfloat32_t svmaxnmp_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat32_t svmaxnmp_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svmaxnmp_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat16_t svmaxnmp_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat64_t svmaxnmp_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat64_t svmaxnmp_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmaxnmp_m(pg, op1, op2) _Generic((op2), \ svfloat32_t: svmaxnmp_f32_m, \ svfloat16_t: svmaxnmp_f16_m, \ svfloat64_t: svmaxnmp_f64_m, \ default: __assume(0) \ )(pg, op1, op2) #define svmaxnmp_x(pg, op1, op2) _Generic((op2), \ svfloat32_t: svmaxnmp_f32_x, \ svfloat16_t: svmaxnmp_f16_x, \ svfloat64_t: svmaxnmp_f64_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Pairwise arithmetic / Pairwise maximum: Maximum pairwise svuint64_t svmaxp_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svfloat16_t svmaxp_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svuint64_t svmaxp_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint16_t svmaxp_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svmaxp_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svmaxp_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svmaxp_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svmaxp_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svmaxp_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svmaxp_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svmaxp_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svuint32_t svmaxp_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint32_t svmaxp_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svmaxp_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svmaxp_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svmaxp_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svmaxp_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svmaxp_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svmaxp_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svfloat64_t svmaxp_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svmaxp_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svmaxp_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svmaxp_m(pg, op1, op2) _Generic((op2), \ svuint64_t: svmaxp_u64_m, \ svfloat16_t: svmaxp_f16_m, \ svuint32_t: svmaxp_u32_m, \ svuint16_t: svmaxp_u16_m, \ svuint8_t: svmaxp_u8_m, \ svint64_t: svmaxp_s64_m, \ svint32_t: svmaxp_s32_m, \ svint16_t: svmaxp_s16_m, \ svint8_t: svmaxp_s8_m, \ svfloat64_t: svmaxp_f64_m, \ svfloat32_t: svmaxp_f32_m, \ default: __assume(0) \ )(pg, op1, op2) #define svmaxp_x(pg, op1, op2) _Generic((op2), \ svuint64_t: svmaxp_u64_x, \ svuint16_t: svmaxp_u16_x, \ svuint8_t: svmaxp_u8_x, \ svint64_t: svmaxp_s64_x, \ svint32_t: svmaxp_s32_x, \ svint16_t: svmaxp_s16_x, \ svint8_t: svmaxp_s8_x, \ svfloat64_t: svmaxp_f64_x, \ svfloat32_t: svmaxp_f32_x, \ svuint32_t: svmaxp_u32_x, \ svfloat16_t: svmaxp_f16_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Pairwise arithmetic / Pairwise minimum (IEEE754): Minimum number pairwise svfloat64_t svminnmp_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat32_t svminnmp_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svminnmp_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svminnmp_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat16_t svminnmp_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat64_t svminnmp_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svminnmp_x(pg, op1, op2) _Generic((op2), \ svfloat64_t: svminnmp_f64_x, \ svfloat32_t: svminnmp_f32_x, \ svfloat16_t: svminnmp_f16_x, \ default: __assume(0) \ )(pg, op1, op2) #define svminnmp_m(pg, op1, op2) _Generic((op2), \ svfloat32_t: svminnmp_f32_m, \ svfloat16_t: svminnmp_f16_m, \ svfloat64_t: svminnmp_f64_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Pairwise arithmetic / Pairwise minimum: Minimum pairwise svuint32_t svminp_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svint8_t svminp_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svminp_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svminp_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svminp_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svminp_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svminp_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svminp_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svminp_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svfloat16_t svminp_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svuint64_t svminp_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint16_t svminp_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svint32_t svminp_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svfloat64_t svminp_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svfloat16_t svminp_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2); svfloat32_t svminp_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svfloat64_t svminp_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2); svint8_t svminp_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svminp_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svfloat32_t svminp_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2); svint64_t svminp_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svminp_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svminp_m(pg, op1, op2) _Generic((op2), \ svuint32_t: svminp_u32_m, \ svuint64_t: svminp_u64_m, \ svuint16_t: svminp_u16_m, \ svint32_t: svminp_s32_m, \ svfloat16_t: svminp_f16_m, \ svfloat32_t: svminp_f32_m, \ svfloat64_t: svminp_f64_m, \ svint8_t: svminp_s8_m, \ svint16_t: svminp_s16_m, \ svint64_t: svminp_s64_m, \ svuint8_t: svminp_u8_m, \ default: __assume(0) \ )(pg, op1, op2) #define svminp_x(pg, op1, op2) _Generic((op2), \ svint8_t: svminp_s8_x, \ svint16_t: svminp_s16_x, \ svint32_t: svminp_s32_x, \ svint64_t: svminp_s64_x, \ svuint8_t: svminp_u8_x, \ svuint16_t: svminp_u16_x, \ svuint32_t: svminp_u32_x, \ svuint64_t: svminp_u64_x, \ svfloat16_t: svminp_f16_x, \ svfloat64_t: svminp_f64_x, \ svfloat32_t: svminp_f32_x, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Polynomial / Polynomial multiply: Polynomial multiply svuint8_t svpmul_n_u8(svuint8_t op1, uint8_t op2); svuint8_t svpmul_u8(svuint8_t op1, svuint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svpmul(op1, op2) _Generic((op2), \ uint8_t: svpmul_n_u8, \ svuint8_t: svpmul_u8, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Polynomial / Polynomial multiply: Polynomial multiply long (bottom) svuint8_t svpmullb_pair_n_u8(svuint8_t op1, uint8_t op2); svuint64_t svpmullb_pair_u64(svuint64_t op1, svuint64_t op2); svuint64_t svpmullb_pair_n_u64(svuint64_t op1, uint64_t op2); svuint32_t svpmullb_pair_u32(svuint32_t op1, svuint32_t op2); svuint8_t svpmullb_pair_u8(svuint8_t op1, svuint8_t op2); svuint64_t svpmullb_n_u64(svuint32_t op1, uint32_t op2); svuint16_t svpmullb_n_u16(svuint8_t op1, uint8_t op2); svuint64_t svpmullb_u64(svuint32_t op1, svuint32_t op2); svuint16_t svpmullb_u16(svuint8_t op1, svuint8_t op2); svuint32_t svpmullb_pair_n_u32(svuint32_t op1, uint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svpmullb_pair(op1, op2) _Generic((op2), \ uint8_t: svpmullb_pair_n_u8, \ svuint64_t: svpmullb_pair_u64, \ uint64_t: svpmullb_pair_n_u64, \ svuint32_t: svpmullb_pair_u32, \ svuint8_t: svpmullb_pair_u8, \ uint32_t: svpmullb_pair_n_u32, \ default: __assume(0) \ )(op1, op2) #define svpmullb(op1, op2) _Generic((op2), \ uint32_t: svpmullb_n_u64, \ uint8_t: svpmullb_n_u16, \ svuint32_t: svpmullb_u64, \ svuint8_t: svpmullb_u16, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Polynomial / Polynomial multiply: Polynomial multiply long (top) svuint16_t svpmullt_u16(svuint8_t op1, svuint8_t op2); svuint16_t svpmullt_n_u16(svuint8_t op1, uint8_t op2); svuint64_t svpmullt_n_u64(svuint32_t op1, uint32_t op2); svuint64_t svpmullt_u64(svuint32_t op1, svuint32_t op2); svuint32_t svpmullt_pair_u32(svuint32_t op1, svuint32_t op2); svuint8_t svpmullt_pair_u8(svuint8_t op1, svuint8_t op2); svuint64_t svpmullt_pair_u64(svuint64_t op1, svuint64_t op2); svuint32_t svpmullt_pair_n_u32(svuint32_t op1, uint32_t op2); svuint64_t svpmullt_pair_n_u64(svuint64_t op1, uint64_t op2); svuint8_t svpmullt_pair_n_u8(svuint8_t op1, uint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svpmullt(op1, op2) _Generic((op2), \ svuint8_t: svpmullt_u16, \ uint8_t: svpmullt_n_u16, \ uint32_t: svpmullt_n_u64, \ svuint32_t: svpmullt_u64, \ default: __assume(0) \ )(op1, op2) #define svpmullt_pair(op1, op2) _Generic((op2), \ svuint32_t: svpmullt_pair_u32, \ svuint8_t: svpmullt_pair_u8, \ svuint64_t: svpmullt_pair_u64, \ uint32_t: svpmullt_pair_n_u32, \ uint64_t: svpmullt_pair_n_u64, \ uint8_t: svpmullt_pair_n_u8, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Reciprocal / Reciprocal estimate: Reciprocal estimate svuint32_t svrecpe_u32_z(svbool_t pg, svuint32_t op); svuint32_t svrecpe_u32_x(svbool_t pg, svuint32_t op); svuint32_t svrecpe_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrecpe_z(pg, op) _Generic((op), \ svuint32_t: svrecpe_u32_z, \ default: __assume(0) \ )(pg, op) #define svrecpe_x(pg, op) _Generic((op), \ svuint32_t: svrecpe_u32_x, \ default: __assume(0) \ )(pg, op) #define svrecpe_m(inactive, pg, op) _Generic((op), \ svuint32_t: svrecpe_u32_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve2: Vector arithmetic / Reciprocal / Reciprocal square-root estimate: Reciprocal square root estimate svuint32_t svrsqrte_u32_x(svbool_t pg, svuint32_t op); svuint32_t svrsqrte_u32_z(svbool_t pg, svuint32_t op); svuint32_t svrsqrte_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrsqrte_x(pg, op) _Generic((op), \ svuint32_t: svrsqrte_u32_x, \ default: __assume(0) \ )(pg, op) #define svrsqrte_z(pg, op) _Generic((op), \ svuint32_t: svrsqrte_u32_z, \ default: __assume(0) \ )(pg, op) #define svrsqrte_m(inactive, pg, op) _Generic((op), \ svuint32_t: svrsqrte_u32_m, \ default: __assume(0) \ )(inactive, pg, op) #endif // sve2: Vector arithmetic / Subtract / Narrowing subtraction: Rounding subtract narrow high part (bottom) svuint32_t svrsubhnb_n_u64(svuint64_t op1, uint64_t op2); svint8_t svrsubhnb_n_s16(svint16_t op1, int16_t op2); svuint8_t svrsubhnb_n_u16(svuint16_t op1, uint16_t op2); svint32_t svrsubhnb_s64(svint64_t op1, svint64_t op2); svint16_t svrsubhnb_s32(svint32_t op1, svint32_t op2); svint8_t svrsubhnb_s16(svint16_t op1, svint16_t op2); svuint8_t svrsubhnb_u16(svuint16_t op1, svuint16_t op2); svuint16_t svrsubhnb_u32(svuint32_t op1, svuint32_t op2); svuint32_t svrsubhnb_u64(svuint64_t op1, svuint64_t op2); svint16_t svrsubhnb_n_s32(svint32_t op1, int32_t op2); svint32_t svrsubhnb_n_s64(svint64_t op1, int64_t op2); svuint16_t svrsubhnb_n_u32(svuint32_t op1, uint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrsubhnb(op1, op2) _Generic((op2), \ uint64_t: svrsubhnb_n_u64, \ int16_t: svrsubhnb_n_s16, \ uint16_t: svrsubhnb_n_u16, \ svint64_t: svrsubhnb_s64, \ svint32_t: svrsubhnb_s32, \ svint16_t: svrsubhnb_s16, \ svuint16_t: svrsubhnb_u16, \ svuint32_t: svrsubhnb_u32, \ svuint64_t: svrsubhnb_u64, \ int32_t: svrsubhnb_n_s32, \ int64_t: svrsubhnb_n_s64, \ uint32_t: svrsubhnb_n_u32, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Subtract / Narrowing subtraction: Rounding subtract narrow high part (top) svuint32_t svrsubhnt_n_u64(svuint32_t even, svuint64_t op1, uint64_t op2); svuint16_t svrsubhnt_n_u32(svuint16_t even, svuint32_t op1, uint32_t op2); svint32_t svrsubhnt_n_s64(svint32_t even, svint64_t op1, int64_t op2); svint16_t svrsubhnt_n_s32(svint16_t even, svint32_t op1, int32_t op2); svint8_t svrsubhnt_n_s16(svint8_t even, svint16_t op1, int16_t op2); svuint32_t svrsubhnt_u64(svuint32_t even, svuint64_t op1, svuint64_t op2); svuint16_t svrsubhnt_u32(svuint16_t even, svuint32_t op1, svuint32_t op2); svuint8_t svrsubhnt_u16(svuint8_t even, svuint16_t op1, svuint16_t op2); svint32_t svrsubhnt_s64(svint32_t even, svint64_t op1, svint64_t op2); svint16_t svrsubhnt_s32(svint16_t even, svint32_t op1, svint32_t op2); svint8_t svrsubhnt_s16(svint8_t even, svint16_t op1, svint16_t op2); svuint8_t svrsubhnt_n_u16(svuint8_t even, svuint16_t op1, uint16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svrsubhnt(even, op1, op2) _Generic((op2), \ uint64_t: svrsubhnt_n_u64, \ uint32_t: svrsubhnt_n_u32, \ int64_t: svrsubhnt_n_s64, \ int32_t: svrsubhnt_n_s32, \ int16_t: svrsubhnt_n_s16, \ svuint64_t: svrsubhnt_u64, \ svuint32_t: svrsubhnt_u32, \ svuint16_t: svrsubhnt_u16, \ svint64_t: svrsubhnt_s64, \ svint32_t: svrsubhnt_s32, \ svint16_t: svrsubhnt_s16, \ uint16_t: svrsubhnt_n_u16, \ default: __assume(0) \ )(even, op1, op2) #endif // sve2: Vector arithmetic / Subtract / Narrowing subtraction: Subtract narrow high part (bottom) svuint8_t svsubhnb_u16(svuint16_t op1, svuint16_t op2); svint8_t svsubhnb_s16(svint16_t op1, svint16_t op2); svint32_t svsubhnb_s64(svint64_t op1, svint64_t op2); svint16_t svsubhnb_s32(svint32_t op1, svint32_t op2); svuint32_t svsubhnb_u64(svuint64_t op1, svuint64_t op2); svuint16_t svsubhnb_u32(svuint32_t op1, svuint32_t op2); svint8_t svsubhnb_n_s16(svint16_t op1, int16_t op2); svint16_t svsubhnb_n_s32(svint32_t op1, int32_t op2); svint32_t svsubhnb_n_s64(svint64_t op1, int64_t op2); svuint8_t svsubhnb_n_u16(svuint16_t op1, uint16_t op2); svuint16_t svsubhnb_n_u32(svuint32_t op1, uint32_t op2); svuint32_t svsubhnb_n_u64(svuint64_t op1, uint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsubhnb(op1, op2) _Generic((op2), \ svuint16_t: svsubhnb_u16, \ svint16_t: svsubhnb_s16, \ svint64_t: svsubhnb_s64, \ svint32_t: svsubhnb_s32, \ svuint64_t: svsubhnb_u64, \ svuint32_t: svsubhnb_u32, \ int16_t: svsubhnb_n_s16, \ int32_t: svsubhnb_n_s32, \ int64_t: svsubhnb_n_s64, \ uint16_t: svsubhnb_n_u16, \ uint32_t: svsubhnb_n_u32, \ uint64_t: svsubhnb_n_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Subtract / Narrowing subtraction: Subtract narrow high part (top) svint8_t svsubhnt_n_s16(svint8_t even, svint16_t op1, int16_t op2); svint32_t svsubhnt_n_s64(svint32_t even, svint64_t op1, int64_t op2); svint8_t svsubhnt_s16(svint8_t even, svint16_t op1, svint16_t op2); svint16_t svsubhnt_s32(svint16_t even, svint32_t op1, svint32_t op2); svuint32_t svsubhnt_u64(svuint32_t even, svuint64_t op1, svuint64_t op2); svint16_t svsubhnt_n_s32(svint16_t even, svint32_t op1, int32_t op2); svuint8_t svsubhnt_n_u16(svuint8_t even, svuint16_t op1, uint16_t op2); svint32_t svsubhnt_s64(svint32_t even, svint64_t op1, svint64_t op2); svuint32_t svsubhnt_n_u64(svuint32_t even, svuint64_t op1, uint64_t op2); svuint8_t svsubhnt_u16(svuint8_t even, svuint16_t op1, svuint16_t op2); svuint16_t svsubhnt_n_u32(svuint16_t even, svuint32_t op1, uint32_t op2); svuint16_t svsubhnt_u32(svuint16_t even, svuint32_t op1, svuint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsubhnt(even, op1, op2) _Generic((op2), \ int16_t: svsubhnt_n_s16, \ int64_t: svsubhnt_n_s64, \ svint16_t: svsubhnt_s16, \ svint32_t: svsubhnt_s32, \ svuint64_t: svsubhnt_u64, \ int32_t: svsubhnt_n_s32, \ uint16_t: svsubhnt_n_u16, \ svint64_t: svsubhnt_s64, \ uint64_t: svsubhnt_n_u64, \ svuint16_t: svsubhnt_u16, \ uint32_t: svsubhnt_n_u32, \ svuint32_t: svsubhnt_u32, \ default: __assume(0) \ )(even, op1, op2) #endif // sve2: Vector arithmetic / Subtract / Saturating subtract: Saturating subtract svint16_t svqsub_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svqsub_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svqsub_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svqsub_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svqsub_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svqsub_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint64_t svqsub_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t svqsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svqsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svqsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svuint8_t svqsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svqsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svqsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint64_t svqsub_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t svqsub_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svqsub_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svuint8_t svqsub_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svint32_t svqsub_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svqsub_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint8_t svqsub_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svint64_t svqsub_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svuint64_t svqsub_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svint32_t svqsub_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svuint16_t svqsub_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint16_t svqsub_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svint8_t svqsub_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svqsub_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint64_t svqsub_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svqsub_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svqsub_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svqsub_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svqsub_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svqsub_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svqsub_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svqsub_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svqsub_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svqsub_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svqsub_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svqsub_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svqsub_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svqsub_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svqsub_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svqsub_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svqsub_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svqsub_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint32_t svqsub_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svqsub_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svqsub_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqsub_m(pg, op1, op2) _Generic((op2), \ int16_t: svqsub_n_s16_m, \ int32_t: svqsub_n_s32_m, \ int64_t: svqsub_n_s64_m, \ uint8_t: svqsub_n_u8_m, \ uint16_t: svqsub_n_u16_m, \ uint32_t: svqsub_n_u32_m, \ uint64_t: svqsub_n_u64_m, \ int8_t: svqsub_n_s8_m, \ svint32_t: svqsub_s32_m, \ svint8_t: svqsub_s8_m, \ svint16_t: svqsub_s16_m, \ svint64_t: svqsub_s64_m, \ svuint8_t: svqsub_u8_m, \ svuint16_t: svqsub_u16_m, \ svuint32_t: svqsub_u32_m, \ svuint64_t: svqsub_u64_m, \ default: __assume(0) \ )(pg, op1, op2) #define svqsub_x(pg, op1, op2) _Generic((op2), \ int8_t: svqsub_n_s8_x, \ int16_t: svqsub_n_s16_x, \ int32_t: svqsub_n_s32_x, \ uint8_t: svqsub_n_u8_x, \ uint16_t: svqsub_n_u16_x, \ uint32_t: svqsub_n_u32_x, \ uint64_t: svqsub_n_u64_x, \ int64_t: svqsub_n_s64_x, \ svint8_t: svqsub_s8_x, \ svint16_t: svqsub_s16_x, \ svint32_t: svqsub_s32_x, \ svint64_t: svqsub_s64_x, \ svuint8_t: svqsub_u8_x, \ svuint16_t: svqsub_u16_x, \ svuint32_t: svqsub_u32_x, \ svuint64_t: svqsub_u64_x, \ default: __assume(0) \ )(pg, op1, op2) #define svqsub_z(pg, op1, op2) _Generic((op2), \ int8_t: svqsub_n_s8_z, \ int16_t: svqsub_n_s16_z, \ uint8_t: svqsub_n_u8_z, \ int32_t: svqsub_n_s32_z, \ int64_t: svqsub_n_s64_z, \ svuint64_t: svqsub_u64_z, \ svuint16_t: svqsub_u16_z, \ uint16_t: svqsub_n_u16_z, \ svint8_t: svqsub_s8_z, \ svint16_t: svqsub_s16_z, \ svint32_t: svqsub_s32_z, \ svint64_t: svqsub_s64_z, \ svuint8_t: svqsub_u8_z, \ svuint32_t: svqsub_u32_z, \ uint64_t: svqsub_n_u64_z, \ uint32_t: svqsub_n_u32_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Subtract / Saturating subtract: Saturating subtract reversed svuint16_t svqsubr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svint16_t svqsubr_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svqsubr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svqsubr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svqsubr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint64_t svqsubr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svqsubr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svqsubr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svqsubr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svqsubr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svqsubr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svqsubr_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svqsubr_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svqsubr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t svqsubr_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svuint32_t svqsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint8_t svqsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svqsubr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svqsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svqsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svqsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svqsubr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svqsubr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svqsubr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svqsubr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svqsubr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svqsubr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svqsubr_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svqsubr_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svuint16_t svqsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint64_t svqsubr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint64_t svqsubr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint16_t svqsubr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svqsubr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svint8_t svqsubr_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svqsubr_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svqsubr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svqsubr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svqsubr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svqsubr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svqsubr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svqsubr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svqsubr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svqsubr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svqsubr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svqsubr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svqsubr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint32_t svqsubr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svqsubr_z(pg, op1, op2) _Generic((op2), \ svuint16_t: svqsubr_u16_z, \ svint16_t: svqsubr_s16_z, \ svint32_t: svqsubr_s32_z, \ svint64_t: svqsubr_s64_z, \ svuint8_t: svqsubr_u8_z, \ uint64_t: svqsubr_n_u64_z, \ uint32_t: svqsubr_n_u32_z, \ uint16_t: svqsubr_n_u16_z, \ uint8_t: svqsubr_n_u8_z, \ int64_t: svqsubr_n_s64_z, \ int32_t: svqsubr_n_s32_z, \ int16_t: svqsubr_n_s16_z, \ int8_t: svqsubr_n_s8_z, \ svint8_t: svqsubr_s8_z, \ svuint64_t: svqsubr_u64_z, \ svuint32_t: svqsubr_u32_z, \ default: __assume(0) \ )(pg, op1, op2) #define svqsubr_x(pg, op1, op2) _Generic((op2), \ uint64_t: svqsubr_n_u64_x, \ uint32_t: svqsubr_n_u32_x, \ uint8_t: svqsubr_n_u8_x, \ int64_t: svqsubr_n_s64_x, \ int32_t: svqsubr_n_s32_x, \ int16_t: svqsubr_n_s16_x, \ int8_t: svqsubr_n_s8_x, \ uint16_t: svqsubr_n_u16_x, \ svuint64_t: svqsubr_u64_x, \ svuint16_t: svqsubr_u16_x, \ svuint32_t: svqsubr_u32_x, \ svint8_t: svqsubr_s8_x, \ svint16_t: svqsubr_s16_x, \ svint32_t: svqsubr_s32_x, \ svint64_t: svqsubr_s64_x, \ svuint8_t: svqsubr_u8_x, \ default: __assume(0) \ )(pg, op1, op2) #define svqsubr_m(pg, op1, op2) _Generic((op2), \ uint64_t: svqsubr_n_u64_m, \ uint32_t: svqsubr_n_u32_m, \ uint16_t: svqsubr_n_u16_m, \ uint8_t: svqsubr_n_u8_m, \ int64_t: svqsubr_n_s64_m, \ int32_t: svqsubr_n_s32_m, \ int16_t: svqsubr_n_s16_m, \ int8_t: svqsubr_n_s8_m, \ svint8_t: svqsubr_s8_m, \ svint16_t: svqsubr_s16_m, \ svint32_t: svqsubr_s32_m, \ svint64_t: svqsubr_s64_m, \ svuint8_t: svqsubr_u8_m, \ svuint16_t: svqsubr_u16_m, \ svuint64_t: svqsubr_u64_m, \ svuint32_t: svqsubr_u32_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Subtract / Subtraction with borrow: Subtract with borrow long (bottom) svuint64_t svsbclb_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint64_t svsbclb_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3); svuint32_t svsbclb_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3); svuint32_t svsbclb_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsbclb(op1, op2, op3) _Generic((op3), \ svuint64_t: svsbclb_u64, \ uint64_t: svsbclb_n_u64, \ uint32_t: svsbclb_n_u32, \ svuint32_t: svsbclb_u32, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Subtract / Subtraction with borrow: Subtract with borrow long (top) svuint32_t svsbclt_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3); svuint64_t svsbclt_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3); svuint32_t svsbclt_n_u32(svuint32_t op1, svuint32_t op2, uint32_t op3); svuint64_t svsbclt_n_u64(svuint64_t op1, svuint64_t op2, uint64_t op3); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsbclt(op1, op2, op3) _Generic((op3), \ svuint32_t: svsbclt_u32, \ svuint64_t: svsbclt_u64, \ uint32_t: svsbclt_n_u32, \ uint64_t: svsbclt_n_u64, \ default: __assume(0) \ )(op1, op2, op3) #endif // sve2: Vector arithmetic / Subtract / Subtraction: Halving subtract svint8_t svhsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svhsub_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svhsub_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svhsub_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svhsub_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svhsub_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svint16_t svhsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svhsub_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svint32_t svhsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svint8_t svhsub_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svint16_t svhsub_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svhsub_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint32_t svhsub_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svhsub_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svint8_t svhsub_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svhsub_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svhsub_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); svuint32_t svhsub_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint8_t svhsub_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svhsub_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svhsub_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svhsub_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svint64_t svhsub_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svuint64_t svhsub_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svhsub_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svhsub_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svint16_t svhsub_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svuint64_t svhsub_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint16_t svhsub_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svint64_t svhsub_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svhsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svhsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint32_t svhsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svint8_t svhsub_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svhsub_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svhsub_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svhsub_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svuint32_t svhsub_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint8_t svhsub_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint32_t svhsub_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svhsub_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t svhsub_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svhsub_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svhsub_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svhsub_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svuint8_t svhsub_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint16_t svhsub_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint64_t svhsub_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svhsub_x(pg, op1, op2) _Generic((op2), \ int8_t: svhsub_n_s8_x, \ int16_t: svhsub_n_s16_x, \ int32_t: svhsub_n_s32_x, \ svuint64_t: svhsub_u64_x, \ svuint16_t: svhsub_u16_x, \ int64_t: svhsub_n_s64_x, \ uint8_t: svhsub_n_u8_x, \ uint16_t: svhsub_n_u16_x, \ uint32_t: svhsub_n_u32_x, \ svuint32_t: svhsub_u32_x, \ uint64_t: svhsub_n_u64_x, \ svint8_t: svhsub_s8_x, \ svint16_t: svhsub_s16_x, \ svint32_t: svhsub_s32_x, \ svint64_t: svhsub_s64_x, \ svuint8_t: svhsub_u8_x, \ default: __assume(0) \ )(pg, op1, op2) #define svhsub_m(pg, op1, op2) _Generic((op2), \ uint64_t: svhsub_n_u64_m, \ uint32_t: svhsub_n_u32_m, \ uint16_t: svhsub_n_u16_m, \ uint8_t: svhsub_n_u8_m, \ int64_t: svhsub_n_s64_m, \ int32_t: svhsub_n_s32_m, \ int16_t: svhsub_n_s16_m, \ int8_t: svhsub_n_s8_m, \ svint8_t: svhsub_s8_m, \ svint16_t: svhsub_s16_m, \ svint32_t: svhsub_s32_m, \ svint64_t: svhsub_s64_m, \ svuint8_t: svhsub_u8_m, \ svuint32_t: svhsub_u32_m, \ svuint16_t: svhsub_u16_m, \ svuint64_t: svhsub_u64_m, \ default: __assume(0) \ )(pg, op1, op2) #define svhsub_z(pg, op1, op2) _Generic((op2), \ svint32_t: svhsub_s32_z, \ int8_t: svhsub_n_s8_z, \ int16_t: svhsub_n_s16_z, \ int32_t: svhsub_n_s32_z, \ svuint64_t: svhsub_u64_z, \ svint8_t: svhsub_s8_z, \ svuint32_t: svhsub_u32_z, \ svuint8_t: svhsub_u8_z, \ int64_t: svhsub_n_s64_z, \ uint8_t: svhsub_n_u8_z, \ uint16_t: svhsub_n_u16_z, \ svint64_t: svhsub_s64_z, \ uint64_t: svhsub_n_u64_z, \ uint32_t: svhsub_n_u32_z, \ svuint16_t: svhsub_u16_z, \ svint16_t: svhsub_s16_z, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Subtract / Subtraction: Halving subtract reversed svuint64_t svhsubr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t svhsubr_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svhsubr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint8_t svhsubr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svhsubr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svhsubr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svhsubr_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2); svuint32_t svhsubr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2); svint8_t svhsubr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svhsubr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2); svint32_t svhsubr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2); svint64_t svhsubr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2); svint16_t svhsubr_s16_z(svbool_t pg, svint16_t op1, svint16_t op2); svuint8_t svhsubr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2); svuint32_t svhsubr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint64_t svhsubr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2); svint8_t svhsubr_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2); svuint64_t svhsubr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2); svuint32_t svhsubr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2); svuint16_t svhsubr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svhsubr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svhsubr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svhsubr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2); svuint16_t svhsubr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint32_t svhsubr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svhsubr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2); svint8_t svhsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2); svuint16_t svhsubr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2); svuint8_t svhsubr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2); svint64_t svhsubr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2); svint32_t svhsubr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2); svint16_t svhsubr_s16_m(svbool_t pg, svint16_t op1, svint16_t op2); svint8_t svhsubr_s8_m(svbool_t pg, svint8_t op1, svint8_t op2); svint16_t svhsubr_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2); svint32_t svhsubr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2); svint64_t svhsubr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2); svuint8_t svhsubr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2); svuint16_t svhsubr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2); svuint64_t svhsubr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2); svuint32_t svhsubr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2); svuint32_t svhsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2); svuint16_t svhsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2); svuint8_t svhsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2); svint64_t svhsubr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2); svint32_t svhsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2); svint16_t svhsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2); svuint64_t svhsubr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2); svint8_t svhsubr_s8_z(svbool_t pg, svint8_t op1, svint8_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svhsubr_x(pg, op1, op2) _Generic((op2), \ uint64_t: svhsubr_n_u64_x, \ svint8_t: svhsubr_s8_x, \ svint16_t: svhsubr_s16_x, \ svint32_t: svhsubr_s32_x, \ svint64_t: svhsubr_s64_x, \ svuint8_t: svhsubr_u8_x, \ svuint32_t: svhsubr_u32_x, \ svuint64_t: svhsubr_u64_x, \ svuint16_t: svhsubr_u16_x, \ int8_t: svhsubr_n_s8_x, \ uint32_t: svhsubr_n_u32_x, \ uint16_t: svhsubr_n_u16_x, \ uint8_t: svhsubr_n_u8_x, \ int64_t: svhsubr_n_s64_x, \ int32_t: svhsubr_n_s32_x, \ int16_t: svhsubr_n_s16_x, \ default: __assume(0) \ )(pg, op1, op2) #define svhsubr_z(pg, op1, op2) _Generic((op2), \ int8_t: svhsubr_n_s8_z, \ svint16_t: svhsubr_s16_z, \ svuint64_t: svhsubr_u64_z, \ svuint32_t: svhsubr_u32_z, \ svuint16_t: svhsubr_u16_z, \ svuint8_t: svhsubr_u8_z, \ svint64_t: svhsubr_s64_z, \ svint32_t: svhsubr_s32_z, \ int16_t: svhsubr_n_s16_z, \ int32_t: svhsubr_n_s32_z, \ int64_t: svhsubr_n_s64_z, \ uint8_t: svhsubr_n_u8_z, \ uint16_t: svhsubr_n_u16_z, \ uint32_t: svhsubr_n_u32_z, \ uint64_t: svhsubr_n_u64_z, \ svint8_t: svhsubr_s8_z, \ default: __assume(0) \ )(pg, op1, op2) #define svhsubr_m(pg, op1, op2) _Generic((op2), \ svuint64_t: svhsubr_u64_m, \ uint8_t: svhsubr_n_u8_m, \ int64_t: svhsubr_n_s64_m, \ int32_t: svhsubr_n_s32_m, \ int16_t: svhsubr_n_s16_m, \ svuint32_t: svhsubr_u32_m, \ int8_t: svhsubr_n_s8_m, \ uint32_t: svhsubr_n_u32_m, \ uint16_t: svhsubr_n_u16_m, \ svuint16_t: svhsubr_u16_m, \ svuint8_t: svhsubr_u8_m, \ svint64_t: svhsubr_s64_m, \ svint32_t: svhsubr_s32_m, \ svint16_t: svhsubr_s16_m, \ svint8_t: svhsubr_s8_m, \ uint64_t: svhsubr_n_u64_m, \ default: __assume(0) \ )(pg, op1, op2) #endif // sve2: Vector arithmetic / Subtract / Widening subtraction: Subtract long (bottom - top) svint16_t svsublbt_s16(svint8_t op1, svint8_t op2); svint32_t svsublbt_s32(svint16_t op1, svint16_t op2); svint64_t svsublbt_s64(svint32_t op1, svint32_t op2); svint16_t svsublbt_n_s16(svint8_t op1, int8_t op2); svint32_t svsublbt_n_s32(svint16_t op1, int16_t op2); svint64_t svsublbt_n_s64(svint32_t op1, int32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsublbt(op1, op2) _Generic((op2), \ svint8_t: svsublbt_s16, \ svint16_t: svsublbt_s32, \ svint32_t: svsublbt_s64, \ int8_t: svsublbt_n_s16, \ int16_t: svsublbt_n_s32, \ int32_t: svsublbt_n_s64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Subtract / Widening subtraction: Subtract long (bottom) svint16_t svsublb_s16(svint8_t op1, svint8_t op2); svuint64_t svsublb_n_u64(svuint32_t op1, uint32_t op2); svuint32_t svsublb_n_u32(svuint16_t op1, uint16_t op2); svuint16_t svsublb_n_u16(svuint8_t op1, uint8_t op2); svint64_t svsublb_n_s64(svint32_t op1, int32_t op2); svint32_t svsublb_n_s32(svint16_t op1, int16_t op2); svint16_t svsublb_n_s16(svint8_t op1, int8_t op2); svuint64_t svsublb_u64(svuint32_t op1, svuint32_t op2); svuint32_t svsublb_u32(svuint16_t op1, svuint16_t op2); svuint16_t svsublb_u16(svuint8_t op1, svuint8_t op2); svint64_t svsublb_s64(svint32_t op1, svint32_t op2); svint32_t svsublb_s32(svint16_t op1, svint16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsublb(op1, op2) _Generic((op2), \ svint8_t: svsublb_s16, \ uint32_t: svsublb_n_u64, \ uint16_t: svsublb_n_u32, \ uint8_t: svsublb_n_u16, \ int32_t: svsublb_n_s64, \ int16_t: svsublb_n_s32, \ int8_t: svsublb_n_s16, \ svuint32_t: svsublb_u64, \ svuint16_t: svsublb_u32, \ svuint8_t: svsublb_u16, \ svint32_t: svsublb_s64, \ svint16_t: svsublb_s32, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Subtract / Widening subtraction: Subtract long (top - bottom) svint32_t svsubltb_s32(svint16_t op1, svint16_t op2); svint16_t svsubltb_s16(svint8_t op1, svint8_t op2); svint16_t svsubltb_n_s16(svint8_t op1, int8_t op2); svint64_t svsubltb_s64(svint32_t op1, svint32_t op2); svint64_t svsubltb_n_s64(svint32_t op1, int32_t op2); svint32_t svsubltb_n_s32(svint16_t op1, int16_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsubltb(op1, op2) _Generic((op2), \ svint16_t: svsubltb_s32, \ svint8_t: svsubltb_s16, \ int8_t: svsubltb_n_s16, \ svint32_t: svsubltb_s64, \ int32_t: svsubltb_n_s64, \ int16_t: svsubltb_n_s32, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Subtract / Widening subtraction: Subtract long (top) svint32_t svsublt_s32(svint16_t op1, svint16_t op2); svuint32_t svsublt_u32(svuint16_t op1, svuint16_t op2); svuint16_t svsublt_u16(svuint8_t op1, svuint8_t op2); svint64_t svsublt_s64(svint32_t op1, svint32_t op2); svint16_t svsublt_s16(svint8_t op1, svint8_t op2); svuint64_t svsublt_u64(svuint32_t op1, svuint32_t op2); svint16_t svsublt_n_s16(svint8_t op1, int8_t op2); svint32_t svsublt_n_s32(svint16_t op1, int16_t op2); svint64_t svsublt_n_s64(svint32_t op1, int32_t op2); svuint16_t svsublt_n_u16(svuint8_t op1, uint8_t op2); svuint32_t svsublt_n_u32(svuint16_t op1, uint16_t op2); svuint64_t svsublt_n_u64(svuint32_t op1, uint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsublt(op1, op2) _Generic((op2), \ svint16_t: svsublt_s32, \ svuint16_t: svsublt_u32, \ svuint8_t: svsublt_u16, \ svint32_t: svsublt_s64, \ svint8_t: svsublt_s16, \ svuint32_t: svsublt_u64, \ int8_t: svsublt_n_s16, \ int16_t: svsublt_n_s32, \ int32_t: svsublt_n_s64, \ uint8_t: svsublt_n_u16, \ uint16_t: svsublt_n_u32, \ uint32_t: svsublt_n_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Subtract / Widening subtraction: Subtract wide (bottom) svint16_t svsubwb_s16(svint16_t op1, svint8_t op2); svint32_t svsubwb_s32(svint32_t op1, svint16_t op2); svint64_t svsubwb_s64(svint64_t op1, svint32_t op2); svuint16_t svsubwb_u16(svuint16_t op1, svuint8_t op2); svuint32_t svsubwb_u32(svuint32_t op1, svuint16_t op2); svuint64_t svsubwb_u64(svuint64_t op1, svuint32_t op2); svint16_t svsubwb_n_s16(svint16_t op1, int8_t op2); svint32_t svsubwb_n_s32(svint32_t op1, int16_t op2); svint64_t svsubwb_n_s64(svint64_t op1, int32_t op2); svuint16_t svsubwb_n_u16(svuint16_t op1, uint8_t op2); svuint32_t svsubwb_n_u32(svuint32_t op1, uint16_t op2); svuint64_t svsubwb_n_u64(svuint64_t op1, uint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsubwb(op1, op2) _Generic((op2), \ svint8_t: svsubwb_s16, \ svint16_t: svsubwb_s32, \ svint32_t: svsubwb_s64, \ svuint8_t: svsubwb_u16, \ svuint16_t: svsubwb_u32, \ svuint32_t: svsubwb_u64, \ int8_t: svsubwb_n_s16, \ int16_t: svsubwb_n_s32, \ int32_t: svsubwb_n_s64, \ uint8_t: svsubwb_n_u16, \ uint16_t: svsubwb_n_u32, \ uint32_t: svsubwb_n_u64, \ default: __assume(0) \ )(op1, op2) #endif // sve2: Vector arithmetic / Subtract / Widening subtraction: Subtract wide (top) svuint64_t svsubwt_u64(svuint64_t op1, svuint32_t op2); svint16_t svsubwt_s16(svint16_t op1, svint8_t op2); svuint64_t svsubwt_n_u64(svuint64_t op1, uint32_t op2); svuint32_t svsubwt_n_u32(svuint32_t op1, uint16_t op2); svuint16_t svsubwt_n_u16(svuint16_t op1, uint8_t op2); svint64_t svsubwt_n_s64(svint64_t op1, int32_t op2); svint32_t svsubwt_n_s32(svint32_t op1, int16_t op2); svint32_t svsubwt_s32(svint32_t op1, svint16_t op2); svint16_t svsubwt_n_s16(svint16_t op1, int8_t op2); svuint16_t svsubwt_u16(svuint16_t op1, svuint8_t op2); svuint32_t svsubwt_u32(svuint32_t op1, svuint16_t op2); svint64_t svsubwt_s64(svint64_t op1, svint32_t op2); #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define svsubwt(op1, op2) _Generic((op2), \ svuint32_t: svsubwt_u64, \ svint8_t: svsubwt_s16, \ uint32_t: svsubwt_n_u64, \ uint16_t: svsubwt_n_u32, \ uint8_t: svsubwt_n_u16, \ int32_t: svsubwt_n_s64, \ int16_t: svsubwt_n_s32, \ svint16_t: svsubwt_s32, \ int8_t: svsubwt_n_s16, \ svuint8_t: svsubwt_u16, \ svuint16_t: svsubwt_u32, \ svint32_t: svsubwt_s64, \ default: __assume(0) \ )(op1, op2) #endif svuint8_t __getRegZ(int); svbool_t __getRegP(int); void __setRegZ(int, svuint8_t); void __setRegP(int, svbool_t); svuint8_t __ldrRegZ(const svuint8_t *, int); svbool_t __ldrRegP(const svbool_t *, int); void __strRegZ(const svuint8_t *, int, svuint8_t); void __strRegP(const svbool_t *, int, svbool_t); #ifdef __cplusplus } // extern "C" #endif // overloaded intrinsics for c++ #if defined(__cplusplus) #include #pragma warning(push) #pragma warning(disable : 4984) // warning C4984: 'if constexpr' is a C++17 language extension template using __svehdr_twice_type = ::std::conditional_t<::std::is_same_v, svint16_t, ::std::conditional_t<::std::is_same_v, svint32_t, ::std::conditional_t<::std::is_same_v, svint64_t, ::std::conditional_t<::std::is_same_v, svuint16_t, ::std::conditional_t<::std::is_same_v, svuint32_t, ::std::conditional_t<::std::is_same_v, svuint64_t, svuint64_t>>>>>>; template using __svehdr_half_type = ::std::conditional_t<::std::is_same_v, svint8_t, ::std::conditional_t<::std::is_same_v, svint16_t, ::std::conditional_t<::std::is_same_v, svint32_t, ::std::conditional_t<::std::is_same_v, svuint8_t, ::std::conditional_t<::std::is_same_v, svuint16_t, ::std::conditional_t<::std::is_same_v, svuint32_t, svuint32_t>>>>>>; template using __svehdr_half_utype = ::std::conditional_t<::std::is_same_v, svuint8_t, ::std::conditional_t<::std::is_same_v, svuint16_t, ::std::conditional_t<::std::is_same_v, svuint32_t, svuint32_t>>>; template using __svehdr_utype = ::std::conditional_t<::std::is_same_v, svuint8_t, ::std::conditional_t<::std::is_same_v, svuint16_t, ::std::conditional_t<::std::is_same_v, svuint32_t, ::std::conditional_t<::std::is_same_v, svuint64_t, svuint64_t>>>>; template using __svehdr_hfa2_type = ::std::conditional_t<::std::is_same_v, svfloat64_t, ::std::conditional_t<::std::is_same_v, svfloat32_t, ::std::conditional_t<::std::is_same_v, svfloat16_t, ::std::conditional_t<::std::is_same_v, svint64_t, ::std::conditional_t<::std::is_same_v, svint32_t, ::std::conditional_t<::std::is_same_v, svint16_t, ::std::conditional_t<::std::is_same_v, svint8_t, ::std::conditional_t<::std::is_same_v, svuint64_t, ::std::conditional_t<::std::is_same_v, svuint32_t, ::std::conditional_t<::std::is_same_v, svuint16_t, ::std::conditional_t<::std::is_same_v, svuint8_t, void>>>>>>>>>>>; template using __svehdr_hfa3_type = ::std::conditional_t<::std::is_same_v, svfloat64_t, ::std::conditional_t<::std::is_same_v, svfloat32_t, ::std::conditional_t<::std::is_same_v, svfloat16_t, ::std::conditional_t<::std::is_same_v, svint64_t, ::std::conditional_t<::std::is_same_v, svint32_t, ::std::conditional_t<::std::is_same_v, svint16_t, ::std::conditional_t<::std::is_same_v, svint8_t, ::std::conditional_t<::std::is_same_v, svuint64_t, ::std::conditional_t<::std::is_same_v, svuint32_t, ::std::conditional_t<::std::is_same_v, svuint16_t, ::std::conditional_t<::std::is_same_v, svuint8_t, void>>>>>>>>>>>; template using __svehdr_hfa4_type = ::std::conditional_t<::std::is_same_v, svfloat64_t, ::std::conditional_t<::std::is_same_v, svfloat32_t, ::std::conditional_t<::std::is_same_v, svfloat16_t, ::std::conditional_t<::std::is_same_v, svint64_t, ::std::conditional_t<::std::is_same_v, svint32_t, ::std::conditional_t<::std::is_same_v, svint16_t, ::std::conditional_t<::std::is_same_v, svint8_t, ::std::conditional_t<::std::is_same_v, svuint64_t, ::std::conditional_t<::std::is_same_v, svuint32_t, ::std::conditional_t<::std::is_same_v, svuint16_t, ::std::conditional_t<::std::is_same_v, svuint8_t, void>>>>>>>>>>>; __forceinline svuint16_t svcls_z(svbool_t pg, svint16_t op) { return svcls_s16_z(pg, op); } __forceinline svuint8_t svcls_z(svbool_t pg, svint8_t op) { return svcls_s8_z(pg, op); } __forceinline svuint8_t svcls_m(svuint8_t inactive, svbool_t pg, svint8_t op) { return svcls_s8_m(inactive, pg, op); } __forceinline svuint64_t svcls_x(svbool_t pg, svint64_t op) { return svcls_s64_x(pg, op); } __forceinline svuint32_t svcls_x(svbool_t pg, svint32_t op) { return svcls_s32_x(pg, op); } __forceinline svuint16_t svcls_x(svbool_t pg, svint16_t op) { return svcls_s16_x(pg, op); } __forceinline svuint64_t svcls_z(svbool_t pg, svint64_t op) { return svcls_s64_z(pg, op); } __forceinline svuint8_t svcls_x(svbool_t pg, svint8_t op) { return svcls_s8_x(pg, op); } __forceinline svuint32_t svcls_m(svuint32_t inactive, svbool_t pg, svint32_t op) { return svcls_s32_m(inactive, pg, op); } __forceinline svuint32_t svcls_z(svbool_t pg, svint32_t op) { return svcls_s32_z(pg, op); } __forceinline svuint64_t svcls_m(svuint64_t inactive, svbool_t pg, svint64_t op) { return svcls_s64_m(inactive, pg, op); } __forceinline svuint16_t svcls_m(svuint16_t inactive, svbool_t pg, svint16_t op) { return svcls_s16_m(inactive, pg, op); } __forceinline svuint8_t svclz_m(svuint8_t inactive, svbool_t pg, svint8_t op) { return svclz_s8_m(inactive, pg, op); } __forceinline svuint16_t svclz_m(svuint16_t inactive, svbool_t pg, svint16_t op) { return svclz_s16_m(inactive, pg, op); } __forceinline svuint16_t svclz_z(svbool_t pg, svint16_t op) { return svclz_s16_z(pg, op); } __forceinline svuint8_t svclz_z(svbool_t pg, svint8_t op) { return svclz_s8_z(pg, op); } __forceinline svuint32_t svclz_m(svuint32_t inactive, svbool_t pg, svint32_t op) { return svclz_s32_m(inactive, pg, op); } __forceinline svuint64_t svclz_m(svuint64_t inactive, svbool_t pg, svint64_t op) { return svclz_s64_m(inactive, pg, op); } __forceinline svuint16_t svclz_x(svbool_t pg, svint16_t op) { return svclz_s16_x(pg, op); } __forceinline svuint16_t svclz_m(svuint16_t inactive, svbool_t pg, svuint16_t op) { return svclz_u16_m(inactive, pg, op); } __forceinline svuint64_t svclz_x(svbool_t pg, svuint64_t op) { return svclz_u64_x(pg, op); } __forceinline svuint32_t svclz_x(svbool_t pg, svuint32_t op) { return svclz_u32_x(pg, op); } __forceinline svuint16_t svclz_x(svbool_t pg, svuint16_t op) { return svclz_u16_x(pg, op); } __forceinline svuint64_t svclz_z(svbool_t pg, svuint64_t op) { return svclz_u64_z(pg, op); } __forceinline svuint32_t svclz_z(svbool_t pg, svuint32_t op) { return svclz_u32_z(pg, op); } __forceinline svuint8_t svclz_m(svuint8_t inactive, svbool_t pg, svuint8_t op) { return svclz_u8_m(inactive, pg, op); } __forceinline svuint8_t svclz_z(svbool_t pg, svuint8_t op) { return svclz_u8_z(pg, op); } __forceinline svuint16_t svclz_z(svbool_t pg, svuint16_t op) { return svclz_u16_z(pg, op); } __forceinline svuint8_t svclz_x(svbool_t pg, svuint8_t op) { return svclz_u8_x(pg, op); } __forceinline svuint64_t svclz_x(svbool_t pg, svint64_t op) { return svclz_s64_x(pg, op); } __forceinline svuint32_t svclz_x(svbool_t pg, svint32_t op) { return svclz_s32_x(pg, op); } __forceinline svuint32_t svclz_z(svbool_t pg, svint32_t op) { return svclz_s32_z(pg, op); } __forceinline svuint8_t svclz_x(svbool_t pg, svint8_t op) { return svclz_s8_x(pg, op); } __forceinline svuint64_t svclz_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { return svclz_u64_m(inactive, pg, op); } __forceinline svuint32_t svclz_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { return svclz_u32_m(inactive, pg, op); } __forceinline svuint64_t svclz_z(svbool_t pg, svint64_t op) { return svclz_s64_z(pg, op); } __forceinline svint32_t svexth_m(svint32_t inactive, svbool_t pg, svint32_t op) { return svexth_s32_m(inactive, pg, op); } __forceinline svint64_t svexth_z(svbool_t pg, svint64_t op) { return svexth_s64_z(pg, op); } __forceinline svint32_t svexth_z(svbool_t pg, svint32_t op) { return svexth_s32_z(pg, op); } __forceinline svint64_t svexth_x(svbool_t pg, svint64_t op) { return svexth_s64_x(pg, op); } __forceinline svint32_t svexth_x(svbool_t pg, svint32_t op) { return svexth_s32_x(pg, op); } __forceinline svint64_t svexth_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svexth_s64_m(inactive, pg, op); } __forceinline svuint32_t svexth_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { return svexth_u32_m(inactive, pg, op); } __forceinline svuint64_t svexth_z(svbool_t pg, svuint64_t op) { return svexth_u64_z(pg, op); } __forceinline svuint32_t svexth_z(svbool_t pg, svuint32_t op) { return svexth_u32_z(pg, op); } __forceinline svuint64_t svexth_x(svbool_t pg, svuint64_t op) { return svexth_u64_x(pg, op); } __forceinline svuint32_t svexth_x(svbool_t pg, svuint32_t op) { return svexth_u32_x(pg, op); } __forceinline svuint64_t svexth_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { return svexth_u64_m(inactive, pg, op); } __forceinline svint64_t svextw_z(svbool_t pg, svint64_t op) { return svextw_s64_z(pg, op); } __forceinline svint64_t svextw_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svextw_s64_m(inactive, pg, op); } __forceinline svint64_t svextw_x(svbool_t pg, svint64_t op) { return svextw_s64_x(pg, op); } __forceinline svuint64_t svextw_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { return svextw_u64_m(inactive, pg, op); } __forceinline svuint64_t svextw_z(svbool_t pg, svuint64_t op) { return svextw_u64_z(pg, op); } __forceinline svuint64_t svextw_x(svbool_t pg, svuint64_t op) { return svextw_u64_x(pg, op); } __forceinline svint64_t svextb_x(svbool_t pg, svint64_t op) { return svextb_s64_x(pg, op); } __forceinline svint16_t svextb_m(svint16_t inactive, svbool_t pg, svint16_t op) { return svextb_s16_m(inactive, pg, op); } __forceinline svint16_t svextb_x(svbool_t pg, svint16_t op) { return svextb_s16_x(pg, op); } __forceinline svint32_t svextb_z(svbool_t pg, svint32_t op) { return svextb_s32_z(pg, op); } __forceinline svint64_t svextb_z(svbool_t pg, svint64_t op) { return svextb_s64_z(pg, op); } __forceinline svint64_t svextb_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svextb_s64_m(inactive, pg, op); } __forceinline svint32_t svextb_x(svbool_t pg, svint32_t op) { return svextb_s32_x(pg, op); } __forceinline svint16_t svextb_z(svbool_t pg, svint16_t op) { return svextb_s16_z(pg, op); } __forceinline svint32_t svextb_m(svint32_t inactive, svbool_t pg, svint32_t op) { return svextb_s32_m(inactive, pg, op); } __forceinline svuint32_t svextb_z(svbool_t pg, svuint32_t op) { return svextb_u32_z(pg, op); } __forceinline svuint16_t svextb_z(svbool_t pg, svuint16_t op) { return svextb_u16_z(pg, op); } __forceinline svuint64_t svextb_x(svbool_t pg, svuint64_t op) { return svextb_u64_x(pg, op); } __forceinline svuint32_t svextb_x(svbool_t pg, svuint32_t op) { return svextb_u32_x(pg, op); } __forceinline svuint16_t svextb_x(svbool_t pg, svuint16_t op) { return svextb_u16_x(pg, op); } __forceinline svuint64_t svextb_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { return svextb_u64_m(inactive, pg, op); } __forceinline svuint64_t svextb_z(svbool_t pg, svuint64_t op) { return svextb_u64_z(pg, op); } __forceinline svuint32_t svextb_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { return svextb_u32_m(inactive, pg, op); } __forceinline svuint16_t svextb_m(svuint16_t inactive, svbool_t pg, svuint16_t op) { return svextb_u16_m(inactive, pg, op); } __forceinline svuint64_t svcnt_z(svbool_t pg, svuint64_t op) { return svcnt_u64_z(pg, op); } __forceinline svuint32_t svcnt_z(svbool_t pg, svuint32_t op) { return svcnt_u32_z(pg, op); } __forceinline svuint16_t svcnt_z(svbool_t pg, svuint16_t op) { return svcnt_u16_z(pg, op); } __forceinline svuint64_t svcnt_z(svbool_t pg, svint64_t op) { return svcnt_s64_z(pg, op); } __forceinline svuint64_t svcnt_z(svbool_t pg, svfloat64_t op) { return svcnt_f64_z(pg, op); } __forceinline svuint16_t svcnt_z(svbool_t pg, svint16_t op) { return svcnt_s16_z(pg, op); } __forceinline svuint16_t svcnt_x(svbool_t pg, svint16_t op) { return svcnt_s16_x(pg, op); } __forceinline svuint8_t svcnt_x(svbool_t pg, svint8_t op) { return svcnt_s8_x(pg, op); } __forceinline svuint64_t svcnt_x(svbool_t pg, svfloat64_t op) { return svcnt_f64_x(pg, op); } __forceinline svuint32_t svcnt_x(svbool_t pg, svfloat32_t op) { return svcnt_f32_x(pg, op); } __forceinline svuint16_t svcnt_x(svbool_t pg, svfloat16_t op) { return svcnt_f16_x(pg, op); } __forceinline svuint16_t svcnt_x(svbool_t pg, svbfloat16_t op) { return svcnt_bf16_x(pg, op); } __forceinline svuint64_t svcnt_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { return svcnt_u64_m(inactive, pg, op); } __forceinline svuint32_t svcnt_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { return svcnt_u32_m(inactive, pg, op); } __forceinline svuint32_t svcnt_z(svbool_t pg, svint32_t op) { return svcnt_s32_z(pg, op); } __forceinline svuint16_t svcnt_m(svuint16_t inactive, svbool_t pg, svuint16_t op) { return svcnt_u16_m(inactive, pg, op); } __forceinline svuint64_t svcnt_m(svuint64_t inactive, svbool_t pg, svint64_t op) { return svcnt_s64_m(inactive, pg, op); } __forceinline svuint32_t svcnt_m(svuint32_t inactive, svbool_t pg, svint32_t op) { return svcnt_s32_m(inactive, pg, op); } __forceinline svuint16_t svcnt_m(svuint16_t inactive, svbool_t pg, svint16_t op) { return svcnt_s16_m(inactive, pg, op); } __forceinline svuint8_t svcnt_m(svuint8_t inactive, svbool_t pg, svint8_t op) { return svcnt_s8_m(inactive, pg, op); } __forceinline svuint64_t svcnt_m(svuint64_t inactive, svbool_t pg, svfloat64_t op) { return svcnt_f64_m(inactive, pg, op); } __forceinline svuint32_t svcnt_m(svuint32_t inactive, svbool_t pg, svfloat32_t op) { return svcnt_f32_m(inactive, pg, op); } __forceinline svuint16_t svcnt_m(svuint16_t inactive, svbool_t pg, svfloat16_t op) { return svcnt_f16_m(inactive, pg, op); } __forceinline svuint16_t svcnt_m(svuint16_t inactive, svbool_t pg, svbfloat16_t op) { return svcnt_bf16_m(inactive, pg, op); } __forceinline svuint8_t svcnt_m(svuint8_t inactive, svbool_t pg, svuint8_t op) { return svcnt_u8_m(inactive, pg, op); } __forceinline svuint64_t svcnt_x(svbool_t pg, svint64_t op) { return svcnt_s64_x(pg, op); } __forceinline svuint32_t svcnt_x(svbool_t pg, svint32_t op) { return svcnt_s32_x(pg, op); } __forceinline svuint16_t svcnt_x(svbool_t pg, svuint16_t op) { return svcnt_u16_x(pg, op); } __forceinline svuint8_t svcnt_z(svbool_t pg, svint8_t op) { return svcnt_s8_z(pg, op); } __forceinline svuint32_t svcnt_z(svbool_t pg, svfloat32_t op) { return svcnt_f32_z(pg, op); } __forceinline svuint16_t svcnt_z(svbool_t pg, svfloat16_t op) { return svcnt_f16_z(pg, op); } __forceinline svuint16_t svcnt_z(svbool_t pg, svbfloat16_t op) { return svcnt_bf16_z(pg, op); } __forceinline svuint8_t svcnt_x(svbool_t pg, svuint8_t op) { return svcnt_u8_x(pg, op); } __forceinline svuint8_t svcnt_z(svbool_t pg, svuint8_t op) { return svcnt_u8_z(pg, op); } __forceinline svuint32_t svcnt_x(svbool_t pg, svuint32_t op) { return svcnt_u32_x(pg, op); } __forceinline svuint64_t svcnt_x(svbool_t pg, svuint64_t op) { return svcnt_u64_x(pg, op); } __forceinline svbool_t svacge(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svacge_f32(pg, op1, op2); } __forceinline svbool_t svacge(svbool_t pg, svfloat32_t op1, float32_t op2) { return svacge_n_f32(pg, op1, op2); } __forceinline svbool_t svacge(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svacge_f64(pg, op1, op2); } __forceinline svbool_t svacge(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svacge_f16(pg, op1, op2); } __forceinline svbool_t svacge(svbool_t pg, svfloat64_t op1, float64_t op2) { return svacge_n_f64(pg, op1, op2); } __forceinline svbool_t svacgt(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svacgt_f32(pg, op1, op2); } __forceinline svbool_t svacgt(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svacgt_f64(pg, op1, op2); } __forceinline svbool_t svacgt(svbool_t pg, svfloat32_t op1, float32_t op2) { return svacgt_n_f32(pg, op1, op2); } __forceinline svbool_t svacgt(svbool_t pg, svfloat64_t op1, float64_t op2) { return svacgt_n_f64(pg, op1, op2); } __forceinline svbool_t svacgt(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svacgt_f16(pg, op1, op2); } __forceinline svbool_t svacle(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svacle_f16(pg, op1, op2); } __forceinline svbool_t svacle(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svacle_f64(pg, op1, op2); } __forceinline svbool_t svacle(svbool_t pg, svfloat32_t op1, float32_t op2) { return svacle_n_f32(pg, op1, op2); } __forceinline svbool_t svacle(svbool_t pg, svfloat64_t op1, float64_t op2) { return svacle_n_f64(pg, op1, op2); } __forceinline svbool_t svacle(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svacle_f32(pg, op1, op2); } __forceinline svbool_t svaclt(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svaclt_f16(pg, op1, op2); } __forceinline svbool_t svaclt(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svaclt_f32(pg, op1, op2); } __forceinline svbool_t svaclt(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svaclt_f64(pg, op1, op2); } __forceinline svbool_t svaclt(svbool_t pg, svfloat32_t op1, float32_t op2) { return svaclt_n_f32(pg, op1, op2); } __forceinline svbool_t svaclt(svbool_t pg, svfloat64_t op1, float64_t op2) { return svaclt_n_f64(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svuint16_t op1, uint16_t op2) { return svcmpeq_n_u16(pg, op1, op2); } __forceinline svbool_t svcmpeq_wide(svbool_t pg, svint8_t op1, svint64_t op2) { return svcmpeq_wide_s8(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svfloat32_t op1, float32_t op2) { return svcmpeq_n_f32(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svfloat64_t op1, float64_t op2) { return svcmpeq_n_f64(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svint8_t op1, int8_t op2) { return svcmpeq_n_s8(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svint16_t op1, int16_t op2) { return svcmpeq_n_s16(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svint32_t op1, int32_t op2) { return svcmpeq_n_s32(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svuint32_t op1, uint32_t op2) { return svcmpeq_n_u32(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svuint8_t op1, uint8_t op2) { return svcmpeq_n_u8(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svint64_t op1, int64_t op2) { return svcmpeq_n_s64(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svint32_t op1, svint32_t op2) { return svcmpeq_s32(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svcmpeq_u64(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svcmpeq_u32(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svcmpeq_u16(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svcmpeq_u8(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svint64_t op1, svint64_t op2) { return svcmpeq_s64(pg, op1, op2); } __forceinline svbool_t svcmpeq_wide(svbool_t pg, svint16_t op1, svint64_t op2) { return svcmpeq_wide_s16(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svint16_t op1, svint16_t op2) { return svcmpeq_s16(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svint8_t op1, svint8_t op2) { return svcmpeq_s8(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svcmpeq_f64(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svcmpeq_f32(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svcmpeq_f16(pg, op1, op2); } __forceinline svbool_t svcmpeq_wide(svbool_t pg, svint32_t op1, svint64_t op2) { return svcmpeq_wide_s32(pg, op1, op2); } __forceinline svbool_t svcmpeq(svbool_t pg, svuint64_t op1, uint64_t op2) { return svcmpeq_n_u64(pg, op1, op2); } __forceinline svbool_t svcmpeq_wide(svbool_t pg, svint16_t op1, int64_t op2) { return svcmpeq_wide_n_s16(pg, op1, op2); } __forceinline svbool_t svcmpeq_wide(svbool_t pg, svint8_t op1, int64_t op2) { return svcmpeq_wide_n_s8(pg, op1, op2); } __forceinline svbool_t svcmpeq_wide(svbool_t pg, svint32_t op1, int64_t op2) { return svcmpeq_wide_n_s32(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svuint8_t op1, uint64_t op2) { return svcmpge_wide_n_u8(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svcmpge_f16(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svcmpge_f32(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svcmpge_f64(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svint8_t op1, svint8_t op2) { return svcmpge_s8(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svint16_t op1, svint16_t op2) { return svcmpge_s16(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svint32_t op1, svint32_t op2) { return svcmpge_s32(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svint64_t op1, svint64_t op2) { return svcmpge_s64(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svcmpge_u8(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svcmpge_u16(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svcmpge_u32(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svcmpge_u64(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svfloat32_t op1, float32_t op2) { return svcmpge_n_f32(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svuint16_t op1, uint64_t op2) { return svcmpge_wide_n_u16(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svfloat64_t op1, float64_t op2) { return svcmpge_n_f64(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svint16_t op1, int16_t op2) { return svcmpge_n_s16(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svint32_t op1, int32_t op2) { return svcmpge_n_s32(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svint64_t op1, int64_t op2) { return svcmpge_n_s64(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svuint8_t op1, uint8_t op2) { return svcmpge_n_u8(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svuint16_t op1, uint16_t op2) { return svcmpge_n_u16(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svuint32_t op1, uint32_t op2) { return svcmpge_n_u32(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svuint64_t op1, uint64_t op2) { return svcmpge_n_u64(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svint32_t op1, svint64_t op2) { return svcmpge_wide_s32(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svuint8_t op1, svuint64_t op2) { return svcmpge_wide_u8(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svuint16_t op1, svuint64_t op2) { return svcmpge_wide_u16(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svuint32_t op1, svuint64_t op2) { return svcmpge_wide_u32(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svint8_t op1, int64_t op2) { return svcmpge_wide_n_s8(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svint16_t op1, int64_t op2) { return svcmpge_wide_n_s16(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svint32_t op1, int64_t op2) { return svcmpge_wide_n_s32(pg, op1, op2); } __forceinline svbool_t svcmpge(svbool_t pg, svint8_t op1, int8_t op2) { return svcmpge_n_s8(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svuint32_t op1, uint64_t op2) { return svcmpge_wide_n_u32(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svint16_t op1, svint64_t op2) { return svcmpge_wide_s16(pg, op1, op2); } __forceinline svbool_t svcmpge_wide(svbool_t pg, svint8_t op1, svint64_t op2) { return svcmpge_wide_s8(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svuint8_t op1, uint64_t op2) { return svcmpgt_wide_n_u8(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svcmpgt_f32(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svcmpgt_f64(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svint8_t op1, svint8_t op2) { return svcmpgt_s8(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svint16_t op1, svint16_t op2) { return svcmpgt_s16(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svint32_t op1, svint32_t op2) { return svcmpgt_s32(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svint64_t op1, svint64_t op2) { return svcmpgt_s64(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svcmpgt_u8(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svcmpgt_u16(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svcmpgt_u32(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svcmpgt_u64(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svfloat32_t op1, float32_t op2) { return svcmpgt_n_f32(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svfloat64_t op1, float64_t op2) { return svcmpgt_n_f64(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svint8_t op1, int8_t op2) { return svcmpgt_n_s8(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svint16_t op1, int16_t op2) { return svcmpgt_n_s16(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svint64_t op1, int64_t op2) { return svcmpgt_n_s64(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svuint8_t op1, uint8_t op2) { return svcmpgt_n_u8(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svuint16_t op1, uint16_t op2) { return svcmpgt_n_u16(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svuint32_t op1, uint32_t op2) { return svcmpgt_n_u32(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svuint64_t op1, uint64_t op2) { return svcmpgt_n_u64(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svint8_t op1, svint64_t op2) { return svcmpgt_wide_s8(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svint16_t op1, svint64_t op2) { return svcmpgt_wide_s16(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svint32_t op1, svint64_t op2) { return svcmpgt_wide_s32(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svuint8_t op1, svuint64_t op2) { return svcmpgt_wide_u8(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svuint16_t op1, svuint64_t op2) { return svcmpgt_wide_u16(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svuint32_t op1, svuint64_t op2) { return svcmpgt_wide_u32(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svint8_t op1, int64_t op2) { return svcmpgt_wide_n_s8(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svint16_t op1, int64_t op2) { return svcmpgt_wide_n_s16(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svint32_t op1, int64_t op2) { return svcmpgt_wide_n_s32(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svcmpgt_f16(pg, op1, op2); } __forceinline svbool_t svcmpgt(svbool_t pg, svint32_t op1, int32_t op2) { return svcmpgt_n_s32(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svuint32_t op1, uint64_t op2) { return svcmpgt_wide_n_u32(pg, op1, op2); } __forceinline svbool_t svcmpgt_wide(svbool_t pg, svuint16_t op1, uint64_t op2) { return svcmpgt_wide_n_u16(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svcmple_f16(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svcmple_f32(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svuint16_t op1, uint64_t op2) { return svcmple_wide_n_u16(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svuint32_t op1, uint64_t op2) { return svcmple_wide_n_u32(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svint8_t op1, svint8_t op2) { return svcmple_s8(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svint16_t op1, svint16_t op2) { return svcmple_s16(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svint32_t op1, svint32_t op2) { return svcmple_s32(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svint64_t op1, svint64_t op2) { return svcmple_s64(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svcmple_u8(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svcmple_u16(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svcmple_u32(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svcmple_u64(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svfloat32_t op1, float32_t op2) { return svcmple_n_f32(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svfloat64_t op1, float64_t op2) { return svcmple_n_f64(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svint8_t op1, int8_t op2) { return svcmple_n_s8(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svint16_t op1, int16_t op2) { return svcmple_n_s16(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svcmple_f64(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svuint8_t op1, uint8_t op2) { return svcmple_n_u8(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svint32_t op1, int32_t op2) { return svcmple_n_s32(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svuint8_t op1, uint64_t op2) { return svcmple_wide_n_u8(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svint32_t op1, int64_t op2) { return svcmple_wide_n_s32(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svint16_t op1, int64_t op2) { return svcmple_wide_n_s16(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svint8_t op1, int64_t op2) { return svcmple_wide_n_s8(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svuint32_t op1, svuint64_t op2) { return svcmple_wide_u32(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svuint16_t op1, svuint64_t op2) { return svcmple_wide_u16(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svint64_t op1, int64_t op2) { return svcmple_n_s64(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svint32_t op1, svint64_t op2) { return svcmple_wide_s32(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svint16_t op1, svint64_t op2) { return svcmple_wide_s16(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svint8_t op1, svint64_t op2) { return svcmple_wide_s8(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svuint64_t op1, uint64_t op2) { return svcmple_n_u64(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svuint32_t op1, uint32_t op2) { return svcmple_n_u32(pg, op1, op2); } __forceinline svbool_t svcmple_wide(svbool_t pg, svuint8_t op1, svuint64_t op2) { return svcmple_wide_u8(pg, op1, op2); } __forceinline svbool_t svcmple(svbool_t pg, svuint16_t op1, uint16_t op2) { return svcmple_n_u16(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svint32_t op1, int32_t op2) { return svcmplt_n_s32(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svint16_t op1, int16_t op2) { return svcmplt_n_s16(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svfloat64_t op1, float64_t op2) { return svcmplt_n_f64(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svfloat32_t op1, float32_t op2) { return svcmplt_n_f32(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svcmplt_u64(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svint64_t op1, int64_t op2) { return svcmplt_n_s64(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svcmplt_u32(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svcmplt_u8(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svint64_t op1, svint64_t op2) { return svcmplt_s64(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svint32_t op1, svint32_t op2) { return svcmplt_s32(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svint16_t op1, svint16_t op2) { return svcmplt_s16(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svint8_t op1, svint8_t op2) { return svcmplt_s8(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svcmplt_f16(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svcmplt_u16(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svint8_t op1, int8_t op2) { return svcmplt_n_s8(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svuint8_t op1, uint8_t op2) { return svcmplt_n_u8(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svuint16_t op1, uint16_t op2) { return svcmplt_n_u16(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svuint32_t op1, uint64_t op2) { return svcmplt_wide_n_u32(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svuint16_t op1, uint64_t op2) { return svcmplt_wide_n_u16(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svuint8_t op1, uint64_t op2) { return svcmplt_wide_n_u8(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svint32_t op1, int64_t op2) { return svcmplt_wide_n_s32(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svint16_t op1, int64_t op2) { return svcmplt_wide_n_s16(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svint8_t op1, int64_t op2) { return svcmplt_wide_n_s8(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svuint32_t op1, svuint64_t op2) { return svcmplt_wide_u32(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svuint16_t op1, svuint64_t op2) { return svcmplt_wide_u16(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svuint8_t op1, svuint64_t op2) { return svcmplt_wide_u8(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svint32_t op1, svint64_t op2) { return svcmplt_wide_s32(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svint16_t op1, svint64_t op2) { return svcmplt_wide_s16(pg, op1, op2); } __forceinline svbool_t svcmplt_wide(svbool_t pg, svint8_t op1, svint64_t op2) { return svcmplt_wide_s8(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svuint64_t op1, uint64_t op2) { return svcmplt_n_u64(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svuint32_t op1, uint32_t op2) { return svcmplt_n_u32(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svcmplt_f32(pg, op1, op2); } __forceinline svbool_t svcmplt(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svcmplt_f64(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svint64_t op1, int64_t op2) { return svcmpne_n_s64(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svuint8_t op1, uint8_t op2) { return svcmpne_n_u8(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svuint16_t op1, uint16_t op2) { return svcmpne_n_u16(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svuint32_t op1, uint32_t op2) { return svcmpne_n_u32(pg, op1, op2); } __forceinline svbool_t svcmpne_wide(svbool_t pg, svint8_t op1, svint64_t op2) { return svcmpne_wide_s8(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svint32_t op1, int32_t op2) { return svcmpne_n_s32(pg, op1, op2); } __forceinline svbool_t svcmpne_wide(svbool_t pg, svint32_t op1, svint64_t op2) { return svcmpne_wide_s32(pg, op1, op2); } __forceinline svbool_t svcmpne_wide(svbool_t pg, svint8_t op1, int64_t op2) { return svcmpne_wide_n_s8(pg, op1, op2); } __forceinline svbool_t svcmpne_wide(svbool_t pg, svint16_t op1, int64_t op2) { return svcmpne_wide_n_s16(pg, op1, op2); } __forceinline svbool_t svcmpne_wide(svbool_t pg, svint32_t op1, int64_t op2) { return svcmpne_wide_n_s32(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svuint64_t op1, uint64_t op2) { return svcmpne_n_u64(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svint16_t op1, int16_t op2) { return svcmpne_n_s16(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svfloat64_t op1, float64_t op2) { return svcmpne_n_f64(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svfloat32_t op1, float32_t op2) { return svcmpne_n_f32(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svcmpne_u64(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svcmpne_u32(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svcmpne_u16(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svint64_t op1, svint64_t op2) { return svcmpne_s64(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svint32_t op1, svint32_t op2) { return svcmpne_s32(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svint16_t op1, svint16_t op2) { return svcmpne_s16(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svint8_t op1, svint8_t op2) { return svcmpne_s8(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svcmpne_f64(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svcmpne_f32(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svcmpne_f16(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svint8_t op1, int8_t op2) { return svcmpne_n_s8(pg, op1, op2); } __forceinline svbool_t svcmpne(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svcmpne_u8(pg, op1, op2); } __forceinline svbool_t svcmpne_wide(svbool_t pg, svint16_t op1, svint64_t op2) { return svcmpne_wide_s16(pg, op1, op2); } __forceinline svbool_t svcmpuo(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svcmpuo_f64(pg, op1, op2); } __forceinline svbool_t svcmpuo(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svcmpuo_f32(pg, op1, op2); } __forceinline svbool_t svcmpuo(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svcmpuo_f16(pg, op1, op2); } __forceinline svbool_t svcmpuo(svbool_t pg, svfloat64_t op1, float64_t op2) { return svcmpuo_n_f64(pg, op1, op2); } __forceinline svbool_t svcmpuo(svbool_t pg, svfloat32_t op1, float32_t op2) { return svcmpuo_n_f32(pg, op1, op2); } template __forceinline T __svcadd_z(svbool_t pg, T op1, T op2) { if constexpr(::std::is_same_v) { return svcadd_f16_z(pg, op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_f32_z(pg, op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_f64_z(pg, op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svcadd_z(pg, op1, op2, imm_rotation) __svcadd_z(pg, op1, op2) template __forceinline T __svcadd_m(svbool_t pg, T op1, T op2) { if constexpr(::std::is_same_v) { return svcadd_f16_m(pg, op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_f32_m(pg, op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_f64_m(pg, op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svcadd_m(pg, op1, op2, imm_rotation) __svcadd_m(pg, op1, op2) template __forceinline T __svcadd_x(svbool_t pg, T op1, T op2) { if constexpr(::std::is_same_v) { return svcadd_f16_x(pg, op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_f32_x(pg, op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_f64_x(pg, op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svcadd_x(pg, op1, op2, imm_rotation) __svcadd_x(pg, op1, op2) template __forceinline T __svcmla_m(svbool_t pg, T op1, T op2, T op3) { if constexpr(::std::is_same_v) { return svcmla_f16_m(pg, op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_f32_m(pg, op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_f64_m(pg, op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svcmla_m(pg, op1, op2, op3, imm_rotation) __svcmla_m(pg, op1, op2, op3) template __forceinline T __svcmla_x(svbool_t pg, T op1, T op2, T op3) { if constexpr(::std::is_same_v) { return svcmla_f16_x(pg, op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_f32_x(pg, op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_f64_x(pg, op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svcmla_x(pg, op1, op2, op3, imm_rotation) __svcmla_x(pg, op1, op2, op3) template __forceinline T __svcmla_z(svbool_t pg, T op1, T op2, T op3) { if constexpr(::std::is_same_v) { return svcmla_f16_z(pg, op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_f32_z(pg, op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_f64_z(pg, op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svcmla_z(pg, op1, op2, op3, imm_rotation) __svcmla_z(pg, op1, op2, op3) template __forceinline T __svcmla(T op1, T op2, T op3) { if constexpr(::std::is_same_v) { return svcmla_s8(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_s16(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_s64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_u8(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_u16(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_u32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcmla_u64(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svcmla(op1, op2, op3, imm_rotation) __svcmla(op1, op2, op3) template __forceinline T __svcmla_lane(T op1, T op2, T op3) { if constexpr(::std::is_same_v) { return svcmla_lane_f16(op1, op2, op3, N1, N2); } else if constexpr(::std::is_same_v) { return svcmla_lane_f32(op1, op2, op3, N1, N2); } else if constexpr(::std::is_same_v) { return svcmla_lane_u32(op1, op2, op3, N1, N2); } else if constexpr(::std::is_same_v) { return svcmla_lane_u16(op1, op2, op3, N1, N2); } else if constexpr(::std::is_same_v) { return svcmla_lane_s32(op1, op2, op3, N1, N2); } else if constexpr(::std::is_same_v) { return svcmla_lane_s16(op1, op2, op3, N1, N2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svcmla_lane(op1, op2, op3, imm_index, imm_rotation) __svcmla_lane(op1, op2, op3) __forceinline svbfloat16_t svcvtnt_bf16_x(svbfloat16_t even, svbool_t pg, svfloat32_t op) { return svcvtnt_bf16_f32_x(even, pg, op); } __forceinline svbfloat16_t svcvtnt_bf16_m(svbfloat16_t even, svbool_t pg, svfloat32_t op) { return svcvtnt_bf16_f32_m(even, pg, op); } __forceinline svint32_t svcvt_s32_m(svint32_t inactive, svbool_t pg, svfloat16_t op) { return svcvt_s32_f16_m(inactive, pg, op); } __forceinline svint32_t svcvt_s32_m(svint32_t inactive, svbool_t pg, svfloat32_t op) { return svcvt_s32_f32_m(inactive, pg, op); } __forceinline svint32_t svcvt_s32_m(svint32_t inactive, svbool_t pg, svfloat64_t op) { return svcvt_s32_f64_m(inactive, pg, op); } __forceinline svint64_t svcvt_s64_m(svint64_t inactive, svbool_t pg, svfloat16_t op) { return svcvt_s64_f16_m(inactive, pg, op); } __forceinline svint64_t svcvt_s64_m(svint64_t inactive, svbool_t pg, svfloat32_t op) { return svcvt_s64_f32_m(inactive, pg, op); } __forceinline svint64_t svcvt_s64_m(svint64_t inactive, svbool_t pg, svfloat64_t op) { return svcvt_s64_f64_m(inactive, pg, op); } __forceinline svuint16_t svcvt_u16_m(svuint16_t inactive, svbool_t pg, svfloat16_t op) { return svcvt_u16_f16_m(inactive, pg, op); } __forceinline svint32_t svcvt_s32_x(svbool_t pg, svfloat32_t op) { return svcvt_s32_f32_x(pg, op); } __forceinline svuint32_t svcvt_u32_m(svuint32_t inactive, svbool_t pg, svfloat16_t op) { return svcvt_u32_f16_m(inactive, pg, op); } __forceinline svuint32_t svcvt_u32_m(svuint32_t inactive, svbool_t pg, svfloat64_t op) { return svcvt_u32_f64_m(inactive, pg, op); } __forceinline svuint64_t svcvt_u64_m(svuint64_t inactive, svbool_t pg, svfloat16_t op) { return svcvt_u64_f16_m(inactive, pg, op); } __forceinline svuint64_t svcvt_u64_m(svuint64_t inactive, svbool_t pg, svfloat32_t op) { return svcvt_u64_f32_m(inactive, pg, op); } __forceinline svuint64_t svcvt_u64_m(svuint64_t inactive, svbool_t pg, svfloat64_t op) { return svcvt_u64_f64_m(inactive, pg, op); } __forceinline svint16_t svcvt_s16_x(svbool_t pg, svfloat16_t op) { return svcvt_s16_f16_x(pg, op); } __forceinline svint32_t svcvt_s32_x(svbool_t pg, svfloat16_t op) { return svcvt_s32_f16_x(pg, op); } __forceinline svint16_t svcvt_s16_m(svint16_t inactive, svbool_t pg, svfloat16_t op) { return svcvt_s16_f16_m(inactive, pg, op); } __forceinline svuint32_t svcvt_u32_m(svuint32_t inactive, svbool_t pg, svfloat32_t op) { return svcvt_u32_f32_m(inactive, pg, op); } __forceinline svfloat64_t svcvt_f64_z(svbool_t pg, svfloat32_t op) { return svcvt_f64_f32_z(pg, op); } __forceinline svfloat32_t svcvt_f32_m(svfloat32_t inactive, svbool_t pg, svfloat64_t op) { return svcvt_f32_f64_m(inactive, pg, op); } __forceinline svfloat32_t svcvt_f32_z(svbool_t pg, svfloat64_t op) { return svcvt_f32_f64_z(pg, op); } __forceinline svbfloat16_t svcvt_bf16_z(svbool_t pg, svfloat32_t op) { return svcvt_bf16_f32_z(pg, op); } __forceinline svfloat16_t svcvt_f16_m(svfloat16_t inactive, svbool_t pg, svfloat32_t op) { return svcvt_f16_f32_m(inactive, pg, op); } __forceinline svfloat16_t svcvt_f16_m(svfloat16_t inactive, svbool_t pg, svfloat64_t op) { return svcvt_f16_f64_m(inactive, pg, op); } __forceinline svfloat32_t svcvt_f32_m(svfloat32_t inactive, svbool_t pg, svfloat16_t op) { return svcvt_f32_f16_m(inactive, pg, op); } __forceinline svint32_t svcvt_s32_x(svbool_t pg, svfloat64_t op) { return svcvt_s32_f64_x(pg, op); } __forceinline svfloat64_t svcvt_f64_m(svfloat64_t inactive, svbool_t pg, svfloat16_t op) { return svcvt_f64_f16_m(inactive, pg, op); } __forceinline svfloat64_t svcvt_f64_m(svfloat64_t inactive, svbool_t pg, svfloat32_t op) { return svcvt_f64_f32_m(inactive, pg, op); } __forceinline svfloat64_t svcvt_f64_z(svbool_t pg, svfloat16_t op) { return svcvt_f64_f16_z(pg, op); } __forceinline svfloat16_t svcvt_f16_x(svbool_t pg, svfloat32_t op) { return svcvt_f16_f32_x(pg, op); } __forceinline svfloat32_t svcvt_f32_x(svbool_t pg, svfloat16_t op) { return svcvt_f32_f16_x(pg, op); } __forceinline svfloat32_t svcvt_f32_x(svbool_t pg, svfloat64_t op) { return svcvt_f32_f64_x(pg, op); } __forceinline svfloat64_t svcvt_f64_x(svbool_t pg, svfloat16_t op) { return svcvt_f64_f16_x(pg, op); } __forceinline svfloat64_t svcvt_f64_x(svbool_t pg, svfloat32_t op) { return svcvt_f64_f32_x(pg, op); } __forceinline svfloat16_t svcvt_f16_z(svbool_t pg, svfloat32_t op) { return svcvt_f16_f32_z(pg, op); } __forceinline svfloat16_t svcvt_f16_z(svbool_t pg, svfloat64_t op) { return svcvt_f16_f64_z(pg, op); } __forceinline svfloat32_t svcvt_f32_z(svbool_t pg, svfloat16_t op) { return svcvt_f32_f16_z(pg, op); } __forceinline svfloat16_t svcvt_f16_x(svbool_t pg, svfloat64_t op) { return svcvt_f16_f64_x(pg, op); } __forceinline svint64_t svcvt_s64_x(svbool_t pg, svfloat16_t op) { return svcvt_s64_f16_x(pg, op); } __forceinline svbfloat16_t svcvt_bf16_x(svbool_t pg, svfloat32_t op) { return svcvt_bf16_f32_x(pg, op); } __forceinline svint64_t svcvt_s64_x(svbool_t pg, svfloat64_t op) { return svcvt_s64_f64_x(pg, op); } __forceinline svfloat64_t svcvt_f64_m(svfloat64_t inactive, svbool_t pg, svuint64_t op) { return svcvt_f64_u64_m(inactive, pg, op); } __forceinline svfloat16_t svcvt_f16_x(svbool_t pg, svint16_t op) { return svcvt_f16_s16_x(pg, op); } __forceinline svfloat16_t svcvt_f16_x(svbool_t pg, svint32_t op) { return svcvt_f16_s32_x(pg, op); } __forceinline svfloat16_t svcvt_f16_x(svbool_t pg, svint64_t op) { return svcvt_f16_s64_x(pg, op); } __forceinline svfloat16_t svcvt_f16_x(svbool_t pg, svuint16_t op) { return svcvt_f16_u16_x(pg, op); } __forceinline svfloat16_t svcvt_f16_x(svbool_t pg, svuint32_t op) { return svcvt_f16_u32_x(pg, op); } __forceinline svfloat16_t svcvt_f16_x(svbool_t pg, svuint64_t op) { return svcvt_f16_u64_x(pg, op); } __forceinline svint64_t svcvt_s64_x(svbool_t pg, svfloat32_t op) { return svcvt_s64_f32_x(pg, op); } __forceinline svfloat32_t svcvt_f32_x(svbool_t pg, svint32_t op) { return svcvt_f32_s32_x(pg, op); } __forceinline svfloat32_t svcvt_f32_x(svbool_t pg, svint64_t op) { return svcvt_f32_s64_x(pg, op); } __forceinline svfloat32_t svcvt_f32_x(svbool_t pg, svuint32_t op) { return svcvt_f32_u32_x(pg, op); } __forceinline svfloat32_t svcvt_f32_x(svbool_t pg, svuint64_t op) { return svcvt_f32_u64_x(pg, op); } __forceinline svfloat64_t svcvt_f64_x(svbool_t pg, svint32_t op) { return svcvt_f64_s32_x(pg, op); } __forceinline svfloat64_t svcvt_f64_x(svbool_t pg, svint64_t op) { return svcvt_f64_s64_x(pg, op); } __forceinline svfloat64_t svcvt_f64_m(svfloat64_t inactive, svbool_t pg, svuint32_t op) { return svcvt_f64_u32_m(inactive, pg, op); } __forceinline svfloat64_t svcvt_f64_x(svbool_t pg, svuint32_t op) { return svcvt_f64_u32_x(pg, op); } __forceinline svfloat16_t svcvt_f16_z(svbool_t pg, svint16_t op) { return svcvt_f16_s16_z(pg, op); } __forceinline svfloat16_t svcvt_f16_z(svbool_t pg, svint32_t op) { return svcvt_f16_s32_z(pg, op); } __forceinline svfloat16_t svcvt_f16_z(svbool_t pg, svint64_t op) { return svcvt_f16_s64_z(pg, op); } __forceinline svfloat16_t svcvt_f16_z(svbool_t pg, svuint16_t op) { return svcvt_f16_u16_z(pg, op); } __forceinline svfloat16_t svcvt_f16_z(svbool_t pg, svuint32_t op) { return svcvt_f16_u32_z(pg, op); } __forceinline svfloat16_t svcvt_f16_z(svbool_t pg, svuint64_t op) { return svcvt_f16_u64_z(pg, op); } __forceinline svfloat32_t svcvt_f32_z(svbool_t pg, svint32_t op) { return svcvt_f32_s32_z(pg, op); } __forceinline svfloat32_t svcvt_f32_z(svbool_t pg, svint64_t op) { return svcvt_f32_s64_z(pg, op); } __forceinline svfloat32_t svcvt_f32_z(svbool_t pg, svuint32_t op) { return svcvt_f32_u32_z(pg, op); } __forceinline svfloat32_t svcvt_f32_z(svbool_t pg, svuint64_t op) { return svcvt_f32_u64_z(pg, op); } __forceinline svfloat64_t svcvt_f64_z(svbool_t pg, svint32_t op) { return svcvt_f64_s32_z(pg, op); } __forceinline svfloat64_t svcvt_f64_z(svbool_t pg, svint64_t op) { return svcvt_f64_s64_z(pg, op); } __forceinline svfloat64_t svcvt_f64_z(svbool_t pg, svuint32_t op) { return svcvt_f64_u32_z(pg, op); } __forceinline svfloat64_t svcvt_f64_z(svbool_t pg, svuint64_t op) { return svcvt_f64_u64_z(pg, op); } __forceinline svfloat64_t svcvt_f64_x(svbool_t pg, svuint64_t op) { return svcvt_f64_u64_x(pg, op); } __forceinline svfloat64_t svcvt_f64_m(svfloat64_t inactive, svbool_t pg, svint64_t op) { return svcvt_f64_s64_m(inactive, pg, op); } __forceinline svbfloat16_t svcvt_bf16_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) { return svcvt_bf16_f32_m(inactive, pg, op); } __forceinline svfloat32_t svcvt_f32_m(svfloat32_t inactive, svbool_t pg, svuint64_t op) { return svcvt_f32_u64_m(inactive, pg, op); } __forceinline svuint32_t svcvt_u32_x(svbool_t pg, svfloat16_t op) { return svcvt_u32_f16_x(pg, op); } __forceinline svuint32_t svcvt_u32_x(svbool_t pg, svfloat32_t op) { return svcvt_u32_f32_x(pg, op); } __forceinline svuint64_t svcvt_u64_x(svbool_t pg, svfloat16_t op) { return svcvt_u64_f16_x(pg, op); } __forceinline svuint64_t svcvt_u64_x(svbool_t pg, svfloat32_t op) { return svcvt_u64_f32_x(pg, op); } __forceinline svuint64_t svcvt_u64_x(svbool_t pg, svfloat64_t op) { return svcvt_u64_f64_x(pg, op); } __forceinline svint16_t svcvt_s16_z(svbool_t pg, svfloat16_t op) { return svcvt_s16_f16_z(pg, op); } __forceinline svint32_t svcvt_s32_z(svbool_t pg, svfloat16_t op) { return svcvt_s32_f16_z(pg, op); } __forceinline svint32_t svcvt_s32_z(svbool_t pg, svfloat32_t op) { return svcvt_s32_f32_z(pg, op); } __forceinline svfloat64_t svcvt_f64_m(svfloat64_t inactive, svbool_t pg, svint32_t op) { return svcvt_f64_s32_m(inactive, pg, op); } __forceinline svfloat16_t svcvt_f16_m(svfloat16_t inactive, svbool_t pg, svint16_t op) { return svcvt_f16_s16_m(inactive, pg, op); } __forceinline svuint64_t svcvt_u64_z(svbool_t pg, svfloat64_t op) { return svcvt_u64_f64_z(pg, op); } __forceinline svuint64_t svcvt_u64_z(svbool_t pg, svfloat32_t op) { return svcvt_u64_f32_z(pg, op); } __forceinline svuint64_t svcvt_u64_z(svbool_t pg, svfloat16_t op) { return svcvt_u64_f16_z(pg, op); } __forceinline svuint32_t svcvt_u32_z(svbool_t pg, svfloat64_t op) { return svcvt_u32_f64_z(pg, op); } __forceinline svuint32_t svcvt_u32_z(svbool_t pg, svfloat32_t op) { return svcvt_u32_f32_z(pg, op); } __forceinline svuint32_t svcvt_u32_z(svbool_t pg, svfloat16_t op) { return svcvt_u32_f16_z(pg, op); } __forceinline svuint16_t svcvt_u16_z(svbool_t pg, svfloat16_t op) { return svcvt_u16_f16_z(pg, op); } __forceinline svint64_t svcvt_s64_z(svbool_t pg, svfloat64_t op) { return svcvt_s64_f64_z(pg, op); } __forceinline svint64_t svcvt_s64_z(svbool_t pg, svfloat32_t op) { return svcvt_s64_f32_z(pg, op); } __forceinline svint64_t svcvt_s64_z(svbool_t pg, svfloat16_t op) { return svcvt_s64_f16_z(pg, op); } __forceinline svint32_t svcvt_s32_z(svbool_t pg, svfloat64_t op) { return svcvt_s32_f64_z(pg, op); } __forceinline svfloat16_t svcvt_f16_m(svfloat16_t inactive, svbool_t pg, svint32_t op) { return svcvt_f16_s32_m(inactive, pg, op); } __forceinline svuint16_t svcvt_u16_x(svbool_t pg, svfloat16_t op) { return svcvt_u16_f16_x(pg, op); } __forceinline svuint32_t svcvt_u32_x(svbool_t pg, svfloat64_t op) { return svcvt_u32_f64_x(pg, op); } __forceinline svfloat16_t svcvt_f16_m(svfloat16_t inactive, svbool_t pg, svuint64_t op) { return svcvt_f16_u64_m(inactive, pg, op); } __forceinline svfloat16_t svcvt_f16_m(svfloat16_t inactive, svbool_t pg, svuint32_t op) { return svcvt_f16_u32_m(inactive, pg, op); } __forceinline svfloat16_t svcvt_f16_m(svfloat16_t inactive, svbool_t pg, svuint16_t op) { return svcvt_f16_u16_m(inactive, pg, op); } __forceinline svfloat16_t svcvt_f16_m(svfloat16_t inactive, svbool_t pg, svint64_t op) { return svcvt_f16_s64_m(inactive, pg, op); } __forceinline svfloat32_t svcvt_f32_m(svfloat32_t inactive, svbool_t pg, svuint32_t op) { return svcvt_f32_u32_m(inactive, pg, op); } __forceinline svfloat32_t svcvt_f32_m(svfloat32_t inactive, svbool_t pg, svint64_t op) { return svcvt_f32_s64_m(inactive, pg, op); } __forceinline svfloat32_t svcvt_f32_m(svfloat32_t inactive, svbool_t pg, svint32_t op) { return svcvt_f32_s32_m(inactive, pg, op); } __forceinline svbfloat16_t svreinterpret_bf16(svbfloat16_t op) { return svreinterpret_bf16_bf16(op); } __forceinline svbfloat16_t svreinterpret_bf16(svfloat16_t op) { return svreinterpret_bf16_f16(op); } __forceinline svbfloat16_t svreinterpret_bf16(svfloat32_t op) { return svreinterpret_bf16_f32(op); } __forceinline svbfloat16_t svreinterpret_bf16(svfloat64_t op) { return svreinterpret_bf16_f64(op); } __forceinline svbfloat16_t svreinterpret_bf16(svint8_t op) { return svreinterpret_bf16_s8(op); } __forceinline svbfloat16_t svreinterpret_bf16(svint16_t op) { return svreinterpret_bf16_s16(op); } __forceinline svbfloat16_t svreinterpret_bf16(svint32_t op) { return svreinterpret_bf16_s32(op); } __forceinline svbfloat16_t svreinterpret_bf16(svint64_t op) { return svreinterpret_bf16_s64(op); } __forceinline svbfloat16_t svreinterpret_bf16(svuint8_t op) { return svreinterpret_bf16_u8(op); } __forceinline svbfloat16_t svreinterpret_bf16(svuint16_t op) { return svreinterpret_bf16_u16(op); } __forceinline svbfloat16_t svreinterpret_bf16(svuint32_t op) { return svreinterpret_bf16_u32(op); } __forceinline svbfloat16_t svreinterpret_bf16(svuint64_t op) { return svreinterpret_bf16_u64(op); } __forceinline svfloat16_t svreinterpret_f16(svbfloat16_t op) { return svreinterpret_f16_bf16(op); } __forceinline svfloat16_t svreinterpret_f16(svfloat16_t op) { return svreinterpret_f16_f16(op); } __forceinline svfloat16_t svreinterpret_f16(svfloat32_t op) { return svreinterpret_f16_f32(op); } __forceinline svfloat16_t svreinterpret_f16(svfloat64_t op) { return svreinterpret_f16_f64(op); } __forceinline svfloat16_t svreinterpret_f16(svint8_t op) { return svreinterpret_f16_s8(op); } __forceinline svfloat16_t svreinterpret_f16(svint16_t op) { return svreinterpret_f16_s16(op); } __forceinline svfloat16_t svreinterpret_f16(svint64_t op) { return svreinterpret_f16_s64(op); } __forceinline svfloat16_t svreinterpret_f16(svint32_t op) { return svreinterpret_f16_s32(op); } __forceinline svfloat16_t svreinterpret_f16(svuint8_t op) { return svreinterpret_f16_u8(op); } __forceinline svfloat16_t svreinterpret_f16(svuint16_t op) { return svreinterpret_f16_u16(op); } __forceinline svfloat16_t svreinterpret_f16(svuint32_t op) { return svreinterpret_f16_u32(op); } __forceinline svfloat16_t svreinterpret_f16(svuint64_t op) { return svreinterpret_f16_u64(op); } __forceinline svfloat32_t svreinterpret_f32(svbfloat16_t op) { return svreinterpret_f32_bf16(op); } __forceinline svfloat32_t svreinterpret_f32(svfloat16_t op) { return svreinterpret_f32_f16(op); } __forceinline svfloat32_t svreinterpret_f32(svfloat32_t op) { return svreinterpret_f32_f32(op); } __forceinline svfloat32_t svreinterpret_f32(svfloat64_t op) { return svreinterpret_f32_f64(op); } __forceinline svfloat32_t svreinterpret_f32(svint8_t op) { return svreinterpret_f32_s8(op); } __forceinline svfloat32_t svreinterpret_f32(svint16_t op) { return svreinterpret_f32_s16(op); } __forceinline svfloat32_t svreinterpret_f32(svint32_t op) { return svreinterpret_f32_s32(op); } __forceinline svfloat32_t svreinterpret_f32(svint64_t op) { return svreinterpret_f32_s64(op); } __forceinline svfloat32_t svreinterpret_f32(svuint8_t op) { return svreinterpret_f32_u8(op); } __forceinline svfloat32_t svreinterpret_f32(svuint16_t op) { return svreinterpret_f32_u16(op); } __forceinline svfloat32_t svreinterpret_f32(svuint32_t op) { return svreinterpret_f32_u32(op); } __forceinline svfloat32_t svreinterpret_f32(svuint64_t op) { return svreinterpret_f32_u64(op); } __forceinline svfloat64_t svreinterpret_f64(svbfloat16_t op) { return svreinterpret_f64_bf16(op); } __forceinline svfloat64_t svreinterpret_f64(svfloat16_t op) { return svreinterpret_f64_f16(op); } __forceinline svfloat64_t svreinterpret_f64(svfloat32_t op) { return svreinterpret_f64_f32(op); } __forceinline svfloat64_t svreinterpret_f64(svfloat64_t op) { return svreinterpret_f64_f64(op); } __forceinline svfloat64_t svreinterpret_f64(svint8_t op) { return svreinterpret_f64_s8(op); } __forceinline svfloat64_t svreinterpret_f64(svint16_t op) { return svreinterpret_f64_s16(op); } __forceinline svfloat64_t svreinterpret_f64(svint32_t op) { return svreinterpret_f64_s32(op); } __forceinline svfloat64_t svreinterpret_f64(svint64_t op) { return svreinterpret_f64_s64(op); } __forceinline svfloat64_t svreinterpret_f64(svuint8_t op) { return svreinterpret_f64_u8(op); } __forceinline svfloat64_t svreinterpret_f64(svuint16_t op) { return svreinterpret_f64_u16(op); } __forceinline svfloat64_t svreinterpret_f64(svuint32_t op) { return svreinterpret_f64_u32(op); } __forceinline svfloat64_t svreinterpret_f64(svuint64_t op) { return svreinterpret_f64_u64(op); } __forceinline svint8_t svreinterpret_s8(svbfloat16_t op) { return svreinterpret_s8_bf16(op); } __forceinline svint8_t svreinterpret_s8(svfloat16_t op) { return svreinterpret_s8_f16(op); } __forceinline svint8_t svreinterpret_s8(svfloat32_t op) { return svreinterpret_s8_f32(op); } __forceinline svint8_t svreinterpret_s8(svfloat64_t op) { return svreinterpret_s8_f64(op); } __forceinline svint8_t svreinterpret_s8(svint8_t op) { return svreinterpret_s8_s8(op); } __forceinline svint8_t svreinterpret_s8(svint16_t op) { return svreinterpret_s8_s16(op); } __forceinline svint8_t svreinterpret_s8(svint32_t op) { return svreinterpret_s8_s32(op); } __forceinline svint8_t svreinterpret_s8(svint64_t op) { return svreinterpret_s8_s64(op); } __forceinline svint8_t svreinterpret_s8(svuint8_t op) { return svreinterpret_s8_u8(op); } __forceinline svint8_t svreinterpret_s8(svuint16_t op) { return svreinterpret_s8_u16(op); } __forceinline svint8_t svreinterpret_s8(svuint32_t op) { return svreinterpret_s8_u32(op); } __forceinline svint8_t svreinterpret_s8(svuint64_t op) { return svreinterpret_s8_u64(op); } __forceinline svint16_t svreinterpret_s16(svbfloat16_t op) { return svreinterpret_s16_bf16(op); } __forceinline svint16_t svreinterpret_s16(svfloat16_t op) { return svreinterpret_s16_f16(op); } __forceinline svint16_t svreinterpret_s16(svfloat32_t op) { return svreinterpret_s16_f32(op); } __forceinline svint16_t svreinterpret_s16(svfloat64_t op) { return svreinterpret_s16_f64(op); } __forceinline svint16_t svreinterpret_s16(svint8_t op) { return svreinterpret_s16_s8(op); } __forceinline svint16_t svreinterpret_s16(svint16_t op) { return svreinterpret_s16_s16(op); } __forceinline svint16_t svreinterpret_s16(svint32_t op) { return svreinterpret_s16_s32(op); } __forceinline svint16_t svreinterpret_s16(svint64_t op) { return svreinterpret_s16_s64(op); } __forceinline svint16_t svreinterpret_s16(svuint8_t op) { return svreinterpret_s16_u8(op); } __forceinline svint16_t svreinterpret_s16(svuint16_t op) { return svreinterpret_s16_u16(op); } __forceinline svint16_t svreinterpret_s16(svuint32_t op) { return svreinterpret_s16_u32(op); } __forceinline svint16_t svreinterpret_s16(svuint64_t op) { return svreinterpret_s16_u64(op); } __forceinline svint32_t svreinterpret_s32(svbfloat16_t op) { return svreinterpret_s32_bf16(op); } __forceinline svint32_t svreinterpret_s32(svfloat16_t op) { return svreinterpret_s32_f16(op); } __forceinline svint32_t svreinterpret_s32(svfloat32_t op) { return svreinterpret_s32_f32(op); } __forceinline svint32_t svreinterpret_s32(svfloat64_t op) { return svreinterpret_s32_f64(op); } __forceinline svint32_t svreinterpret_s32(svint8_t op) { return svreinterpret_s32_s8(op); } __forceinline svint32_t svreinterpret_s32(svint16_t op) { return svreinterpret_s32_s16(op); } __forceinline svint32_t svreinterpret_s32(svint32_t op) { return svreinterpret_s32_s32(op); } __forceinline svint32_t svreinterpret_s32(svint64_t op) { return svreinterpret_s32_s64(op); } __forceinline svint32_t svreinterpret_s32(svuint8_t op) { return svreinterpret_s32_u8(op); } __forceinline svint32_t svreinterpret_s32(svuint16_t op) { return svreinterpret_s32_u16(op); } __forceinline svint32_t svreinterpret_s32(svuint32_t op) { return svreinterpret_s32_u32(op); } __forceinline svint32_t svreinterpret_s32(svuint64_t op) { return svreinterpret_s32_u64(op); } __forceinline svint64_t svreinterpret_s64(svbfloat16_t op) { return svreinterpret_s64_bf16(op); } __forceinline svint64_t svreinterpret_s64(svfloat16_t op) { return svreinterpret_s64_f16(op); } __forceinline svint64_t svreinterpret_s64(svfloat32_t op) { return svreinterpret_s64_f32(op); } __forceinline svint64_t svreinterpret_s64(svfloat64_t op) { return svreinterpret_s64_f64(op); } __forceinline svint64_t svreinterpret_s64(svint8_t op) { return svreinterpret_s64_s8(op); } __forceinline svint64_t svreinterpret_s64(svint16_t op) { return svreinterpret_s64_s16(op); } __forceinline svint64_t svreinterpret_s64(svint32_t op) { return svreinterpret_s64_s32(op); } __forceinline svint64_t svreinterpret_s64(svint64_t op) { return svreinterpret_s64_s64(op); } __forceinline svint64_t svreinterpret_s64(svuint8_t op) { return svreinterpret_s64_u8(op); } __forceinline svint64_t svreinterpret_s64(svuint16_t op) { return svreinterpret_s64_u16(op); } __forceinline svint64_t svreinterpret_s64(svuint32_t op) { return svreinterpret_s64_u32(op); } __forceinline svint64_t svreinterpret_s64(svuint64_t op) { return svreinterpret_s64_u64(op); } __forceinline svuint8_t svreinterpret_u8(svbfloat16_t op) { return svreinterpret_u8_bf16(op); } __forceinline svuint8_t svreinterpret_u8(svfloat16_t op) { return svreinterpret_u8_f16(op); } __forceinline svuint8_t svreinterpret_u8(svfloat32_t op) { return svreinterpret_u8_f32(op); } __forceinline svuint8_t svreinterpret_u8(svfloat64_t op) { return svreinterpret_u8_f64(op); } __forceinline svuint8_t svreinterpret_u8(svint8_t op) { return svreinterpret_u8_s8(op); } __forceinline svuint8_t svreinterpret_u8(svint16_t op) { return svreinterpret_u8_s16(op); } __forceinline svuint8_t svreinterpret_u8(svint32_t op) { return svreinterpret_u8_s32(op); } __forceinline svuint8_t svreinterpret_u8(svint64_t op) { return svreinterpret_u8_s64(op); } __forceinline svuint8_t svreinterpret_u8(svuint8_t op) { return svreinterpret_u8_u8(op); } __forceinline svuint8_t svreinterpret_u8(svuint16_t op) { return svreinterpret_u8_u16(op); } __forceinline svuint8_t svreinterpret_u8(svuint32_t op) { return svreinterpret_u8_u32(op); } __forceinline svuint8_t svreinterpret_u8(svuint64_t op) { return svreinterpret_u8_u64(op); } __forceinline svuint16_t svreinterpret_u16(svbfloat16_t op) { return svreinterpret_u16_bf16(op); } __forceinline svuint16_t svreinterpret_u16(svfloat16_t op) { return svreinterpret_u16_f16(op); } __forceinline svuint16_t svreinterpret_u16(svfloat32_t op) { return svreinterpret_u16_f32(op); } __forceinline svuint16_t svreinterpret_u16(svfloat64_t op) { return svreinterpret_u16_f64(op); } __forceinline svuint16_t svreinterpret_u16(svint8_t op) { return svreinterpret_u16_s8(op); } __forceinline svuint16_t svreinterpret_u16(svint16_t op) { return svreinterpret_u16_s16(op); } __forceinline svuint16_t svreinterpret_u16(svint32_t op) { return svreinterpret_u16_s32(op); } __forceinline svuint16_t svreinterpret_u16(svint64_t op) { return svreinterpret_u16_s64(op); } __forceinline svuint16_t svreinterpret_u16(svuint8_t op) { return svreinterpret_u16_u8(op); } __forceinline svuint16_t svreinterpret_u16(svuint16_t op) { return svreinterpret_u16_u16(op); } __forceinline svuint16_t svreinterpret_u16(svuint32_t op) { return svreinterpret_u16_u32(op); } __forceinline svuint16_t svreinterpret_u16(svuint64_t op) { return svreinterpret_u16_u64(op); } __forceinline svuint32_t svreinterpret_u32(svbfloat16_t op) { return svreinterpret_u32_bf16(op); } __forceinline svuint32_t svreinterpret_u32(svfloat16_t op) { return svreinterpret_u32_f16(op); } __forceinline svuint32_t svreinterpret_u32(svfloat32_t op) { return svreinterpret_u32_f32(op); } __forceinline svuint32_t svreinterpret_u32(svfloat64_t op) { return svreinterpret_u32_f64(op); } __forceinline svuint32_t svreinterpret_u32(svint8_t op) { return svreinterpret_u32_s8(op); } __forceinline svuint32_t svreinterpret_u32(svint16_t op) { return svreinterpret_u32_s16(op); } __forceinline svuint32_t svreinterpret_u32(svint32_t op) { return svreinterpret_u32_s32(op); } __forceinline svuint32_t svreinterpret_u32(svint64_t op) { return svreinterpret_u32_s64(op); } __forceinline svuint32_t svreinterpret_u32(svuint8_t op) { return svreinterpret_u32_u8(op); } __forceinline svuint32_t svreinterpret_u32(svuint16_t op) { return svreinterpret_u32_u16(op); } __forceinline svuint32_t svreinterpret_u32(svuint32_t op) { return svreinterpret_u32_u32(op); } __forceinline svuint32_t svreinterpret_u32(svuint64_t op) { return svreinterpret_u32_u64(op); } __forceinline svuint64_t svreinterpret_u64(svbfloat16_t op) { return svreinterpret_u64_bf16(op); } __forceinline svuint64_t svreinterpret_u64(svfloat16_t op) { return svreinterpret_u64_f16(op); } __forceinline svuint64_t svreinterpret_u64(svfloat32_t op) { return svreinterpret_u64_f32(op); } __forceinline svuint64_t svreinterpret_u64(svfloat64_t op) { return svreinterpret_u64_f64(op); } __forceinline svuint64_t svreinterpret_u64(svint8_t op) { return svreinterpret_u64_s8(op); } __forceinline svuint64_t svreinterpret_u64(svint16_t op) { return svreinterpret_u64_s16(op); } __forceinline svuint64_t svreinterpret_u64(svint32_t op) { return svreinterpret_u64_s32(op); } __forceinline svuint64_t svreinterpret_u64(svint64_t op) { return svreinterpret_u64_s64(op); } __forceinline svuint64_t svreinterpret_u64(svuint8_t op) { return svreinterpret_u64_u8(op); } __forceinline svuint64_t svreinterpret_u64(svuint16_t op) { return svreinterpret_u64_u16(op); } __forceinline svuint64_t svreinterpret_u64(svuint32_t op) { return svreinterpret_u64_u32(op); } __forceinline svuint64_t svreinterpret_u64(svuint64_t op) { return svreinterpret_u64_u64(op); } __forceinline svint64_t svldff1sh_gather_s64(svbool_t pg, svuint64_t bases) { return svldff1sh_gather_u64base_s64(pg, bases); } __forceinline svint64_t svldff1sh_gather_offset_s64(svbool_t pg, const int16_t *base, svint64_t offsets) { return svldff1sh_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldff1sh_gather_offset_u64(svbool_t pg, const int16_t *base, svint64_t offsets) { return svldff1sh_gather_s64offset_u64(pg, base, offsets); } __forceinline svuint32_t svldff1sh_gather_offset_u32(svbool_t pg, const int16_t *base, svuint32_t offsets) { return svldff1sh_gather_u32offset_u32(pg, base, offsets); } __forceinline svint32_t svldff1sh_gather_offset_s32(svbool_t pg, const int16_t *base, svuint32_t offsets) { return svldff1sh_gather_u32offset_s32(pg, base, offsets); } __forceinline svuint32_t svldff1sh_gather_offset_u32(svbool_t pg, const int16_t *base, svint32_t offsets) { return svldff1sh_gather_s32offset_u32(pg, base, offsets); } __forceinline svint32_t svldff1sh_gather_offset_s32(svbool_t pg, const int16_t *base, svint32_t offsets) { return svldff1sh_gather_s32offset_s32(pg, base, offsets); } __forceinline svuint64_t svldff1sh_gather_u64(svbool_t pg, svuint64_t bases) { return svldff1sh_gather_u64base_u64(pg, bases); } __forceinline svint64_t svldff1sh_gather_offset_s64(svbool_t pg, const int16_t *base, svuint64_t offsets) { return svldff1sh_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint32_t svldff1sh_gather_u32(svbool_t pg, svuint32_t bases) { return svldff1sh_gather_u32base_u32(pg, bases); } __forceinline svuint64_t svldff1sh_gather_offset_u64(svbool_t pg, const int16_t *base, svuint64_t offsets) { return svldff1sh_gather_u64offset_u64(pg, base, offsets); } __forceinline svint32_t svldff1sh_gather_s32(svbool_t pg, svuint32_t bases) { return svldff1sh_gather_u32base_s32(pg, bases); } __forceinline svuint32_t svldff1sh_gather_index_u32(svbool_t pg, const int16_t *base, svint32_t indices) { return svldff1sh_gather_s32index_u32(pg, base, indices); } __forceinline svint32_t svldff1sh_gather_index_s32(svbool_t pg, const int16_t *base, svint32_t indices) { return svldff1sh_gather_s32index_s32(pg, base, indices); } __forceinline svuint64_t svldff1sh_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svldff1sh_gather_u64base_index_u64(pg, bases, index); } __forceinline svint64_t svldff1sh_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svldff1sh_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint32_t svldff1sh_gather_index_u32(svbool_t pg, svuint32_t bases, int64_t index) { return svldff1sh_gather_u32base_index_u32(pg, bases, index); } __forceinline svint32_t svldff1sh_gather_index_s32(svbool_t pg, svuint32_t bases, int64_t index) { return svldff1sh_gather_u32base_index_s32(pg, bases, index); } __forceinline svuint64_t svldff1sh_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1sh_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svuint32_t svldff1sh_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldff1sh_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint64_t svldff1sh_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1sh_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint64_t svldff1sh_gather_index_u64(svbool_t pg, const int16_t *base, svuint64_t indices) { return svldff1sh_gather_u64index_u64(pg, base, indices); } __forceinline svint64_t svldff1sh_gather_index_s64(svbool_t pg, const int16_t *base, svuint64_t indices) { return svldff1sh_gather_u64index_s64(pg, base, indices); } __forceinline svuint64_t svldff1sh_gather_index_u64(svbool_t pg, const int16_t *base, svint64_t indices) { return svldff1sh_gather_s64index_u64(pg, base, indices); } __forceinline svint64_t svldff1sh_gather_index_s64(svbool_t pg, const int16_t *base, svint64_t indices) { return svldff1sh_gather_s64index_s64(pg, base, indices); } __forceinline svuint32_t svldff1sh_gather_index_u32(svbool_t pg, const int16_t *base, svuint32_t indices) { return svldff1sh_gather_u32index_u32(pg, base, indices); } __forceinline svint32_t svldff1sh_gather_index_s32(svbool_t pg, const int16_t *base, svuint32_t indices) { return svldff1sh_gather_u32index_s32(pg, base, indices); } __forceinline svint32_t svldff1sh_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldff1sh_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svuint64_t svldff1uh_gather_index_u64(svbool_t pg, const uint16_t *base, svuint64_t indices) { return svldff1uh_gather_u64index_u64(pg, base, indices); } __forceinline svint32_t svldff1uh_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldff1uh_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svuint32_t svldff1uh_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldff1uh_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint64_t svldff1uh_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1uh_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint64_t svldff1uh_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1uh_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svuint32_t svldff1uh_gather_index_u32(svbool_t pg, svuint32_t bases, int64_t index) { return svldff1uh_gather_u32base_index_u32(pg, bases, index); } __forceinline svint64_t svldff1uh_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svldff1uh_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint64_t svldff1uh_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svldff1uh_gather_u64base_index_u64(pg, bases, index); } __forceinline svint32_t svldff1uh_gather_index_s32(svbool_t pg, svuint32_t bases, int64_t index) { return svldff1uh_gather_u32base_index_s32(pg, bases, index); } __forceinline svuint64_t svldff1uh_gather_index_u64(svbool_t pg, const uint16_t *base, svint64_t indices) { return svldff1uh_gather_s64index_u64(pg, base, indices); } __forceinline svint64_t svldff1uh_gather_index_s64(svbool_t pg, const uint16_t *base, svuint64_t indices) { return svldff1uh_gather_u64index_s64(pg, base, indices); } __forceinline svuint32_t svldff1uh_gather_index_u32(svbool_t pg, const uint16_t *base, svuint32_t indices) { return svldff1uh_gather_u32index_u32(pg, base, indices); } __forceinline svint32_t svldff1uh_gather_s32(svbool_t pg, svuint32_t bases) { return svldff1uh_gather_u32base_s32(pg, bases); } __forceinline svuint32_t svldff1uh_gather_u32(svbool_t pg, svuint32_t bases) { return svldff1uh_gather_u32base_u32(pg, bases); } __forceinline svint64_t svldff1uh_gather_s64(svbool_t pg, svuint64_t bases) { return svldff1uh_gather_u64base_s64(pg, bases); } __forceinline svuint64_t svldff1uh_gather_u64(svbool_t pg, svuint64_t bases) { return svldff1uh_gather_u64base_u64(pg, bases); } __forceinline svint32_t svldff1uh_gather_offset_s32(svbool_t pg, const uint16_t *base, svint32_t offsets) { return svldff1uh_gather_s32offset_s32(pg, base, offsets); } __forceinline svint64_t svldff1uh_gather_index_s64(svbool_t pg, const uint16_t *base, svint64_t indices) { return svldff1uh_gather_s64index_s64(pg, base, indices); } __forceinline svint32_t svldff1uh_gather_index_s32(svbool_t pg, const uint16_t *base, svint32_t indices) { return svldff1uh_gather_s32index_s32(pg, base, indices); } __forceinline svuint32_t svldff1uh_gather_offset_u32(svbool_t pg, const uint16_t *base, svint32_t offsets) { return svldff1uh_gather_s32offset_u32(pg, base, offsets); } __forceinline svint32_t svldff1uh_gather_offset_s32(svbool_t pg, const uint16_t *base, svuint32_t offsets) { return svldff1uh_gather_u32offset_s32(pg, base, offsets); } __forceinline svuint32_t svldff1uh_gather_offset_u32(svbool_t pg, const uint16_t *base, svuint32_t offsets) { return svldff1uh_gather_u32offset_u32(pg, base, offsets); } __forceinline svint64_t svldff1uh_gather_offset_s64(svbool_t pg, const uint16_t *base, svint64_t offsets) { return svldff1uh_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldff1uh_gather_offset_u64(svbool_t pg, const uint16_t *base, svint64_t offsets) { return svldff1uh_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svldff1uh_gather_offset_s64(svbool_t pg, const uint16_t *base, svuint64_t offsets) { return svldff1uh_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldff1uh_gather_offset_u64(svbool_t pg, const uint16_t *base, svuint64_t offsets) { return svldff1uh_gather_u64offset_u64(pg, base, offsets); } __forceinline svuint32_t svldff1uh_gather_index_u32(svbool_t pg, const uint16_t *base, svint32_t indices) { return svldff1uh_gather_s32index_u32(pg, base, indices); } __forceinline svint32_t svldff1uh_gather_index_s32(svbool_t pg, const uint16_t *base, svuint32_t indices) { return svldff1uh_gather_u32index_s32(pg, base, indices); } __forceinline svint64_t svldff1sw_gather_s64(svbool_t pg, svuint64_t bases) { return svldff1sw_gather_u64base_s64(pg, bases); } __forceinline svuint64_t svldff1sw_gather_u64(svbool_t pg, svuint64_t bases) { return svldff1sw_gather_u64base_u64(pg, bases); } __forceinline svint64_t svldff1sw_gather_offset_s64(svbool_t pg, const int32_t *base, svint64_t offsets) { return svldff1sw_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldff1sw_gather_offset_u64(svbool_t pg, const int32_t *base, svint64_t offsets) { return svldff1sw_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svldff1sw_gather_offset_s64(svbool_t pg, const int32_t *base, svuint64_t offsets) { return svldff1sw_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldff1sw_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svldff1sw_gather_u64base_index_u64(pg, bases, index); } __forceinline svint64_t svldff1sw_gather_index_s64(svbool_t pg, const int32_t *base, svint64_t indices) { return svldff1sw_gather_s64index_s64(pg, base, indices); } __forceinline svuint64_t svldff1sw_gather_index_u64(svbool_t pg, const int32_t *base, svint64_t indices) { return svldff1sw_gather_s64index_u64(pg, base, indices); } __forceinline svint64_t svldff1sw_gather_index_s64(svbool_t pg, const int32_t *base, svuint64_t indices) { return svldff1sw_gather_u64index_s64(pg, base, indices); } __forceinline svuint64_t svldff1sw_gather_index_u64(svbool_t pg, const int32_t *base, svuint64_t indices) { return svldff1sw_gather_u64index_u64(pg, base, indices); } __forceinline svint64_t svldff1sw_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1sw_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svint64_t svldff1sw_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svldff1sw_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint64_t svldff1sw_gather_offset_u64(svbool_t pg, const int32_t *base, svuint64_t offsets) { return svldff1sw_gather_u64offset_u64(pg, base, offsets); } __forceinline svuint64_t svldff1sw_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1sw_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svuint64_t svldff1uw_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svldff1uw_gather_u64base_index_u64(pg, bases, index); } __forceinline svuint64_t svldff1uw_gather_index_u64(svbool_t pg, const uint32_t *base, svint64_t indices) { return svldff1uw_gather_s64index_u64(pg, base, indices); } __forceinline svuint64_t svldff1uw_gather_u64(svbool_t pg, svuint64_t bases) { return svldff1uw_gather_u64base_u64(pg, bases); } __forceinline svuint64_t svldff1uw_gather_offset_u64(svbool_t pg, const uint32_t *base, svuint64_t offsets) { return svldff1uw_gather_u64offset_u64(pg, base, offsets); } __forceinline svuint64_t svldff1uw_gather_offset_u64(svbool_t pg, const uint32_t *base, svint64_t offsets) { return svldff1uw_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svldff1uw_gather_offset_s64(svbool_t pg, const uint32_t *base, svuint64_t offsets) { return svldff1uw_gather_u64offset_s64(pg, base, offsets); } __forceinline svint64_t svldff1uw_gather_index_s64(svbool_t pg, const uint32_t *base, svint64_t indices) { return svldff1uw_gather_s64index_s64(pg, base, indices); } __forceinline svint64_t svldff1uw_gather_offset_s64(svbool_t pg, const uint32_t *base, svint64_t offsets) { return svldff1uw_gather_s64offset_s64(pg, base, offsets); } __forceinline svint64_t svldff1uw_gather_s64(svbool_t pg, svuint64_t bases) { return svldff1uw_gather_u64base_s64(pg, bases); } __forceinline svuint64_t svldff1uw_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1uw_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint64_t svldff1uw_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1uw_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint64_t svldff1uw_gather_index_u64(svbool_t pg, const uint32_t *base, svuint64_t indices) { return svldff1uw_gather_u64index_u64(pg, base, indices); } __forceinline svint64_t svldff1uw_gather_index_s64(svbool_t pg, const uint32_t *base, svuint64_t indices) { return svldff1uw_gather_u64index_s64(pg, base, indices); } __forceinline svint64_t svldff1uw_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svldff1uw_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint32_t svldff1sb_gather_u32(svbool_t pg, svuint32_t bases) { return svldff1sb_gather_u32base_u32(pg, bases); } __forceinline svint32_t svldff1sb_gather_s32(svbool_t pg, svuint32_t bases) { return svldff1sb_gather_u32base_s32(pg, bases); } __forceinline svuint32_t svldff1sb_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldff1sb_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svuint64_t svldff1sb_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1sb_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint64_t svldff1sb_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1sb_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svint32_t svldff1sb_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldff1sb_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svuint64_t svldff1sb_gather_offset_u64(svbool_t pg, const int8_t *base, svuint64_t offsets) { return svldff1sb_gather_u64offset_u64(pg, base, offsets); } __forceinline svuint64_t svldff1sb_gather_offset_u64(svbool_t pg, const int8_t *base, svint64_t offsets) { return svldff1sb_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svldff1sb_gather_offset_s64(svbool_t pg, const int8_t *base, svuint64_t offsets) { return svldff1sb_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint32_t svldff1sb_gather_offset_u32(svbool_t pg, const int8_t *base, svuint32_t offsets) { return svldff1sb_gather_u32offset_u32(pg, base, offsets); } __forceinline svint32_t svldff1sb_gather_offset_s32(svbool_t pg, const int8_t *base, svuint32_t offsets) { return svldff1sb_gather_u32offset_s32(pg, base, offsets); } __forceinline svuint32_t svldff1sb_gather_offset_u32(svbool_t pg, const int8_t *base, svint32_t offsets) { return svldff1sb_gather_s32offset_u32(pg, base, offsets); } __forceinline svint32_t svldff1sb_gather_offset_s32(svbool_t pg, const int8_t *base, svint32_t offsets) { return svldff1sb_gather_s32offset_s32(pg, base, offsets); } __forceinline svuint64_t svldff1sb_gather_u64(svbool_t pg, svuint64_t bases) { return svldff1sb_gather_u64base_u64(pg, bases); } __forceinline svint64_t svldff1sb_gather_s64(svbool_t pg, svuint64_t bases) { return svldff1sb_gather_u64base_s64(pg, bases); } __forceinline svint64_t svldff1sb_gather_offset_s64(svbool_t pg, const int8_t *base, svint64_t offsets) { return svldff1sb_gather_s64offset_s64(pg, base, offsets); } __forceinline svint64_t svldff1ub_gather_s64(svbool_t pg, svuint64_t bases) { return svldff1ub_gather_u64base_s64(pg, bases); } __forceinline svuint64_t svldff1ub_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1ub_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint64_t svldff1ub_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1ub_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint32_t svldff1ub_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldff1ub_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint32_t svldff1ub_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldff1ub_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svuint64_t svldff1ub_gather_offset_u64(svbool_t pg, const uint8_t *base, svuint64_t offsets) { return svldff1ub_gather_u64offset_u64(pg, base, offsets); } __forceinline svint64_t svldff1ub_gather_offset_s64(svbool_t pg, const uint8_t *base, svuint64_t offsets) { return svldff1ub_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldff1ub_gather_offset_u64(svbool_t pg, const uint8_t *base, svint64_t offsets) { return svldff1ub_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svldff1ub_gather_offset_s64(svbool_t pg, const uint8_t *base, svint64_t offsets) { return svldff1ub_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint32_t svldff1ub_gather_offset_u32(svbool_t pg, const uint8_t *base, svuint32_t offsets) { return svldff1ub_gather_u32offset_u32(pg, base, offsets); } __forceinline svuint64_t svldff1ub_gather_u64(svbool_t pg, svuint64_t bases) { return svldff1ub_gather_u64base_u64(pg, bases); } __forceinline svuint32_t svldff1ub_gather_offset_u32(svbool_t pg, const uint8_t *base, svint32_t offsets) { return svldff1ub_gather_s32offset_u32(pg, base, offsets); } __forceinline svint32_t svldff1ub_gather_s32(svbool_t pg, svuint32_t bases) { return svldff1ub_gather_u32base_s32(pg, bases); } __forceinline svint32_t svldff1ub_gather_offset_s32(svbool_t pg, const uint8_t *base, svuint32_t offsets) { return svldff1ub_gather_u32offset_s32(pg, base, offsets); } __forceinline svuint32_t svldff1ub_gather_u32(svbool_t pg, svuint32_t bases) { return svldff1ub_gather_u32base_u32(pg, bases); } __forceinline svint32_t svldff1ub_gather_offset_s32(svbool_t pg, const uint8_t *base, svint32_t offsets) { return svldff1ub_gather_s32offset_s32(pg, base, offsets); } __forceinline svint64_t svldff1_gather_index(svbool_t pg, const int64_t *base, svuint64_t indices) { return svldff1_gather_u64index_s64(pg, base, indices); } __forceinline svfloat64_t svldff1_gather_index(svbool_t pg, const float64_t *base, svuint64_t indices) { return svldff1_gather_u64index_f64(pg, base, indices); } __forceinline svuint64_t svldff1_gather_index(svbool_t pg, const uint64_t *base, svint64_t indices) { return svldff1_gather_s64index_u64(pg, base, indices); } __forceinline svuint64_t svldff1_gather_index(svbool_t pg, const uint64_t *base, svuint64_t indices) { return svldff1_gather_u64index_u64(pg, base, indices); } __forceinline svfloat32_t svldff1_gather_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldff1_gather_u32base_offset_f32(pg, bases, offset); } __forceinline svuint64_t svldff1_gather_u64(svbool_t pg, svuint64_t bases) { return svldff1_gather_u64base_u64(pg, bases); } __forceinline svfloat64_t svldff1_gather_f64(svbool_t pg, svuint64_t bases) { return svldff1_gather_u64base_f64(pg, bases); } __forceinline svfloat32_t svldff1_gather_f32(svbool_t pg, svuint32_t bases) { return svldff1_gather_u32base_f32(pg, bases); } __forceinline svint64_t svldff1_gather_s64(svbool_t pg, svuint64_t bases) { return svldff1_gather_u64base_s64(pg, bases); } __forceinline svint32_t svldff1_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldff1_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svuint32_t svldff1_gather_index(svbool_t pg, const uint32_t *base, svint32_t indices) { return svldff1_gather_s32index_u32(pg, base, indices); } __forceinline svfloat32_t svldff1_gather_index(svbool_t pg, const float32_t *base, svuint32_t indices) { return svldff1_gather_u32index_f32(pg, base, indices); } __forceinline svint32_t svldff1_gather_index(svbool_t pg, const int32_t *base, svuint32_t indices) { return svldff1_gather_u32index_s32(pg, base, indices); } __forceinline svuint32_t svldff1_gather_index(svbool_t pg, const uint32_t *base, svuint32_t indices) { return svldff1_gather_u32index_u32(pg, base, indices); } __forceinline svfloat64_t svldff1_gather_index(svbool_t pg, const float64_t *base, svint64_t indices) { return svldff1_gather_s64index_f64(pg, base, indices); } __forceinline svint64_t svldff1_gather_index(svbool_t pg, const int64_t *base, svint64_t indices) { return svldff1_gather_s64index_s64(pg, base, indices); } __forceinline svuint32_t svldff1_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldff1_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint32_t svldff1_gather_index_s32(svbool_t pg, svuint32_t bases, int64_t index) { return svldff1_gather_u32base_index_s32(pg, bases, index); } __forceinline svint64_t svldff1_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svint32_t svldff1_gather_offset(svbool_t pg, const int32_t *base, svuint32_t offsets) { return svldff1_gather_u32offset_s32(pg, base, offsets); } __forceinline svfloat32_t svldff1_gather_offset(svbool_t pg, const float32_t *base, svuint32_t offsets) { return svldff1_gather_u32offset_f32(pg, base, offsets); } __forceinline svuint32_t svldff1_gather_offset(svbool_t pg, const uint32_t *base, svint32_t offsets) { return svldff1_gather_s32offset_u32(pg, base, offsets); } __forceinline svint32_t svldff1_gather_offset(svbool_t pg, const int32_t *base, svint32_t offsets) { return svldff1_gather_s32offset_s32(pg, base, offsets); } __forceinline svfloat32_t svldff1_gather_offset(svbool_t pg, const float32_t *base, svint32_t offsets) { return svldff1_gather_s32offset_f32(pg, base, offsets); } __forceinline svfloat64_t svldff1_gather_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1_gather_u64base_offset_f64(pg, bases, offset); } __forceinline svuint32_t svldff1_gather_offset(svbool_t pg, const uint32_t *base, svuint32_t offsets) { return svldff1_gather_u32offset_u32(pg, base, offsets); } __forceinline svint64_t svldff1_gather_offset(svbool_t pg, const int64_t *base, svint64_t offsets) { return svldff1_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldff1_gather_offset(svbool_t pg, const uint64_t *base, svint64_t offsets) { return svldff1_gather_s64offset_u64(pg, base, offsets); } __forceinline svfloat64_t svldff1_gather_offset(svbool_t pg, const float64_t *base, svuint64_t offsets) { return svldff1_gather_u64offset_f64(pg, base, offsets); } __forceinline svint64_t svldff1_gather_offset(svbool_t pg, const int64_t *base, svuint64_t offsets) { return svldff1_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldff1_gather_offset(svbool_t pg, const uint64_t *base, svuint64_t offsets) { return svldff1_gather_u64offset_u64(pg, base, offsets); } __forceinline svfloat32_t svldff1_gather_index(svbool_t pg, const float32_t *base, svint32_t indices) { return svldff1_gather_s32index_f32(pg, base, indices); } __forceinline svint32_t svldff1_gather_index(svbool_t pg, const int32_t *base, svint32_t indices) { return svldff1_gather_s32index_s32(pg, base, indices); } __forceinline svuint32_t svldff1_gather_u32(svbool_t pg, svuint32_t bases) { return svldff1_gather_u32base_u32(pg, bases); } __forceinline svuint64_t svldff1_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svldff1_gather_u64base_index_u64(pg, bases, index); } __forceinline svint64_t svldff1_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svldff1_gather_u64base_index_s64(pg, bases, index); } __forceinline svfloat64_t svldff1_gather_index_f64(svbool_t pg, svuint64_t bases, int64_t index) { return svldff1_gather_u64base_index_f64(pg, bases, index); } __forceinline svuint32_t svldff1_gather_index_u32(svbool_t pg, svuint32_t bases, int64_t index) { return svldff1_gather_u32base_index_u32(pg, bases, index); } __forceinline svfloat32_t svldff1_gather_index_f32(svbool_t pg, svuint32_t bases, int64_t index) { return svldff1_gather_u32base_index_f32(pg, bases, index); } __forceinline svuint64_t svldff1_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldff1_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svfloat64_t svldff1_gather_offset(svbool_t pg, const float64_t *base, svint64_t offsets) { return svldff1_gather_s64offset_f64(pg, base, offsets); } __forceinline svint32_t svldff1_gather_s32(svbool_t pg, svuint32_t bases) { return svldff1_gather_u32base_s32(pg, bases); } __forceinline svfloat64_t svld1(svbool_t pg, const float64_t *base) { return svld1_f64(pg, base); } __forceinline svfloat32_t svld1(svbool_t pg, const float32_t *base) { return svld1_f32(pg, base); } __forceinline svint8_t svld1(svbool_t pg, const int8_t *base) { return svld1_s8(pg, base); } __forceinline svint32_t svld1(svbool_t pg, const int32_t *base) { return svld1_s32(pg, base); } __forceinline svint64_t svld1(svbool_t pg, const int64_t *base) { return svld1_s64(pg, base); } __forceinline svint16_t svld1(svbool_t pg, const int16_t *base) { return svld1_s16(pg, base); } __forceinline svuint8_t svld1(svbool_t pg, const uint8_t *base) { return svld1_u8(pg, base); } __forceinline svuint16_t svld1(svbool_t pg, const uint16_t *base) { return svld1_u16(pg, base); } __forceinline svuint32_t svld1(svbool_t pg, const uint32_t *base) { return svld1_u32(pg, base); } __forceinline svuint64_t svld1(svbool_t pg, const uint64_t *base) { return svld1_u64(pg, base); } __forceinline svuint64_t svld1_vnum(svbool_t pg, const uint64_t *base, int64_t vnum) { return svld1_vnum_u64(pg, base, vnum); } __forceinline svfloat32_t svld1_vnum(svbool_t pg, const float32_t *base, int64_t vnum) { return svld1_vnum_f32(pg, base, vnum); } __forceinline svfloat64_t svld1_vnum(svbool_t pg, const float64_t *base, int64_t vnum) { return svld1_vnum_f64(pg, base, vnum); } __forceinline svint16_t svld1_vnum(svbool_t pg, const int16_t *base, int64_t vnum) { return svld1_vnum_s16(pg, base, vnum); } __forceinline svint32_t svld1_vnum(svbool_t pg, const int32_t *base, int64_t vnum) { return svld1_vnum_s32(pg, base, vnum); } __forceinline svint64_t svld1_vnum(svbool_t pg, const int64_t *base, int64_t vnum) { return svld1_vnum_s64(pg, base, vnum); } __forceinline svuint8_t svld1_vnum(svbool_t pg, const uint8_t *base, int64_t vnum) { return svld1_vnum_u8(pg, base, vnum); } __forceinline svuint16_t svld1_vnum(svbool_t pg, const uint16_t *base, int64_t vnum) { return svld1_vnum_u16(pg, base, vnum); } __forceinline svuint32_t svld1_vnum(svbool_t pg, const uint32_t *base, int64_t vnum) { return svld1_vnum_u32(pg, base, vnum); } __forceinline svint8_t svld1_vnum(svbool_t pg, const int8_t *base, int64_t vnum) { return svld1_vnum_s8(pg, base, vnum); } __forceinline svint8_t svldnt1_vnum(svbool_t pg, const int8_t *base, int64_t vnum) { return svldnt1_vnum_s8(pg, base, vnum); } __forceinline svint64_t svldnt1_vnum(svbool_t pg, const int64_t *base, int64_t vnum) { return svldnt1_vnum_s64(pg, base, vnum); } __forceinline svfloat32_t svldnt1_vnum(svbool_t pg, const float32_t *base, int64_t vnum) { return svldnt1_vnum_f32(pg, base, vnum); } __forceinline svuint64_t svldnt1(svbool_t pg, const uint64_t *base) { return svldnt1_u64(pg, base); } __forceinline svuint32_t svldnt1(svbool_t pg, const uint32_t *base) { return svldnt1_u32(pg, base); } __forceinline svint16_t svldnt1_vnum(svbool_t pg, const int16_t *base, int64_t vnum) { return svldnt1_vnum_s16(pg, base, vnum); } __forceinline svint32_t svldnt1_vnum(svbool_t pg, const int32_t *base, int64_t vnum) { return svldnt1_vnum_s32(pg, base, vnum); } __forceinline svfloat64_t svldnt1_vnum(svbool_t pg, const float64_t *base, int64_t vnum) { return svldnt1_vnum_f64(pg, base, vnum); } __forceinline svuint8_t svldnt1_vnum(svbool_t pg, const uint8_t *base, int64_t vnum) { return svldnt1_vnum_u8(pg, base, vnum); } __forceinline svint32_t svldnt1(svbool_t pg, const int32_t *base) { return svldnt1_s32(pg, base); } __forceinline svuint32_t svldnt1_vnum(svbool_t pg, const uint32_t *base, int64_t vnum) { return svldnt1_vnum_u32(pg, base, vnum); } __forceinline svuint64_t svldnt1_vnum(svbool_t pg, const uint64_t *base, int64_t vnum) { return svldnt1_vnum_u64(pg, base, vnum); } __forceinline svint16_t svldnt1(svbool_t pg, const int16_t *base) { return svldnt1_s16(pg, base); } __forceinline svuint8_t svldnt1(svbool_t pg, const uint8_t *base) { return svldnt1_u8(pg, base); } __forceinline svfloat32_t svldnt1(svbool_t pg, const float32_t *base) { return svldnt1_f32(pg, base); } __forceinline svfloat64_t svldnt1(svbool_t pg, const float64_t *base) { return svldnt1_f64(pg, base); } __forceinline svint8_t svldnt1(svbool_t pg, const int8_t *base) { return svldnt1_s8(pg, base); } __forceinline svint64_t svldnt1(svbool_t pg, const int64_t *base) { return svldnt1_s64(pg, base); } __forceinline svuint16_t svldnt1_vnum(svbool_t pg, const uint16_t *base, int64_t vnum) { return svldnt1_vnum_u16(pg, base, vnum); } __forceinline svuint16_t svldnt1(svbool_t pg, const uint16_t *base) { return svldnt1_u16(pg, base); } __forceinline svuint32_t svld1sh_gather_index_u32(svbool_t pg, const int16_t *base, svint32_t indices) { return svld1sh_gather_s32index_u32(pg, base, indices); } __forceinline svint32_t svld1sh_gather_index_s32(svbool_t pg, const int16_t *base, svuint32_t indices) { return svld1sh_gather_u32index_s32(pg, base, indices); } __forceinline svuint32_t svld1sh_gather_index_u32(svbool_t pg, const int16_t *base, svuint32_t indices) { return svld1sh_gather_u32index_u32(pg, base, indices); } __forceinline svint64_t svld1sh_gather_index_s64(svbool_t pg, const int16_t *base, svint64_t indices) { return svld1sh_gather_s64index_s64(pg, base, indices); } __forceinline svuint64_t svld1sh_gather_index_u64(svbool_t pg, const int16_t *base, svint64_t indices) { return svld1sh_gather_s64index_u64(pg, base, indices); } __forceinline svint64_t svld1sh_gather_index_s64(svbool_t pg, const int16_t *base, svuint64_t indices) { return svld1sh_gather_u64index_s64(pg, base, indices); } __forceinline svuint64_t svld1sh_gather_index_u64(svbool_t pg, const int16_t *base, svuint64_t indices) { return svld1sh_gather_u64index_u64(pg, base, indices); } __forceinline svint32_t svld1sh_gather_index_s32(svbool_t pg, svuint32_t bases, int64_t index) { return svld1sh_gather_u32base_index_s32(pg, bases, index); } __forceinline svuint32_t svld1sh_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svld1sh_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint64_t svld1sh_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1sh_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint64_t svld1sh_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1sh_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svuint32_t svld1sh_gather_index_u32(svbool_t pg, svuint32_t bases, int64_t index) { return svld1sh_gather_u32base_index_u32(pg, bases, index); } __forceinline svint32_t svld1sh_gather_index_s32(svbool_t pg, const int16_t *base, svint32_t indices) { return svld1sh_gather_s32index_s32(pg, base, indices); } __forceinline svuint64_t svld1sh_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svld1sh_gather_u64base_index_u64(pg, bases, index); } __forceinline svint64_t svld1sh_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svld1sh_gather_u64base_index_s64(pg, bases, index); } __forceinline svint32_t svld1sh_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svld1sh_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svuint64_t svld1sh_gather_offset_u64(svbool_t pg, const int16_t *base, svuint64_t offsets) { return svld1sh_gather_u64offset_u64(pg, base, offsets); } __forceinline svint64_t svld1sh_gather_offset_s64(svbool_t pg, const int16_t *base, svint64_t offsets) { return svld1sh_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint64_t svld1sh_gather_offset_u64(svbool_t pg, const int16_t *base, svint64_t offsets) { return svld1sh_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svld1sh_gather_offset_s64(svbool_t pg, const int16_t *base, svuint64_t offsets) { return svld1sh_gather_u64offset_s64(pg, base, offsets); } __forceinline svint32_t svld1sh_gather_s32(svbool_t pg, svuint32_t bases) { return svld1sh_gather_u32base_s32(pg, bases); } __forceinline svint64_t svld1sh_gather_s64(svbool_t pg, svuint64_t bases) { return svld1sh_gather_u64base_s64(pg, bases); } __forceinline svuint64_t svld1sh_gather_u64(svbool_t pg, svuint64_t bases) { return svld1sh_gather_u64base_u64(pg, bases); } __forceinline svint32_t svld1sh_gather_offset_s32(svbool_t pg, const int16_t *base, svint32_t offsets) { return svld1sh_gather_s32offset_s32(pg, base, offsets); } __forceinline svuint32_t svld1sh_gather_offset_u32(svbool_t pg, const int16_t *base, svint32_t offsets) { return svld1sh_gather_s32offset_u32(pg, base, offsets); } __forceinline svint32_t svld1sh_gather_offset_s32(svbool_t pg, const int16_t *base, svuint32_t offsets) { return svld1sh_gather_u32offset_s32(pg, base, offsets); } __forceinline svuint32_t svld1sh_gather_offset_u32(svbool_t pg, const int16_t *base, svuint32_t offsets) { return svld1sh_gather_u32offset_u32(pg, base, offsets); } __forceinline svuint32_t svld1sh_gather_u32(svbool_t pg, svuint32_t bases) { return svld1sh_gather_u32base_u32(pg, bases); } __forceinline svint32_t svld1uh_gather_offset_s32(svbool_t pg, const uint16_t *base, svuint32_t offsets) { return svld1uh_gather_u32offset_s32(pg, base, offsets); } __forceinline svuint32_t svld1uh_gather_offset_u32(svbool_t pg, const uint16_t *base, svint32_t offsets) { return svld1uh_gather_s32offset_u32(pg, base, offsets); } __forceinline svint32_t svld1uh_gather_offset_s32(svbool_t pg, const uint16_t *base, svint32_t offsets) { return svld1uh_gather_s32offset_s32(pg, base, offsets); } __forceinline svuint64_t svld1uh_gather_u64(svbool_t pg, svuint64_t bases) { return svld1uh_gather_u64base_u64(pg, bases); } __forceinline svint64_t svld1uh_gather_s64(svbool_t pg, svuint64_t bases) { return svld1uh_gather_u64base_s64(pg, bases); } __forceinline svuint32_t svld1uh_gather_u32(svbool_t pg, svuint32_t bases) { return svld1uh_gather_u32base_u32(pg, bases); } __forceinline svuint32_t svld1uh_gather_offset_u32(svbool_t pg, const uint16_t *base, svuint32_t offsets) { return svld1uh_gather_u32offset_u32(pg, base, offsets); } __forceinline svint32_t svld1uh_gather_s32(svbool_t pg, svuint32_t bases) { return svld1uh_gather_u32base_s32(pg, bases); } __forceinline svint64_t svld1uh_gather_offset_s64(svbool_t pg, const uint16_t *base, svint64_t offsets) { return svld1uh_gather_s64offset_s64(pg, base, offsets); } __forceinline svint64_t svld1uh_gather_offset_s64(svbool_t pg, const uint16_t *base, svuint64_t offsets) { return svld1uh_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svld1uh_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svld1uh_gather_u64base_index_u64(pg, bases, index); } __forceinline svint64_t svld1uh_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svld1uh_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint32_t svld1uh_gather_index_u32(svbool_t pg, svuint32_t bases, int64_t index) { return svld1uh_gather_u32base_index_u32(pg, bases, index); } __forceinline svuint64_t svld1uh_gather_offset_u64(svbool_t pg, const uint16_t *base, svint64_t offsets) { return svld1uh_gather_s64offset_u64(pg, base, offsets); } __forceinline svint32_t svld1uh_gather_index_s32(svbool_t pg, svuint32_t bases, int64_t index) { return svld1uh_gather_u32base_index_s32(pg, bases, index); } __forceinline svuint64_t svld1uh_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1uh_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint64_t svld1uh_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1uh_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint32_t svld1uh_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svld1uh_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint32_t svld1uh_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svld1uh_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svint64_t svld1uh_gather_index_s64(svbool_t pg, const uint16_t *base, svuint64_t indices) { return svld1uh_gather_u64index_s64(pg, base, indices); } __forceinline svint32_t svld1uh_gather_index_s32(svbool_t pg, const uint16_t *base, svuint32_t indices) { return svld1uh_gather_u32index_s32(pg, base, indices); } __forceinline svuint64_t svld1uh_gather_index_u64(svbool_t pg, const uint16_t *base, svuint64_t indices) { return svld1uh_gather_u64index_u64(pg, base, indices); } __forceinline svint32_t svld1uh_gather_index_s32(svbool_t pg, const uint16_t *base, svint32_t indices) { return svld1uh_gather_s32index_s32(pg, base, indices); } __forceinline svuint64_t svld1uh_gather_offset_u64(svbool_t pg, const uint16_t *base, svuint64_t offsets) { return svld1uh_gather_u64offset_u64(pg, base, offsets); } __forceinline svuint32_t svld1uh_gather_index_u32(svbool_t pg, const uint16_t *base, svint32_t indices) { return svld1uh_gather_s32index_u32(pg, base, indices); } __forceinline svuint64_t svld1uh_gather_index_u64(svbool_t pg, const uint16_t *base, svint64_t indices) { return svld1uh_gather_s64index_u64(pg, base, indices); } __forceinline svuint32_t svld1uh_gather_index_u32(svbool_t pg, const uint16_t *base, svuint32_t indices) { return svld1uh_gather_u32index_u32(pg, base, indices); } __forceinline svint64_t svld1uh_gather_index_s64(svbool_t pg, const uint16_t *base, svint64_t indices) { return svld1uh_gather_s64index_s64(pg, base, indices); } __forceinline svuint64_t svld1sw_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svld1sw_gather_u64base_index_u64(pg, bases, index); } __forceinline svint64_t svld1sw_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svld1sw_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint64_t svld1sw_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1sw_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint64_t svld1sw_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1sw_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svint64_t svld1sw_gather_index_s64(svbool_t pg, const int32_t *base, svuint64_t indices) { return svld1sw_gather_u64index_s64(pg, base, indices); } __forceinline svuint64_t svld1sw_gather_index_u64(svbool_t pg, const int32_t *base, svint64_t indices) { return svld1sw_gather_s64index_u64(pg, base, indices); } __forceinline svuint64_t svld1sw_gather_index_u64(svbool_t pg, const int32_t *base, svuint64_t indices) { return svld1sw_gather_u64index_u64(pg, base, indices); } __forceinline svuint64_t svld1sw_gather_offset_u64(svbool_t pg, const int32_t *base, svuint64_t offsets) { return svld1sw_gather_u64offset_u64(pg, base, offsets); } __forceinline svint64_t svld1sw_gather_offset_s64(svbool_t pg, const int32_t *base, svuint64_t offsets) { return svld1sw_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svld1sw_gather_offset_u64(svbool_t pg, const int32_t *base, svint64_t offsets) { return svld1sw_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svld1sw_gather_offset_s64(svbool_t pg, const int32_t *base, svint64_t offsets) { return svld1sw_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint64_t svld1sw_gather_u64(svbool_t pg, svuint64_t bases) { return svld1sw_gather_u64base_u64(pg, bases); } __forceinline svint64_t svld1sw_gather_s64(svbool_t pg, svuint64_t bases) { return svld1sw_gather_u64base_s64(pg, bases); } __forceinline svint64_t svld1sw_gather_index_s64(svbool_t pg, const int32_t *base, svint64_t indices) { return svld1sw_gather_s64index_s64(pg, base, indices); } __forceinline svint64_t svld1uw_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svld1uw_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint64_t svld1uw_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svld1uw_gather_u64base_index_u64(pg, bases, index); } __forceinline svint64_t svld1uw_gather_s64(svbool_t pg, svuint64_t bases) { return svld1uw_gather_u64base_s64(pg, bases); } __forceinline svint64_t svld1uw_gather_offset_s64(svbool_t pg, const uint32_t *base, svint64_t offsets) { return svld1uw_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint64_t svld1uw_gather_offset_u64(svbool_t pg, const uint32_t *base, svint64_t offsets) { return svld1uw_gather_s64offset_u64(pg, base, offsets); } __forceinline svuint64_t svld1uw_gather_u64(svbool_t pg, svuint64_t bases) { return svld1uw_gather_u64base_u64(pg, bases); } __forceinline svuint64_t svld1uw_gather_offset_u64(svbool_t pg, const uint32_t *base, svuint64_t offsets) { return svld1uw_gather_u64offset_u64(pg, base, offsets); } __forceinline svint64_t svld1uw_gather_index_s64(svbool_t pg, const uint32_t *base, svint64_t indices) { return svld1uw_gather_s64index_s64(pg, base, indices); } __forceinline svuint64_t svld1uw_gather_index_u64(svbool_t pg, const uint32_t *base, svint64_t indices) { return svld1uw_gather_s64index_u64(pg, base, indices); } __forceinline svint64_t svld1uw_gather_index_s64(svbool_t pg, const uint32_t *base, svuint64_t indices) { return svld1uw_gather_u64index_s64(pg, base, indices); } __forceinline svuint64_t svld1uw_gather_index_u64(svbool_t pg, const uint32_t *base, svuint64_t indices) { return svld1uw_gather_u64index_u64(pg, base, indices); } __forceinline svint64_t svld1uw_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1uw_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svint64_t svld1uw_gather_offset_s64(svbool_t pg, const uint32_t *base, svuint64_t offsets) { return svld1uw_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svld1uw_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1uw_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint64_t svld1sb_gather_s64(svbool_t pg, svuint64_t bases) { return svld1sb_gather_u64base_s64(pg, bases); } __forceinline svuint32_t svld1sb_gather_u32(svbool_t pg, svuint32_t bases) { return svld1sb_gather_u32base_u32(pg, bases); } __forceinline svint32_t svld1sb_gather_s32(svbool_t pg, svuint32_t bases) { return svld1sb_gather_u32base_s32(pg, bases); } __forceinline svint32_t svld1sb_gather_offset_s32(svbool_t pg, const int8_t *base, svint32_t offsets) { return svld1sb_gather_s32offset_s32(pg, base, offsets); } __forceinline svuint64_t svld1sb_gather_u64(svbool_t pg, svuint64_t bases) { return svld1sb_gather_u64base_u64(pg, bases); } __forceinline svuint32_t svld1sb_gather_offset_u32(svbool_t pg, const int8_t *base, svuint32_t offsets) { return svld1sb_gather_u32offset_u32(pg, base, offsets); } __forceinline svuint32_t svld1sb_gather_offset_u32(svbool_t pg, const int8_t *base, svint32_t offsets) { return svld1sb_gather_s32offset_u32(pg, base, offsets); } __forceinline svuint64_t svld1sb_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1sb_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint64_t svld1sb_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1sb_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint32_t svld1sb_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svld1sb_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint32_t svld1sb_gather_offset_s32(svbool_t pg, const int8_t *base, svuint32_t offsets) { return svld1sb_gather_u32offset_s32(pg, base, offsets); } __forceinline svuint64_t svld1sb_gather_offset_u64(svbool_t pg, const int8_t *base, svuint64_t offsets) { return svld1sb_gather_u64offset_u64(pg, base, offsets); } __forceinline svint64_t svld1sb_gather_offset_s64(svbool_t pg, const int8_t *base, svuint64_t offsets) { return svld1sb_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svld1sb_gather_offset_u64(svbool_t pg, const int8_t *base, svint64_t offsets) { return svld1sb_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svld1sb_gather_offset_s64(svbool_t pg, const int8_t *base, svint64_t offsets) { return svld1sb_gather_s64offset_s64(pg, base, offsets); } __forceinline svint32_t svld1sb_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svld1sb_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svuint64_t svld1ub_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1ub_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint32_t svld1ub_gather_s32(svbool_t pg, svuint32_t bases) { return svld1ub_gather_u32base_s32(pg, bases); } __forceinline svint64_t svld1ub_gather_s64(svbool_t pg, svuint64_t bases) { return svld1ub_gather_u64base_s64(pg, bases); } __forceinline svuint64_t svld1ub_gather_u64(svbool_t pg, svuint64_t bases) { return svld1ub_gather_u64base_u64(pg, bases); } __forceinline svint32_t svld1ub_gather_offset_s32(svbool_t pg, const uint8_t *base, svint32_t offsets) { return svld1ub_gather_s32offset_s32(pg, base, offsets); } __forceinline svuint32_t svld1ub_gather_offset_u32(svbool_t pg, const uint8_t *base, svint32_t offsets) { return svld1ub_gather_s32offset_u32(pg, base, offsets); } __forceinline svint32_t svld1ub_gather_offset_s32(svbool_t pg, const uint8_t *base, svuint32_t offsets) { return svld1ub_gather_u32offset_s32(pg, base, offsets); } __forceinline svuint32_t svld1ub_gather_offset_u32(svbool_t pg, const uint8_t *base, svuint32_t offsets) { return svld1ub_gather_u32offset_u32(pg, base, offsets); } __forceinline svint64_t svld1ub_gather_offset_s64(svbool_t pg, const uint8_t *base, svint64_t offsets) { return svld1ub_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint64_t svld1ub_gather_offset_u64(svbool_t pg, const uint8_t *base, svint64_t offsets) { return svld1ub_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svld1ub_gather_offset_s64(svbool_t pg, const uint8_t *base, svuint64_t offsets) { return svld1ub_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svld1ub_gather_offset_u64(svbool_t pg, const uint8_t *base, svuint64_t offsets) { return svld1ub_gather_u64offset_u64(pg, base, offsets); } __forceinline svint32_t svld1ub_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svld1ub_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svuint32_t svld1ub_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svld1ub_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint64_t svld1ub_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1ub_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint32_t svld1ub_gather_u32(svbool_t pg, svuint32_t bases) { return svld1ub_gather_u32base_u32(pg, bases); } __forceinline svuint64_t svld1_gather_index(svbool_t pg, const uint64_t *base, svuint64_t indices) { return svld1_gather_u64index_u64(pg, base, indices); } __forceinline svfloat32_t svld1_gather_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset) { return svld1_gather_u32base_offset_f32(pg, bases, offset); } __forceinline svfloat64_t svld1_gather_index_f64(svbool_t pg, svuint64_t bases, int64_t index) { return svld1_gather_u64base_index_f64(pg, bases, index); } __forceinline svuint32_t svld1_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svld1_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svfloat64_t svld1_gather_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1_gather_u64base_offset_f64(pg, bases, offset); } __forceinline svint64_t svld1_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint64_t svld1_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svld1_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svfloat32_t svld1_gather_index_f32(svbool_t pg, svuint32_t bases, int64_t index) { return svld1_gather_u32base_index_f32(pg, bases, index); } __forceinline svint32_t svld1_gather_index_s32(svbool_t pg, svuint32_t bases, int64_t index) { return svld1_gather_u32base_index_s32(pg, bases, index); } __forceinline svuint32_t svld1_gather_index_u32(svbool_t pg, svuint32_t bases, int64_t index) { return svld1_gather_u32base_index_u32(pg, bases, index); } __forceinline svint64_t svld1_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svld1_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint64_t svld1_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svld1_gather_u64base_index_u64(pg, bases, index); } __forceinline svint64_t svld1_gather_index(svbool_t pg, const int64_t *base, svuint64_t indices) { return svld1_gather_u64index_s64(pg, base, indices); } __forceinline svint32_t svld1_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svld1_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svfloat64_t svld1_gather_index(svbool_t pg, const float64_t *base, svuint64_t indices) { return svld1_gather_u64index_f64(pg, base, indices); } __forceinline svint64_t svld1_gather_index(svbool_t pg, const int64_t *base, svint64_t indices) { return svld1_gather_s64index_s64(pg, base, indices); } __forceinline svint32_t svld1_gather_index(svbool_t pg, const int32_t *base, svint32_t indices) { return svld1_gather_s32index_s32(pg, base, indices); } __forceinline svfloat32_t svld1_gather_index(svbool_t pg, const float32_t *base, svint32_t indices) { return svld1_gather_s32index_f32(pg, base, indices); } __forceinline svuint64_t svld1_gather_offset(svbool_t pg, const uint64_t *base, svuint64_t offsets) { return svld1_gather_u64offset_u64(pg, base, offsets); } __forceinline svint64_t svld1_gather_offset(svbool_t pg, const int64_t *base, svuint64_t offsets) { return svld1_gather_u64offset_s64(pg, base, offsets); } __forceinline svfloat64_t svld1_gather_offset(svbool_t pg, const float64_t *base, svuint64_t offsets) { return svld1_gather_u64offset_f64(pg, base, offsets); } __forceinline svuint64_t svld1_gather_offset(svbool_t pg, const uint64_t *base, svint64_t offsets) { return svld1_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svld1_gather_offset(svbool_t pg, const int64_t *base, svint64_t offsets) { return svld1_gather_s64offset_s64(pg, base, offsets); } __forceinline svfloat64_t svld1_gather_offset(svbool_t pg, const float64_t *base, svint64_t offsets) { return svld1_gather_s64offset_f64(pg, base, offsets); } __forceinline svuint32_t svld1_gather_offset(svbool_t pg, const uint32_t *base, svuint32_t offsets) { return svld1_gather_u32offset_u32(pg, base, offsets); } __forceinline svint32_t svld1_gather_offset(svbool_t pg, const int32_t *base, svuint32_t offsets) { return svld1_gather_u32offset_s32(pg, base, offsets); } __forceinline svfloat32_t svld1_gather_offset(svbool_t pg, const float32_t *base, svuint32_t offsets) { return svld1_gather_u32offset_f32(pg, base, offsets); } __forceinline svuint32_t svld1_gather_offset(svbool_t pg, const uint32_t *base, svint32_t offsets) { return svld1_gather_s32offset_u32(pg, base, offsets); } __forceinline svfloat64_t svld1_gather_f64(svbool_t pg, svuint64_t bases) { return svld1_gather_u64base_f64(pg, bases); } __forceinline svuint32_t svld1_gather_index(svbool_t pg, const uint32_t *base, svint32_t indices) { return svld1_gather_s32index_u32(pg, base, indices); } __forceinline svint32_t svld1_gather_offset(svbool_t pg, const int32_t *base, svint32_t offsets) { return svld1_gather_s32offset_s32(pg, base, offsets); } __forceinline svfloat32_t svld1_gather_offset(svbool_t pg, const float32_t *base, svint32_t offsets) { return svld1_gather_s32offset_f32(pg, base, offsets); } __forceinline svuint64_t svld1_gather_u64(svbool_t pg, svuint64_t bases) { return svld1_gather_u64base_u64(pg, bases); } __forceinline svint64_t svld1_gather_s64(svbool_t pg, svuint64_t bases) { return svld1_gather_u64base_s64(pg, bases); } __forceinline svuint64_t svld1_gather_index(svbool_t pg, const uint64_t *base, svint64_t indices) { return svld1_gather_s64index_u64(pg, base, indices); } __forceinline svuint32_t svld1_gather_u32(svbool_t pg, svuint32_t bases) { return svld1_gather_u32base_u32(pg, bases); } __forceinline svfloat32_t svld1_gather_f32(svbool_t pg, svuint32_t bases) { return svld1_gather_u32base_f32(pg, bases); } __forceinline svint32_t svld1_gather_s32(svbool_t pg, svuint32_t bases) { return svld1_gather_u32base_s32(pg, bases); } __forceinline svint32_t svld1_gather_index(svbool_t pg, const int32_t *base, svuint32_t indices) { return svld1_gather_u32index_s32(pg, base, indices); } __forceinline svfloat32_t svld1_gather_index(svbool_t pg, const float32_t *base, svuint32_t indices) { return svld1_gather_u32index_f32(pg, base, indices); } __forceinline svuint32_t svld1_gather_index(svbool_t pg, const uint32_t *base, svuint32_t indices) { return svld1_gather_u32index_u32(pg, base, indices); } __forceinline svfloat64_t svld1_gather_index(svbool_t pg, const float64_t *base, svint64_t indices) { return svld1_gather_s64index_f64(pg, base, indices); } __forceinline svint8_t svld1rq(svbool_t pg, const int8_t *base) { return svld1rq_s8(pg, base); } __forceinline svuint32_t svld1rq(svbool_t pg, const uint32_t *base) { return svld1rq_u32(pg, base); } __forceinline svuint16_t svld1rq(svbool_t pg, const uint16_t *base) { return svld1rq_u16(pg, base); } __forceinline svuint8_t svld1rq(svbool_t pg, const uint8_t *base) { return svld1rq_u8(pg, base); } __forceinline svint64_t svld1rq(svbool_t pg, const int64_t *base) { return svld1rq_s64(pg, base); } __forceinline svint32_t svld1rq(svbool_t pg, const int32_t *base) { return svld1rq_s32(pg, base); } __forceinline svint16_t svld1rq(svbool_t pg, const int16_t *base) { return svld1rq_s16(pg, base); } __forceinline svuint64_t svld1rq(svbool_t pg, const uint64_t *base) { return svld1rq_u64(pg, base); } __forceinline svfloat64_t svld1rq(svbool_t pg, const float64_t *base) { return svld1rq_f64(pg, base); } __forceinline svfloat32_t svld1rq(svbool_t pg, const float32_t *base) { return svld1rq_f32(pg, base); } __forceinline svint32_t svld1ro(svbool_t pg, const int32_t *base) { return svld1ro_s32(pg, base); } __forceinline svuint32_t svld1ro(svbool_t pg, const uint32_t *base) { return svld1ro_u32(pg, base); } __forceinline svuint64_t svld1ro(svbool_t pg, const uint64_t *base) { return svld1ro_u64(pg, base); } __forceinline svint16_t svld1ro(svbool_t pg, const int16_t *base) { return svld1ro_s16(pg, base); } __forceinline svint8_t svld1ro(svbool_t pg, const int8_t *base) { return svld1ro_s8(pg, base); } __forceinline svfloat64_t svld1ro(svbool_t pg, const float64_t *base) { return svld1ro_f64(pg, base); } __forceinline svfloat32_t svld1ro(svbool_t pg, const float32_t *base) { return svld1ro_f32(pg, base); } __forceinline svint64_t svld1ro(svbool_t pg, const int64_t *base) { return svld1ro_s64(pg, base); } __forceinline svuint8_t svld1ro(svbool_t pg, const uint8_t *base) { return svld1ro_u8(pg, base); } __forceinline svuint16_t svld1ro(svbool_t pg, const uint16_t *base) { return svld1ro_u16(pg, base); } __forceinline svuint64_t svbic_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svbic_u64_z(pg, op1, op2); } __forceinline svuint32_t svbic_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svbic_u32_z(pg, op1, op2); } __forceinline svuint16_t svbic_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svbic_u16_z(pg, op1, op2); } __forceinline svuint8_t svbic_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svbic_u8_z(pg, op1, op2); } __forceinline svint64_t svbic_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svbic_s64_z(pg, op1, op2); } __forceinline svint32_t svbic_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svbic_s32_z(pg, op1, op2); } __forceinline svint16_t svbic_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svbic_s16_z(pg, op1, op2); } __forceinline svint8_t svbic_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svbic_s8_z(pg, op1, op2); } __forceinline svuint64_t svbic_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svbic_u64_x(pg, op1, op2); } __forceinline svuint32_t svbic_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svbic_u32_x(pg, op1, op2); } __forceinline svuint16_t svbic_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svbic_u16_x(pg, op1, op2); } __forceinline svuint8_t svbic_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svbic_u8_x(pg, op1, op2); } __forceinline svint32_t svbic_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svbic_s32_x(pg, op1, op2); } __forceinline svint16_t svbic_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svbic_s16_x(pg, op1, op2); } __forceinline svint8_t svbic_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svbic_s8_x(pg, op1, op2); } __forceinline svuint64_t svbic_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svbic_u64_m(pg, op1, op2); } __forceinline svuint32_t svbic_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svbic_u32_m(pg, op1, op2); } __forceinline svuint16_t svbic_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svbic_u16_m(pg, op1, op2); } __forceinline svuint8_t svbic_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svbic_u8_m(pg, op1, op2); } __forceinline svint64_t svbic_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svbic_s64_m(pg, op1, op2); } __forceinline svint32_t svbic_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svbic_s32_m(pg, op1, op2); } __forceinline svint64_t svbic_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svbic_s64_x(pg, op1, op2); } __forceinline svint8_t svbic_m(svbool_t pg, svint8_t op1, int8_t op2) { return svbic_n_s8_m(pg, op1, op2); } __forceinline svint16_t svbic_m(svbool_t pg, svint16_t op1, int16_t op2) { return svbic_n_s16_m(pg, op1, op2); } __forceinline svint32_t svbic_m(svbool_t pg, svint32_t op1, int32_t op2) { return svbic_n_s32_m(pg, op1, op2); } __forceinline svbool_t svbic_z(svbool_t pg, svbool_t op1, svbool_t op2) { return svbic_b_z(pg, op1, op2); } __forceinline svuint64_t svbic_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svbic_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svbic_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svbic_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svbic_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svbic_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svbic_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svbic_n_u8_z(pg, op1, op2); } __forceinline svint64_t svbic_z(svbool_t pg, svint64_t op1, int64_t op2) { return svbic_n_s64_z(pg, op1, op2); } __forceinline svint32_t svbic_z(svbool_t pg, svint32_t op1, int32_t op2) { return svbic_n_s32_z(pg, op1, op2); } __forceinline svint16_t svbic_z(svbool_t pg, svint16_t op1, int16_t op2) { return svbic_n_s16_z(pg, op1, op2); } __forceinline svint8_t svbic_z(svbool_t pg, svint8_t op1, int8_t op2) { return svbic_n_s8_z(pg, op1, op2); } __forceinline svuint64_t svbic_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svbic_n_u64_x(pg, op1, op2); } __forceinline svint16_t svbic_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svbic_s16_m(pg, op1, op2); } __forceinline svuint32_t svbic_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svbic_n_u32_x(pg, op1, op2); } __forceinline svuint8_t svbic_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svbic_n_u8_x(pg, op1, op2); } __forceinline svint64_t svbic_x(svbool_t pg, svint64_t op1, int64_t op2) { return svbic_n_s64_x(pg, op1, op2); } __forceinline svint32_t svbic_x(svbool_t pg, svint32_t op1, int32_t op2) { return svbic_n_s32_x(pg, op1, op2); } __forceinline svint16_t svbic_x(svbool_t pg, svint16_t op1, int16_t op2) { return svbic_n_s16_x(pg, op1, op2); } __forceinline svint8_t svbic_x(svbool_t pg, svint8_t op1, int8_t op2) { return svbic_n_s8_x(pg, op1, op2); } __forceinline svuint64_t svbic_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svbic_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svbic_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svbic_n_u32_m(pg, op1, op2); } __forceinline svuint16_t svbic_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svbic_n_u16_m(pg, op1, op2); } __forceinline svuint8_t svbic_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svbic_n_u8_m(pg, op1, op2); } __forceinline svint64_t svbic_m(svbool_t pg, svint64_t op1, int64_t op2) { return svbic_n_s64_m(pg, op1, op2); } __forceinline svuint16_t svbic_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svbic_n_u16_x(pg, op1, op2); } __forceinline svint8_t svbic_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svbic_s8_m(pg, op1, op2); } __forceinline svint8_t svand_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svand_s8_z(pg, op1, op2); } __forceinline svuint64_t svand_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svand_u64_x(pg, op1, op2); } __forceinline svuint32_t svand_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svand_u32_x(pg, op1, op2); } __forceinline svint64_t svand_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svand_s64_x(pg, op1, op2); } __forceinline svint32_t svand_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svand_s32_z(pg, op1, op2); } __forceinline svint64_t svand_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svand_s64_z(pg, op1, op2); } __forceinline svuint16_t svand_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svand_u16_x(pg, op1, op2); } __forceinline svuint8_t svand_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svand_u8_x(pg, op1, op2); } __forceinline svint16_t svand_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svand_s16_z(pg, op1, op2); } __forceinline svint32_t svand_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svand_s32_x(pg, op1, op2); } __forceinline svuint32_t svand_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svand_u32_z(pg, op1, op2); } __forceinline svint8_t svand_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svand_s8_x(pg, op1, op2); } __forceinline svuint64_t svand_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svand_u64_m(pg, op1, op2); } __forceinline svuint32_t svand_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svand_u32_m(pg, op1, op2); } __forceinline svuint8_t svand_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svand_u8_z(pg, op1, op2); } __forceinline svint8_t svand_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svand_s8_m(pg, op1, op2); } __forceinline svint16_t svand_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svand_s16_m(pg, op1, op2); } __forceinline svint32_t svand_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svand_s32_m(pg, op1, op2); } __forceinline svint64_t svand_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svand_s64_m(pg, op1, op2); } __forceinline svuint8_t svand_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svand_u8_m(pg, op1, op2); } __forceinline svuint16_t svand_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svand_u16_m(pg, op1, op2); } __forceinline svint16_t svand_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svand_s16_x(pg, op1, op2); } __forceinline svuint16_t svand_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svand_u16_z(pg, op1, op2); } __forceinline svint16_t svand_z(svbool_t pg, svint16_t op1, int16_t op2) { return svand_n_s16_z(pg, op1, op2); } __forceinline svuint64_t svand_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svand_u64_z(pg, op1, op2); } __forceinline svbool_t svand_z(svbool_t pg, svbool_t op1, svbool_t op2) { return svand_b_z(pg, op1, op2); } __forceinline svuint64_t svand_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svand_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svand_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svand_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svand_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svand_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svand_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svand_n_u8_z(pg, op1, op2); } __forceinline svint64_t svand_z(svbool_t pg, svint64_t op1, int64_t op2) { return svand_n_s64_z(pg, op1, op2); } __forceinline svint32_t svand_z(svbool_t pg, svint32_t op1, int32_t op2) { return svand_n_s32_z(pg, op1, op2); } __forceinline svint8_t svand_z(svbool_t pg, svint8_t op1, int8_t op2) { return svand_n_s8_z(pg, op1, op2); } __forceinline svuint64_t svand_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svand_n_u64_x(pg, op1, op2); } __forceinline svuint16_t svand_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svand_n_u16_x(pg, op1, op2); } __forceinline svuint8_t svand_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svand_n_u8_x(pg, op1, op2); } __forceinline svuint32_t svand_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svand_n_u32_x(pg, op1, op2); } __forceinline svint32_t svand_x(svbool_t pg, svint32_t op1, int32_t op2) { return svand_n_s32_x(pg, op1, op2); } __forceinline svint8_t svand_m(svbool_t pg, svint8_t op1, int8_t op2) { return svand_n_s8_m(pg, op1, op2); } __forceinline svint16_t svand_m(svbool_t pg, svint16_t op1, int16_t op2) { return svand_n_s16_m(pg, op1, op2); } __forceinline svint32_t svand_m(svbool_t pg, svint32_t op1, int32_t op2) { return svand_n_s32_m(pg, op1, op2); } __forceinline svint64_t svand_x(svbool_t pg, svint64_t op1, int64_t op2) { return svand_n_s64_x(pg, op1, op2); } __forceinline svuint8_t svand_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svand_n_u8_m(pg, op1, op2); } __forceinline svint64_t svand_m(svbool_t pg, svint64_t op1, int64_t op2) { return svand_n_s64_m(pg, op1, op2); } __forceinline svuint32_t svand_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svand_n_u32_m(pg, op1, op2); } __forceinline svuint64_t svand_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svand_n_u64_m(pg, op1, op2); } __forceinline svint8_t svand_x(svbool_t pg, svint8_t op1, int8_t op2) { return svand_n_s8_x(pg, op1, op2); } __forceinline svint16_t svand_x(svbool_t pg, svint16_t op1, int16_t op2) { return svand_n_s16_x(pg, op1, op2); } __forceinline svuint16_t svand_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svand_n_u16_m(pg, op1, op2); } __forceinline int64_t svandv(svbool_t pg, svint64_t op) { return svandv_s64(pg, op); } __forceinline int8_t svandv(svbool_t pg, svint8_t op) { return svandv_s8(pg, op); } __forceinline int16_t svandv(svbool_t pg, svint16_t op) { return svandv_s16(pg, op); } __forceinline int32_t svandv(svbool_t pg, svint32_t op) { return svandv_s32(pg, op); } __forceinline uint16_t svandv(svbool_t pg, svuint16_t op) { return svandv_u16(pg, op); } __forceinline uint8_t svandv(svbool_t pg, svuint8_t op) { return svandv_u8(pg, op); } __forceinline uint64_t svandv(svbool_t pg, svuint64_t op) { return svandv_u64(pg, op); } __forceinline uint32_t svandv(svbool_t pg, svuint32_t op) { return svandv_u32(pg, op); } __forceinline svbool_t svmov_z(svbool_t pg, svbool_t op) { return svmov_b_z(pg, op); } __forceinline svuint32_t svnot_z(svbool_t pg, svuint32_t op) { return svnot_u32_z(pg, op); } __forceinline svuint64_t svnot_z(svbool_t pg, svuint64_t op) { return svnot_u64_z(pg, op); } __forceinline svbool_t svnot_z(svbool_t pg, svbool_t op) { return svnot_b_z(pg, op); } __forceinline svuint8_t svnot_z(svbool_t pg, svuint8_t op) { return svnot_u8_z(pg, op); } __forceinline svuint64_t svnot_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { return svnot_u64_m(inactive, pg, op); } __forceinline svint64_t svnot_z(svbool_t pg, svint64_t op) { return svnot_s64_z(pg, op); } __forceinline svint32_t svnot_z(svbool_t pg, svint32_t op) { return svnot_s32_z(pg, op); } __forceinline svint16_t svnot_z(svbool_t pg, svint16_t op) { return svnot_s16_z(pg, op); } __forceinline svint8_t svnot_z(svbool_t pg, svint8_t op) { return svnot_s8_z(pg, op); } __forceinline svuint64_t svnot_x(svbool_t pg, svuint64_t op) { return svnot_u64_x(pg, op); } __forceinline svuint32_t svnot_x(svbool_t pg, svuint32_t op) { return svnot_u32_x(pg, op); } __forceinline svuint16_t svnot_x(svbool_t pg, svuint16_t op) { return svnot_u16_x(pg, op); } __forceinline svuint8_t svnot_x(svbool_t pg, svuint8_t op) { return svnot_u8_x(pg, op); } __forceinline svint64_t svnot_x(svbool_t pg, svint64_t op) { return svnot_s64_x(pg, op); } __forceinline svint32_t svnot_x(svbool_t pg, svint32_t op) { return svnot_s32_x(pg, op); } __forceinline svint16_t svnot_x(svbool_t pg, svint16_t op) { return svnot_s16_x(pg, op); } __forceinline svint8_t svnot_x(svbool_t pg, svint8_t op) { return svnot_s8_x(pg, op); } __forceinline svuint16_t svnot_z(svbool_t pg, svuint16_t op) { return svnot_u16_z(pg, op); } __forceinline svuint32_t svnot_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { return svnot_u32_m(inactive, pg, op); } __forceinline svuint16_t svnot_m(svuint16_t inactive, svbool_t pg, svuint16_t op) { return svnot_u16_m(inactive, pg, op); } __forceinline svuint8_t svnot_m(svuint8_t inactive, svbool_t pg, svuint8_t op) { return svnot_u8_m(inactive, pg, op); } __forceinline svint64_t svnot_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svnot_s64_m(inactive, pg, op); } __forceinline svint32_t svnot_m(svint32_t inactive, svbool_t pg, svint32_t op) { return svnot_s32_m(inactive, pg, op); } __forceinline svint16_t svnot_m(svint16_t inactive, svbool_t pg, svint16_t op) { return svnot_s16_m(inactive, pg, op); } __forceinline svint8_t svnot_m(svint8_t inactive, svbool_t pg, svint8_t op) { return svnot_s8_m(inactive, pg, op); } __forceinline svuint32_t sveor_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return sveor_n_u32_x(pg, op1, op2); } __forceinline svuint16_t sveor_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return sveor_n_u16_x(pg, op1, op2); } __forceinline svbool_t sveor_z(svbool_t pg, svbool_t op1, svbool_t op2) { return sveor_b_z(pg, op1, op2); } __forceinline svuint64_t sveor_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return sveor_u64_z(pg, op1, op2); } __forceinline svuint32_t sveor_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return sveor_u32_m(pg, op1, op2); } __forceinline svuint16_t sveor_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return sveor_u16_m(pg, op1, op2); } __forceinline svint8_t sveor_m(svbool_t pg, svint8_t op1, int8_t op2) { return sveor_n_s8_m(pg, op1, op2); } __forceinline svuint64_t sveor_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return sveor_u64_m(pg, op1, op2); } __forceinline svint8_t sveor_x(svbool_t pg, svint8_t op1, svint8_t op2) { return sveor_s8_x(pg, op1, op2); } __forceinline svint16_t sveor_x(svbool_t pg, svint16_t op1, svint16_t op2) { return sveor_s16_x(pg, op1, op2); } __forceinline svint32_t sveor_x(svbool_t pg, svint32_t op1, svint32_t op2) { return sveor_s32_x(pg, op1, op2); } __forceinline svint64_t sveor_x(svbool_t pg, svint64_t op1, svint64_t op2) { return sveor_s64_x(pg, op1, op2); } __forceinline svuint8_t sveor_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return sveor_u8_x(pg, op1, op2); } __forceinline svuint16_t sveor_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return sveor_u16_x(pg, op1, op2); } __forceinline svuint32_t sveor_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return sveor_u32_x(pg, op1, op2); } __forceinline svuint64_t sveor_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return sveor_u64_x(pg, op1, op2); } __forceinline svint8_t sveor_z(svbool_t pg, svint8_t op1, svint8_t op2) { return sveor_s8_z(pg, op1, op2); } __forceinline svint8_t sveor_m(svbool_t pg, svint8_t op1, svint8_t op2) { return sveor_s8_m(pg, op1, op2); } __forceinline svint16_t sveor_z(svbool_t pg, svint16_t op1, svint16_t op2) { return sveor_s16_z(pg, op1, op2); } __forceinline svint32_t sveor_z(svbool_t pg, svint32_t op1, svint32_t op2) { return sveor_s32_z(pg, op1, op2); } __forceinline svint64_t sveor_z(svbool_t pg, svint64_t op1, svint64_t op2) { return sveor_s64_z(pg, op1, op2); } __forceinline svuint8_t sveor_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return sveor_u8_z(pg, op1, op2); } __forceinline svuint16_t sveor_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return sveor_u16_z(pg, op1, op2); } __forceinline svuint32_t sveor_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return sveor_u32_z(pg, op1, op2); } __forceinline svuint8_t sveor_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return sveor_u8_m(pg, op1, op2); } __forceinline svint32_t sveor_m(svbool_t pg, svint32_t op1, int32_t op2) { return sveor_n_s32_m(pg, op1, op2); } __forceinline svint16_t sveor_m(svbool_t pg, svint16_t op1, int16_t op2) { return sveor_n_s16_m(pg, op1, op2); } __forceinline svuint8_t sveor_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return sveor_n_u8_m(pg, op1, op2); } __forceinline svuint64_t sveor_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return sveor_n_u64_x(pg, op1, op2); } __forceinline svint8_t sveor_z(svbool_t pg, svint8_t op1, int8_t op2) { return sveor_n_s8_z(pg, op1, op2); } __forceinline svint16_t sveor_z(svbool_t pg, svint16_t op1, int16_t op2) { return sveor_n_s16_z(pg, op1, op2); } __forceinline svint32_t sveor_z(svbool_t pg, svint32_t op1, int32_t op2) { return sveor_n_s32_z(pg, op1, op2); } __forceinline svint64_t sveor_z(svbool_t pg, svint64_t op1, int64_t op2) { return sveor_n_s64_z(pg, op1, op2); } __forceinline svint16_t sveor_m(svbool_t pg, svint16_t op1, svint16_t op2) { return sveor_s16_m(pg, op1, op2); } __forceinline svint64_t sveor_m(svbool_t pg, svint64_t op1, int64_t op2) { return sveor_n_s64_m(pg, op1, op2); } __forceinline svuint8_t sveor_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return sveor_n_u8_z(pg, op1, op2); } __forceinline svuint16_t sveor_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return sveor_n_u16_z(pg, op1, op2); } __forceinline svint64_t sveor_m(svbool_t pg, svint64_t op1, svint64_t op2) { return sveor_s64_m(pg, op1, op2); } __forceinline svint32_t sveor_m(svbool_t pg, svint32_t op1, svint32_t op2) { return sveor_s32_m(pg, op1, op2); } __forceinline svuint8_t sveor_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return sveor_n_u8_x(pg, op1, op2); } __forceinline svint64_t sveor_x(svbool_t pg, svint64_t op1, int64_t op2) { return sveor_n_s64_x(pg, op1, op2); } __forceinline svuint64_t sveor_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return sveor_n_u64_z(pg, op1, op2); } __forceinline svuint16_t sveor_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return sveor_n_u16_m(pg, op1, op2); } __forceinline svuint32_t sveor_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return sveor_n_u32_m(pg, op1, op2); } __forceinline svuint64_t sveor_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return sveor_n_u64_m(pg, op1, op2); } __forceinline svint8_t sveor_x(svbool_t pg, svint8_t op1, int8_t op2) { return sveor_n_s8_x(pg, op1, op2); } __forceinline svint16_t sveor_x(svbool_t pg, svint16_t op1, int16_t op2) { return sveor_n_s16_x(pg, op1, op2); } __forceinline svuint32_t sveor_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return sveor_n_u32_z(pg, op1, op2); } __forceinline svint32_t sveor_x(svbool_t pg, svint32_t op1, int32_t op2) { return sveor_n_s32_x(pg, op1, op2); } __forceinline uint32_t sveorv(svbool_t pg, svuint32_t op) { return sveorv_u32(pg, op); } __forceinline uint16_t sveorv(svbool_t pg, svuint16_t op) { return sveorv_u16(pg, op); } __forceinline uint8_t sveorv(svbool_t pg, svuint8_t op) { return sveorv_u8(pg, op); } __forceinline int64_t sveorv(svbool_t pg, svint64_t op) { return sveorv_s64(pg, op); } __forceinline int32_t sveorv(svbool_t pg, svint32_t op) { return sveorv_s32(pg, op); } __forceinline int16_t sveorv(svbool_t pg, svint16_t op) { return sveorv_s16(pg, op); } __forceinline uint64_t sveorv(svbool_t pg, svuint64_t op) { return sveorv_u64(pg, op); } __forceinline int8_t sveorv(svbool_t pg, svint8_t op) { return sveorv_s8(pg, op); } __forceinline svuint32_t svcnot_z(svbool_t pg, svuint32_t op) { return svcnot_u32_z(pg, op); } __forceinline svint16_t svcnot_z(svbool_t pg, svint16_t op) { return svcnot_s16_z(pg, op); } __forceinline svint8_t svcnot_m(svint8_t inactive, svbool_t pg, svint8_t op) { return svcnot_s8_m(inactive, pg, op); } __forceinline svint16_t svcnot_m(svint16_t inactive, svbool_t pg, svint16_t op) { return svcnot_s16_m(inactive, pg, op); } __forceinline svint32_t svcnot_m(svint32_t inactive, svbool_t pg, svint32_t op) { return svcnot_s32_m(inactive, pg, op); } __forceinline svint64_t svcnot_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svcnot_s64_m(inactive, pg, op); } __forceinline svuint16_t svcnot_z(svbool_t pg, svuint16_t op) { return svcnot_u16_z(pg, op); } __forceinline svuint8_t svcnot_z(svbool_t pg, svuint8_t op) { return svcnot_u8_z(pg, op); } __forceinline svint64_t svcnot_z(svbool_t pg, svint64_t op) { return svcnot_s64_z(pg, op); } __forceinline svint32_t svcnot_z(svbool_t pg, svint32_t op) { return svcnot_s32_z(pg, op); } __forceinline svint8_t svcnot_z(svbool_t pg, svint8_t op) { return svcnot_s8_z(pg, op); } __forceinline svuint64_t svcnot_x(svbool_t pg, svuint64_t op) { return svcnot_u64_x(pg, op); } __forceinline svuint32_t svcnot_x(svbool_t pg, svuint32_t op) { return svcnot_u32_x(pg, op); } __forceinline svuint16_t svcnot_x(svbool_t pg, svuint16_t op) { return svcnot_u16_x(pg, op); } __forceinline svuint8_t svcnot_x(svbool_t pg, svuint8_t op) { return svcnot_u8_x(pg, op); } __forceinline svint64_t svcnot_x(svbool_t pg, svint64_t op) { return svcnot_s64_x(pg, op); } __forceinline svint32_t svcnot_x(svbool_t pg, svint32_t op) { return svcnot_s32_x(pg, op); } __forceinline svint16_t svcnot_x(svbool_t pg, svint16_t op) { return svcnot_s16_x(pg, op); } __forceinline svint8_t svcnot_x(svbool_t pg, svint8_t op) { return svcnot_s8_x(pg, op); } __forceinline svuint64_t svcnot_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { return svcnot_u64_m(inactive, pg, op); } __forceinline svuint32_t svcnot_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { return svcnot_u32_m(inactive, pg, op); } __forceinline svuint8_t svcnot_m(svuint8_t inactive, svbool_t pg, svuint8_t op) { return svcnot_u8_m(inactive, pg, op); } __forceinline svuint64_t svcnot_z(svbool_t pg, svuint64_t op) { return svcnot_u64_z(pg, op); } __forceinline svuint16_t svcnot_m(svuint16_t inactive, svbool_t pg, svuint16_t op) { return svcnot_u16_m(inactive, pg, op); } __forceinline svbool_t svnand_z(svbool_t pg, svbool_t op1, svbool_t op2) { return svnand_b_z(pg, op1, op2); } __forceinline svbool_t svnor_z(svbool_t pg, svbool_t op1, svbool_t op2) { return svnor_b_z(pg, op1, op2); } __forceinline svbool_t svorn_z(svbool_t pg, svbool_t op1, svbool_t op2) { return svorn_b_z(pg, op1, op2); } __forceinline svint8_t svorr_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svorr_s8_m(pg, op1, op2); } __forceinline svuint8_t svorr_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svorr_u8_z(pg, op1, op2); } __forceinline svint16_t svorr_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svorr_s16_m(pg, op1, op2); } __forceinline svint32_t svorr_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svorr_s32_m(pg, op1, op2); } __forceinline svint64_t svorr_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svorr_s64_m(pg, op1, op2); } __forceinline svuint8_t svorr_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svorr_u8_m(pg, op1, op2); } __forceinline svuint16_t svorr_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svorr_u16_m(pg, op1, op2); } __forceinline svuint32_t svorr_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svorr_u32_m(pg, op1, op2); } __forceinline svuint64_t svorr_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svorr_u64_m(pg, op1, op2); } __forceinline svint16_t svorr_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svorr_s16_x(pg, op1, op2); } __forceinline svuint16_t svorr_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svorr_u16_z(pg, op1, op2); } __forceinline svint32_t svorr_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svorr_s32_x(pg, op1, op2); } __forceinline svuint8_t svorr_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svorr_u8_x(pg, op1, op2); } __forceinline svuint16_t svorr_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svorr_u16_x(pg, op1, op2); } __forceinline svuint32_t svorr_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svorr_u32_x(pg, op1, op2); } __forceinline svuint64_t svorr_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svorr_u64_x(pg, op1, op2); } __forceinline svint8_t svorr_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svorr_s8_z(pg, op1, op2); } __forceinline svint16_t svorr_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svorr_s16_z(pg, op1, op2); } __forceinline svint32_t svorr_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svorr_s32_z(pg, op1, op2); } __forceinline svint64_t svorr_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svorr_s64_z(pg, op1, op2); } __forceinline svint64_t svorr_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svorr_s64_x(pg, op1, op2); } __forceinline svint8_t svorr_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svorr_s8_x(pg, op1, op2); } __forceinline svuint32_t svorr_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svorr_n_u32_z(pg, op1, op2); } __forceinline svbool_t svorr_z(svbool_t pg, svbool_t op1, svbool_t op2) { return svorr_b_z(pg, op1, op2); } __forceinline svuint16_t svorr_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svorr_n_u16_z(pg, op1, op2); } __forceinline svuint64_t svorr_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svorr_n_u64_z(pg, op1, op2); } __forceinline svint32_t svorr_z(svbool_t pg, svint32_t op1, int32_t op2) { return svorr_n_s32_z(pg, op1, op2); } __forceinline svint16_t svorr_z(svbool_t pg, svint16_t op1, int16_t op2) { return svorr_n_s16_z(pg, op1, op2); } __forceinline svint8_t svorr_z(svbool_t pg, svint8_t op1, int8_t op2) { return svorr_n_s8_z(pg, op1, op2); } __forceinline svuint64_t svorr_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svorr_n_u64_x(pg, op1, op2); } __forceinline svuint32_t svorr_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svorr_n_u32_x(pg, op1, op2); } __forceinline svint16_t svorr_m(svbool_t pg, svint16_t op1, int16_t op2) { return svorr_n_s16_m(pg, op1, op2); } __forceinline svint32_t svorr_m(svbool_t pg, svint32_t op1, int32_t op2) { return svorr_n_s32_m(pg, op1, op2); } __forceinline svint64_t svorr_m(svbool_t pg, svint64_t op1, int64_t op2) { return svorr_n_s64_m(pg, op1, op2); } __forceinline svuint8_t svorr_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svorr_n_u8_m(pg, op1, op2); } __forceinline svuint16_t svorr_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svorr_n_u16_m(pg, op1, op2); } __forceinline svuint32_t svorr_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svorr_n_u32_m(pg, op1, op2); } __forceinline svuint8_t svorr_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svorr_n_u8_z(pg, op1, op2); } __forceinline svint8_t svorr_x(svbool_t pg, svint8_t op1, int8_t op2) { return svorr_n_s8_x(pg, op1, op2); } __forceinline svuint64_t svorr_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svorr_n_u64_m(pg, op1, op2); } __forceinline svint32_t svorr_x(svbool_t pg, svint32_t op1, int32_t op2) { return svorr_n_s32_x(pg, op1, op2); } __forceinline svuint16_t svorr_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svorr_n_u16_x(pg, op1, op2); } __forceinline svuint8_t svorr_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svorr_n_u8_x(pg, op1, op2); } __forceinline svuint32_t svorr_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svorr_u32_z(pg, op1, op2); } __forceinline svuint64_t svorr_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svorr_u64_z(pg, op1, op2); } __forceinline svint8_t svorr_m(svbool_t pg, svint8_t op1, int8_t op2) { return svorr_n_s8_m(pg, op1, op2); } __forceinline svint16_t svorr_x(svbool_t pg, svint16_t op1, int16_t op2) { return svorr_n_s16_x(pg, op1, op2); } __forceinline svint64_t svorr_z(svbool_t pg, svint64_t op1, int64_t op2) { return svorr_n_s64_z(pg, op1, op2); } __forceinline svint64_t svorr_x(svbool_t pg, svint64_t op1, int64_t op2) { return svorr_n_s64_x(pg, op1, op2); } __forceinline int32_t svorv(svbool_t pg, svint32_t op) { return svorv_s32(pg, op); } __forceinline int64_t svorv(svbool_t pg, svint64_t op) { return svorv_s64(pg, op); } __forceinline uint16_t svorv(svbool_t pg, svuint16_t op) { return svorv_u16(pg, op); } __forceinline uint32_t svorv(svbool_t pg, svuint32_t op) { return svorv_u32(pg, op); } __forceinline uint64_t svorv(svbool_t pg, svuint64_t op) { return svorv_u64(pg, op); } __forceinline int8_t svorv(svbool_t pg, svint8_t op) { return svorv_s8(pg, op); } __forceinline int16_t svorv(svbool_t pg, svint16_t op) { return svorv_s16(pg, op); } __forceinline uint8_t svorv(svbool_t pg, svuint8_t op) { return svorv_u8(pg, op); } __forceinline svuint16_t svunpkhi(svuint8_t op) { return svunpkhi_u16(op); } __forceinline svint64_t svunpkhi(svint32_t op) { return svunpkhi_s64(op); } __forceinline svint16_t svunpkhi(svint8_t op) { return svunpkhi_s16(op); } __forceinline svint32_t svunpkhi(svint16_t op) { return svunpkhi_s32(op); } __forceinline svuint32_t svunpkhi(svuint16_t op) { return svunpkhi_u32(op); } __forceinline svuint64_t svunpkhi(svuint32_t op) { return svunpkhi_u64(op); } __forceinline svbool_t svunpkhi(svbool_t op) { return svunpkhi_b(op); } __forceinline svbool_t svunpklo(svbool_t op) { return svunpklo_b(op); } __forceinline svuint64_t svunpklo(svuint32_t op) { return svunpklo_u64(op); } __forceinline svint32_t svunpklo(svint16_t op) { return svunpklo_s32(op); } __forceinline svint64_t svunpklo(svint32_t op) { return svunpklo_s64(op); } __forceinline svuint16_t svunpklo(svuint8_t op) { return svunpklo_u16(op); } __forceinline svuint32_t svunpklo(svuint16_t op) { return svunpklo_u32(op); } __forceinline svint16_t svunpklo(svint8_t op) { return svunpklo_s16(op); } __forceinline uint64_t svqdecp_b64(uint64_t op, svbool_t pg) { return svqdecp_n_u64_b64(op, pg); } __forceinline uint32_t svqdecp_b32(uint32_t op, svbool_t pg) { return svqdecp_n_u32_b32(op, pg); } __forceinline uint32_t svqdecp_b64(uint32_t op, svbool_t pg) { return svqdecp_n_u32_b64(op, pg); } __forceinline svuint64_t svqdecp(svuint64_t op, svbool_t pg) { return svqdecp_u64(op, pg); } __forceinline uint32_t svqdecp_b16(uint32_t op, svbool_t pg) { return svqdecp_n_u32_b16(op, pg); } __forceinline uint64_t svqdecp_b8(uint64_t op, svbool_t pg) { return svqdecp_n_u64_b8(op, pg); } __forceinline uint64_t svqdecp_b16(uint64_t op, svbool_t pg) { return svqdecp_n_u64_b16(op, pg); } __forceinline uint64_t svqdecp_b32(uint64_t op, svbool_t pg) { return svqdecp_n_u64_b32(op, pg); } __forceinline svint32_t svqdecp(svint32_t op, svbool_t pg) { return svqdecp_s32(op, pg); } __forceinline svint64_t svqdecp(svint64_t op, svbool_t pg) { return svqdecp_s64(op, pg); } __forceinline int64_t svqdecp_b64(int64_t op, svbool_t pg) { return svqdecp_n_s64_b64(op, pg); } __forceinline int32_t svqdecp_b8(int32_t op, svbool_t pg) { return svqdecp_n_s32_b8(op, pg); } __forceinline int32_t svqdecp_b16(int32_t op, svbool_t pg) { return svqdecp_n_s32_b16(op, pg); } __forceinline svint16_t svqdecp(svint16_t op, svbool_t pg) { return svqdecp_s16(op, pg); } __forceinline int32_t svqdecp_b32(int32_t op, svbool_t pg) { return svqdecp_n_s32_b32(op, pg); } __forceinline int64_t svqdecp_b32(int64_t op, svbool_t pg) { return svqdecp_n_s64_b32(op, pg); } __forceinline int64_t svqdecp_b8(int64_t op, svbool_t pg) { return svqdecp_n_s64_b8(op, pg); } __forceinline int64_t svqdecp_b16(int64_t op, svbool_t pg) { return svqdecp_n_s64_b16(op, pg); } __forceinline uint32_t svqdecp_b8(uint32_t op, svbool_t pg) { return svqdecp_n_u32_b8(op, pg); } __forceinline svuint32_t svqdecp(svuint32_t op, svbool_t pg) { return svqdecp_u32(op, pg); } __forceinline svuint16_t svqdecp(svuint16_t op, svbool_t pg) { return svqdecp_u16(op, pg); } __forceinline int32_t svqdecp_b64(int32_t op, svbool_t pg) { return svqdecp_n_s32_b64(op, pg); } __forceinline int32_t svqincp_b8(int32_t op, svbool_t pg) { return svqincp_n_s32_b8(op, pg); } __forceinline int32_t svqincp_b16(int32_t op, svbool_t pg) { return svqincp_n_s32_b16(op, pg); } __forceinline int32_t svqincp_b32(int32_t op, svbool_t pg) { return svqincp_n_s32_b32(op, pg); } __forceinline int32_t svqincp_b64(int32_t op, svbool_t pg) { return svqincp_n_s32_b64(op, pg); } __forceinline int64_t svqincp_b8(int64_t op, svbool_t pg) { return svqincp_n_s64_b8(op, pg); } __forceinline int64_t svqincp_b16(int64_t op, svbool_t pg) { return svqincp_n_s64_b16(op, pg); } __forceinline int64_t svqincp_b32(int64_t op, svbool_t pg) { return svqincp_n_s64_b32(op, pg); } __forceinline uint32_t svqincp_b64(uint32_t op, svbool_t pg) { return svqincp_n_u32_b64(op, pg); } __forceinline uint32_t svqincp_b16(uint32_t op, svbool_t pg) { return svqincp_n_u32_b16(op, pg); } __forceinline svuint64_t svqincp(svuint64_t op, svbool_t pg) { return svqincp_u64(op, pg); } __forceinline int64_t svqincp_b64(int64_t op, svbool_t pg) { return svqincp_n_s64_b64(op, pg); } __forceinline svuint16_t svqincp(svuint16_t op, svbool_t pg) { return svqincp_u16(op, pg); } __forceinline svint64_t svqincp(svint64_t op, svbool_t pg) { return svqincp_s64(op, pg); } __forceinline svint32_t svqincp(svint32_t op, svbool_t pg) { return svqincp_s32(op, pg); } __forceinline svuint32_t svqincp(svuint32_t op, svbool_t pg) { return svqincp_u32(op, pg); } __forceinline uint64_t svqincp_b64(uint64_t op, svbool_t pg) { return svqincp_n_u64_b64(op, pg); } __forceinline uint64_t svqincp_b32(uint64_t op, svbool_t pg) { return svqincp_n_u64_b32(op, pg); } __forceinline uint64_t svqincp_b16(uint64_t op, svbool_t pg) { return svqincp_n_u64_b16(op, pg); } __forceinline uint64_t svqincp_b8(uint64_t op, svbool_t pg) { return svqincp_n_u64_b8(op, pg); } __forceinline uint32_t svqincp_b32(uint32_t op, svbool_t pg) { return svqincp_n_u32_b32(op, pg); } __forceinline svint16_t svqincp(svint16_t op, svbool_t pg) { return svqincp_s16(op, pg); } __forceinline uint32_t svqincp_b8(uint32_t op, svbool_t pg) { return svqincp_n_u32_b8(op, pg); } __forceinline svuint32_t svcompact(svbool_t pg, svuint32_t op) { return svcompact_u32(pg, op); } __forceinline svuint64_t svcompact(svbool_t pg, svuint64_t op) { return svcompact_u64(pg, op); } __forceinline svint64_t svcompact(svbool_t pg, svint64_t op) { return svcompact_s64(pg, op); } __forceinline svint32_t svcompact(svbool_t pg, svint32_t op) { return svcompact_s32(pg, op); } __forceinline svfloat64_t svcompact(svbool_t pg, svfloat64_t op) { return svcompact_f64(pg, op); } __forceinline svfloat32_t svcompact(svbool_t pg, svfloat32_t op) { return svcompact_f32(pg, op); } __forceinline svint64_t svclasta(svbool_t pg, svint64_t fallback, svint64_t data) { return svclasta_s64(pg, fallback, data); } __forceinline svuint8_t svclasta(svbool_t pg, svuint8_t fallback, svuint8_t data) { return svclasta_u8(pg, fallback, data); } __forceinline uint64_t svclasta(svbool_t pg, uint64_t fallback, svuint64_t data) { return svclasta_n_u64(pg, fallback, data); } __forceinline uint32_t svclasta(svbool_t pg, uint32_t fallback, svuint32_t data) { return svclasta_n_u32(pg, fallback, data); } __forceinline uint16_t svclasta(svbool_t pg, uint16_t fallback, svuint16_t data) { return svclasta_n_u16(pg, fallback, data); } __forceinline uint8_t svclasta(svbool_t pg, uint8_t fallback, svuint8_t data) { return svclasta_n_u8(pg, fallback, data); } __forceinline int64_t svclasta(svbool_t pg, int64_t fallback, svint64_t data) { return svclasta_n_s64(pg, fallback, data); } __forceinline int16_t svclasta(svbool_t pg, int16_t fallback, svint16_t data) { return svclasta_n_s16(pg, fallback, data); } __forceinline int8_t svclasta(svbool_t pg, int8_t fallback, svint8_t data) { return svclasta_n_s8(pg, fallback, data); } __forceinline float64_t svclasta(svbool_t pg, float64_t fallback, svfloat64_t data) { return svclasta_n_f64(pg, fallback, data); } __forceinline float32_t svclasta(svbool_t pg, float32_t fallback, svfloat32_t data) { return svclasta_n_f32(pg, fallback, data); } __forceinline int32_t svclasta(svbool_t pg, int32_t fallback, svint32_t data) { return svclasta_n_s32(pg, fallback, data); } __forceinline svuint64_t svclasta(svbool_t pg, svuint64_t fallback, svuint64_t data) { return svclasta_u64(pg, fallback, data); } __forceinline svint16_t svclasta(svbool_t pg, svint16_t fallback, svint16_t data) { return svclasta_s16(pg, fallback, data); } __forceinline svint8_t svclasta(svbool_t pg, svint8_t fallback, svint8_t data) { return svclasta_s8(pg, fallback, data); } __forceinline svfloat64_t svclasta(svbool_t pg, svfloat64_t fallback, svfloat64_t data) { return svclasta_f64(pg, fallback, data); } __forceinline svfloat32_t svclasta(svbool_t pg, svfloat32_t fallback, svfloat32_t data) { return svclasta_f32(pg, fallback, data); } __forceinline svint32_t svclasta(svbool_t pg, svint32_t fallback, svint32_t data) { return svclasta_s32(pg, fallback, data); } __forceinline svbfloat16_t svclasta(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) { return svclasta_bf16(pg, fallback, data); } __forceinline svuint16_t svclasta(svbool_t pg, svuint16_t fallback, svuint16_t data) { return svclasta_u16(pg, fallback, data); } __forceinline svuint32_t svclasta(svbool_t pg, svuint32_t fallback, svuint32_t data) { return svclasta_u32(pg, fallback, data); } __forceinline svfloat16_t svclasta(svbool_t pg, svfloat16_t fallback, svfloat16_t data) { return svclasta_f16(pg, fallback, data); } __forceinline int8_t svlasta(svbool_t pg, svint8_t op) { return svlasta_s8(pg, op); } __forceinline float32_t svlasta(svbool_t pg, svfloat32_t op) { return svlasta_f32(pg, op); } __forceinline float64_t svlasta(svbool_t pg, svfloat64_t op) { return svlasta_f64(pg, op); } __forceinline uint64_t svlasta(svbool_t pg, svuint64_t op) { return svlasta_u64(pg, op); } __forceinline int64_t svlasta(svbool_t pg, svint64_t op) { return svlasta_s64(pg, op); } __forceinline int16_t svlasta(svbool_t pg, svint16_t op) { return svlasta_s16(pg, op); } __forceinline int32_t svlasta(svbool_t pg, svint32_t op) { return svlasta_s32(pg, op); } __forceinline uint16_t svlasta(svbool_t pg, svuint16_t op) { return svlasta_u16(pg, op); } __forceinline uint32_t svlasta(svbool_t pg, svuint32_t op) { return svlasta_u32(pg, op); } __forceinline uint8_t svlasta(svbool_t pg, svuint8_t op) { return svlasta_u8(pg, op); } __forceinline float32_t svclastb(svbool_t pg, float32_t fallback, svfloat32_t data) { return svclastb_n_f32(pg, fallback, data); } __forceinline uint64_t svclastb(svbool_t pg, uint64_t fallback, svuint64_t data) { return svclastb_n_u64(pg, fallback, data); } __forceinline uint32_t svclastb(svbool_t pg, uint32_t fallback, svuint32_t data) { return svclastb_n_u32(pg, fallback, data); } __forceinline uint16_t svclastb(svbool_t pg, uint16_t fallback, svuint16_t data) { return svclastb_n_u16(pg, fallback, data); } __forceinline uint8_t svclastb(svbool_t pg, uint8_t fallback, svuint8_t data) { return svclastb_n_u8(pg, fallback, data); } __forceinline int64_t svclastb(svbool_t pg, int64_t fallback, svint64_t data) { return svclastb_n_s64(pg, fallback, data); } __forceinline int32_t svclastb(svbool_t pg, int32_t fallback, svint32_t data) { return svclastb_n_s32(pg, fallback, data); } __forceinline int16_t svclastb(svbool_t pg, int16_t fallback, svint16_t data) { return svclastb_n_s16(pg, fallback, data); } __forceinline int8_t svclastb(svbool_t pg, int8_t fallback, svint8_t data) { return svclastb_n_s8(pg, fallback, data); } __forceinline float64_t svclastb(svbool_t pg, float64_t fallback, svfloat64_t data) { return svclastb_n_f64(pg, fallback, data); } __forceinline svbfloat16_t svclastb(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) { return svclastb_bf16(pg, fallback, data); } __forceinline svuint64_t svclastb(svbool_t pg, svuint64_t fallback, svuint64_t data) { return svclastb_u64(pg, fallback, data); } __forceinline svuint32_t svclastb(svbool_t pg, svuint32_t fallback, svuint32_t data) { return svclastb_u32(pg, fallback, data); } __forceinline svuint8_t svclastb(svbool_t pg, svuint8_t fallback, svuint8_t data) { return svclastb_u8(pg, fallback, data); } __forceinline svint64_t svclastb(svbool_t pg, svint64_t fallback, svint64_t data) { return svclastb_s64(pg, fallback, data); } __forceinline svint32_t svclastb(svbool_t pg, svint32_t fallback, svint32_t data) { return svclastb_s32(pg, fallback, data); } __forceinline svint16_t svclastb(svbool_t pg, svint16_t fallback, svint16_t data) { return svclastb_s16(pg, fallback, data); } __forceinline svint8_t svclastb(svbool_t pg, svint8_t fallback, svint8_t data) { return svclastb_s8(pg, fallback, data); } __forceinline svfloat64_t svclastb(svbool_t pg, svfloat64_t fallback, svfloat64_t data) { return svclastb_f64(pg, fallback, data); } __forceinline svfloat32_t svclastb(svbool_t pg, svfloat32_t fallback, svfloat32_t data) { return svclastb_f32(pg, fallback, data); } __forceinline svfloat16_t svclastb(svbool_t pg, svfloat16_t fallback, svfloat16_t data) { return svclastb_f16(pg, fallback, data); } __forceinline svuint16_t svclastb(svbool_t pg, svuint16_t fallback, svuint16_t data) { return svclastb_u16(pg, fallback, data); } __forceinline int32_t svlastb(svbool_t pg, svint32_t op) { return svlastb_s32(pg, op); } __forceinline float32_t svlastb(svbool_t pg, svfloat32_t op) { return svlastb_f32(pg, op); } __forceinline int16_t svlastb(svbool_t pg, svint16_t op) { return svlastb_s16(pg, op); } __forceinline float64_t svlastb(svbool_t pg, svfloat64_t op) { return svlastb_f64(pg, op); } __forceinline int64_t svlastb(svbool_t pg, svint64_t op) { return svlastb_s64(pg, op); } __forceinline uint8_t svlastb(svbool_t pg, svuint8_t op) { return svlastb_u8(pg, op); } __forceinline int8_t svlastb(svbool_t pg, svint8_t op) { return svlastb_s8(pg, op); } __forceinline uint32_t svlastb(svbool_t pg, svuint32_t op) { return svlastb_u32(pg, op); } __forceinline uint64_t svlastb(svbool_t pg, svuint64_t op) { return svlastb_u64(pg, op); } __forceinline uint16_t svlastb(svbool_t pg, svuint16_t op) { return svlastb_u16(pg, op); } __forceinline svfloat32_t svsel(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svsel_f32(pg, op1, op2); } __forceinline svfloat64_t svsel(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svsel_f64(pg, op1, op2); } __forceinline svbfloat16_t svsel(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) { return svsel_bf16(pg, op1, op2); } __forceinline svfloat16_t svsel(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svsel_f16(pg, op1, op2); } __forceinline svint8_t svsel(svbool_t pg, svint8_t op1, svint8_t op2) { return svsel_s8(pg, op1, op2); } __forceinline svint32_t svsel(svbool_t pg, svint32_t op1, svint32_t op2) { return svsel_s32(pg, op1, op2); } __forceinline svint64_t svsel(svbool_t pg, svint64_t op1, svint64_t op2) { return svsel_s64(pg, op1, op2); } __forceinline svuint8_t svsel(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svsel_u8(pg, op1, op2); } __forceinline svuint16_t svsel(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svsel_u16(pg, op1, op2); } __forceinline svint16_t svsel(svbool_t pg, svint16_t op1, svint16_t op2) { return svsel_s16(pg, op1, op2); } __forceinline svuint64_t svsel(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svsel_u64(pg, op1, op2); } __forceinline svbool_t svsel(svbool_t pg, svbool_t op1, svbool_t op2) { return svsel_b(pg, op1, op2); } __forceinline svuint32_t svsel(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svsel_u32(pg, op1, op2); } __forceinline svbool_t svdupq_b8(bool x0, bool x1, bool x2, bool x3, bool x4, bool x5, bool x6, bool x7, bool x8, bool x9, bool x10, bool x11, bool x12, bool x13, bool x14, bool x15) { return svdupq_n_b8(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); } __forceinline svbool_t svdupq_b16(bool x0, bool x1, bool x2, bool x3, bool x4, bool x5, bool x6, bool x7) { return svdupq_n_b16(x0, x1, x2, x3, x4, x5, x6, x7); } __forceinline svbool_t svdupq_b32(bool x0, bool x1, bool x2, bool x3) { return svdupq_n_b32(x0, x1, x2, x3); } __forceinline svbool_t svdupq_b64(bool x0, bool x1) { return svdupq_n_b64(x0, x1); } __forceinline svbool_t svdup_b8(bool op) { return svdup_n_b8(op); } __forceinline svbool_t svdup_b16(bool op) { return svdup_n_b16(op); } __forceinline svbool_t svdup_b32(bool op) { return svdup_n_b32(op); } __forceinline svbool_t svdup_b64(bool op) { return svdup_n_b64(op); } __forceinline svbool_t svpfalse() { return svpfalse_b(); } __forceinline svbool_t svwhilelt_b8(int32_t op1, int32_t op2) { return svwhilelt_b8_s32(op1, op2); } __forceinline svbool_t svwhilelt_b8(int64_t op1, int64_t op2) { return svwhilelt_b8_s64(op1, op2); } __forceinline svbool_t svwhilelt_b8(uint32_t op1, uint32_t op2) { return svwhilelt_b8_u32(op1, op2); } __forceinline svbool_t svwhilelt_b8(uint64_t op1, uint64_t op2) { return svwhilelt_b8_u64(op1, op2); } __forceinline svbool_t svwhilelt_b16(int32_t op1, int32_t op2) { return svwhilelt_b16_s32(op1, op2); } __forceinline svbool_t svwhilelt_b16(int64_t op1, int64_t op2) { return svwhilelt_b16_s64(op1, op2); } __forceinline svbool_t svwhilelt_b16(uint32_t op1, uint32_t op2) { return svwhilelt_b16_u32(op1, op2); } __forceinline svbool_t svwhilelt_b16(uint64_t op1, uint64_t op2) { return svwhilelt_b16_u64(op1, op2); } __forceinline svbool_t svwhilelt_b32(int32_t op1, int32_t op2) { return svwhilelt_b32_s32(op1, op2); } __forceinline svbool_t svwhilelt_b32(int64_t op1, int64_t op2) { return svwhilelt_b32_s64(op1, op2); } __forceinline svbool_t svwhilelt_b32(uint32_t op1, uint32_t op2) { return svwhilelt_b32_u32(op1, op2); } __forceinline svbool_t svwhilelt_b32(uint64_t op1, uint64_t op2) { return svwhilelt_b32_u64(op1, op2); } __forceinline svbool_t svwhilelt_b64(int32_t op1, int32_t op2) { return svwhilelt_b64_s32(op1, op2); } __forceinline svbool_t svwhilelt_b64(int64_t op1, int64_t op2) { return svwhilelt_b64_s64(op1, op2); } __forceinline svbool_t svwhilelt_b64(uint32_t op1, uint32_t op2) { return svwhilelt_b64_u32(op1, op2); } __forceinline svbool_t svwhilelt_b64(uint64_t op1, uint64_t op2) { return svwhilelt_b64_u64(op1, op2); } __forceinline svbool_t svwhilele_b8(int32_t op1, int32_t op2) { return svwhilele_b8_s32(op1, op2); } __forceinline svbool_t svwhilele_b8(int64_t op1, int64_t op2) { return svwhilele_b8_s64(op1, op2); } __forceinline svbool_t svwhilele_b8(uint32_t op1, uint32_t op2) { return svwhilele_b8_u32(op1, op2); } __forceinline svbool_t svwhilele_b8(uint64_t op1, uint64_t op2) { return svwhilele_b8_u64(op1, op2); } __forceinline svbool_t svwhilele_b16(int32_t op1, int32_t op2) { return svwhilele_b16_s32(op1, op2); } __forceinline svbool_t svwhilele_b16(int64_t op1, int64_t op2) { return svwhilele_b16_s64(op1, op2); } __forceinline svbool_t svwhilele_b16(uint32_t op1, uint32_t op2) { return svwhilele_b16_u32(op1, op2); } __forceinline svbool_t svwhilele_b16(uint64_t op1, uint64_t op2) { return svwhilele_b16_u64(op1, op2); } __forceinline svbool_t svwhilele_b32(int32_t op1, int32_t op2) { return svwhilele_b32_s32(op1, op2); } __forceinline svbool_t svwhilele_b32(int64_t op1, int64_t op2) { return svwhilele_b32_s64(op1, op2); } __forceinline svbool_t svwhilele_b32(uint32_t op1, uint32_t op2) { return svwhilele_b32_u32(op1, op2); } __forceinline svbool_t svwhilele_b32(uint64_t op1, uint64_t op2) { return svwhilele_b32_u64(op1, op2); } __forceinline svbool_t svwhilele_b64(int32_t op1, int32_t op2) { return svwhilele_b64_s32(op1, op2); } __forceinline svbool_t svwhilele_b64(int64_t op1, int64_t op2) { return svwhilele_b64_s64(op1, op2); } __forceinline svbool_t svwhilele_b64(uint32_t op1, uint32_t op2) { return svwhilele_b64_u32(op1, op2); } __forceinline svbool_t svwhilele_b64(uint64_t op1, uint64_t op2) { return svwhilele_b64_u64(op1, op2); } __forceinline svbool_t svbrka_m(svbool_t inactive, svbool_t pg, svbool_t op) { return svbrka_b_m(inactive, pg, op); } __forceinline svbool_t svbrka_z(svbool_t pg, svbool_t op) { return svbrka_b_z(pg, op); } __forceinline svbool_t svbrkpa_z(svbool_t pg, svbool_t op1, svbool_t op2) { return svbrkpa_b_z(pg, op1, op2); } __forceinline svbool_t svbrkb_m(svbool_t inactive, svbool_t pg, svbool_t op) { return svbrkb_b_m(inactive, pg, op); } __forceinline svbool_t svbrkb_z(svbool_t pg, svbool_t op) { return svbrkb_b_z(pg, op); } __forceinline svbool_t svbrkpb_z(svbool_t pg, svbool_t op1, svbool_t op2) { return svbrkpb_b_z(pg, op1, op2); } __forceinline svbool_t svbrkn_z(svbool_t pg, svbool_t op1, svbool_t op2) { return svbrkn_b_z(pg, op1, op2); } __forceinline svbool_t svpfirst(svbool_t pg, svbool_t op) { return svpfirst_b(pg, op); } template __forceinline void __svprfb_gather_offset(svbool_t pg, T1 base, T2 offsets) { if constexpr(::std::is_same_v) { return svprfb_gather_s32offset(pg, base, offsets, N); } else if constexpr(::std::is_same_v) { return svprfb_gather_s64offset(pg, base, offsets, N); } else if constexpr(::std::is_same_v) { return svprfb_gather_u32offset(pg, base, offsets, N); } else if constexpr(::std::is_same_v) { return svprfb_gather_u64offset(pg, base, offsets, N); } else if constexpr(::std::is_same_v && ::std::is_same_v) { return svprfb_gather_u32base_offset(pg, base, offsets, N); } else if constexpr(::std::is_same_v && ::std::is_same_v) { return svprfb_gather_u64base_offset(pg, base, offsets, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svprfb_gather_offset(pg, base, offsets, op) __svprfb_gather_offset(pg, base, offsets) template __forceinline void __svprfb_gather(svbool_t pg, T bases) { if constexpr(::std::is_same_v) { return svprfb_gather_u32base(pg, bases, N); } else if constexpr(::std::is_same_v) { return svprfb_gather_u64base(pg, bases, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svprfb_gather(pg, bases, op) __svprfb_gather(pg, bases) template __forceinline void __svprfd_gather_index(svbool_t pg, T1 base, T2 indices) { if constexpr(::std::is_same_v) { return svprfd_gather_s32index(pg, base, indices, N); } else if constexpr(::std::is_same_v) { return svprfd_gather_s64index(pg, base, indices, N); } else if constexpr(::std::is_same_v) { return svprfd_gather_u32index(pg, base, indices, N); } else if constexpr(::std::is_same_v) { return svprfd_gather_u64index(pg, base, indices, N); } else if constexpr(::std::is_same_v && ::std::is_same_v) { return svprfd_gather_u32base_index(pg, base, indices, N); } else if constexpr(::std::is_same_v && ::std::is_same_v) { return svprfd_gather_u64base_index(pg, base, indices, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svprfd_gather_index(pg, base, indices, op) __svprfd_gather_index(pg, base, indices) template __forceinline void __svprfd_gather(svbool_t pg, T bases) { if constexpr(::std::is_same_v) { return svprfd_gather_u32base(pg, bases, N); } else if constexpr(::std::is_same_v) { return svprfd_gather_u64base(pg, bases, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svprfd_gather(pg, bases, op) __svprfd_gather(pg, bases) template __forceinline void __svprfh_gather_index(svbool_t pg, T1 base, T2 indices) { if constexpr(::std::is_same_v) { return svprfh_gather_s32index(pg, base, indices, N); } else if constexpr(::std::is_same_v) { return svprfh_gather_s64index(pg, base, indices, N); } else if constexpr(::std::is_same_v) { return svprfh_gather_u32index(pg, base, indices, N); } else if constexpr(::std::is_same_v) { return svprfh_gather_u64index(pg, base, indices, N); } else if constexpr(::std::is_same_v && ::std::is_same_v) { return svprfh_gather_u32base_index(pg, base, indices, N); } else if constexpr(::std::is_same_v && ::std::is_same_v) { return svprfh_gather_u64base_index(pg, base, indices, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svprfh_gather_index(pg, base, indices, op) __svprfh_gather_index(pg, base, indices) template __forceinline void __svprfh_gather(svbool_t pg, T bases) { if constexpr(::std::is_same_v) { return svprfh_gather_u32base(pg, bases, N); } else if constexpr(::std::is_same_v) { return svprfh_gather_u64base(pg, bases, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svprfh_gather(pg, bases, op) __svprfh_gather(pg, bases) template __forceinline void __svprfw_gather_index(svbool_t pg, T1 base, T2 indices) { if constexpr(::std::is_same_v) { return svprfw_gather_s32index(pg, base, indices, N); } else if constexpr(::std::is_same_v) { return svprfw_gather_s64index(pg, base, indices, N); } else if constexpr(::std::is_same_v) { return svprfw_gather_u32index(pg, base, indices, N); } else if constexpr(::std::is_same_v) { return svprfw_gather_u64index(pg, base, indices, N); } else if constexpr(::std::is_same_v && ::std::is_same_v) { return svprfw_gather_u32base_index(pg, base, indices, N); } else if constexpr(::std::is_same_v && ::std::is_same_v) { return svprfw_gather_u64base_index(pg, base, indices, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svprfw_gather_index(pg, base, indices, op) __svprfw_gather_index(pg, base, indices) template __forceinline void __svprfw_gather(svbool_t pg, T bases) { if constexpr(::std::is_same_v) { return svprfw_gather_u32base(pg, bases, N); } else if constexpr(::std::is_same_v) { return svprfw_gather_u64base(pg, bases, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svprfw_gather(pg, bases, op) __svprfw_gather(pg, bases) __forceinline svint16_t svlsl_m(svbool_t pg, svint16_t op1, uint16_t op2) { return svlsl_n_s16_m(pg, op1, op2); } __forceinline svint16_t svlsl_x(svbool_t pg, svint16_t op1, uint16_t op2) { return svlsl_n_s16_x(pg, op1, op2); } __forceinline svint16_t svlsl_z(svbool_t pg, svint16_t op1, uint16_t op2) { return svlsl_n_s16_z(pg, op1, op2); } __forceinline svint16_t svlsl_m(svbool_t pg, svint16_t op1, svuint16_t op2) { return svlsl_s16_m(pg, op1, op2); } __forceinline svint16_t svlsl_x(svbool_t pg, svint16_t op1, svuint16_t op2) { return svlsl_s16_x(pg, op1, op2); } __forceinline svint16_t svlsl_z(svbool_t pg, svint16_t op1, svuint16_t op2) { return svlsl_s16_z(pg, op1, op2); } __forceinline svint16_t svlsl_wide_m(svbool_t pg, svint16_t op1, uint64_t op2) { return svlsl_wide_n_s16_m(pg, op1, op2); } __forceinline svint16_t svlsl_wide_x(svbool_t pg, svint16_t op1, uint64_t op2) { return svlsl_wide_n_s16_x(pg, op1, op2); } __forceinline svint16_t svlsl_wide_z(svbool_t pg, svint16_t op1, uint64_t op2) { return svlsl_wide_n_s16_z(pg, op1, op2); } __forceinline svint16_t svlsl_wide_m(svbool_t pg, svint16_t op1, svuint64_t op2) { return svlsl_wide_s16_m(pg, op1, op2); } __forceinline svint16_t svlsl_wide_x(svbool_t pg, svint16_t op1, svuint64_t op2) { return svlsl_wide_s16_x(pg, op1, op2); } __forceinline svint16_t svlsl_wide_z(svbool_t pg, svint16_t op1, svuint64_t op2) { return svlsl_wide_s16_z(pg, op1, op2); } __forceinline svint32_t svlsl_m(svbool_t pg, svint32_t op1, uint32_t op2) { return svlsl_n_s32_m(pg, op1, op2); } __forceinline svint32_t svlsl_x(svbool_t pg, svint32_t op1, uint32_t op2) { return svlsl_n_s32_x(pg, op1, op2); } __forceinline svint32_t svlsl_z(svbool_t pg, svint32_t op1, uint32_t op2) { return svlsl_n_s32_z(pg, op1, op2); } __forceinline svint32_t svlsl_m(svbool_t pg, svint32_t op1, svuint32_t op2) { return svlsl_s32_m(pg, op1, op2); } __forceinline svint32_t svlsl_x(svbool_t pg, svint32_t op1, svuint32_t op2) { return svlsl_s32_x(pg, op1, op2); } __forceinline svint32_t svlsl_z(svbool_t pg, svint32_t op1, svuint32_t op2) { return svlsl_s32_z(pg, op1, op2); } __forceinline svint32_t svlsl_wide_m(svbool_t pg, svint32_t op1, uint64_t op2) { return svlsl_wide_n_s32_m(pg, op1, op2); } __forceinline svint32_t svlsl_wide_x(svbool_t pg, svint32_t op1, uint64_t op2) { return svlsl_wide_n_s32_x(pg, op1, op2); } __forceinline svint32_t svlsl_wide_z(svbool_t pg, svint32_t op1, uint64_t op2) { return svlsl_wide_n_s32_z(pg, op1, op2); } __forceinline svint32_t svlsl_wide_m(svbool_t pg, svint32_t op1, svuint64_t op2) { return svlsl_wide_s32_m(pg, op1, op2); } __forceinline svint32_t svlsl_wide_x(svbool_t pg, svint32_t op1, svuint64_t op2) { return svlsl_wide_s32_x(pg, op1, op2); } __forceinline svint32_t svlsl_wide_z(svbool_t pg, svint32_t op1, svuint64_t op2) { return svlsl_wide_s32_z(pg, op1, op2); } __forceinline svint64_t svlsl_m(svbool_t pg, svint64_t op1, uint64_t op2) { return svlsl_n_s64_m(pg, op1, op2); } __forceinline svint64_t svlsl_x(svbool_t pg, svint64_t op1, uint64_t op2) { return svlsl_n_s64_x(pg, op1, op2); } __forceinline svint64_t svlsl_z(svbool_t pg, svint64_t op1, uint64_t op2) { return svlsl_n_s64_z(pg, op1, op2); } __forceinline svint64_t svlsl_m(svbool_t pg, svint64_t op1, svuint64_t op2) { return svlsl_s64_m(pg, op1, op2); } __forceinline svint64_t svlsl_x(svbool_t pg, svint64_t op1, svuint64_t op2) { return svlsl_s64_x(pg, op1, op2); } __forceinline svint64_t svlsl_z(svbool_t pg, svint64_t op1, svuint64_t op2) { return svlsl_s64_z(pg, op1, op2); } __forceinline svint8_t svlsl_m(svbool_t pg, svint8_t op1, uint8_t op2) { return svlsl_n_s8_m(pg, op1, op2); } __forceinline svint8_t svlsl_x(svbool_t pg, svint8_t op1, uint8_t op2) { return svlsl_n_s8_x(pg, op1, op2); } __forceinline svint8_t svlsl_z(svbool_t pg, svint8_t op1, uint8_t op2) { return svlsl_n_s8_z(pg, op1, op2); } __forceinline svint8_t svlsl_m(svbool_t pg, svint8_t op1, svuint8_t op2) { return svlsl_s8_m(pg, op1, op2); } __forceinline svint8_t svlsl_x(svbool_t pg, svint8_t op1, svuint8_t op2) { return svlsl_s8_x(pg, op1, op2); } __forceinline svint8_t svlsl_z(svbool_t pg, svint8_t op1, svuint8_t op2) { return svlsl_s8_z(pg, op1, op2); } __forceinline svint8_t svlsl_wide_m(svbool_t pg, svint8_t op1, uint64_t op2) { return svlsl_wide_n_s8_m(pg, op1, op2); } __forceinline svint8_t svlsl_wide_x(svbool_t pg, svint8_t op1, uint64_t op2) { return svlsl_wide_n_s8_x(pg, op1, op2); } __forceinline svint8_t svlsl_wide_z(svbool_t pg, svint8_t op1, uint64_t op2) { return svlsl_wide_n_s8_z(pg, op1, op2); } __forceinline svint8_t svlsl_wide_m(svbool_t pg, svint8_t op1, svuint64_t op2) { return svlsl_wide_s8_m(pg, op1, op2); } __forceinline svint8_t svlsl_wide_x(svbool_t pg, svint8_t op1, svuint64_t op2) { return svlsl_wide_s8_x(pg, op1, op2); } __forceinline svint8_t svlsl_wide_z(svbool_t pg, svint8_t op1, svuint64_t op2) { return svlsl_wide_s8_z(pg, op1, op2); } __forceinline svuint16_t svlsl_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svlsl_n_u16_m(pg, op1, op2); } __forceinline svuint16_t svlsl_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svlsl_n_u16_x(pg, op1, op2); } __forceinline svuint16_t svlsl_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svlsl_n_u16_z(pg, op1, op2); } __forceinline svuint16_t svlsl_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svlsl_u16_m(pg, op1, op2); } __forceinline svuint16_t svlsl_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svlsl_u16_x(pg, op1, op2); } __forceinline svuint16_t svlsl_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svlsl_u16_z(pg, op1, op2); } __forceinline svuint16_t svlsl_wide_m(svbool_t pg, svuint16_t op1, uint64_t op2) { return svlsl_wide_n_u16_m(pg, op1, op2); } __forceinline svuint16_t svlsl_wide_x(svbool_t pg, svuint16_t op1, uint64_t op2) { return svlsl_wide_n_u16_x(pg, op1, op2); } __forceinline svuint16_t svlsl_wide_z(svbool_t pg, svuint16_t op1, uint64_t op2) { return svlsl_wide_n_u16_z(pg, op1, op2); } __forceinline svuint16_t svlsl_wide_m(svbool_t pg, svuint16_t op1, svuint64_t op2) { return svlsl_wide_u16_m(pg, op1, op2); } __forceinline svuint16_t svlsl_wide_x(svbool_t pg, svuint16_t op1, svuint64_t op2) { return svlsl_wide_u16_x(pg, op1, op2); } __forceinline svuint16_t svlsl_wide_z(svbool_t pg, svuint16_t op1, svuint64_t op2) { return svlsl_wide_u16_z(pg, op1, op2); } __forceinline svuint32_t svlsl_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svlsl_n_u32_m(pg, op1, op2); } __forceinline svuint32_t svlsl_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svlsl_n_u32_x(pg, op1, op2); } __forceinline svuint32_t svlsl_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svlsl_n_u32_z(pg, op1, op2); } __forceinline svuint32_t svlsl_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svlsl_u32_m(pg, op1, op2); } __forceinline svuint32_t svlsl_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svlsl_u32_x(pg, op1, op2); } __forceinline svuint32_t svlsl_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svlsl_u32_z(pg, op1, op2); } __forceinline svuint32_t svlsl_wide_m(svbool_t pg, svuint32_t op1, uint64_t op2) { return svlsl_wide_n_u32_m(pg, op1, op2); } __forceinline svuint32_t svlsl_wide_x(svbool_t pg, svuint32_t op1, uint64_t op2) { return svlsl_wide_n_u32_x(pg, op1, op2); } __forceinline svuint32_t svlsl_wide_z(svbool_t pg, svuint32_t op1, uint64_t op2) { return svlsl_wide_n_u32_z(pg, op1, op2); } __forceinline svuint32_t svlsl_wide_m(svbool_t pg, svuint32_t op1, svuint64_t op2) { return svlsl_wide_u32_m(pg, op1, op2); } __forceinline svuint32_t svlsl_wide_x(svbool_t pg, svuint32_t op1, svuint64_t op2) { return svlsl_wide_u32_x(pg, op1, op2); } __forceinline svuint32_t svlsl_wide_z(svbool_t pg, svuint32_t op1, svuint64_t op2) { return svlsl_wide_u32_z(pg, op1, op2); } __forceinline svuint64_t svlsl_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svlsl_n_u64_m(pg, op1, op2); } __forceinline svuint64_t svlsl_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svlsl_n_u64_x(pg, op1, op2); } __forceinline svuint64_t svlsl_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svlsl_n_u64_z(pg, op1, op2); } __forceinline svuint64_t svlsl_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svlsl_u64_m(pg, op1, op2); } __forceinline svuint64_t svlsl_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svlsl_u64_x(pg, op1, op2); } __forceinline svuint64_t svlsl_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svlsl_u64_z(pg, op1, op2); } __forceinline svuint8_t svlsl_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svlsl_n_u8_m(pg, op1, op2); } __forceinline svuint8_t svlsl_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svlsl_n_u8_x(pg, op1, op2); } __forceinline svuint8_t svlsl_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svlsl_n_u8_z(pg, op1, op2); } __forceinline svuint8_t svlsl_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svlsl_u8_m(pg, op1, op2); } __forceinline svuint8_t svlsl_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svlsl_u8_x(pg, op1, op2); } __forceinline svuint8_t svlsl_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svlsl_u8_z(pg, op1, op2); } __forceinline svuint8_t svlsl_wide_m(svbool_t pg, svuint8_t op1, uint64_t op2) { return svlsl_wide_n_u8_m(pg, op1, op2); } __forceinline svuint8_t svlsl_wide_x(svbool_t pg, svuint8_t op1, uint64_t op2) { return svlsl_wide_n_u8_x(pg, op1, op2); } __forceinline svuint8_t svlsl_wide_z(svbool_t pg, svuint8_t op1, uint64_t op2) { return svlsl_wide_n_u8_z(pg, op1, op2); } __forceinline svuint8_t svlsl_wide_m(svbool_t pg, svuint8_t op1, svuint64_t op2) { return svlsl_wide_u8_m(pg, op1, op2); } __forceinline svuint8_t svlsl_wide_x(svbool_t pg, svuint8_t op1, svuint64_t op2) { return svlsl_wide_u8_x(pg, op1, op2); } __forceinline svuint8_t svlsl_wide_z(svbool_t pg, svuint8_t op1, svuint64_t op2) { return svlsl_wide_u8_z(pg, op1, op2); } __forceinline svint16_t svasr_m(svbool_t pg, svint16_t op1, uint16_t op2) { return svasr_n_s16_m(pg, op1, op2); } __forceinline svint16_t svasr_x(svbool_t pg, svint16_t op1, uint16_t op2) { return svasr_n_s16_x(pg, op1, op2); } __forceinline svint16_t svasr_z(svbool_t pg, svint16_t op1, uint16_t op2) { return svasr_n_s16_z(pg, op1, op2); } __forceinline svint16_t svasr_m(svbool_t pg, svint16_t op1, svuint16_t op2) { return svasr_s16_m(pg, op1, op2); } __forceinline svint16_t svasr_x(svbool_t pg, svint16_t op1, svuint16_t op2) { return svasr_s16_x(pg, op1, op2); } __forceinline svint16_t svasr_z(svbool_t pg, svint16_t op1, svuint16_t op2) { return svasr_s16_z(pg, op1, op2); } __forceinline svint16_t svasr_wide_m(svbool_t pg, svint16_t op1, uint64_t op2) { return svasr_wide_n_s16_m(pg, op1, op2); } __forceinline svint16_t svasr_wide_x(svbool_t pg, svint16_t op1, uint64_t op2) { return svasr_wide_n_s16_x(pg, op1, op2); } __forceinline svint16_t svasr_wide_z(svbool_t pg, svint16_t op1, uint64_t op2) { return svasr_wide_n_s16_z(pg, op1, op2); } __forceinline svint16_t svasr_wide_m(svbool_t pg, svint16_t op1, svuint64_t op2) { return svasr_wide_s16_m(pg, op1, op2); } __forceinline svint16_t svasr_wide_x(svbool_t pg, svint16_t op1, svuint64_t op2) { return svasr_wide_s16_x(pg, op1, op2); } __forceinline svint16_t svasr_wide_z(svbool_t pg, svint16_t op1, svuint64_t op2) { return svasr_wide_s16_z(pg, op1, op2); } __forceinline svint32_t svasr_m(svbool_t pg, svint32_t op1, uint32_t op2) { return svasr_n_s32_m(pg, op1, op2); } __forceinline svint32_t svasr_x(svbool_t pg, svint32_t op1, uint32_t op2) { return svasr_n_s32_x(pg, op1, op2); } __forceinline svint32_t svasr_z(svbool_t pg, svint32_t op1, uint32_t op2) { return svasr_n_s32_z(pg, op1, op2); } __forceinline svint32_t svasr_m(svbool_t pg, svint32_t op1, svuint32_t op2) { return svasr_s32_m(pg, op1, op2); } __forceinline svint32_t svasr_x(svbool_t pg, svint32_t op1, svuint32_t op2) { return svasr_s32_x(pg, op1, op2); } __forceinline svint32_t svasr_z(svbool_t pg, svint32_t op1, svuint32_t op2) { return svasr_s32_z(pg, op1, op2); } __forceinline svint32_t svasr_wide_m(svbool_t pg, svint32_t op1, uint64_t op2) { return svasr_wide_n_s32_m(pg, op1, op2); } __forceinline svint32_t svasr_wide_x(svbool_t pg, svint32_t op1, uint64_t op2) { return svasr_wide_n_s32_x(pg, op1, op2); } __forceinline svint32_t svasr_wide_z(svbool_t pg, svint32_t op1, uint64_t op2) { return svasr_wide_n_s32_z(pg, op1, op2); } __forceinline svint32_t svasr_wide_m(svbool_t pg, svint32_t op1, svuint64_t op2) { return svasr_wide_s32_m(pg, op1, op2); } __forceinline svint32_t svasr_wide_x(svbool_t pg, svint32_t op1, svuint64_t op2) { return svasr_wide_s32_x(pg, op1, op2); } __forceinline svint32_t svasr_wide_z(svbool_t pg, svint32_t op1, svuint64_t op2) { return svasr_wide_s32_z(pg, op1, op2); } __forceinline svint64_t svasr_m(svbool_t pg, svint64_t op1, uint64_t op2) { return svasr_n_s64_m(pg, op1, op2); } __forceinline svint64_t svasr_x(svbool_t pg, svint64_t op1, uint64_t op2) { return svasr_n_s64_x(pg, op1, op2); } __forceinline svint64_t svasr_z(svbool_t pg, svint64_t op1, uint64_t op2) { return svasr_n_s64_z(pg, op1, op2); } __forceinline svint64_t svasr_m(svbool_t pg, svint64_t op1, svuint64_t op2) { return svasr_s64_m(pg, op1, op2); } __forceinline svint64_t svasr_x(svbool_t pg, svint64_t op1, svuint64_t op2) { return svasr_s64_x(pg, op1, op2); } __forceinline svint64_t svasr_z(svbool_t pg, svint64_t op1, svuint64_t op2) { return svasr_s64_z(pg, op1, op2); } __forceinline svint8_t svasr_m(svbool_t pg, svint8_t op1, uint8_t op2) { return svasr_n_s8_m(pg, op1, op2); } __forceinline svint8_t svasr_x(svbool_t pg, svint8_t op1, uint8_t op2) { return svasr_n_s8_x(pg, op1, op2); } __forceinline svint8_t svasr_z(svbool_t pg, svint8_t op1, uint8_t op2) { return svasr_n_s8_z(pg, op1, op2); } __forceinline svint8_t svasr_m(svbool_t pg, svint8_t op1, svuint8_t op2) { return svasr_s8_m(pg, op1, op2); } __forceinline svint8_t svasr_x(svbool_t pg, svint8_t op1, svuint8_t op2) { return svasr_s8_x(pg, op1, op2); } __forceinline svint8_t svasr_z(svbool_t pg, svint8_t op1, svuint8_t op2) { return svasr_s8_z(pg, op1, op2); } __forceinline svint8_t svasr_wide_m(svbool_t pg, svint8_t op1, uint64_t op2) { return svasr_wide_n_s8_m(pg, op1, op2); } __forceinline svint8_t svasr_wide_x(svbool_t pg, svint8_t op1, uint64_t op2) { return svasr_wide_n_s8_x(pg, op1, op2); } __forceinline svint8_t svasr_wide_z(svbool_t pg, svint8_t op1, uint64_t op2) { return svasr_wide_n_s8_z(pg, op1, op2); } __forceinline svint8_t svasr_wide_m(svbool_t pg, svint8_t op1, svuint64_t op2) { return svasr_wide_s8_m(pg, op1, op2); } __forceinline svint8_t svasr_wide_x(svbool_t pg, svint8_t op1, svuint64_t op2) { return svasr_wide_s8_x(pg, op1, op2); } __forceinline svint8_t svasr_wide_z(svbool_t pg, svint8_t op1, svuint64_t op2) { return svasr_wide_s8_z(pg, op1, op2); } template __forceinline T __svasrd_m(svbool_t pg, T op1) { if constexpr(::std::is_same_v) { return svasrd_n_s8_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svasrd_n_s16_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svasrd_n_s32_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svasrd_n_s64_m(pg, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svasrd_m(pg, op1, imm2) __svasrd_m(pg, op1) template __forceinline T __svasrd_z(svbool_t pg, T op1) { if constexpr(::std::is_same_v) { return svasrd_n_s8_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svasrd_n_s16_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svasrd_n_s32_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svasrd_n_s64_z(pg, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svasrd_z(pg, op1, imm2) __svasrd_z(pg, op1) template __forceinline T __svasrd_x(svbool_t pg, T op1) { if constexpr(::std::is_same_v) { return svasrd_n_s8_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svasrd_n_s16_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svasrd_n_s32_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svasrd_n_s64_x(pg, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svasrd_x(pg, op1, imm2) __svasrd_x(pg, op1) __forceinline svuint16_t svlsr_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svlsr_n_u16_m(pg, op1, op2); } __forceinline svuint16_t svlsr_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svlsr_n_u16_x(pg, op1, op2); } __forceinline svuint16_t svlsr_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svlsr_n_u16_z(pg, op1, op2); } __forceinline svuint16_t svlsr_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svlsr_u16_m(pg, op1, op2); } __forceinline svuint16_t svlsr_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svlsr_u16_x(pg, op1, op2); } __forceinline svuint16_t svlsr_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svlsr_u16_z(pg, op1, op2); } __forceinline svuint16_t svlsr_wide_m(svbool_t pg, svuint16_t op1, uint64_t op2) { return svlsr_wide_n_u16_m(pg, op1, op2); } __forceinline svuint16_t svlsr_wide_x(svbool_t pg, svuint16_t op1, uint64_t op2) { return svlsr_wide_n_u16_x(pg, op1, op2); } __forceinline svuint16_t svlsr_wide_z(svbool_t pg, svuint16_t op1, uint64_t op2) { return svlsr_wide_n_u16_z(pg, op1, op2); } __forceinline svuint16_t svlsr_wide_m(svbool_t pg, svuint16_t op1, svuint64_t op2) { return svlsr_wide_u16_m(pg, op1, op2); } __forceinline svuint16_t svlsr_wide_x(svbool_t pg, svuint16_t op1, svuint64_t op2) { return svlsr_wide_u16_x(pg, op1, op2); } __forceinline svuint16_t svlsr_wide_z(svbool_t pg, svuint16_t op1, svuint64_t op2) { return svlsr_wide_u16_z(pg, op1, op2); } __forceinline svuint32_t svlsr_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svlsr_n_u32_m(pg, op1, op2); } __forceinline svuint32_t svlsr_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svlsr_n_u32_x(pg, op1, op2); } __forceinline svuint32_t svlsr_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svlsr_n_u32_z(pg, op1, op2); } __forceinline svuint32_t svlsr_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svlsr_u32_m(pg, op1, op2); } __forceinline svuint32_t svlsr_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svlsr_u32_x(pg, op1, op2); } __forceinline svuint32_t svlsr_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svlsr_u32_z(pg, op1, op2); } __forceinline svuint32_t svlsr_wide_m(svbool_t pg, svuint32_t op1, uint64_t op2) { return svlsr_wide_n_u32_m(pg, op1, op2); } __forceinline svuint32_t svlsr_wide_x(svbool_t pg, svuint32_t op1, uint64_t op2) { return svlsr_wide_n_u32_x(pg, op1, op2); } __forceinline svuint32_t svlsr_wide_z(svbool_t pg, svuint32_t op1, uint64_t op2) { return svlsr_wide_n_u32_z(pg, op1, op2); } __forceinline svuint32_t svlsr_wide_m(svbool_t pg, svuint32_t op1, svuint64_t op2) { return svlsr_wide_u32_m(pg, op1, op2); } __forceinline svuint32_t svlsr_wide_x(svbool_t pg, svuint32_t op1, svuint64_t op2) { return svlsr_wide_u32_x(pg, op1, op2); } __forceinline svuint32_t svlsr_wide_z(svbool_t pg, svuint32_t op1, svuint64_t op2) { return svlsr_wide_u32_z(pg, op1, op2); } __forceinline svuint64_t svlsr_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svlsr_n_u64_m(pg, op1, op2); } __forceinline svuint64_t svlsr_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svlsr_n_u64_x(pg, op1, op2); } __forceinline svuint64_t svlsr_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svlsr_n_u64_z(pg, op1, op2); } __forceinline svuint64_t svlsr_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svlsr_u64_m(pg, op1, op2); } __forceinline svuint64_t svlsr_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svlsr_u64_x(pg, op1, op2); } __forceinline svuint64_t svlsr_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svlsr_u64_z(pg, op1, op2); } __forceinline svuint8_t svlsr_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svlsr_n_u8_m(pg, op1, op2); } __forceinline svuint8_t svlsr_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svlsr_n_u8_x(pg, op1, op2); } __forceinline svuint8_t svlsr_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svlsr_n_u8_z(pg, op1, op2); } __forceinline svuint8_t svlsr_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svlsr_u8_m(pg, op1, op2); } __forceinline svuint8_t svlsr_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svlsr_u8_x(pg, op1, op2); } __forceinline svuint8_t svlsr_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svlsr_u8_z(pg, op1, op2); } __forceinline svuint8_t svlsr_wide_m(svbool_t pg, svuint8_t op1, uint64_t op2) { return svlsr_wide_n_u8_m(pg, op1, op2); } __forceinline svuint8_t svlsr_wide_x(svbool_t pg, svuint8_t op1, uint64_t op2) { return svlsr_wide_n_u8_x(pg, op1, op2); } __forceinline svuint8_t svlsr_wide_z(svbool_t pg, svuint8_t op1, uint64_t op2) { return svlsr_wide_n_u8_z(pg, op1, op2); } __forceinline svuint8_t svlsr_wide_m(svbool_t pg, svuint8_t op1, svuint64_t op2) { return svlsr_wide_u8_m(pg, op1, op2); } __forceinline svuint8_t svlsr_wide_x(svbool_t pg, svuint8_t op1, svuint64_t op2) { return svlsr_wide_u8_x(pg, op1, op2); } __forceinline svuint8_t svlsr_wide_z(svbool_t pg, svuint8_t op1, svuint64_t op2) { return svlsr_wide_u8_z(pg, op1, op2); } __forceinline void svst1(svbool_t pg, float32_t *base, svfloat32_t data) { return svst1_f32(pg, base, data); } __forceinline void svst1(svbool_t pg, float64_t *base, svfloat64_t data) { return svst1_f64(pg, base, data); } __forceinline void svst1(svbool_t pg, int16_t *base, svint16_t data) { return svst1_s16(pg, base, data); } __forceinline void svst1(svbool_t pg, int32_t *base, svint32_t data) { return svst1_s32(pg, base, data); } __forceinline void svst1(svbool_t pg, int64_t *base, svint64_t data) { return svst1_s64(pg, base, data); } __forceinline void svst1(svbool_t pg, int8_t *base, svint8_t data) { return svst1_s8(pg, base, data); } __forceinline void svst1(svbool_t pg, uint16_t *base, svuint16_t data) { return svst1_u16(pg, base, data); } __forceinline void svst1(svbool_t pg, uint32_t *base, svuint32_t data) { return svst1_u32(pg, base, data); } __forceinline void svst1(svbool_t pg, uint64_t *base, svuint64_t data) { return svst1_u64(pg, base, data); } __forceinline void svst1(svbool_t pg, uint8_t *base, svuint8_t data) { return svst1_u8(pg, base, data); } __forceinline void svst1_vnum(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data) { return svst1_vnum_f32(pg, base, vnum, data); } __forceinline void svst1_vnum(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data) { return svst1_vnum_f64(pg, base, vnum, data); } __forceinline void svst1_vnum(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data) { return svst1_vnum_s16(pg, base, vnum, data); } __forceinline void svst1_vnum(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data) { return svst1_vnum_s32(pg, base, vnum, data); } __forceinline void svst1_vnum(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data) { return svst1_vnum_s64(pg, base, vnum, data); } __forceinline void svst1_vnum(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data) { return svst1_vnum_s8(pg, base, vnum, data); } __forceinline void svst1_vnum(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data) { return svst1_vnum_u16(pg, base, vnum, data); } __forceinline void svst1_vnum(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data) { return svst1_vnum_u32(pg, base, vnum, data); } __forceinline void svst1_vnum(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data) { return svst1_vnum_u64(pg, base, vnum, data); } __forceinline void svst1_vnum(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data) { return svst1_vnum_u8(pg, base, vnum, data); } __forceinline void svstnt1(svbool_t pg, uint8_t *base, svuint8_t data) { return svstnt1_u8(pg, base, data); } __forceinline void svstnt1_vnum(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data) { return svstnt1_vnum_u8(pg, base, vnum, data); } __forceinline void svstnt1_vnum(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data) { return svstnt1_vnum_s64(pg, base, vnum, data); } __forceinline void svstnt1_vnum(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data) { return svstnt1_vnum_s32(pg, base, vnum, data); } __forceinline void svstnt1_vnum(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data) { return svstnt1_vnum_s16(pg, base, vnum, data); } __forceinline void svstnt1_vnum(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data) { return svstnt1_vnum_s8(pg, base, vnum, data); } __forceinline void svstnt1_vnum(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data) { return svstnt1_vnum_f64(pg, base, vnum, data); } __forceinline void svstnt1_vnum(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data) { return svstnt1_vnum_f32(pg, base, vnum, data); } __forceinline void svstnt1_vnum(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data) { return svstnt1_vnum_u16(pg, base, vnum, data); } __forceinline void svstnt1(svbool_t pg, uint64_t *base, svuint64_t data) { return svstnt1_u64(pg, base, data); } __forceinline void svstnt1(svbool_t pg, uint16_t *base, svuint16_t data) { return svstnt1_u16(pg, base, data); } __forceinline void svstnt1(svbool_t pg, int64_t *base, svint64_t data) { return svstnt1_s64(pg, base, data); } __forceinline void svstnt1(svbool_t pg, int32_t *base, svint32_t data) { return svstnt1_s32(pg, base, data); } __forceinline void svstnt1(svbool_t pg, int16_t *base, svint16_t data) { return svstnt1_s16(pg, base, data); } __forceinline void svstnt1(svbool_t pg, int8_t *base, svint8_t data) { return svstnt1_s8(pg, base, data); } __forceinline void svstnt1(svbool_t pg, float64_t *base, svfloat64_t data) { return svstnt1_f64(pg, base, data); } __forceinline void svstnt1(svbool_t pg, float32_t *base, svfloat32_t data) { return svstnt1_f32(pg, base, data); } __forceinline void svstnt1(svbool_t pg, uint32_t *base, svuint32_t data) { return svstnt1_u32(pg, base, data); } __forceinline void svstnt1_vnum(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data) { return svstnt1_vnum_u32(pg, base, vnum, data); } __forceinline void svstnt1_vnum(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data) { return svstnt1_vnum_u64(pg, base, vnum, data); } __forceinline void svst1h_vnum(svbool_t pg, int16_t *base, int64_t vnum, svint32_t data) { return svst1h_vnum_s32(pg, base, vnum, data); } __forceinline void svst1h_vnum(svbool_t pg, int16_t *base, int64_t vnum, svint64_t data) { return svst1h_vnum_s64(pg, base, vnum, data); } __forceinline void svst1h(svbool_t pg, uint16_t *base, svuint64_t data) { return svst1h_u64(pg, base, data); } __forceinline void svst1h(svbool_t pg, uint16_t *base, svuint32_t data) { return svst1h_u32(pg, base, data); } __forceinline void svst1h(svbool_t pg, int16_t *base, svint64_t data) { return svst1h_s64(pg, base, data); } __forceinline void svst1h(svbool_t pg, int16_t *base, svint32_t data) { return svst1h_s32(pg, base, data); } __forceinline void svst1h_vnum(svbool_t pg, uint16_t *base, int64_t vnum, svuint64_t data) { return svst1h_vnum_u64(pg, base, vnum, data); } __forceinline void svst1h_vnum(svbool_t pg, uint16_t *base, int64_t vnum, svuint32_t data) { return svst1h_vnum_u32(pg, base, vnum, data); } __forceinline void svst1w_vnum(svbool_t pg, int32_t *base, int64_t vnum, svint64_t data) { return svst1w_vnum_s64(pg, base, vnum, data); } __forceinline void svst1w(svbool_t pg, uint32_t *base, svuint64_t data) { return svst1w_u64(pg, base, data); } __forceinline void svst1w(svbool_t pg, int32_t *base, svint64_t data) { return svst1w_s64(pg, base, data); } __forceinline void svst1w_vnum(svbool_t pg, uint32_t *base, int64_t vnum, svuint64_t data) { return svst1w_vnum_u64(pg, base, vnum, data); } __forceinline void svst1b(svbool_t pg, uint8_t *base, svuint64_t data) { return svst1b_u64(pg, base, data); } __forceinline void svst1b(svbool_t pg, int8_t *base, svint16_t data) { return svst1b_s16(pg, base, data); } __forceinline void svst1b(svbool_t pg, int8_t *base, svint32_t data) { return svst1b_s32(pg, base, data); } __forceinline void svst1b(svbool_t pg, int8_t *base, svint64_t data) { return svst1b_s64(pg, base, data); } __forceinline void svst1b(svbool_t pg, uint8_t *base, svuint16_t data) { return svst1b_u16(pg, base, data); } __forceinline void svst1b(svbool_t pg, uint8_t *base, svuint32_t data) { return svst1b_u32(pg, base, data); } __forceinline void svst1b_vnum(svbool_t pg, int8_t *base, int64_t vnum, svint16_t data) { return svst1b_vnum_s16(pg, base, vnum, data); } __forceinline void svst1b_vnum(svbool_t pg, int8_t *base, int64_t vnum, svint64_t data) { return svst1b_vnum_s64(pg, base, vnum, data); } __forceinline void svst1b_vnum(svbool_t pg, int8_t *base, int64_t vnum, svint32_t data) { return svst1b_vnum_s32(pg, base, vnum, data); } __forceinline void svst1b_vnum(svbool_t pg, uint8_t *base, int64_t vnum, svuint64_t data) { return svst1b_vnum_u64(pg, base, vnum, data); } __forceinline void svst1b_vnum(svbool_t pg, uint8_t *base, int64_t vnum, svuint32_t data) { return svst1b_vnum_u32(pg, base, vnum, data); } __forceinline void svst1b_vnum(svbool_t pg, uint8_t *base, int64_t vnum, svuint16_t data) { return svst1b_vnum_u16(pg, base, vnum, data); } __forceinline void svst1_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) { return svst1_scatter_u32base_offset_u32(pg, bases, offset, data); } __forceinline void svst1_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data) { return svst1_scatter_u64base_index_u64(pg, bases, index, data); } __forceinline void svst1_scatter_offset(svbool_t pg, uint64_t *base, svint64_t offsets, svuint64_t data) { return svst1_scatter_s64offset_u64(pg, base, offsets, data); } __forceinline void svst1_scatter_offset(svbool_t pg, float64_t *base, svuint64_t offsets, svfloat64_t data) { return svst1_scatter_u64offset_f64(pg, base, offsets, data); } __forceinline void svst1_scatter_offset(svbool_t pg, int64_t *base, svuint64_t offsets, svint64_t data) { return svst1_scatter_u64offset_s64(pg, base, offsets, data); } __forceinline void svst1_scatter_offset(svbool_t pg, uint64_t *base, svuint64_t offsets, svuint64_t data) { return svst1_scatter_u64offset_u64(pg, base, offsets, data); } __forceinline void svst1_scatter_index(svbool_t pg, float32_t *base, svint32_t indices, svfloat32_t data) { return svst1_scatter_s32index_f32(pg, base, indices, data); } __forceinline void svst1_scatter_index(svbool_t pg, int32_t *base, svint32_t indices, svint32_t data) { return svst1_scatter_s32index_s32(pg, base, indices, data); } __forceinline void svst1_scatter_index(svbool_t pg, uint32_t *base, svint32_t indices, svuint32_t data) { return svst1_scatter_s32index_u32(pg, base, indices, data); } __forceinline void svst1_scatter_index(svbool_t pg, float32_t *base, svuint32_t indices, svfloat32_t data) { return svst1_scatter_u32index_f32(pg, base, indices, data); } __forceinline void svst1_scatter_index(svbool_t pg, int32_t *base, svuint32_t indices, svint32_t data) { return svst1_scatter_u32index_s32(pg, base, indices, data); } __forceinline void svst1_scatter_index(svbool_t pg, uint32_t *base, svuint32_t indices, svuint32_t data) { return svst1_scatter_u32index_u32(pg, base, indices, data); } __forceinline void svst1_scatter_index(svbool_t pg, float64_t *base, svint64_t indices, svfloat64_t data) { return svst1_scatter_s64index_f64(pg, base, indices, data); } __forceinline void svst1_scatter_index(svbool_t pg, int64_t *base, svint64_t indices, svint64_t data) { return svst1_scatter_s64index_s64(pg, base, indices, data); } __forceinline void svst1_scatter_offset(svbool_t pg, int64_t *base, svint64_t offsets, svint64_t data) { return svst1_scatter_s64offset_s64(pg, base, offsets, data); } __forceinline void svst1_scatter_index(svbool_t pg, uint64_t *base, svint64_t indices, svuint64_t data) { return svst1_scatter_s64index_u64(pg, base, indices, data); } __forceinline void svst1_scatter_index(svbool_t pg, int64_t *base, svuint64_t indices, svint64_t data) { return svst1_scatter_u64index_s64(pg, base, indices, data); } __forceinline void svst1_scatter_index(svbool_t pg, uint64_t *base, svuint64_t indices, svuint64_t data) { return svst1_scatter_u64index_u64(pg, base, indices, data); } __forceinline void svst1_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svfloat32_t data) { return svst1_scatter_u32base_offset_f32(pg, bases, offset, data); } __forceinline void svst1_scatter_index(svbool_t pg, svuint32_t bases, int64_t index, svuint32_t data) { return svst1_scatter_u32base_index_u32(pg, bases, index, data); } __forceinline void svst1_scatter_index(svbool_t pg, svuint32_t bases, int64_t index, svint32_t data) { return svst1_scatter_u32base_index_s32(pg, bases, index, data); } __forceinline void svst1_scatter_index(svbool_t pg, svuint32_t bases, int64_t index, svfloat32_t data) { return svst1_scatter_u32base_index_f32(pg, bases, index, data); } __forceinline void svst1_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) { return svst1_scatter_u64base_offset_u64(pg, bases, offset, data); } __forceinline void svst1_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) { return svst1_scatter_u64base_offset_s64(pg, bases, offset, data); } __forceinline void svst1_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svfloat64_t data) { return svst1_scatter_u64base_offset_f64(pg, bases, offset, data); } __forceinline void svst1_scatter_index(svbool_t pg, float64_t *base, svuint64_t indices, svfloat64_t data) { return svst1_scatter_u64index_f64(pg, base, indices, data); } __forceinline void svst1_scatter_offset(svbool_t pg, float64_t *base, svint64_t offsets, svfloat64_t data) { return svst1_scatter_s64offset_f64(pg, base, offsets, data); } __forceinline void svst1_scatter_offset(svbool_t pg, uint32_t *base, svuint32_t offsets, svuint32_t data) { return svst1_scatter_u32offset_u32(pg, base, offsets, data); } __forceinline void svst1_scatter_offset(svbool_t pg, int32_t *base, svuint32_t offsets, svint32_t data) { return svst1_scatter_u32offset_s32(pg, base, offsets, data); } __forceinline void svst1_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data) { return svst1_scatter_u64base_index_s64(pg, bases, index, data); } __forceinline void svst1_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svfloat64_t data) { return svst1_scatter_u64base_index_f64(pg, bases, index, data); } __forceinline void svst1_scatter(svbool_t pg, svuint32_t bases, svfloat32_t data) { return svst1_scatter_u32base_f32(pg, bases, data); } __forceinline void svst1_scatter(svbool_t pg, svuint32_t bases, svint32_t data) { return svst1_scatter_u32base_s32(pg, bases, data); } __forceinline void svst1_scatter(svbool_t pg, svuint32_t bases, svuint32_t data) { return svst1_scatter_u32base_u32(pg, bases, data); } __forceinline void svst1_scatter(svbool_t pg, svuint64_t bases, svfloat64_t data) { return svst1_scatter_u64base_f64(pg, bases, data); } __forceinline void svst1_scatter(svbool_t pg, svuint64_t bases, svint64_t data) { return svst1_scatter_u64base_s64(pg, bases, data); } __forceinline void svst1_scatter(svbool_t pg, svuint64_t bases, svuint64_t data) { return svst1_scatter_u64base_u64(pg, bases, data); } __forceinline void svst1_scatter_offset(svbool_t pg, float32_t *base, svint32_t offsets, svfloat32_t data) { return svst1_scatter_s32offset_f32(pg, base, offsets, data); } __forceinline void svst1_scatter_offset(svbool_t pg, int32_t *base, svint32_t offsets, svint32_t data) { return svst1_scatter_s32offset_s32(pg, base, offsets, data); } __forceinline void svst1_scatter_offset(svbool_t pg, uint32_t *base, svint32_t offsets, svuint32_t data) { return svst1_scatter_s32offset_u32(pg, base, offsets, data); } __forceinline void svst1_scatter_offset(svbool_t pg, float32_t *base, svuint32_t offsets, svfloat32_t data) { return svst1_scatter_u32offset_f32(pg, base, offsets, data); } __forceinline void svst1_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) { return svst1_scatter_u32base_offset_s32(pg, bases, offset, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, uint16_t *base, svint32_t offsets, svuint32_t data) { return svst1h_scatter_s32offset_u32(pg, base, offsets, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, uint16_t *base, svuint32_t offsets, svuint32_t data) { return svst1h_scatter_u32offset_u32(pg, base, offsets, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, int16_t *base, svint64_t offsets, svint64_t data) { return svst1h_scatter_s64offset_s64(pg, base, offsets, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, int16_t *base, svuint32_t offsets, svint32_t data) { return svst1h_scatter_u32offset_s32(pg, base, offsets, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, uint16_t *base, svint64_t offsets, svuint64_t data) { return svst1h_scatter_s64offset_u64(pg, base, offsets, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, int16_t *base, svuint64_t offsets, svint64_t data) { return svst1h_scatter_u64offset_s64(pg, base, offsets, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, uint16_t *base, svuint64_t offsets, svuint64_t data) { return svst1h_scatter_u64offset_u64(pg, base, offsets, data); } __forceinline void svst1h_scatter(svbool_t pg, svuint32_t bases, svint32_t data) { return svst1h_scatter_u32base_s32(pg, bases, data); } __forceinline void svst1h_scatter_index(svbool_t pg, uint16_t *base, svint32_t indices, svuint32_t data) { return svst1h_scatter_s32index_u32(pg, base, indices, data); } __forceinline void svst1h_scatter(svbool_t pg, svuint32_t bases, svuint32_t data) { return svst1h_scatter_u32base_u32(pg, bases, data); } __forceinline void svst1h_scatter_index(svbool_t pg, int16_t *base, svint32_t indices, svint32_t data) { return svst1h_scatter_s32index_s32(pg, base, indices, data); } __forceinline void svst1h_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data) { return svst1h_scatter_u64base_index_s64(pg, bases, index, data); } __forceinline void svst1h_scatter_index(svbool_t pg, svuint32_t bases, int64_t index, svuint32_t data) { return svst1h_scatter_u32base_index_u32(pg, bases, index, data); } __forceinline void svst1h_scatter(svbool_t pg, svuint64_t bases, svint64_t data) { return svst1h_scatter_u64base_s64(pg, bases, data); } __forceinline void svst1h_scatter(svbool_t pg, svuint64_t bases, svuint64_t data) { return svst1h_scatter_u64base_u64(pg, bases, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, int16_t *base, svint32_t offsets, svint32_t data) { return svst1h_scatter_s32offset_s32(pg, base, offsets, data); } __forceinline void svst1h_scatter_index(svbool_t pg, svuint32_t bases, int64_t index, svint32_t data) { return svst1h_scatter_u32base_index_s32(pg, bases, index, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) { return svst1h_scatter_u64base_offset_u64(pg, bases, offset, data); } __forceinline void svst1h_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data) { return svst1h_scatter_u64base_index_u64(pg, bases, index, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) { return svst1h_scatter_u32base_offset_u32(pg, bases, offset, data); } __forceinline void svst1h_scatter_index(svbool_t pg, uint16_t *base, svuint32_t indices, svuint32_t data) { return svst1h_scatter_u32index_u32(pg, base, indices, data); } __forceinline void svst1h_scatter_index(svbool_t pg, int16_t *base, svint64_t indices, svint64_t data) { return svst1h_scatter_s64index_s64(pg, base, indices, data); } __forceinline void svst1h_scatter_index(svbool_t pg, uint16_t *base, svint64_t indices, svuint64_t data) { return svst1h_scatter_s64index_u64(pg, base, indices, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) { return svst1h_scatter_u64base_offset_s64(pg, bases, offset, data); } __forceinline void svst1h_scatter_index(svbool_t pg, uint16_t *base, svuint64_t indices, svuint64_t data) { return svst1h_scatter_u64index_u64(pg, base, indices, data); } __forceinline void svst1h_scatter_index(svbool_t pg, int16_t *base, svuint64_t indices, svint64_t data) { return svst1h_scatter_u64index_s64(pg, base, indices, data); } __forceinline void svst1h_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) { return svst1h_scatter_u32base_offset_s32(pg, bases, offset, data); } __forceinline void svst1h_scatter_index(svbool_t pg, int16_t *base, svuint32_t indices, svint32_t data) { return svst1h_scatter_u32index_s32(pg, base, indices, data); } __forceinline void svst1w_scatter_index(svbool_t pg, uint32_t *base, svuint64_t indices, svuint64_t data) { return svst1w_scatter_u64index_u64(pg, base, indices, data); } __forceinline void svst1w_scatter_index(svbool_t pg, int32_t *base, svuint64_t indices, svint64_t data) { return svst1w_scatter_u64index_s64(pg, base, indices, data); } __forceinline void svst1w_scatter_index(svbool_t pg, uint32_t *base, svint64_t indices, svuint64_t data) { return svst1w_scatter_s64index_u64(pg, base, indices, data); } __forceinline void svst1w_scatter_index(svbool_t pg, int32_t *base, svint64_t indices, svint64_t data) { return svst1w_scatter_s64index_s64(pg, base, indices, data); } __forceinline void svst1w_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) { return svst1w_scatter_u64base_offset_s64(pg, bases, offset, data); } __forceinline void svst1w_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) { return svst1w_scatter_u64base_offset_u64(pg, bases, offset, data); } __forceinline void svst1w_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data) { return svst1w_scatter_u64base_index_s64(pg, bases, index, data); } __forceinline void svst1w_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data) { return svst1w_scatter_u64base_index_u64(pg, bases, index, data); } __forceinline void svst1w_scatter_offset(svbool_t pg, uint32_t *base, svuint64_t offsets, svuint64_t data) { return svst1w_scatter_u64offset_u64(pg, base, offsets, data); } __forceinline void svst1w_scatter(svbool_t pg, svuint64_t bases, svint64_t data) { return svst1w_scatter_u64base_s64(pg, bases, data); } __forceinline void svst1w_scatter_offset(svbool_t pg, int32_t *base, svuint64_t offsets, svint64_t data) { return svst1w_scatter_u64offset_s64(pg, base, offsets, data); } __forceinline void svst1w_scatter_offset(svbool_t pg, uint32_t *base, svint64_t offsets, svuint64_t data) { return svst1w_scatter_s64offset_u64(pg, base, offsets, data); } __forceinline void svst1w_scatter_offset(svbool_t pg, int32_t *base, svint64_t offsets, svint64_t data) { return svst1w_scatter_s64offset_s64(pg, base, offsets, data); } __forceinline void svst1w_scatter(svbool_t pg, svuint64_t bases, svuint64_t data) { return svst1w_scatter_u64base_u64(pg, bases, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, int8_t *base, svuint64_t offsets, svint64_t data) { return svst1b_scatter_u64offset_s64(pg, base, offsets, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) { return svst1b_scatter_u64base_offset_u64(pg, bases, offset, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) { return svst1b_scatter_u64base_offset_s64(pg, bases, offset, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) { return svst1b_scatter_u32base_offset_u32(pg, bases, offset, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) { return svst1b_scatter_u32base_offset_s32(pg, bases, offset, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, uint8_t *base, svuint64_t offsets, svuint64_t data) { return svst1b_scatter_u64offset_u64(pg, base, offsets, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, uint8_t *base, svint64_t offsets, svuint64_t data) { return svst1b_scatter_s64offset_u64(pg, base, offsets, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, int8_t *base, svint64_t offsets, svint64_t data) { return svst1b_scatter_s64offset_s64(pg, base, offsets, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, int8_t *base, svuint32_t offsets, svint32_t data) { return svst1b_scatter_u32offset_s32(pg, base, offsets, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, uint8_t *base, svuint32_t offsets, svuint32_t data) { return svst1b_scatter_u32offset_u32(pg, base, offsets, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, int8_t *base, svint32_t offsets, svint32_t data) { return svst1b_scatter_s32offset_s32(pg, base, offsets, data); } __forceinline void svst1b_scatter_offset(svbool_t pg, uint8_t *base, svint32_t offsets, svuint32_t data) { return svst1b_scatter_s32offset_u32(pg, base, offsets, data); } __forceinline void svst1b_scatter(svbool_t pg, svuint64_t bases, svuint64_t data) { return svst1b_scatter_u64base_u64(pg, bases, data); } __forceinline void svst1b_scatter(svbool_t pg, svuint64_t bases, svint64_t data) { return svst1b_scatter_u64base_s64(pg, bases, data); } __forceinline void svst1b_scatter(svbool_t pg, svuint32_t bases, svuint32_t data) { return svst1b_scatter_u32base_u32(pg, bases, data); } __forceinline void svst1b_scatter(svbool_t pg, svuint32_t bases, svint32_t data) { return svst1b_scatter_u32base_s32(pg, bases, data); } __forceinline svfloat64_t svtbl(svfloat64_t data, svuint64_t indices) { return svtbl_f64(data, indices); } __forceinline svbfloat16_t svtbl(svbfloat16_t data, svuint16_t indices) { return svtbl_bf16(data, indices); } __forceinline svint16_t svtbl(svint16_t data, svuint16_t indices) { return svtbl_s16(data, indices); } __forceinline svint32_t svtbl(svint32_t data, svuint32_t indices) { return svtbl_s32(data, indices); } __forceinline svint64_t svtbl(svint64_t data, svuint64_t indices) { return svtbl_s64(data, indices); } __forceinline svuint8_t svtbl(svuint8_t data, svuint8_t indices) { return svtbl_u8(data, indices); } __forceinline svuint16_t svtbl(svuint16_t data, svuint16_t indices) { return svtbl_u16(data, indices); } __forceinline svuint64_t svtbl(svuint64_t data, svuint64_t indices) { return svtbl_u64(data, indices); } __forceinline svint8_t svtbl(svint8_t data, svuint8_t indices) { return svtbl_s8(data, indices); } __forceinline svuint32_t svtbl(svuint32_t data, svuint32_t indices) { return svtbl_u32(data, indices); } __forceinline svfloat32_t svtbl(svfloat32_t data, svuint32_t indices) { return svtbl_f32(data, indices); } __forceinline svfloat16_t svtbl(svfloat16_t data, svuint16_t indices) { return svtbl_f16(data, indices); } __forceinline svuint64_t svabd_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svabd_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svabd_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svabd_n_u32_m(pg, op1, op2); } __forceinline svuint16_t svabd_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svabd_n_u16_m(pg, op1, op2); } __forceinline svuint8_t svabd_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svabd_n_u8_m(pg, op1, op2); } __forceinline svint64_t svabd_m(svbool_t pg, svint64_t op1, int64_t op2) { return svabd_n_s64_m(pg, op1, op2); } __forceinline svint32_t svabd_m(svbool_t pg, svint32_t op1, int32_t op2) { return svabd_n_s32_m(pg, op1, op2); } __forceinline svint16_t svabd_m(svbool_t pg, svint16_t op1, int16_t op2) { return svabd_n_s16_m(pg, op1, op2); } __forceinline svfloat32_t svabd_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svabd_n_f32_m(pg, op1, op2); } __forceinline svfloat64_t svabd_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svabd_n_f64_m(pg, op1, op2); } __forceinline svuint64_t svabd_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svabd_u64_z(pg, op1, op2); } __forceinline svuint32_t svabd_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svabd_u32_z(pg, op1, op2); } __forceinline svuint16_t svabd_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svabd_u16_z(pg, op1, op2); } __forceinline svuint8_t svabd_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svabd_u8_z(pg, op1, op2); } __forceinline svint64_t svabd_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svabd_s64_z(pg, op1, op2); } __forceinline svint32_t svabd_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svabd_s32_z(pg, op1, op2); } __forceinline svint8_t svabd_m(svbool_t pg, svint8_t op1, int8_t op2) { return svabd_n_s8_m(pg, op1, op2); } __forceinline svfloat32_t svabd_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svabd_n_f32_x(pg, op1, op2); } __forceinline svfloat64_t svabd_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svabd_n_f64_z(pg, op1, op2); } __forceinline svint16_t svabd_x(svbool_t pg, svint16_t op1, int16_t op2) { return svabd_n_s16_x(pg, op1, op2); } __forceinline svuint64_t svabd_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svabd_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svabd_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svabd_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svabd_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svabd_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svabd_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svabd_n_u8_z(pg, op1, op2); } __forceinline svint64_t svabd_z(svbool_t pg, svint64_t op1, int64_t op2) { return svabd_n_s64_z(pg, op1, op2); } __forceinline svint32_t svabd_z(svbool_t pg, svint32_t op1, int32_t op2) { return svabd_n_s32_z(pg, op1, op2); } __forceinline svint16_t svabd_z(svbool_t pg, svint16_t op1, int16_t op2) { return svabd_n_s16_z(pg, op1, op2); } __forceinline svint8_t svabd_z(svbool_t pg, svint8_t op1, int8_t op2) { return svabd_n_s8_z(pg, op1, op2); } __forceinline svint16_t svabd_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svabd_s16_z(pg, op1, op2); } __forceinline svfloat32_t svabd_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svabd_n_f32_z(pg, op1, op2); } __forceinline svuint64_t svabd_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svabd_n_u64_x(pg, op1, op2); } __forceinline svuint32_t svabd_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svabd_n_u32_x(pg, op1, op2); } __forceinline svuint16_t svabd_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svabd_n_u16_x(pg, op1, op2); } __forceinline svuint8_t svabd_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svabd_n_u8_x(pg, op1, op2); } __forceinline svint64_t svabd_x(svbool_t pg, svint64_t op1, int64_t op2) { return svabd_n_s64_x(pg, op1, op2); } __forceinline svint32_t svabd_x(svbool_t pg, svint32_t op1, int32_t op2) { return svabd_n_s32_x(pg, op1, op2); } __forceinline svfloat64_t svabd_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svabd_n_f64_x(pg, op1, op2); } __forceinline svint8_t svabd_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svabd_s8_z(pg, op1, op2); } __forceinline svint8_t svabd_x(svbool_t pg, svint8_t op1, int8_t op2) { return svabd_n_s8_x(pg, op1, op2); } __forceinline svfloat32_t svabd_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svabd_f32_z(pg, op1, op2); } __forceinline svfloat64_t svabd_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svabd_f64_z(pg, op1, op2); } __forceinline svfloat16_t svabd_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svabd_f16_m(pg, op1, op2); } __forceinline svfloat32_t svabd_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svabd_f32_m(pg, op1, op2); } __forceinline svint8_t svabd_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svabd_s8_m(pg, op1, op2); } __forceinline svint16_t svabd_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svabd_s16_m(pg, op1, op2); } __forceinline svint32_t svabd_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svabd_s32_m(pg, op1, op2); } __forceinline svint64_t svabd_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svabd_s64_m(pg, op1, op2); } __forceinline svuint8_t svabd_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svabd_u8_m(pg, op1, op2); } __forceinline svuint16_t svabd_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svabd_u16_m(pg, op1, op2); } __forceinline svuint32_t svabd_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svabd_u32_m(pg, op1, op2); } __forceinline svuint64_t svabd_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svabd_u64_m(pg, op1, op2); } __forceinline svfloat64_t svabd_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svabd_f64_m(pg, op1, op2); } __forceinline svfloat32_t svabd_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svabd_f32_x(pg, op1, op2); } __forceinline svfloat16_t svabd_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svabd_f16_z(pg, op1, op2); } __forceinline svfloat16_t svabd_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svabd_f16_x(pg, op1, op2); } __forceinline svuint64_t svabd_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svabd_u64_x(pg, op1, op2); } __forceinline svuint16_t svabd_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svabd_u16_x(pg, op1, op2); } __forceinline svuint8_t svabd_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svabd_u8_x(pg, op1, op2); } __forceinline svuint32_t svabd_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svabd_u32_x(pg, op1, op2); } __forceinline svint32_t svabd_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svabd_s32_x(pg, op1, op2); } __forceinline svint16_t svabd_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svabd_s16_x(pg, op1, op2); } __forceinline svint8_t svabd_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svabd_s8_x(pg, op1, op2); } __forceinline svfloat64_t svabd_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svabd_f64_x(pg, op1, op2); } __forceinline svint64_t svabd_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svabd_s64_x(pg, op1, op2); } __forceinline svint16_t svabs_z(svbool_t pg, svint16_t op) { return svabs_s16_z(pg, op); } __forceinline svint32_t svabs_x(svbool_t pg, svint32_t op) { return svabs_s32_x(pg, op); } __forceinline svint8_t svabs_z(svbool_t pg, svint8_t op) { return svabs_s8_z(pg, op); } __forceinline svfloat64_t svabs_z(svbool_t pg, svfloat64_t op) { return svabs_f64_z(pg, op); } __forceinline svfloat32_t svabs_z(svbool_t pg, svfloat32_t op) { return svabs_f32_z(pg, op); } __forceinline svfloat16_t svabs_z(svbool_t pg, svfloat16_t op) { return svabs_f16_z(pg, op); } __forceinline svint64_t svabs_x(svbool_t pg, svint64_t op) { return svabs_s64_x(pg, op); } __forceinline svint16_t svabs_x(svbool_t pg, svint16_t op) { return svabs_s16_x(pg, op); } __forceinline svint64_t svabs_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svabs_s64_m(inactive, pg, op); } __forceinline svfloat64_t svabs_x(svbool_t pg, svfloat64_t op) { return svabs_f64_x(pg, op); } __forceinline svfloat32_t svabs_x(svbool_t pg, svfloat32_t op) { return svabs_f32_x(pg, op); } __forceinline svfloat16_t svabs_x(svbool_t pg, svfloat16_t op) { return svabs_f16_x(pg, op); } __forceinline svint32_t svabs_m(svint32_t inactive, svbool_t pg, svint32_t op) { return svabs_s32_m(inactive, pg, op); } __forceinline svint16_t svabs_m(svint16_t inactive, svbool_t pg, svint16_t op) { return svabs_s16_m(inactive, pg, op); } __forceinline svfloat16_t svabs_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { return svabs_f16_m(inactive, pg, op); } __forceinline svfloat32_t svabs_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { return svabs_f32_m(inactive, pg, op); } __forceinline svint32_t svabs_z(svbool_t pg, svint32_t op) { return svabs_s32_z(pg, op); } __forceinline svint8_t svabs_x(svbool_t pg, svint8_t op) { return svabs_s8_x(pg, op); } __forceinline svint64_t svabs_z(svbool_t pg, svint64_t op) { return svabs_s64_z(pg, op); } __forceinline svint8_t svabs_m(svint8_t inactive, svbool_t pg, svint8_t op) { return svabs_s8_m(inactive, pg, op); } __forceinline svfloat64_t svabs_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { return svabs_f64_m(inactive, pg, op); } __forceinline int64_t svaddv(svbool_t pg, svint32_t op) { return svaddv_s32(pg, op); } __forceinline int64_t svaddv(svbool_t pg, svint16_t op) { return svaddv_s16(pg, op); } __forceinline uint64_t svaddv(svbool_t pg, svuint8_t op) { return svaddv_u8(pg, op); } __forceinline uint64_t svaddv(svbool_t pg, svuint16_t op) { return svaddv_u16(pg, op); } __forceinline uint64_t svaddv(svbool_t pg, svuint32_t op) { return svaddv_u32(pg, op); } __forceinline int64_t svaddv(svbool_t pg, svint8_t op) { return svaddv_s8(pg, op); } __forceinline int64_t svaddv(svbool_t pg, svint64_t op) { return svaddv_s64(pg, op); } __forceinline uint64_t svaddv(svbool_t pg, svuint64_t op) { return svaddv_u64(pg, op); } __forceinline float32_t svaddv(svbool_t pg, svfloat32_t op) { return svaddv_f32(pg, op); } __forceinline float64_t svaddv(svbool_t pg, svfloat64_t op) { return svaddv_f64(pg, op); } __forceinline float64_t svadda(svbool_t pg, float64_t initial, svfloat64_t op) { return svadda_f64(pg, initial, op); } __forceinline float32_t svadda(svbool_t pg, float32_t initial, svfloat32_t op) { return svadda_f32(pg, initial, op); } __forceinline float64_t svmaxnmv(svbool_t pg, svfloat64_t op) { return svmaxnmv_f64(pg, op); } __forceinline float32_t svmaxnmv(svbool_t pg, svfloat32_t op) { return svmaxnmv_f32(pg, op); } __forceinline int32_t svmaxv(svbool_t pg, svint32_t op) { return svmaxv_s32(pg, op); } __forceinline int64_t svmaxv(svbool_t pg, svint64_t op) { return svmaxv_s64(pg, op); } __forceinline float32_t svmaxv(svbool_t pg, svfloat32_t op) { return svmaxv_f32(pg, op); } __forceinline float64_t svmaxv(svbool_t pg, svfloat64_t op) { return svmaxv_f64(pg, op); } __forceinline int8_t svmaxv(svbool_t pg, svint8_t op) { return svmaxv_s8(pg, op); } __forceinline int16_t svmaxv(svbool_t pg, svint16_t op) { return svmaxv_s16(pg, op); } __forceinline uint64_t svmaxv(svbool_t pg, svuint64_t op) { return svmaxv_u64(pg, op); } __forceinline uint8_t svmaxv(svbool_t pg, svuint8_t op) { return svmaxv_u8(pg, op); } __forceinline uint16_t svmaxv(svbool_t pg, svuint16_t op) { return svmaxv_u16(pg, op); } __forceinline uint32_t svmaxv(svbool_t pg, svuint32_t op) { return svmaxv_u32(pg, op); } __forceinline float64_t svminnmv(svbool_t pg, svfloat64_t op) { return svminnmv_f64(pg, op); } __forceinline float32_t svminnmv(svbool_t pg, svfloat32_t op) { return svminnmv_f32(pg, op); } __forceinline uint64_t svminv(svbool_t pg, svuint64_t op) { return svminv_u64(pg, op); } __forceinline uint32_t svminv(svbool_t pg, svuint32_t op) { return svminv_u32(pg, op); } __forceinline uint16_t svminv(svbool_t pg, svuint16_t op) { return svminv_u16(pg, op); } __forceinline uint8_t svminv(svbool_t pg, svuint8_t op) { return svminv_u8(pg, op); } __forceinline float32_t svminv(svbool_t pg, svfloat32_t op) { return svminv_f32(pg, op); } __forceinline int32_t svminv(svbool_t pg, svint32_t op) { return svminv_s32(pg, op); } __forceinline int16_t svminv(svbool_t pg, svint16_t op) { return svminv_s16(pg, op); } __forceinline int8_t svminv(svbool_t pg, svint8_t op) { return svminv_s8(pg, op); } __forceinline float64_t svminv(svbool_t pg, svfloat64_t op) { return svminv_f64(pg, op); } __forceinline int64_t svminv(svbool_t pg, svint64_t op) { return svminv_s64(pg, op); } __forceinline svuint64_t svadd_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svadd_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svadd_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svadd_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svadd_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svadd_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svadd_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svadd_n_u8_z(pg, op1, op2); } __forceinline svint64_t svadd_z(svbool_t pg, svint64_t op1, int64_t op2) { return svadd_n_s64_z(pg, op1, op2); } __forceinline svint32_t svadd_z(svbool_t pg, svint32_t op1, int32_t op2) { return svadd_n_s32_z(pg, op1, op2); } __forceinline svint16_t svadd_z(svbool_t pg, svint16_t op1, int16_t op2) { return svadd_n_s16_z(pg, op1, op2); } __forceinline svint8_t svadd_z(svbool_t pg, svint8_t op1, int8_t op2) { return svadd_n_s8_z(pg, op1, op2); } __forceinline svfloat64_t svadd_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svadd_n_f64_z(pg, op1, op2); } __forceinline svfloat32_t svadd_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svadd_n_f32_z(pg, op1, op2); } __forceinline svfloat32_t svadd_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svadd_n_f32_x(pg, op1, op2); } __forceinline svuint64_t svadd_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svadd_n_u64_x(pg, op1, op2); } __forceinline svuint32_t svadd_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svadd_n_u32_x(pg, op1, op2); } __forceinline svuint16_t svadd_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svadd_n_u16_x(pg, op1, op2); } __forceinline svuint8_t svadd_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svadd_n_u8_x(pg, op1, op2); } __forceinline svint64_t svadd_x(svbool_t pg, svint64_t op1, int64_t op2) { return svadd_n_s64_x(pg, op1, op2); } __forceinline svint32_t svadd_x(svbool_t pg, svint32_t op1, int32_t op2) { return svadd_n_s32_x(pg, op1, op2); } __forceinline svint16_t svadd_x(svbool_t pg, svint16_t op1, int16_t op2) { return svadd_n_s16_x(pg, op1, op2); } __forceinline svint8_t svadd_x(svbool_t pg, svint8_t op1, int8_t op2) { return svadd_n_s8_x(pg, op1, op2); } __forceinline svfloat64_t svadd_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svadd_n_f64_x(pg, op1, op2); } __forceinline svfloat32_t svadd_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svadd_f32_m(pg, op1, op2); } __forceinline svfloat16_t svadd_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svadd_f16_x(pg, op1, op2); } __forceinline svint8_t svadd_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svadd_s8_m(pg, op1, op2); } __forceinline svuint64_t svadd_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svadd_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svadd_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svadd_n_u32_m(pg, op1, op2); } __forceinline svuint16_t svadd_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svadd_n_u16_m(pg, op1, op2); } __forceinline svuint8_t svadd_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svadd_n_u8_m(pg, op1, op2); } __forceinline svint64_t svadd_m(svbool_t pg, svint64_t op1, int64_t op2) { return svadd_n_s64_m(pg, op1, op2); } __forceinline svint32_t svadd_m(svbool_t pg, svint32_t op1, int32_t op2) { return svadd_n_s32_m(pg, op1, op2); } __forceinline svint16_t svadd_m(svbool_t pg, svint16_t op1, int16_t op2) { return svadd_n_s16_m(pg, op1, op2); } __forceinline svint8_t svadd_m(svbool_t pg, svint8_t op1, int8_t op2) { return svadd_n_s8_m(pg, op1, op2); } __forceinline svfloat64_t svadd_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svadd_n_f64_m(pg, op1, op2); } __forceinline svfloat32_t svadd_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svadd_n_f32_m(pg, op1, op2); } __forceinline svuint64_t svadd_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svadd_u64_z(pg, op1, op2); } __forceinline svuint32_t svadd_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svadd_u32_z(pg, op1, op2); } __forceinline svuint16_t svadd_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svadd_u16_z(pg, op1, op2); } __forceinline svuint8_t svadd_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svadd_u8_z(pg, op1, op2); } __forceinline svint64_t svadd_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svadd_s64_z(pg, op1, op2); } __forceinline svint16_t svadd_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svadd_s16_z(pg, op1, op2); } __forceinline svint8_t svadd_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svadd_s8_z(pg, op1, op2); } __forceinline svfloat64_t svadd_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svadd_f64_m(pg, op1, op2); } __forceinline svfloat64_t svadd_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svadd_f64_z(pg, op1, op2); } __forceinline svfloat16_t svadd_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svadd_f16_z(pg, op1, op2); } __forceinline svint16_t svadd_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svadd_s16_m(pg, op1, op2); } __forceinline svint32_t svadd_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svadd_s32_m(pg, op1, op2); } __forceinline svint64_t svadd_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svadd_s64_m(pg, op1, op2); } __forceinline svuint8_t svadd_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svadd_u8_m(pg, op1, op2); } __forceinline svuint16_t svadd_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svadd_u16_m(pg, op1, op2); } __forceinline svuint32_t svadd_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svadd_u32_m(pg, op1, op2); } __forceinline svuint64_t svadd_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svadd_u64_m(pg, op1, op2); } __forceinline svfloat32_t svadd_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svadd_f32_x(pg, op1, op2); } __forceinline svfloat64_t svadd_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svadd_f64_x(pg, op1, op2); } __forceinline svint8_t svadd_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svadd_s8_x(pg, op1, op2); } __forceinline svint16_t svadd_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svadd_s16_x(pg, op1, op2); } __forceinline svint32_t svadd_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svadd_s32_x(pg, op1, op2); } __forceinline svint64_t svadd_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svadd_s64_x(pg, op1, op2); } __forceinline svuint8_t svadd_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svadd_u8_x(pg, op1, op2); } __forceinline svuint16_t svadd_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svadd_u16_x(pg, op1, op2); } __forceinline svuint32_t svadd_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svadd_u32_x(pg, op1, op2); } __forceinline svuint64_t svadd_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svadd_u64_x(pg, op1, op2); } __forceinline svfloat32_t svadd_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svadd_f32_z(pg, op1, op2); } __forceinline svfloat16_t svadd_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svadd_f16_m(pg, op1, op2); } __forceinline svint32_t svadd_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svadd_s32_z(pg, op1, op2); } __forceinline svint32_t svqadd(svint32_t op1, int32_t op2) { return svqadd_n_s32(op1, op2); } __forceinline svint16_t svqadd(svint16_t op1, int16_t op2) { return svqadd_n_s16(op1, op2); } __forceinline svint64_t svqadd(svint64_t op1, int64_t op2) { return svqadd_n_s64(op1, op2); } __forceinline svint8_t svqadd(svint8_t op1, svint8_t op2) { return svqadd_s8(op1, op2); } __forceinline svint16_t svqadd(svint16_t op1, svint16_t op2) { return svqadd_s16(op1, op2); } __forceinline svuint32_t svqadd(svuint32_t op1, uint32_t op2) { return svqadd_n_u32(op1, op2); } __forceinline svint32_t svqadd(svint32_t op1, svint32_t op2) { return svqadd_s32(op1, op2); } __forceinline svuint16_t svqadd(svuint16_t op1, uint16_t op2) { return svqadd_n_u16(op1, op2); } __forceinline svuint64_t svqadd(svuint64_t op1, uint64_t op2) { return svqadd_n_u64(op1, op2); } __forceinline svuint8_t svqadd(svuint8_t op1, svuint8_t op2) { return svqadd_u8(op1, op2); } __forceinline svuint16_t svqadd(svuint16_t op1, svuint16_t op2) { return svqadd_u16(op1, op2); } __forceinline svuint32_t svqadd(svuint32_t op1, svuint32_t op2) { return svqadd_u32(op1, op2); } __forceinline svuint64_t svqadd(svuint64_t op1, svuint64_t op2) { return svqadd_u64(op1, op2); } __forceinline svint8_t svqadd(svint8_t op1, int8_t op2) { return svqadd_n_s8(op1, op2); } __forceinline svint64_t svqadd(svint64_t op1, svint64_t op2) { return svqadd_s64(op1, op2); } __forceinline svuint8_t svqadd(svuint8_t op1, uint8_t op2) { return svqadd_n_u8(op1, op2); } __forceinline svuint64_t svadrh_index(svuint64_t bases, svuint64_t indices) { return svadrh_u64base_u64index(bases, indices); } __forceinline svuint64_t svadrh_index(svuint64_t bases, svint64_t indices) { return svadrh_u64base_s64index(bases, indices); } __forceinline svuint32_t svadrh_index(svuint32_t bases, svuint32_t indices) { return svadrh_u32base_u32index(bases, indices); } __forceinline svuint32_t svadrh_index(svuint32_t bases, svint32_t indices) { return svadrh_u32base_s32index(bases, indices); } __forceinline svuint64_t svadrw_index(svuint64_t bases, svuint64_t indices) { return svadrw_u64base_u64index(bases, indices); } __forceinline svuint64_t svadrw_index(svuint64_t bases, svint64_t indices) { return svadrw_u64base_s64index(bases, indices); } __forceinline svuint32_t svadrw_index(svuint32_t bases, svuint32_t indices) { return svadrw_u32base_u32index(bases, indices); } __forceinline svuint32_t svadrw_index(svuint32_t bases, svint32_t indices) { return svadrw_u32base_s32index(bases, indices); } __forceinline svuint64_t svadrd_index(svuint64_t bases, svuint64_t indices) { return svadrd_u64base_u64index(bases, indices); } __forceinline svuint64_t svadrd_index(svuint64_t bases, svint64_t indices) { return svadrd_u64base_s64index(bases, indices); } __forceinline svuint32_t svadrd_index(svuint32_t bases, svuint32_t indices) { return svadrd_u32base_u32index(bases, indices); } __forceinline svuint32_t svadrd_index(svuint32_t bases, svint32_t indices) { return svadrd_u32base_s32index(bases, indices); } __forceinline svuint64_t svadrb_offset(svuint64_t bases, svuint64_t offsets) { return svadrb_u64base_u64offset(bases, offsets); } __forceinline svuint64_t svadrb_offset(svuint64_t bases, svint64_t offsets) { return svadrb_u64base_s64offset(bases, offsets); } __forceinline svuint32_t svadrb_offset(svuint32_t bases, svuint32_t offsets) { return svadrb_u32base_u32offset(bases, offsets); } __forceinline svuint32_t svadrb_offset(svuint32_t bases, svint32_t offsets) { return svadrb_u32base_s32offset(bases, offsets); } __forceinline svuint32_t svdiv_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svdiv_u32_x(pg, op1, op2); } __forceinline svuint64_t svdiv_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svdiv_u64_z(pg, op1, op2); } __forceinline svuint32_t svdiv_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svdiv_u32_z(pg, op1, op2); } __forceinline svint64_t svdiv_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svdiv_s64_z(pg, op1, op2); } __forceinline svint32_t svdiv_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svdiv_s32_z(pg, op1, op2); } __forceinline svfloat64_t svdiv_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svdiv_f64_z(pg, op1, op2); } __forceinline svfloat32_t svdiv_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svdiv_f32_z(pg, op1, op2); } __forceinline svfloat16_t svdiv_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svdiv_f16_z(pg, op1, op2); } __forceinline svfloat16_t svdiv_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svdiv_f16_x(pg, op1, op2); } __forceinline svfloat32_t svdiv_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svdiv_f32_x(pg, op1, op2); } __forceinline svfloat64_t svdiv_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svdiv_f64_x(pg, op1, op2); } __forceinline svfloat32_t svdiv_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svdiv_n_f32_m(pg, op1, op2); } __forceinline svint32_t svdiv_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svdiv_s32_x(pg, op1, op2); } __forceinline svint64_t svdiv_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svdiv_s64_x(pg, op1, op2); } __forceinline svuint64_t svdiv_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svdiv_u64_x(pg, op1, op2); } __forceinline svfloat64_t svdiv_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svdiv_n_f64_m(pg, op1, op2); } __forceinline svfloat32_t svdiv_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svdiv_n_f32_x(pg, op1, op2); } __forceinline svint64_t svdiv_m(svbool_t pg, svint64_t op1, int64_t op2) { return svdiv_n_s64_m(pg, op1, op2); } __forceinline svuint64_t svdiv_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svdiv_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svdiv_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svdiv_n_u32_z(pg, op1, op2); } __forceinline svint64_t svdiv_z(svbool_t pg, svint64_t op1, int64_t op2) { return svdiv_n_s64_z(pg, op1, op2); } __forceinline svint32_t svdiv_z(svbool_t pg, svint32_t op1, int32_t op2) { return svdiv_n_s32_z(pg, op1, op2); } __forceinline svfloat32_t svdiv_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svdiv_n_f32_z(pg, op1, op2); } __forceinline svuint64_t svdiv_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svdiv_n_u64_x(pg, op1, op2); } __forceinline svuint32_t svdiv_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svdiv_n_u32_x(pg, op1, op2); } __forceinline svint64_t svdiv_x(svbool_t pg, svint64_t op1, int64_t op2) { return svdiv_n_s64_x(pg, op1, op2); } __forceinline svint32_t svdiv_x(svbool_t pg, svint32_t op1, int32_t op2) { return svdiv_n_s32_x(pg, op1, op2); } __forceinline svfloat64_t svdiv_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svdiv_n_f64_x(pg, op1, op2); } __forceinline svuint64_t svdiv_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svdiv_u64_m(pg, op1, op2); } __forceinline svuint64_t svdiv_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svdiv_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svdiv_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svdiv_n_u32_m(pg, op1, op2); } __forceinline svint32_t svdiv_m(svbool_t pg, svint32_t op1, int32_t op2) { return svdiv_n_s32_m(pg, op1, op2); } __forceinline svuint32_t svdiv_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svdiv_u32_m(pg, op1, op2); } __forceinline svfloat64_t svdiv_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svdiv_n_f64_z(pg, op1, op2); } __forceinline svint32_t svdiv_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svdiv_s32_m(pg, op1, op2); } __forceinline svfloat64_t svdiv_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svdiv_f64_m(pg, op1, op2); } __forceinline svfloat32_t svdiv_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svdiv_f32_m(pg, op1, op2); } __forceinline svfloat16_t svdiv_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svdiv_f16_m(pg, op1, op2); } __forceinline svint64_t svdiv_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svdiv_s64_m(pg, op1, op2); } __forceinline svuint64_t svdivr_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svdivr_u64_z(pg, op1, op2); } __forceinline svint64_t svdivr_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svdivr_s64_z(pg, op1, op2); } __forceinline svint32_t svdivr_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svdivr_s32_z(pg, op1, op2); } __forceinline svfloat64_t svdivr_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svdivr_f64_z(pg, op1, op2); } __forceinline svfloat32_t svdivr_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svdivr_f32_z(pg, op1, op2); } __forceinline svuint64_t svdivr_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svdivr_u64_x(pg, op1, op2); } __forceinline svfloat32_t svdivr_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svdivr_n_f32_m(pg, op1, op2); } __forceinline svuint32_t svdivr_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svdivr_u32_x(pg, op1, op2); } __forceinline svint64_t svdivr_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svdivr_s64_x(pg, op1, op2); } __forceinline svint32_t svdivr_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svdivr_s32_x(pg, op1, op2); } __forceinline svfloat64_t svdivr_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svdivr_f64_x(pg, op1, op2); } __forceinline svfloat32_t svdivr_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svdivr_f32_x(pg, op1, op2); } __forceinline svfloat16_t svdivr_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svdivr_f16_x(pg, op1, op2); } __forceinline svfloat16_t svdivr_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svdivr_f16_z(pg, op1, op2); } __forceinline svfloat64_t svdivr_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svdivr_n_f64_m(pg, op1, op2); } __forceinline svint64_t svdivr_m(svbool_t pg, svint64_t op1, int64_t op2) { return svdivr_n_s64_m(pg, op1, op2); } __forceinline svuint64_t svdivr_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svdivr_u64_m(pg, op1, op2); } __forceinline svuint64_t svdivr_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svdivr_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svdivr_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svdivr_n_u32_z(pg, op1, op2); } __forceinline svint64_t svdivr_z(svbool_t pg, svint64_t op1, int64_t op2) { return svdivr_n_s64_z(pg, op1, op2); } __forceinline svint32_t svdivr_z(svbool_t pg, svint32_t op1, int32_t op2) { return svdivr_n_s32_z(pg, op1, op2); } __forceinline svfloat64_t svdivr_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svdivr_n_f64_z(pg, op1, op2); } __forceinline svfloat32_t svdivr_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svdivr_n_f32_z(pg, op1, op2); } __forceinline svint32_t svdivr_m(svbool_t pg, svint32_t op1, int32_t op2) { return svdivr_n_s32_m(pg, op1, op2); } __forceinline svuint64_t svdivr_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svdivr_n_u64_x(pg, op1, op2); } __forceinline svint64_t svdivr_x(svbool_t pg, svint64_t op1, int64_t op2) { return svdivr_n_s64_x(pg, op1, op2); } __forceinline svint32_t svdivr_x(svbool_t pg, svint32_t op1, int32_t op2) { return svdivr_n_s32_x(pg, op1, op2); } __forceinline svfloat64_t svdivr_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svdivr_n_f64_x(pg, op1, op2); } __forceinline svfloat32_t svdivr_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svdivr_n_f32_x(pg, op1, op2); } __forceinline svuint64_t svdivr_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svdivr_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svdivr_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svdivr_n_u32_m(pg, op1, op2); } __forceinline svuint32_t svdivr_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svdivr_n_u32_x(pg, op1, op2); } __forceinline svuint32_t svdivr_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svdivr_u32_m(pg, op1, op2); } __forceinline svuint32_t svdivr_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svdivr_u32_z(pg, op1, op2); } __forceinline svint32_t svdivr_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svdivr_s32_m(pg, op1, op2); } __forceinline svfloat64_t svdivr_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svdivr_f64_m(pg, op1, op2); } __forceinline svfloat32_t svdivr_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svdivr_f32_m(pg, op1, op2); } __forceinline svfloat16_t svdivr_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svdivr_f16_m(pg, op1, op2); } __forceinline svint64_t svdivr_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svdivr_s64_m(pg, op1, op2); } template __forceinline T __svbfdot_lane(T op1, svbfloat16_t op2, svbfloat16_t op3) { if constexpr(::std::is_same_v) { return svbfdot_lane_f32(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svbfdot_lane(op1, op2, op3, imm_index) __svbfdot_lane(op1, op2, op3) __forceinline svfloat32_t svbfdot(svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) { return svbfdot_f32(op1, op2, op3); } __forceinline svuint32_t svdot(svuint32_t op1, svuint8_t op2, svuint8_t op3) { return svdot_u32(op1, op2, op3); } __forceinline svuint64_t svdot(svuint64_t op1, svuint16_t op2, svuint16_t op3) { return svdot_u64(op1, op2, op3); } __forceinline svint32_t svdot(svint32_t op1, svint8_t op2, int8_t op3) { return svdot_n_s32(op1, op2, op3); } __forceinline svint64_t svdot(svint64_t op1, svint16_t op2, int16_t op3) { return svdot_n_s64(op1, op2, op3); } __forceinline svuint32_t svdot(svuint32_t op1, svuint8_t op2, uint8_t op3) { return svdot_n_u32(op1, op2, op3); } __forceinline svuint64_t svdot(svuint64_t op1, svuint16_t op2, uint16_t op3) { return svdot_n_u64(op1, op2, op3); } template __forceinline T1 __svdot_lane(T1 op1, T2 op2, T2 op3) { if constexpr(::std::is_same_v) { return svdot_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svdot_lane_u32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svdot_lane_s64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svdot_lane_u64(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svdot_lane(op1, op2, op3, imm_index) __svdot_lane(op1, op2, op3) __forceinline svint64_t svdot(svint64_t op1, svint16_t op2, svint16_t op3) { return svdot_s64(op1, op2, op3); } __forceinline svint32_t svdot(svint32_t op1, svint8_t op2, svint8_t op3) { return svdot_s32(op1, op2, op3); } __forceinline svint32_t svsudot(svint32_t op1, svint8_t op2, uint8_t op3) { return svsudot_n_s32(op1, op2, op3); } __forceinline svint32_t svsudot(svint32_t op1, svint8_t op2, svuint8_t op3) { return svsudot_s32(op1, op2, op3); } template __forceinline T __svsudot_lane(T op1, svint8_t op2, svuint8_t op3) { if constexpr(::std::is_same_v) { return svsudot_lane_s32(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svsudot_lane(op1, op2, op3, imm_index) __svsudot_lane(op1, op2, op3) template __forceinline T __svusdot_lane(T op1, svuint8_t op2, svint8_t op3) { if constexpr(::std::is_same_v) { return svusdot_lane_s32(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svusdot_lane(op1, op2, op3, imm_index) __svusdot_lane(op1, op2, op3) __forceinline svint32_t svusdot(svint32_t op1, svuint8_t op2, int8_t op3) { return svusdot_n_s32(op1, op2, op3); } __forceinline svint32_t svusdot(svint32_t op1, svuint8_t op2, svint8_t op3) { return svusdot_s32(op1, op2, op3); } __forceinline svfloat32_t svscale_x(svbool_t pg, svfloat32_t op1, int32_t op2) { return svscale_n_f32_x(pg, op1, op2); } __forceinline svfloat64_t svscale_x(svbool_t pg, svfloat64_t op1, int64_t op2) { return svscale_n_f64_x(pg, op1, op2); } __forceinline svfloat16_t svscale_m(svbool_t pg, svfloat16_t op1, int16_t op2) { return svscale_n_f16_m(pg, op1, op2); } __forceinline svfloat16_t svscale_z(svbool_t pg, svfloat16_t op1, int16_t op2) { return svscale_n_f16_z(pg, op1, op2); } __forceinline svfloat32_t svscale_z(svbool_t pg, svfloat32_t op1, int32_t op2) { return svscale_n_f32_z(pg, op1, op2); } __forceinline svfloat16_t svscale_x(svbool_t pg, svfloat16_t op1, int16_t op2) { return svscale_n_f16_x(pg, op1, op2); } __forceinline svfloat16_t svscale_m(svbool_t pg, svfloat16_t op1, svint16_t op2) { return svscale_f16_m(pg, op1, op2); } __forceinline svfloat64_t svscale_z(svbool_t pg, svfloat64_t op1, int64_t op2) { return svscale_n_f64_z(pg, op1, op2); } __forceinline svfloat64_t svscale_m(svbool_t pg, svfloat64_t op1, svint64_t op2) { return svscale_f64_m(pg, op1, op2); } __forceinline svfloat16_t svscale_x(svbool_t pg, svfloat16_t op1, svint16_t op2) { return svscale_f16_x(pg, op1, op2); } __forceinline svfloat32_t svscale_m(svbool_t pg, svfloat32_t op1, int32_t op2) { return svscale_n_f32_m(pg, op1, op2); } __forceinline svfloat32_t svscale_x(svbool_t pg, svfloat32_t op1, svint32_t op2) { return svscale_f32_x(pg, op1, op2); } __forceinline svfloat64_t svscale_x(svbool_t pg, svfloat64_t op1, svint64_t op2) { return svscale_f64_x(pg, op1, op2); } __forceinline svfloat16_t svscale_z(svbool_t pg, svfloat16_t op1, svint16_t op2) { return svscale_f16_z(pg, op1, op2); } __forceinline svfloat32_t svscale_z(svbool_t pg, svfloat32_t op1, svint32_t op2) { return svscale_f32_z(pg, op1, op2); } __forceinline svfloat64_t svscale_z(svbool_t pg, svfloat64_t op1, svint64_t op2) { return svscale_f64_z(pg, op1, op2); } __forceinline svfloat32_t svscale_m(svbool_t pg, svfloat32_t op1, svint32_t op2) { return svscale_f32_m(pg, op1, op2); } __forceinline svfloat64_t svscale_m(svbool_t pg, svfloat64_t op1, int64_t op2) { return svscale_n_f64_m(pg, op1, op2); } __forceinline svfloat64_t svexpa(svuint64_t op) { return svexpa_f64(op); } __forceinline svfloat16_t svexpa(svuint16_t op) { return svexpa_f16(op); } __forceinline svfloat32_t svexpa(svuint32_t op) { return svexpa_f32(op); } __forceinline svfloat32_t svbfmmla(svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) { return svbfmmla_f32(op1, op2, op3); } __forceinline svint32_t svmmla(svint32_t op1, svint8_t op2, svint8_t op3) { return svmmla_s32(op1, op2, op3); } __forceinline svuint32_t svmmla(svuint32_t op1, svuint8_t op2, svuint8_t op3) { return svmmla_u32(op1, op2, op3); } __forceinline svfloat32_t svmmla(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmmla_f32(op1, op2, op3); } __forceinline svfloat64_t svmmla(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmmla_f64(op1, op2, op3); } __forceinline svint32_t svusmmla(svint32_t op1, svuint8_t op2, svint8_t op3) { return svusmmla_s32(op1, op2, op3); } __forceinline svuint32_t svmax_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmax_n_u32_m(pg, op1, op2); } __forceinline svuint16_t svmax_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmax_n_u16_m(pg, op1, op2); } __forceinline svuint8_t svmax_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmax_n_u8_m(pg, op1, op2); } __forceinline svint64_t svmax_m(svbool_t pg, svint64_t op1, int64_t op2) { return svmax_n_s64_m(pg, op1, op2); } __forceinline svint32_t svmax_m(svbool_t pg, svint32_t op1, int32_t op2) { return svmax_n_s32_m(pg, op1, op2); } __forceinline svint16_t svmax_m(svbool_t pg, svint16_t op1, int16_t op2) { return svmax_n_s16_m(pg, op1, op2); } __forceinline svint8_t svmax_m(svbool_t pg, svint8_t op1, int8_t op2) { return svmax_n_s8_m(pg, op1, op2); } __forceinline svfloat64_t svmax_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmax_n_f64_m(pg, op1, op2); } __forceinline svfloat32_t svmax_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmax_n_f32_m(pg, op1, op2); } __forceinline svfloat32_t svmax_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmax_f32_z(pg, op1, op2); } __forceinline svuint32_t svmax_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmax_u32_z(pg, op1, op2); } __forceinline svuint16_t svmax_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmax_u16_z(pg, op1, op2); } __forceinline svuint8_t svmax_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmax_u8_z(pg, op1, op2); } __forceinline svint64_t svmax_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svmax_s64_z(pg, op1, op2); } __forceinline svint32_t svmax_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svmax_s32_z(pg, op1, op2); } __forceinline svint16_t svmax_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svmax_s16_z(pg, op1, op2); } __forceinline svint8_t svmax_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svmax_s8_z(pg, op1, op2); } __forceinline svfloat64_t svmax_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmax_f64_z(pg, op1, op2); } __forceinline svuint64_t svmax_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmax_u64_z(pg, op1, op2); } __forceinline svfloat16_t svmax_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmax_f16_z(pg, op1, op2); } __forceinline svint32_t svmax_x(svbool_t pg, svint32_t op1, int32_t op2) { return svmax_n_s32_x(pg, op1, op2); } __forceinline svuint16_t svmax_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmax_u16_m(pg, op1, op2); } __forceinline svuint64_t svmax_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmax_u64_x(pg, op1, op2); } __forceinline svuint8_t svmax_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmax_u8_x(pg, op1, op2); } __forceinline svint64_t svmax_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svmax_s64_x(pg, op1, op2); } __forceinline svint32_t svmax_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svmax_s32_x(pg, op1, op2); } __forceinline svint16_t svmax_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svmax_s16_x(pg, op1, op2); } __forceinline svint8_t svmax_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svmax_s8_x(pg, op1, op2); } __forceinline svfloat64_t svmax_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmax_f64_x(pg, op1, op2); } __forceinline svfloat32_t svmax_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmax_f32_x(pg, op1, op2); } __forceinline svfloat16_t svmax_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmax_f16_x(pg, op1, op2); } __forceinline svuint64_t svmax_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmax_u64_m(pg, op1, op2); } __forceinline svfloat16_t svmax_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmax_f16_m(pg, op1, op2); } __forceinline svfloat32_t svmax_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmax_f32_m(pg, op1, op2); } __forceinline svfloat64_t svmax_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmax_f64_m(pg, op1, op2); } __forceinline svint8_t svmax_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svmax_s8_m(pg, op1, op2); } __forceinline svint16_t svmax_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svmax_s16_m(pg, op1, op2); } __forceinline svint32_t svmax_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svmax_s32_m(pg, op1, op2); } __forceinline svint64_t svmax_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svmax_s64_m(pg, op1, op2); } __forceinline svuint8_t svmax_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmax_u8_m(pg, op1, op2); } __forceinline svuint32_t svmax_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmax_u32_m(pg, op1, op2); } __forceinline svuint64_t svmax_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmax_n_u64_m(pg, op1, op2); } __forceinline svuint16_t svmax_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmax_u16_x(pg, op1, op2); } __forceinline svuint64_t svmax_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmax_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svmax_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmax_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svmax_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmax_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svmax_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmax_n_u8_z(pg, op1, op2); } __forceinline svint64_t svmax_z(svbool_t pg, svint64_t op1, int64_t op2) { return svmax_n_s64_z(pg, op1, op2); } __forceinline svint32_t svmax_z(svbool_t pg, svint32_t op1, int32_t op2) { return svmax_n_s32_z(pg, op1, op2); } __forceinline svint8_t svmax_z(svbool_t pg, svint8_t op1, int8_t op2) { return svmax_n_s8_z(pg, op1, op2); } __forceinline svfloat64_t svmax_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmax_n_f64_z(pg, op1, op2); } __forceinline svfloat32_t svmax_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmax_n_f32_z(pg, op1, op2); } __forceinline svint16_t svmax_z(svbool_t pg, svint16_t op1, int16_t op2) { return svmax_n_s16_z(pg, op1, op2); } __forceinline svuint32_t svmax_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmax_n_u32_x(pg, op1, op2); } __forceinline svuint64_t svmax_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmax_n_u64_x(pg, op1, op2); } __forceinline svint8_t svmax_x(svbool_t pg, svint8_t op1, int8_t op2) { return svmax_n_s8_x(pg, op1, op2); } __forceinline svint16_t svmax_x(svbool_t pg, svint16_t op1, int16_t op2) { return svmax_n_s16_x(pg, op1, op2); } __forceinline svuint32_t svmax_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmax_u32_x(pg, op1, op2); } __forceinline svfloat64_t svmax_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmax_n_f64_x(pg, op1, op2); } __forceinline svuint8_t svmax_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmax_n_u8_x(pg, op1, op2); } __forceinline svuint16_t svmax_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmax_n_u16_x(pg, op1, op2); } __forceinline svint64_t svmax_x(svbool_t pg, svint64_t op1, int64_t op2) { return svmax_n_s64_x(pg, op1, op2); } __forceinline svfloat32_t svmax_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmax_n_f32_x(pg, op1, op2); } __forceinline svfloat64_t svmaxnm_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmaxnm_f64_x(pg, op1, op2); } __forceinline svfloat64_t svmaxnm_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmaxnm_n_f64_z(pg, op1, op2); } __forceinline svfloat32_t svmaxnm_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmaxnm_n_f32_z(pg, op1, op2); } __forceinline svfloat32_t svmaxnm_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmaxnm_f32_m(pg, op1, op2); } __forceinline svfloat64_t svmaxnm_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmaxnm_f64_m(pg, op1, op2); } __forceinline svfloat16_t svmaxnm_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmaxnm_f16_x(pg, op1, op2); } __forceinline svfloat32_t svmaxnm_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmaxnm_f32_x(pg, op1, op2); } __forceinline svfloat64_t svmaxnm_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmaxnm_n_f64_x(pg, op1, op2); } __forceinline svfloat32_t svmaxnm_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmaxnm_f32_z(pg, op1, op2); } __forceinline svfloat64_t svmaxnm_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmaxnm_f64_z(pg, op1, op2); } __forceinline svfloat32_t svmaxnm_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmaxnm_n_f32_m(pg, op1, op2); } __forceinline svfloat64_t svmaxnm_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmaxnm_n_f64_m(pg, op1, op2); } __forceinline svfloat32_t svmaxnm_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmaxnm_n_f32_x(pg, op1, op2); } __forceinline svfloat16_t svmaxnm_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmaxnm_f16_m(pg, op1, op2); } __forceinline svfloat16_t svmaxnm_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmaxnm_f16_z(pg, op1, op2); } __forceinline svuint32_t svmin_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmin_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svmin_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmin_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svmin_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmin_n_u8_z(pg, op1, op2); } __forceinline svfloat32_t svmin_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmin_n_f32_z(pg, op1, op2); } __forceinline svint16_t svmin_z(svbool_t pg, svint16_t op1, int16_t op2) { return svmin_n_s16_z(pg, op1, op2); } __forceinline svint8_t svmin_z(svbool_t pg, svint8_t op1, int8_t op2) { return svmin_n_s8_z(pg, op1, op2); } __forceinline svfloat64_t svmin_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmin_n_f64_z(pg, op1, op2); } __forceinline svuint64_t svmin_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmin_n_u64_z(pg, op1, op2); } __forceinline svint64_t svmin_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svmin_s64_z(pg, op1, op2); } __forceinline svuint32_t svmin_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmin_n_u32_x(pg, op1, op2); } __forceinline svuint8_t svmin_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmin_u8_z(pg, op1, op2); } __forceinline svint32_t svmin_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svmin_s32_z(pg, op1, op2); } __forceinline svint16_t svmin_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svmin_s16_z(pg, op1, op2); } __forceinline svint8_t svmin_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svmin_s8_z(pg, op1, op2); } __forceinline svfloat64_t svmin_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmin_f64_z(pg, op1, op2); } __forceinline svfloat32_t svmin_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmin_f32_z(pg, op1, op2); } __forceinline svfloat16_t svmin_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmin_f16_z(pg, op1, op2); } __forceinline svuint64_t svmin_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmin_u64_x(pg, op1, op2); } __forceinline svuint32_t svmin_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmin_u32_x(pg, op1, op2); } __forceinline svuint16_t svmin_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmin_u16_x(pg, op1, op2); } __forceinline svint32_t svmin_z(svbool_t pg, svint32_t op1, int32_t op2) { return svmin_n_s32_z(pg, op1, op2); } __forceinline svuint8_t svmin_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmin_u8_x(pg, op1, op2); } __forceinline svint64_t svmin_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svmin_s64_x(pg, op1, op2); } __forceinline svint32_t svmin_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svmin_s32_x(pg, op1, op2); } __forceinline svuint16_t svmin_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmin_u16_z(pg, op1, op2); } __forceinline svint16_t svmin_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svmin_s16_x(pg, op1, op2); } __forceinline svfloat64_t svmin_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmin_f64_x(pg, op1, op2); } __forceinline svfloat32_t svmin_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmin_f32_x(pg, op1, op2); } __forceinline svfloat16_t svmin_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmin_f16_x(pg, op1, op2); } __forceinline svuint64_t svmin_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmin_u64_m(pg, op1, op2); } __forceinline svuint32_t svmin_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmin_u32_m(pg, op1, op2); } __forceinline svuint16_t svmin_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmin_u16_m(pg, op1, op2); } __forceinline svuint8_t svmin_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmin_u8_m(pg, op1, op2); } __forceinline svint64_t svmin_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svmin_s64_m(pg, op1, op2); } __forceinline svint32_t svmin_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svmin_s32_m(pg, op1, op2); } __forceinline svint16_t svmin_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svmin_s16_m(pg, op1, op2); } __forceinline svint8_t svmin_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svmin_s8_m(pg, op1, op2); } __forceinline svfloat64_t svmin_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmin_f64_m(pg, op1, op2); } __forceinline svfloat32_t svmin_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmin_f32_m(pg, op1, op2); } __forceinline svfloat16_t svmin_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmin_f16_m(pg, op1, op2); } __forceinline svint8_t svmin_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svmin_s8_x(pg, op1, op2); } __forceinline svuint64_t svmin_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmin_n_u64_x(pg, op1, op2); } __forceinline svint64_t svmin_z(svbool_t pg, svint64_t op1, int64_t op2) { return svmin_n_s64_z(pg, op1, op2); } __forceinline svint8_t svmin_m(svbool_t pg, svint8_t op1, int8_t op2) { return svmin_n_s8_m(pg, op1, op2); } __forceinline svint16_t svmin_m(svbool_t pg, svint16_t op1, int16_t op2) { return svmin_n_s16_m(pg, op1, op2); } __forceinline svint32_t svmin_m(svbool_t pg, svint32_t op1, int32_t op2) { return svmin_n_s32_m(pg, op1, op2); } __forceinline svint64_t svmin_m(svbool_t pg, svint64_t op1, int64_t op2) { return svmin_n_s64_m(pg, op1, op2); } __forceinline svuint8_t svmin_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmin_n_u8_m(pg, op1, op2); } __forceinline svuint16_t svmin_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmin_n_u16_m(pg, op1, op2); } __forceinline svuint32_t svmin_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmin_n_u32_m(pg, op1, op2); } __forceinline svuint64_t svmin_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmin_n_u64_m(pg, op1, op2); } __forceinline svfloat32_t svmin_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmin_n_f32_x(pg, op1, op2); } __forceinline svfloat64_t svmin_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmin_n_f64_x(pg, op1, op2); } __forceinline svuint64_t svmin_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmin_u64_z(pg, op1, op2); } __forceinline svuint32_t svmin_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmin_u32_z(pg, op1, op2); } __forceinline svint8_t svmin_x(svbool_t pg, svint8_t op1, int8_t op2) { return svmin_n_s8_x(pg, op1, op2); } __forceinline svint16_t svmin_x(svbool_t pg, svint16_t op1, int16_t op2) { return svmin_n_s16_x(pg, op1, op2); } __forceinline svint32_t svmin_x(svbool_t pg, svint32_t op1, int32_t op2) { return svmin_n_s32_x(pg, op1, op2); } __forceinline svint64_t svmin_x(svbool_t pg, svint64_t op1, int64_t op2) { return svmin_n_s64_x(pg, op1, op2); } __forceinline svuint8_t svmin_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmin_n_u8_x(pg, op1, op2); } __forceinline svuint16_t svmin_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmin_n_u16_x(pg, op1, op2); } __forceinline svfloat64_t svmin_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmin_n_f64_m(pg, op1, op2); } __forceinline svfloat32_t svmin_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmin_n_f32_m(pg, op1, op2); } __forceinline svfloat32_t svminnm_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svminnm_f32_z(pg, op1, op2); } __forceinline svfloat16_t svminnm_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svminnm_f16_m(pg, op1, op2); } __forceinline svfloat16_t svminnm_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svminnm_f16_z(pg, op1, op2); } __forceinline svfloat64_t svminnm_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svminnm_f64_x(pg, op1, op2); } __forceinline svfloat64_t svminnm_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svminnm_n_f64_z(pg, op1, op2); } __forceinline svfloat32_t svminnm_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svminnm_n_f32_m(pg, op1, op2); } __forceinline svfloat64_t svminnm_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svminnm_f64_z(pg, op1, op2); } __forceinline svfloat64_t svminnm_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svminnm_n_f64_m(pg, op1, op2); } __forceinline svfloat64_t svminnm_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svminnm_n_f64_x(pg, op1, op2); } __forceinline svfloat32_t svminnm_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svminnm_n_f32_z(pg, op1, op2); } __forceinline svfloat16_t svminnm_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svminnm_f16_x(pg, op1, op2); } __forceinline svfloat64_t svminnm_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svminnm_f64_m(pg, op1, op2); } __forceinline svfloat32_t svminnm_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svminnm_f32_m(pg, op1, op2); } __forceinline svfloat32_t svminnm_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svminnm_n_f32_x(pg, op1, op2); } __forceinline svfloat32_t svminnm_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svminnm_f32_x(pg, op1, op2); } __forceinline svuint64_t svmul_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmul_n_u64_x(pg, op1, op2); } __forceinline svuint32_t svmul_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmul_n_u32_x(pg, op1, op2); } __forceinline svuint16_t svmul_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmul_n_u16_x(pg, op1, op2); } __forceinline svuint8_t svmul_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmul_n_u8_x(pg, op1, op2); } __forceinline svint64_t svmul_x(svbool_t pg, svint64_t op1, int64_t op2) { return svmul_n_s64_x(pg, op1, op2); } __forceinline svint16_t svmul_x(svbool_t pg, svint16_t op1, int16_t op2) { return svmul_n_s16_x(pg, op1, op2); } __forceinline svfloat32_t svmul_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmul_n_f32_z(pg, op1, op2); } __forceinline svint8_t svmul_x(svbool_t pg, svint8_t op1, int8_t op2) { return svmul_n_s8_x(pg, op1, op2); } __forceinline svfloat64_t svmul_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmul_n_f64_x(pg, op1, op2); } __forceinline svfloat32_t svmul_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmul_n_f32_x(pg, op1, op2); } __forceinline svuint64_t svmul_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmul_n_u64_m(pg, op1, op2); } __forceinline svint32_t svmul_x(svbool_t pg, svint32_t op1, int32_t op2) { return svmul_n_s32_x(pg, op1, op2); } __forceinline svfloat64_t svmul_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmul_n_f64_z(pg, op1, op2); } __forceinline svint32_t svmul_z(svbool_t pg, svint32_t op1, int32_t op2) { return svmul_n_s32_z(pg, op1, op2); } __forceinline svint16_t svmul_z(svbool_t pg, svint16_t op1, int16_t op2) { return svmul_n_s16_z(pg, op1, op2); } __forceinline svint64_t svmul_z(svbool_t pg, svint64_t op1, int64_t op2) { return svmul_n_s64_z(pg, op1, op2); } __forceinline svuint8_t svmul_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmul_n_u8_z(pg, op1, op2); } __forceinline svuint16_t svmul_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmul_n_u16_z(pg, op1, op2); } __forceinline svuint32_t svmul_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmul_n_u32_z(pg, op1, op2); } __forceinline svuint64_t svmul_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmul_n_u64_z(pg, op1, op2); } template __forceinline T __svmul_lane(T op1, T op2) { if constexpr(::std::is_same_v) { return svmul_lane_f16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmul_lane_f32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmul_lane_f64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmul_lane_u64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmul_lane_u32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmul_lane_u16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmul_lane_s64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmul_lane_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmul_lane_s16(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svmul_lane(op1, op2, imm_index) __svmul_lane(op1, op2) __forceinline svint16_t svmul_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svmul_s16_m(pg, op1, op2); } __forceinline svint8_t svmul_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svmul_s8_m(pg, op1, op2); } __forceinline svfloat64_t svmul_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmul_f64_m(pg, op1, op2); } __forceinline svfloat32_t svmul_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmul_f32_m(pg, op1, op2); } __forceinline svfloat16_t svmul_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmul_f16_m(pg, op1, op2); } __forceinline svint8_t svmul_z(svbool_t pg, svint8_t op1, int8_t op2) { return svmul_n_s8_z(pg, op1, op2); } __forceinline svuint32_t svmul_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmul_n_u32_m(pg, op1, op2); } __forceinline svint64_t svmul_m(svbool_t pg, svint64_t op1, int64_t op2) { return svmul_n_s64_m(pg, op1, op2); } __forceinline svuint8_t svmul_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmul_n_u8_m(pg, op1, op2); } __forceinline svuint32_t svmul_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmul_u32_x(pg, op1, op2); } __forceinline svuint16_t svmul_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmul_u16_x(pg, op1, op2); } __forceinline svuint8_t svmul_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmul_u8_x(pg, op1, op2); } __forceinline svint64_t svmul_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svmul_s64_x(pg, op1, op2); } __forceinline svint32_t svmul_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svmul_s32_x(pg, op1, op2); } __forceinline svint16_t svmul_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svmul_s16_x(pg, op1, op2); } __forceinline svuint64_t svmul_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmul_u64_x(pg, op1, op2); } __forceinline svint8_t svmul_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svmul_s8_x(pg, op1, op2); } __forceinline svfloat32_t svmul_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmul_f32_x(pg, op1, op2); } __forceinline svfloat16_t svmul_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmul_f16_x(pg, op1, op2); } __forceinline svuint64_t svmul_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmul_u64_m(pg, op1, op2); } __forceinline svuint32_t svmul_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmul_u32_m(pg, op1, op2); } __forceinline svuint16_t svmul_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmul_u16_m(pg, op1, op2); } __forceinline svuint8_t svmul_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmul_u8_m(pg, op1, op2); } __forceinline svfloat64_t svmul_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmul_f64_x(pg, op1, op2); } __forceinline svuint16_t svmul_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmul_n_u16_m(pg, op1, op2); } __forceinline svfloat16_t svmul_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmul_f16_z(pg, op1, op2); } __forceinline svfloat64_t svmul_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmul_f64_z(pg, op1, op2); } __forceinline svint32_t svmul_m(svbool_t pg, svint32_t op1, int32_t op2) { return svmul_n_s32_m(pg, op1, op2); } __forceinline svint16_t svmul_m(svbool_t pg, svint16_t op1, int16_t op2) { return svmul_n_s16_m(pg, op1, op2); } __forceinline svint64_t svmul_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svmul_s64_m(pg, op1, op2); } __forceinline svfloat64_t svmul_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmul_n_f64_m(pg, op1, op2); } __forceinline svfloat32_t svmul_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmul_n_f32_m(pg, op1, op2); } __forceinline svfloat32_t svmul_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmul_f32_z(pg, op1, op2); } __forceinline svuint64_t svmul_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmul_u64_z(pg, op1, op2); } __forceinline svuint16_t svmul_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmul_u16_z(pg, op1, op2); } __forceinline svuint8_t svmul_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmul_u8_z(pg, op1, op2); } __forceinline svint64_t svmul_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svmul_s64_z(pg, op1, op2); } __forceinline svint32_t svmul_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svmul_s32_z(pg, op1, op2); } __forceinline svint16_t svmul_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svmul_s16_z(pg, op1, op2); } __forceinline svint8_t svmul_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svmul_s8_z(pg, op1, op2); } __forceinline svuint32_t svmul_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmul_u32_z(pg, op1, op2); } __forceinline svint8_t svmul_m(svbool_t pg, svint8_t op1, int8_t op2) { return svmul_n_s8_m(pg, op1, op2); } __forceinline svint32_t svmul_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svmul_s32_m(pg, op1, op2); } __forceinline svuint16_t svmulh_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmulh_u16_z(pg, op1, op2); } __forceinline svuint32_t svmulh_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmulh_u32_z(pg, op1, op2); } __forceinline svuint16_t svmulh_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmulh_u16_x(pg, op1, op2); } __forceinline svuint8_t svmulh_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmulh_u8_z(pg, op1, op2); } __forceinline svint64_t svmulh_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svmulh_s64_z(pg, op1, op2); } __forceinline svint32_t svmulh_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svmulh_s32_z(pg, op1, op2); } __forceinline svint16_t svmulh_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svmulh_s16_z(pg, op1, op2); } __forceinline svint8_t svmulh_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svmulh_s8_z(pg, op1, op2); } __forceinline svuint64_t svmulh_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmulh_u64_x(pg, op1, op2); } __forceinline svuint32_t svmulh_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmulh_u32_x(pg, op1, op2); } __forceinline svuint64_t svmulh_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmulh_u64_m(pg, op1, op2); } __forceinline svint64_t svmulh_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svmulh_s64_x(pg, op1, op2); } __forceinline svint16_t svmulh_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svmulh_s16_x(pg, op1, op2); } __forceinline svint8_t svmulh_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svmulh_s8_x(pg, op1, op2); } __forceinline svuint32_t svmulh_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmulh_u32_m(pg, op1, op2); } __forceinline svuint16_t svmulh_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmulh_u16_m(pg, op1, op2); } __forceinline svuint8_t svmulh_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmulh_u8_m(pg, op1, op2); } __forceinline svint64_t svmulh_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svmulh_s64_m(pg, op1, op2); } __forceinline svuint64_t svmulh_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmulh_u64_z(pg, op1, op2); } __forceinline svint32_t svmulh_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svmulh_s32_m(pg, op1, op2); } __forceinline svuint8_t svmulh_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmulh_u8_x(pg, op1, op2); } __forceinline svint8_t svmulh_m(svbool_t pg, svint8_t op1, int8_t op2) { return svmulh_n_s8_m(pg, op1, op2); } __forceinline svuint32_t svmulh_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmulh_n_u32_x(pg, op1, op2); } __forceinline svint32_t svmulh_m(svbool_t pg, svint32_t op1, int32_t op2) { return svmulh_n_s32_m(pg, op1, op2); } __forceinline svint16_t svmulh_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svmulh_s16_m(pg, op1, op2); } __forceinline svuint64_t svmulh_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmulh_n_u64_x(pg, op1, op2); } __forceinline svint8_t svmulh_z(svbool_t pg, svint8_t op1, int8_t op2) { return svmulh_n_s8_z(pg, op1, op2); } __forceinline svint16_t svmulh_z(svbool_t pg, svint16_t op1, int16_t op2) { return svmulh_n_s16_z(pg, op1, op2); } __forceinline svint32_t svmulh_z(svbool_t pg, svint32_t op1, int32_t op2) { return svmulh_n_s32_z(pg, op1, op2); } __forceinline svint64_t svmulh_z(svbool_t pg, svint64_t op1, int64_t op2) { return svmulh_n_s64_z(pg, op1, op2); } __forceinline svuint8_t svmulh_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmulh_n_u8_z(pg, op1, op2); } __forceinline svuint16_t svmulh_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmulh_n_u16_z(pg, op1, op2); } __forceinline svuint32_t svmulh_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmulh_n_u32_z(pg, op1, op2); } __forceinline svuint64_t svmulh_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmulh_n_u64_z(pg, op1, op2); } __forceinline svuint16_t svmulh_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmulh_n_u16_x(pg, op1, op2); } __forceinline svuint8_t svmulh_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmulh_n_u8_x(pg, op1, op2); } __forceinline svint64_t svmulh_x(svbool_t pg, svint64_t op1, int64_t op2) { return svmulh_n_s64_x(pg, op1, op2); } __forceinline svint32_t svmulh_x(svbool_t pg, svint32_t op1, int32_t op2) { return svmulh_n_s32_x(pg, op1, op2); } __forceinline svint16_t svmulh_x(svbool_t pg, svint16_t op1, int16_t op2) { return svmulh_n_s16_x(pg, op1, op2); } __forceinline svint8_t svmulh_x(svbool_t pg, svint8_t op1, int8_t op2) { return svmulh_n_s8_x(pg, op1, op2); } __forceinline svuint64_t svmulh_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svmulh_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svmulh_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svmulh_n_u32_m(pg, op1, op2); } __forceinline svuint16_t svmulh_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svmulh_n_u16_m(pg, op1, op2); } __forceinline svuint8_t svmulh_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svmulh_n_u8_m(pg, op1, op2); } __forceinline svint64_t svmulh_m(svbool_t pg, svint64_t op1, int64_t op2) { return svmulh_n_s64_m(pg, op1, op2); } __forceinline svint16_t svmulh_m(svbool_t pg, svint16_t op1, int16_t op2) { return svmulh_n_s16_m(pg, op1, op2); } __forceinline svint8_t svmulh_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svmulh_s8_m(pg, op1, op2); } __forceinline svint32_t svmulh_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svmulh_s32_x(pg, op1, op2); } __forceinline svfloat64_t svmulx_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmulx_f64_x(pg, op1, op2); } __forceinline svfloat16_t svmulx_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmulx_f16_m(pg, op1, op2); } __forceinline svfloat32_t svmulx_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmulx_f32_m(pg, op1, op2); } __forceinline svfloat64_t svmulx_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmulx_f64_m(pg, op1, op2); } __forceinline svfloat16_t svmulx_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmulx_f16_x(pg, op1, op2); } __forceinline svfloat32_t svmulx_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmulx_f32_x(pg, op1, op2); } __forceinline svfloat16_t svmulx_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmulx_f16_z(pg, op1, op2); } __forceinline svfloat32_t svmulx_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmulx_f32_z(pg, op1, op2); } __forceinline svfloat64_t svmulx_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmulx_f64_z(pg, op1, op2); } __forceinline svfloat32_t svmulx_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmulx_n_f32_m(pg, op1, op2); } __forceinline svfloat64_t svmulx_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmulx_n_f64_m(pg, op1, op2); } __forceinline svfloat32_t svmulx_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmulx_n_f32_x(pg, op1, op2); } __forceinline svfloat64_t svmulx_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmulx_n_f64_x(pg, op1, op2); } __forceinline svfloat64_t svmulx_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svmulx_n_f64_z(pg, op1, op2); } __forceinline svfloat32_t svmulx_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svmulx_n_f32_z(pg, op1, op2); } __forceinline svfloat16_t svmla_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmla_f16_m(pg, op1, op2, op3); } __forceinline svfloat32_t svmla_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmla_f32_m(pg, op1, op2, op3); } __forceinline svfloat64_t svmla_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmla_f64_m(pg, op1, op2, op3); } __forceinline svfloat16_t svmla_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmla_f16_x(pg, op1, op2, op3); } __forceinline svfloat32_t svmla_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmla_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svmla_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmla_f64_x(pg, op1, op2, op3); } __forceinline svfloat16_t svmla_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmla_f16_z(pg, op1, op2, op3); } __forceinline svfloat64_t svmla_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmla_f64_z(pg, op1, op2, op3); } __forceinline svfloat32_t svmla_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmla_f32_z(pg, op1, op2, op3); } template __forceinline T __svmla_lane(T op1, T op2, T op3) { if constexpr(::std::is_same_v) { return svmla_lane_f16(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmla_lane_f32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmla_lane_f64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmla_lane_s16(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmla_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmla_lane_s64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmla_lane_u16(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmla_lane_u32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmla_lane_u64(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svmla_lane(op1, op2, op3, imm_index) __svmla_lane(op1, op2, op3) __forceinline svfloat32_t svmla_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmla_n_f32_m(pg, op1, op2, op3); } __forceinline svfloat64_t svmla_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmla_n_f64_m(pg, op1, op2, op3); } __forceinline svfloat32_t svmla_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmla_n_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svmla_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmla_n_f64_x(pg, op1, op2, op3); } __forceinline svfloat32_t svmla_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmla_n_f32_z(pg, op1, op2, op3); } __forceinline svfloat64_t svmla_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmla_n_f64_z(pg, op1, op2, op3); } __forceinline svint32_t svmla_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmla_n_s32_x(pg, op1, op2, op3); } __forceinline svint16_t svmla_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmla_n_s16_x(pg, op1, op2, op3); } __forceinline svint8_t svmla_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmla_n_s8_m(pg, op1, op2, op3); } __forceinline svint16_t svmla_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmla_n_s16_m(pg, op1, op2, op3); } __forceinline svint32_t svmla_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmla_n_s32_m(pg, op1, op2, op3); } __forceinline svint64_t svmla_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmla_n_s64_m(pg, op1, op2, op3); } __forceinline svuint8_t svmla_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmla_n_u8_m(pg, op1, op2, op3); } __forceinline svuint16_t svmla_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmla_n_u16_m(pg, op1, op2, op3); } __forceinline svuint32_t svmla_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmla_n_u32_m(pg, op1, op2, op3); } __forceinline svuint64_t svmla_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmla_n_u64_m(pg, op1, op2, op3); } __forceinline svint8_t svmla_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmla_n_s8_x(pg, op1, op2, op3); } __forceinline svint64_t svmla_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmla_n_s64_x(pg, op1, op2, op3); } __forceinline svuint16_t svmla_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmla_n_u16_x(pg, op1, op2, op3); } __forceinline svuint8_t svmla_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmla_n_u8_z(pg, op1, op2, op3); } __forceinline svuint16_t svmla_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmla_n_u16_z(pg, op1, op2, op3); } __forceinline svuint32_t svmla_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmla_n_u32_z(pg, op1, op2, op3); } __forceinline svuint64_t svmla_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmla_n_u64_z(pg, op1, op2, op3); } __forceinline svuint8_t svmla_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmla_n_u8_x(pg, op1, op2, op3); } __forceinline svint16_t svmla_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmla_n_s16_z(pg, op1, op2, op3); } __forceinline svuint64_t svmla_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmla_n_u64_x(pg, op1, op2, op3); } __forceinline svuint32_t svmla_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmla_n_u32_x(pg, op1, op2, op3); } __forceinline svint8_t svmla_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmla_n_s8_z(pg, op1, op2, op3); } __forceinline svint32_t svmla_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmla_n_s32_z(pg, op1, op2, op3); } __forceinline svuint32_t svmla_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmla_u32_z(pg, op1, op2, op3); } __forceinline svuint64_t svmla_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmla_u64_m(pg, op1, op2, op3); } __forceinline svuint32_t svmla_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmla_u32_m(pg, op1, op2, op3); } __forceinline svuint16_t svmla_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmla_u16_m(pg, op1, op2, op3); } __forceinline svint8_t svmla_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmla_s8_x(pg, op1, op2, op3); } __forceinline svuint8_t svmla_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmla_u8_m(pg, op1, op2, op3); } __forceinline svint32_t svmla_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmla_s32_m(pg, op1, op2, op3); } __forceinline svint16_t svmla_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmla_s16_m(pg, op1, op2, op3); } __forceinline svint8_t svmla_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmla_s8_m(pg, op1, op2, op3); } __forceinline svint64_t svmla_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmla_s64_m(pg, op1, op2, op3); } __forceinline svuint64_t svmla_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmla_u64_z(pg, op1, op2, op3); } __forceinline svint32_t svmla_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmla_s32_x(pg, op1, op2, op3); } __forceinline svuint16_t svmla_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmla_u16_z(pg, op1, op2, op3); } __forceinline svuint8_t svmla_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmla_u8_z(pg, op1, op2, op3); } __forceinline svint64_t svmla_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmla_s64_z(pg, op1, op2, op3); } __forceinline svint32_t svmla_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmla_s32_z(pg, op1, op2, op3); } __forceinline svint16_t svmla_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmla_s16_z(pg, op1, op2, op3); } __forceinline svint8_t svmla_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmla_s8_z(pg, op1, op2, op3); } __forceinline svint16_t svmla_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmla_s16_x(pg, op1, op2, op3); } __forceinline svuint64_t svmla_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmla_u64_x(pg, op1, op2, op3); } __forceinline svuint32_t svmla_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmla_u32_x(pg, op1, op2, op3); } __forceinline svuint16_t svmla_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmla_u16_x(pg, op1, op2, op3); } __forceinline svuint8_t svmla_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmla_u8_x(pg, op1, op2, op3); } __forceinline svint64_t svmla_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmla_s64_x(pg, op1, op2, op3); } __forceinline svint64_t svmla_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmla_n_s64_z(pg, op1, op2, op3); } __forceinline svfloat64_t svmad_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmad_f64_z(pg, op1, op2, op3); } __forceinline svfloat32_t svmad_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmad_f32_z(pg, op1, op2, op3); } __forceinline svfloat16_t svmad_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmad_f16_z(pg, op1, op2, op3); } __forceinline svfloat64_t svmad_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmad_f64_m(pg, op1, op2, op3); } __forceinline svfloat64_t svmad_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmad_f64_x(pg, op1, op2, op3); } __forceinline svfloat32_t svmad_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmad_f32_x(pg, op1, op2, op3); } __forceinline svfloat16_t svmad_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmad_f16_x(pg, op1, op2, op3); } __forceinline svfloat64_t svmad_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmad_n_f64_m(pg, op1, op2, op3); } __forceinline svfloat64_t svmad_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmad_n_f64_z(pg, op1, op2, op3); } __forceinline svfloat32_t svmad_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmad_n_f32_z(pg, op1, op2, op3); } __forceinline svfloat64_t svmad_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmad_n_f64_x(pg, op1, op2, op3); } __forceinline svfloat32_t svmad_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmad_n_f32_x(pg, op1, op2, op3); } __forceinline svfloat32_t svmad_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmad_n_f32_m(pg, op1, op2, op3); } __forceinline svfloat16_t svmad_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmad_f16_m(pg, op1, op2, op3); } __forceinline svfloat32_t svmad_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmad_f32_m(pg, op1, op2, op3); } __forceinline svint16_t svmad_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmad_s16_m(pg, op1, op2, op3); } __forceinline svint16_t svmad_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmad_n_s16_z(pg, op1, op2, op3); } __forceinline svuint16_t svmad_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmad_n_u16_z(pg, op1, op2, op3); } __forceinline svuint32_t svmad_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmad_u32_z(pg, op1, op2, op3); } __forceinline svuint16_t svmad_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmad_u16_z(pg, op1, op2, op3); } __forceinline svuint8_t svmad_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmad_u8_z(pg, op1, op2, op3); } __forceinline svint64_t svmad_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmad_s64_z(pg, op1, op2, op3); } __forceinline svint32_t svmad_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmad_s32_z(pg, op1, op2, op3); } __forceinline svint16_t svmad_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmad_s16_z(pg, op1, op2, op3); } __forceinline svint8_t svmad_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmad_s8_z(pg, op1, op2, op3); } __forceinline svuint64_t svmad_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmad_u64_x(pg, op1, op2, op3); } __forceinline svuint32_t svmad_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmad_u32_x(pg, op1, op2, op3); } __forceinline svuint8_t svmad_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmad_u8_x(pg, op1, op2, op3); } __forceinline svint64_t svmad_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmad_s64_x(pg, op1, op2, op3); } __forceinline svint32_t svmad_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmad_s32_x(pg, op1, op2, op3); } __forceinline svint16_t svmad_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmad_s16_x(pg, op1, op2, op3); } __forceinline svint8_t svmad_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmad_s8_x(pg, op1, op2, op3); } __forceinline svuint64_t svmad_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmad_u64_m(pg, op1, op2, op3); } __forceinline svuint32_t svmad_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmad_u32_m(pg, op1, op2, op3); } __forceinline svuint16_t svmad_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmad_u16_m(pg, op1, op2, op3); } __forceinline svuint8_t svmad_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmad_u8_m(pg, op1, op2, op3); } __forceinline svint64_t svmad_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmad_s64_m(pg, op1, op2, op3); } __forceinline svint32_t svmad_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmad_s32_m(pg, op1, op2, op3); } __forceinline svuint64_t svmad_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmad_u64_z(pg, op1, op2, op3); } __forceinline svuint32_t svmad_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmad_n_u32_z(pg, op1, op2, op3); } __forceinline svuint8_t svmad_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmad_n_u8_z(pg, op1, op2, op3); } __forceinline svint64_t svmad_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmad_n_s64_z(pg, op1, op2, op3); } __forceinline svint32_t svmad_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmad_n_s32_z(pg, op1, op2, op3); } __forceinline svint8_t svmad_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmad_s8_m(pg, op1, op2, op3); } __forceinline svint8_t svmad_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmad_n_s8_z(pg, op1, op2, op3); } __forceinline svuint64_t svmad_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmad_n_u64_x(pg, op1, op2, op3); } __forceinline svuint32_t svmad_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmad_n_u32_x(pg, op1, op2, op3); } __forceinline svuint16_t svmad_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmad_n_u16_x(pg, op1, op2, op3); } __forceinline svuint8_t svmad_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmad_n_u8_x(pg, op1, op2, op3); } __forceinline svint64_t svmad_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmad_n_s64_x(pg, op1, op2, op3); } __forceinline svint32_t svmad_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmad_n_s32_x(pg, op1, op2, op3); } __forceinline svint16_t svmad_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmad_n_s16_x(pg, op1, op2, op3); } __forceinline svint8_t svmad_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmad_n_s8_x(pg, op1, op2, op3); } __forceinline svuint64_t svmad_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmad_n_u64_m(pg, op1, op2, op3); } __forceinline svuint32_t svmad_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmad_n_u32_m(pg, op1, op2, op3); } __forceinline svuint16_t svmad_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmad_n_u16_m(pg, op1, op2, op3); } __forceinline svuint8_t svmad_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmad_n_u8_m(pg, op1, op2, op3); } __forceinline svint64_t svmad_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmad_n_s64_m(pg, op1, op2, op3); } __forceinline svint32_t svmad_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmad_n_s32_m(pg, op1, op2, op3); } __forceinline svint16_t svmad_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmad_n_s16_m(pg, op1, op2, op3); } __forceinline svint8_t svmad_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmad_n_s8_m(pg, op1, op2, op3); } __forceinline svuint16_t svmad_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmad_u16_x(pg, op1, op2, op3); } __forceinline svuint64_t svmad_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmad_n_u64_z(pg, op1, op2, op3); } template __forceinline T __svmls_lane(T op1, T op2, T op3) { if constexpr(::std::is_same_v) { return svmls_lane_f64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmls_lane_f32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmls_lane_f16(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmls_lane_s64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmls_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmls_lane_s16(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmls_lane_u64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmls_lane_u32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmls_lane_u16(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svmls_lane(op1, op2, op3, imm_index) __svmls_lane(op1, op2, op3) __forceinline svfloat16_t svmls_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmls_f16_z(pg, op1, op2, op3); } __forceinline svfloat32_t svmls_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmls_f32_z(pg, op1, op2, op3); } __forceinline svfloat64_t svmls_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmls_f64_z(pg, op1, op2, op3); } __forceinline svfloat16_t svmls_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmls_f16_m(pg, op1, op2, op3); } __forceinline svfloat32_t svmls_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmls_f32_m(pg, op1, op2, op3); } __forceinline svfloat64_t svmls_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmls_f64_m(pg, op1, op2, op3); } __forceinline svfloat16_t svmls_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmls_f16_x(pg, op1, op2, op3); } __forceinline svfloat32_t svmls_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmls_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svmls_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmls_f64_x(pg, op1, op2, op3); } __forceinline svfloat32_t svmls_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmls_n_f32_m(pg, op1, op2, op3); } __forceinline svfloat64_t svmls_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmls_n_f64_z(pg, op1, op2, op3); } __forceinline svfloat32_t svmls_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmls_n_f32_z(pg, op1, op2, op3); } __forceinline svfloat64_t svmls_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmls_n_f64_m(pg, op1, op2, op3); } __forceinline svfloat32_t svmls_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmls_n_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svmls_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmls_n_f64_x(pg, op1, op2, op3); } __forceinline svuint8_t svmls_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmls_u8_x(pg, op1, op2, op3); } __forceinline svuint16_t svmls_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmls_u16_x(pg, op1, op2, op3); } __forceinline svuint32_t svmls_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmls_u32_x(pg, op1, op2, op3); } __forceinline svuint64_t svmls_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmls_u64_x(pg, op1, op2, op3); } __forceinline svint64_t svmls_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmls_s64_x(pg, op1, op2, op3); } __forceinline svint8_t svmls_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmls_s8_z(pg, op1, op2, op3); } __forceinline svint16_t svmls_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmls_s16_z(pg, op1, op2, op3); } __forceinline svint32_t svmls_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmls_s32_z(pg, op1, op2, op3); } __forceinline svint64_t svmls_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmls_s64_z(pg, op1, op2, op3); } __forceinline svuint8_t svmls_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmls_u8_z(pg, op1, op2, op3); } __forceinline svuint16_t svmls_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmls_u16_z(pg, op1, op2, op3); } __forceinline svint32_t svmls_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmls_s32_x(pg, op1, op2, op3); } __forceinline svint16_t svmls_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmls_s16_x(pg, op1, op2, op3); } __forceinline svint8_t svmls_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmls_s8_x(pg, op1, op2, op3); } __forceinline svint8_t svmls_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmls_s8_m(pg, op1, op2, op3); } __forceinline svint16_t svmls_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmls_s16_m(pg, op1, op2, op3); } __forceinline svint32_t svmls_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmls_s32_m(pg, op1, op2, op3); } __forceinline svuint32_t svmls_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmls_u32_z(pg, op1, op2, op3); } __forceinline svint64_t svmls_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmls_s64_m(pg, op1, op2, op3); } __forceinline svuint16_t svmls_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmls_u16_m(pg, op1, op2, op3); } __forceinline svuint32_t svmls_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmls_u32_m(pg, op1, op2, op3); } __forceinline svuint64_t svmls_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmls_u64_m(pg, op1, op2, op3); } __forceinline svuint8_t svmls_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmls_u8_m(pg, op1, op2, op3); } __forceinline svuint64_t svmls_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmls_n_u64_z(pg, op1, op2, op3); } __forceinline svuint32_t svmls_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmls_n_u32_z(pg, op1, op2, op3); } __forceinline svuint16_t svmls_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmls_n_u16_z(pg, op1, op2, op3); } __forceinline svuint8_t svmls_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmls_n_u8_z(pg, op1, op2, op3); } __forceinline svint64_t svmls_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmls_n_s64_z(pg, op1, op2, op3); } __forceinline svint32_t svmls_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmls_n_s32_z(pg, op1, op2, op3); } __forceinline svint16_t svmls_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmls_n_s16_z(pg, op1, op2, op3); } __forceinline svint8_t svmls_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmls_n_s8_z(pg, op1, op2, op3); } __forceinline svuint64_t svmls_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmls_n_u64_x(pg, op1, op2, op3); } __forceinline svuint16_t svmls_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmls_n_u16_x(pg, op1, op2, op3); } __forceinline svuint8_t svmls_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmls_n_u8_x(pg, op1, op2, op3); } __forceinline svint64_t svmls_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmls_n_s64_x(pg, op1, op2, op3); } __forceinline svint32_t svmls_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmls_n_s32_x(pg, op1, op2, op3); } __forceinline svint8_t svmls_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmls_n_s8_m(pg, op1, op2, op3); } __forceinline svint16_t svmls_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmls_n_s16_m(pg, op1, op2, op3); } __forceinline svint32_t svmls_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmls_n_s32_m(pg, op1, op2, op3); } __forceinline svint64_t svmls_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmls_n_s64_m(pg, op1, op2, op3); } __forceinline svuint8_t svmls_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmls_n_u8_m(pg, op1, op2, op3); } __forceinline svuint64_t svmls_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmls_u64_z(pg, op1, op2, op3); } __forceinline svuint16_t svmls_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmls_n_u16_m(pg, op1, op2, op3); } __forceinline svuint64_t svmls_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmls_n_u64_m(pg, op1, op2, op3); } __forceinline svint8_t svmls_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmls_n_s8_x(pg, op1, op2, op3); } __forceinline svint16_t svmls_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmls_n_s16_x(pg, op1, op2, op3); } __forceinline svuint32_t svmls_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmls_n_u32_m(pg, op1, op2, op3); } __forceinline svuint32_t svmls_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmls_n_u32_x(pg, op1, op2, op3); } __forceinline svfloat32_t svmsb_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmsb_n_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svmsb_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmsb_n_f64_m(pg, op1, op2, op3); } __forceinline svfloat32_t svmsb_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmsb_n_f32_m(pg, op1, op2, op3); } __forceinline svfloat64_t svmsb_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmsb_n_f64_x(pg, op1, op2, op3); } __forceinline svfloat64_t svmsb_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svmsb_n_f64_z(pg, op1, op2, op3); } __forceinline svfloat32_t svmsb_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svmsb_n_f32_z(pg, op1, op2, op3); } __forceinline svfloat16_t svmsb_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmsb_f16_m(pg, op1, op2, op3); } __forceinline svfloat32_t svmsb_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmsb_f32_m(pg, op1, op2, op3); } __forceinline svfloat64_t svmsb_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmsb_f64_m(pg, op1, op2, op3); } __forceinline svfloat16_t svmsb_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmsb_f16_x(pg, op1, op2, op3); } __forceinline svfloat32_t svmsb_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmsb_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svmsb_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmsb_f64_x(pg, op1, op2, op3); } __forceinline svfloat32_t svmsb_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svmsb_f32_z(pg, op1, op2, op3); } __forceinline svfloat16_t svmsb_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svmsb_f16_z(pg, op1, op2, op3); } __forceinline svfloat64_t svmsb_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svmsb_f64_z(pg, op1, op2, op3); } __forceinline svint8_t svmsb_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmsb_s8_x(pg, op1, op2, op3); } __forceinline svuint64_t svmsb_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmsb_n_u64_m(pg, op1, op2, op3); } __forceinline svuint32_t svmsb_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmsb_n_u32_m(pg, op1, op2, op3); } __forceinline svuint16_t svmsb_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmsb_n_u16_m(pg, op1, op2, op3); } __forceinline svuint8_t svmsb_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmsb_n_u8_m(pg, op1, op2, op3); } __forceinline svint64_t svmsb_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmsb_n_s64_m(pg, op1, op2, op3); } __forceinline svint32_t svmsb_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmsb_n_s32_m(pg, op1, op2, op3); } __forceinline svint16_t svmsb_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmsb_n_s16_m(pg, op1, op2, op3); } __forceinline svint8_t svmsb_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmsb_n_s8_m(pg, op1, op2, op3); } __forceinline svuint64_t svmsb_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmsb_u64_z(pg, op1, op2, op3); } __forceinline svuint32_t svmsb_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmsb_u32_z(pg, op1, op2, op3); } __forceinline svuint16_t svmsb_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmsb_u16_z(pg, op1, op2, op3); } __forceinline svint16_t svmsb_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmsb_n_s16_x(pg, op1, op2, op3); } __forceinline svint64_t svmsb_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmsb_n_s64_x(pg, op1, op2, op3); } __forceinline svuint64_t svmsb_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmsb_n_u64_z(pg, op1, op2, op3); } __forceinline svuint32_t svmsb_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmsb_n_u32_z(pg, op1, op2, op3); } __forceinline svuint16_t svmsb_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmsb_n_u16_z(pg, op1, op2, op3); } __forceinline svuint8_t svmsb_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmsb_n_u8_z(pg, op1, op2, op3); } __forceinline svint64_t svmsb_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) { return svmsb_n_s64_z(pg, op1, op2, op3); } __forceinline svint32_t svmsb_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmsb_n_s32_z(pg, op1, op2, op3); } __forceinline svint16_t svmsb_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) { return svmsb_n_s16_z(pg, op1, op2, op3); } __forceinline svint8_t svmsb_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmsb_n_s8_z(pg, op1, op2, op3); } __forceinline svuint64_t svmsb_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) { return svmsb_n_u64_x(pg, op1, op2, op3); } __forceinline svuint32_t svmsb_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) { return svmsb_n_u32_x(pg, op1, op2, op3); } __forceinline svuint16_t svmsb_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) { return svmsb_n_u16_x(pg, op1, op2, op3); } __forceinline svuint8_t svmsb_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) { return svmsb_n_u8_x(pg, op1, op2, op3); } __forceinline svint32_t svmsb_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) { return svmsb_n_s32_x(pg, op1, op2, op3); } __forceinline svuint8_t svmsb_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmsb_u8_z(pg, op1, op2, op3); } __forceinline svint8_t svmsb_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) { return svmsb_n_s8_x(pg, op1, op2, op3); } __forceinline svint32_t svmsb_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmsb_s32_z(pg, op1, op2, op3); } __forceinline svint64_t svmsb_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmsb_s64_z(pg, op1, op2, op3); } __forceinline svint16_t svmsb_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmsb_s16_m(pg, op1, op2, op3); } __forceinline svint32_t svmsb_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmsb_s32_m(pg, op1, op2, op3); } __forceinline svint64_t svmsb_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmsb_s64_m(pg, op1, op2, op3); } __forceinline svuint8_t svmsb_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmsb_u8_m(pg, op1, op2, op3); } __forceinline svuint16_t svmsb_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmsb_u16_m(pg, op1, op2, op3); } __forceinline svuint32_t svmsb_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmsb_u32_m(pg, op1, op2, op3); } __forceinline svuint64_t svmsb_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmsb_u64_m(pg, op1, op2, op3); } __forceinline svint8_t svmsb_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmsb_s8_m(pg, op1, op2, op3); } __forceinline svint16_t svmsb_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmsb_s16_x(pg, op1, op2, op3); } __forceinline svint8_t svmsb_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) { return svmsb_s8_z(pg, op1, op2, op3); } __forceinline svuint64_t svmsb_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svmsb_u64_x(pg, op1, op2, op3); } __forceinline svuint16_t svmsb_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svmsb_u16_x(pg, op1, op2, op3); } __forceinline svuint8_t svmsb_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svmsb_u8_x(pg, op1, op2, op3); } __forceinline svint64_t svmsb_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) { return svmsb_s64_x(pg, op1, op2, op3); } __forceinline svint32_t svmsb_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) { return svmsb_s32_x(pg, op1, op2, op3); } __forceinline svuint32_t svmsb_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svmsb_u32_x(pg, op1, op2, op3); } __forceinline svint16_t svmsb_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) { return svmsb_s16_z(pg, op1, op2, op3); } __forceinline svfloat64_t svnmla_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmla_n_f64_x(pg, op1, op2, op3); } __forceinline svfloat32_t svnmla_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmla_n_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svnmla_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmla_n_f64_m(pg, op1, op2, op3); } __forceinline svfloat32_t svnmla_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmla_n_f32_m(pg, op1, op2, op3); } __forceinline svfloat32_t svnmla_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmla_f32_z(pg, op1, op2, op3); } __forceinline svfloat64_t svnmla_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmla_f64_m(pg, op1, op2, op3); } __forceinline svfloat64_t svnmla_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmla_f64_x(pg, op1, op2, op3); } __forceinline svfloat32_t svnmla_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmla_f32_x(pg, op1, op2, op3); } __forceinline svfloat16_t svnmla_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmla_f16_x(pg, op1, op2, op3); } __forceinline svfloat32_t svnmla_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmla_f32_m(pg, op1, op2, op3); } __forceinline svfloat16_t svnmla_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmla_f16_m(pg, op1, op2, op3); } __forceinline svfloat16_t svnmla_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmla_f16_z(pg, op1, op2, op3); } __forceinline svfloat32_t svnmla_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmla_n_f32_z(pg, op1, op2, op3); } __forceinline svfloat64_t svnmla_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmla_f64_z(pg, op1, op2, op3); } __forceinline svfloat64_t svnmla_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmla_n_f64_z(pg, op1, op2, op3); } __forceinline svfloat64_t svnmad_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmad_f64_z(pg, op1, op2, op3); } __forceinline svfloat32_t svnmad_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmad_n_f32_z(pg, op1, op2, op3); } __forceinline svfloat64_t svnmad_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmad_n_f64_z(pg, op1, op2, op3); } __forceinline svfloat32_t svnmad_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmad_n_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svnmad_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmad_n_f64_m(pg, op1, op2, op3); } __forceinline svfloat32_t svnmad_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmad_n_f32_m(pg, op1, op2, op3); } __forceinline svfloat16_t svnmad_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmad_f16_m(pg, op1, op2, op3); } __forceinline svfloat32_t svnmad_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmad_f32_m(pg, op1, op2, op3); } __forceinline svfloat64_t svnmad_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmad_f64_m(pg, op1, op2, op3); } __forceinline svfloat16_t svnmad_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmad_f16_x(pg, op1, op2, op3); } __forceinline svfloat32_t svnmad_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmad_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svnmad_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmad_f64_x(pg, op1, op2, op3); } __forceinline svfloat16_t svnmad_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmad_f16_z(pg, op1, op2, op3); } __forceinline svfloat32_t svnmad_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmad_f32_z(pg, op1, op2, op3); } __forceinline svfloat64_t svnmad_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmad_n_f64_x(pg, op1, op2, op3); } __forceinline svfloat64_t svnmls_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmls_n_f64_z(pg, op1, op2, op3); } __forceinline svfloat32_t svnmls_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmls_n_f32_z(pg, op1, op2, op3); } __forceinline svfloat16_t svnmls_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmls_f16_m(pg, op1, op2, op3); } __forceinline svfloat64_t svnmls_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmls_n_f64_x(pg, op1, op2, op3); } __forceinline svfloat32_t svnmls_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmls_f32_m(pg, op1, op2, op3); } __forceinline svfloat64_t svnmls_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmls_f64_m(pg, op1, op2, op3); } __forceinline svfloat16_t svnmls_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmls_f16_x(pg, op1, op2, op3); } __forceinline svfloat32_t svnmls_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmls_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svnmls_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmls_f64_x(pg, op1, op2, op3); } __forceinline svfloat16_t svnmls_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmls_f16_z(pg, op1, op2, op3); } __forceinline svfloat32_t svnmls_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmls_f32_z(pg, op1, op2, op3); } __forceinline svfloat64_t svnmls_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmls_f64_z(pg, op1, op2, op3); } __forceinline svfloat64_t svnmls_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmls_n_f64_m(pg, op1, op2, op3); } __forceinline svfloat32_t svnmls_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmls_n_f32_m(pg, op1, op2, op3); } __forceinline svfloat32_t svnmls_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmls_n_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svnmsb_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmsb_n_f64_m(pg, op1, op2, op3); } __forceinline svfloat16_t svnmsb_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmsb_f16_m(pg, op1, op2, op3); } __forceinline svfloat32_t svnmsb_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmsb_f32_m(pg, op1, op2, op3); } __forceinline svfloat64_t svnmsb_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmsb_f64_m(pg, op1, op2, op3); } __forceinline svfloat32_t svnmsb_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmsb_f32_x(pg, op1, op2, op3); } __forceinline svfloat64_t svnmsb_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmsb_f64_x(pg, op1, op2, op3); } __forceinline svfloat64_t svnmsb_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmsb_n_f64_z(pg, op1, op2, op3); } __forceinline svfloat32_t svnmsb_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmsb_n_f32_z(pg, op1, op2, op3); } __forceinline svfloat64_t svnmsb_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) { return svnmsb_n_f64_x(pg, op1, op2, op3); } __forceinline svfloat32_t svnmsb_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmsb_n_f32_x(pg, op1, op2, op3); } __forceinline svfloat16_t svnmsb_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmsb_f16_z(pg, op1, op2, op3); } __forceinline svfloat32_t svnmsb_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svnmsb_f32_z(pg, op1, op2, op3); } __forceinline svfloat64_t svnmsb_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svnmsb_f64_z(pg, op1, op2, op3); } __forceinline svfloat32_t svnmsb_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) { return svnmsb_n_f32_m(pg, op1, op2, op3); } __forceinline svfloat16_t svnmsb_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svnmsb_f16_x(pg, op1, op2, op3); } __forceinline svfloat32_t svbfmlalb(svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) { return svbfmlalb_f32(op1, op2, op3); } __forceinline svfloat32_t svbfmlalt(svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) { return svbfmlalt_f32(op1, op2, op3); } template __forceinline T __svbfmlalb_lane(T op1, svbfloat16_t op2, svbfloat16_t op3) { if constexpr(::std::is_same_v) { return svbfmlalb_lane_f32(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svbfmlalb_lane(op1, op2, op3, imm_index) __svbfmlalb_lane(op1, op2, op3) template __forceinline T __svbfmlalt_lane(T op1, svbfloat16_t op2, svbfloat16_t op3) { if constexpr(::std::is_same_v) { return svbfmlalt_lane_f32(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svbfmlalt_lane(op1, op2, op3, imm_index) __svbfmlalt_lane(op1, op2, op3) __forceinline svfloat32_t svneg_z(svbool_t pg, svfloat32_t op) { return svneg_f32_z(pg, op); } __forceinline svfloat16_t svneg_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { return svneg_f16_m(inactive, pg, op); } __forceinline svfloat32_t svneg_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { return svneg_f32_m(inactive, pg, op); } __forceinline svint8_t svneg_m(svint8_t inactive, svbool_t pg, svint8_t op) { return svneg_s8_m(inactive, pg, op); } __forceinline svint16_t svneg_m(svint16_t inactive, svbool_t pg, svint16_t op) { return svneg_s16_m(inactive, pg, op); } __forceinline svint32_t svneg_m(svint32_t inactive, svbool_t pg, svint32_t op) { return svneg_s32_m(inactive, pg, op); } __forceinline svint64_t svneg_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svneg_s64_m(inactive, pg, op); } __forceinline svfloat16_t svneg_x(svbool_t pg, svfloat16_t op) { return svneg_f16_x(pg, op); } __forceinline svfloat32_t svneg_x(svbool_t pg, svfloat32_t op) { return svneg_f32_x(pg, op); } __forceinline svfloat64_t svneg_x(svbool_t pg, svfloat64_t op) { return svneg_f64_x(pg, op); } __forceinline svfloat64_t svneg_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { return svneg_f64_m(inactive, pg, op); } __forceinline svint16_t svneg_x(svbool_t pg, svint16_t op) { return svneg_s16_x(pg, op); } __forceinline svint8_t svneg_x(svbool_t pg, svint8_t op) { return svneg_s8_x(pg, op); } __forceinline svint64_t svneg_z(svbool_t pg, svint64_t op) { return svneg_s64_z(pg, op); } __forceinline svint16_t svneg_z(svbool_t pg, svint16_t op) { return svneg_s16_z(pg, op); } __forceinline svint8_t svneg_z(svbool_t pg, svint8_t op) { return svneg_s8_z(pg, op); } __forceinline svint32_t svneg_z(svbool_t pg, svint32_t op) { return svneg_s32_z(pg, op); } __forceinline svfloat16_t svneg_z(svbool_t pg, svfloat16_t op) { return svneg_f16_z(pg, op); } __forceinline svint64_t svneg_x(svbool_t pg, svint64_t op) { return svneg_s64_x(pg, op); } __forceinline svint32_t svneg_x(svbool_t pg, svint32_t op) { return svneg_s32_x(pg, op); } __forceinline svfloat64_t svneg_z(svbool_t pg, svfloat64_t op) { return svneg_f64_z(pg, op); } __forceinline svfloat32_t svrecpe(svfloat32_t op) { return svrecpe_f32(op); } __forceinline svfloat64_t svrecpe(svfloat64_t op) { return svrecpe_f64(op); } __forceinline svfloat16_t svrecpe(svfloat16_t op) { return svrecpe_f16(op); } __forceinline svfloat16_t svrecpx_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { return svrecpx_f16_m(inactive, pg, op); } __forceinline svfloat64_t svrecpx_z(svbool_t pg, svfloat64_t op) { return svrecpx_f64_z(pg, op); } __forceinline svfloat32_t svrecpx_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { return svrecpx_f32_m(inactive, pg, op); } __forceinline svfloat16_t svrecpx_z(svbool_t pg, svfloat16_t op) { return svrecpx_f16_z(pg, op); } __forceinline svfloat64_t svrecpx_x(svbool_t pg, svfloat64_t op) { return svrecpx_f64_x(pg, op); } __forceinline svfloat32_t svrecpx_x(svbool_t pg, svfloat32_t op) { return svrecpx_f32_x(pg, op); } __forceinline svfloat64_t svrecpx_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { return svrecpx_f64_m(inactive, pg, op); } __forceinline svfloat32_t svrecpx_z(svbool_t pg, svfloat32_t op) { return svrecpx_f32_z(pg, op); } __forceinline svfloat16_t svrecpx_x(svbool_t pg, svfloat16_t op) { return svrecpx_f16_x(pg, op); } __forceinline svfloat16_t svrsqrte(svfloat16_t op) { return svrsqrte_f16(op); } __forceinline svfloat32_t svrsqrte(svfloat32_t op) { return svrsqrte_f32(op); } __forceinline svfloat64_t svrsqrte(svfloat64_t op) { return svrsqrte_f64(op); } __forceinline svfloat32_t svrsqrts(svfloat32_t op1, svfloat32_t op2) { return svrsqrts_f32(op1, op2); } __forceinline svfloat64_t svrsqrts(svfloat64_t op1, svfloat64_t op2) { return svrsqrts_f64(op1, op2); } __forceinline svfloat16_t svrsqrts(svfloat16_t op1, svfloat16_t op2) { return svrsqrts_f16(op1, op2); } __forceinline svfloat16_t svrecps(svfloat16_t op1, svfloat16_t op2) { return svrecps_f16(op1, op2); } __forceinline svfloat64_t svrecps(svfloat64_t op1, svfloat64_t op2) { return svrecps_f64(op1, op2); } __forceinline svfloat32_t svrecps(svfloat32_t op1, svfloat32_t op2) { return svrecps_f32(op1, op2); } __forceinline svfloat64_t svrinta_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { return svrinta_f64_m(inactive, pg, op); } __forceinline svfloat16_t svrinta_x(svbool_t pg, svfloat16_t op) { return svrinta_f16_x(pg, op); } __forceinline svfloat32_t svrinta_x(svbool_t pg, svfloat32_t op) { return svrinta_f32_x(pg, op); } __forceinline svfloat64_t svrinta_x(svbool_t pg, svfloat64_t op) { return svrinta_f64_x(pg, op); } __forceinline svfloat16_t svrinta_z(svbool_t pg, svfloat16_t op) { return svrinta_f16_z(pg, op); } __forceinline svfloat32_t svrinta_z(svbool_t pg, svfloat32_t op) { return svrinta_f32_z(pg, op); } __forceinline svfloat64_t svrinta_z(svbool_t pg, svfloat64_t op) { return svrinta_f64_z(pg, op); } __forceinline svfloat16_t svrinta_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { return svrinta_f16_m(inactive, pg, op); } __forceinline svfloat32_t svrinta_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { return svrinta_f32_m(inactive, pg, op); } __forceinline svfloat32_t svrintn_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { return svrintn_f32_m(inactive, pg, op); } __forceinline svfloat64_t svrintn_z(svbool_t pg, svfloat64_t op) { return svrintn_f64_z(pg, op); } __forceinline svfloat32_t svrintn_z(svbool_t pg, svfloat32_t op) { return svrintn_f32_z(pg, op); } __forceinline svfloat16_t svrintn_z(svbool_t pg, svfloat16_t op) { return svrintn_f16_z(pg, op); } __forceinline svfloat64_t svrintn_x(svbool_t pg, svfloat64_t op) { return svrintn_f64_x(pg, op); } __forceinline svfloat32_t svrintn_x(svbool_t pg, svfloat32_t op) { return svrintn_f32_x(pg, op); } __forceinline svfloat16_t svrintn_x(svbool_t pg, svfloat16_t op) { return svrintn_f16_x(pg, op); } __forceinline svfloat64_t svrintn_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { return svrintn_f64_m(inactive, pg, op); } __forceinline svfloat16_t svrintn_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { return svrintn_f16_m(inactive, pg, op); } __forceinline svfloat64_t svrintm_x(svbool_t pg, svfloat64_t op) { return svrintm_f64_x(pg, op); } __forceinline svfloat32_t svrintm_z(svbool_t pg, svfloat32_t op) { return svrintm_f32_z(pg, op); } __forceinline svfloat16_t svrintm_z(svbool_t pg, svfloat16_t op) { return svrintm_f16_z(pg, op); } __forceinline svfloat32_t svrintm_x(svbool_t pg, svfloat32_t op) { return svrintm_f32_x(pg, op); } __forceinline svfloat16_t svrintm_x(svbool_t pg, svfloat16_t op) { return svrintm_f16_x(pg, op); } __forceinline svfloat64_t svrintm_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { return svrintm_f64_m(inactive, pg, op); } __forceinline svfloat32_t svrintm_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { return svrintm_f32_m(inactive, pg, op); } __forceinline svfloat16_t svrintm_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { return svrintm_f16_m(inactive, pg, op); } __forceinline svfloat64_t svrintm_z(svbool_t pg, svfloat64_t op) { return svrintm_f64_z(pg, op); } __forceinline svfloat16_t svrintp_x(svbool_t pg, svfloat16_t op) { return svrintp_f16_x(pg, op); } __forceinline svfloat32_t svrintp_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { return svrintp_f32_m(inactive, pg, op); } __forceinline svfloat64_t svrintp_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { return svrintp_f64_m(inactive, pg, op); } __forceinline svfloat32_t svrintp_x(svbool_t pg, svfloat32_t op) { return svrintp_f32_x(pg, op); } __forceinline svfloat16_t svrintp_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { return svrintp_f16_m(inactive, pg, op); } __forceinline svfloat16_t svrintp_z(svbool_t pg, svfloat16_t op) { return svrintp_f16_z(pg, op); } __forceinline svfloat32_t svrintp_z(svbool_t pg, svfloat32_t op) { return svrintp_f32_z(pg, op); } __forceinline svfloat64_t svrintp_z(svbool_t pg, svfloat64_t op) { return svrintp_f64_z(pg, op); } __forceinline svfloat64_t svrintp_x(svbool_t pg, svfloat64_t op) { return svrintp_f64_x(pg, op); } __forceinline svfloat64_t svrintz_z(svbool_t pg, svfloat64_t op) { return svrintz_f64_z(pg, op); } __forceinline svfloat32_t svrintz_z(svbool_t pg, svfloat32_t op) { return svrintz_f32_z(pg, op); } __forceinline svfloat16_t svrintz_z(svbool_t pg, svfloat16_t op) { return svrintz_f16_z(pg, op); } __forceinline svfloat64_t svrintz_x(svbool_t pg, svfloat64_t op) { return svrintz_f64_x(pg, op); } __forceinline svfloat32_t svrintz_x(svbool_t pg, svfloat32_t op) { return svrintz_f32_x(pg, op); } __forceinline svfloat16_t svrintz_x(svbool_t pg, svfloat16_t op) { return svrintz_f16_x(pg, op); } __forceinline svfloat32_t svrintz_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { return svrintz_f32_m(inactive, pg, op); } __forceinline svfloat16_t svrintz_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { return svrintz_f16_m(inactive, pg, op); } __forceinline svfloat64_t svrintz_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { return svrintz_f64_m(inactive, pg, op); } __forceinline svfloat32_t svrintx_z(svbool_t pg, svfloat32_t op) { return svrintx_f32_z(pg, op); } __forceinline svfloat64_t svrintx_z(svbool_t pg, svfloat64_t op) { return svrintx_f64_z(pg, op); } __forceinline svfloat32_t svrintx_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { return svrintx_f32_m(inactive, pg, op); } __forceinline svfloat64_t svrintx_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { return svrintx_f64_m(inactive, pg, op); } __forceinline svfloat16_t svrintx_x(svbool_t pg, svfloat16_t op) { return svrintx_f16_x(pg, op); } __forceinline svfloat16_t svrintx_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { return svrintx_f16_m(inactive, pg, op); } __forceinline svfloat32_t svrintx_x(svbool_t pg, svfloat32_t op) { return svrintx_f32_x(pg, op); } __forceinline svfloat64_t svrintx_x(svbool_t pg, svfloat64_t op) { return svrintx_f64_x(pg, op); } __forceinline svfloat16_t svrintx_z(svbool_t pg, svfloat16_t op) { return svrintx_f16_z(pg, op); } __forceinline svfloat32_t svrinti_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { return svrinti_f32_m(inactive, pg, op); } __forceinline svfloat64_t svrinti_z(svbool_t pg, svfloat64_t op) { return svrinti_f64_z(pg, op); } __forceinline svfloat32_t svrinti_z(svbool_t pg, svfloat32_t op) { return svrinti_f32_z(pg, op); } __forceinline svfloat16_t svrinti_z(svbool_t pg, svfloat16_t op) { return svrinti_f16_z(pg, op); } __forceinline svfloat64_t svrinti_x(svbool_t pg, svfloat64_t op) { return svrinti_f64_x(pg, op); } __forceinline svfloat32_t svrinti_x(svbool_t pg, svfloat32_t op) { return svrinti_f32_x(pg, op); } __forceinline svfloat16_t svrinti_x(svbool_t pg, svfloat16_t op) { return svrinti_f16_x(pg, op); } __forceinline svfloat64_t svrinti_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { return svrinti_f64_m(inactive, pg, op); } __forceinline svfloat16_t svrinti_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { return svrinti_f16_m(inactive, pg, op); } __forceinline svfloat64_t svsqrt_z(svbool_t pg, svfloat64_t op) { return svsqrt_f64_z(pg, op); } __forceinline svfloat32_t svsqrt_z(svbool_t pg, svfloat32_t op) { return svsqrt_f32_z(pg, op); } __forceinline svfloat16_t svsqrt_z(svbool_t pg, svfloat16_t op) { return svsqrt_f16_z(pg, op); } __forceinline svfloat64_t svsqrt_x(svbool_t pg, svfloat64_t op) { return svsqrt_f64_x(pg, op); } __forceinline svfloat64_t svsqrt_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { return svsqrt_f64_m(inactive, pg, op); } __forceinline svfloat16_t svsqrt_x(svbool_t pg, svfloat16_t op) { return svsqrt_f16_x(pg, op); } __forceinline svfloat32_t svsqrt_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { return svsqrt_f32_m(inactive, pg, op); } __forceinline svfloat16_t svsqrt_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { return svsqrt_f16_m(inactive, pg, op); } __forceinline svfloat32_t svsqrt_x(svbool_t pg, svfloat32_t op) { return svsqrt_f32_x(pg, op); } __forceinline svuint64_t svqsub(svuint64_t op1, uint64_t op2) { return svqsub_n_u64(op1, op2); } __forceinline svuint32_t svqsub(svuint32_t op1, uint32_t op2) { return svqsub_n_u32(op1, op2); } __forceinline svuint16_t svqsub(svuint16_t op1, uint16_t op2) { return svqsub_n_u16(op1, op2); } __forceinline svuint8_t svqsub(svuint8_t op1, uint8_t op2) { return svqsub_n_u8(op1, op2); } __forceinline svint64_t svqsub(svint64_t op1, int64_t op2) { return svqsub_n_s64(op1, op2); } __forceinline svint8_t svqsub(svint8_t op1, svint8_t op2) { return svqsub_s8(op1, op2); } __forceinline svint16_t svqsub(svint16_t op1, svint16_t op2) { return svqsub_s16(op1, op2); } __forceinline svint64_t svqsub(svint64_t op1, svint64_t op2) { return svqsub_s64(op1, op2); } __forceinline svint32_t svqsub(svint32_t op1, svint32_t op2) { return svqsub_s32(op1, op2); } __forceinline svuint16_t svqsub(svuint16_t op1, svuint16_t op2) { return svqsub_u16(op1, op2); } __forceinline svuint8_t svqsub(svuint8_t op1, svuint8_t op2) { return svqsub_u8(op1, op2); } __forceinline svint16_t svqsub(svint16_t op1, int16_t op2) { return svqsub_n_s16(op1, op2); } __forceinline svint8_t svqsub(svint8_t op1, int8_t op2) { return svqsub_n_s8(op1, op2); } __forceinline svint32_t svqsub(svint32_t op1, int32_t op2) { return svqsub_n_s32(op1, op2); } __forceinline svuint64_t svqsub(svuint64_t op1, svuint64_t op2) { return svqsub_u64(op1, op2); } __forceinline svuint32_t svqsub(svuint32_t op1, svuint32_t op2) { return svqsub_u32(op1, op2); } __forceinline svint32_t svsub_m(svbool_t pg, svint32_t op1, int32_t op2) { return svsub_n_s32_m(pg, op1, op2); } __forceinline svint64_t svsub_m(svbool_t pg, svint64_t op1, int64_t op2) { return svsub_n_s64_m(pg, op1, op2); } __forceinline svuint64_t svsub_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svsub_n_u64_m(pg, op1, op2); } __forceinline svuint16_t svsub_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svsub_n_u16_m(pg, op1, op2); } __forceinline svuint32_t svsub_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svsub_n_u32_m(pg, op1, op2); } __forceinline svint16_t svsub_m(svbool_t pg, svint16_t op1, int16_t op2) { return svsub_n_s16_m(pg, op1, op2); } __forceinline svuint8_t svsub_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svsub_n_u8_m(pg, op1, op2); } __forceinline svint8_t svsub_m(svbool_t pg, svint8_t op1, int8_t op2) { return svsub_n_s8_m(pg, op1, op2); } __forceinline svuint16_t svsub_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svsub_u16_z(pg, op1, op2); } __forceinline svfloat32_t svsub_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svsub_n_f32_m(pg, op1, op2); } __forceinline svuint64_t svsub_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svsub_u64_z(pg, op1, op2); } __forceinline svuint32_t svsub_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svsub_u32_z(pg, op1, op2); } __forceinline svuint8_t svsub_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svsub_u8_z(pg, op1, op2); } __forceinline svint64_t svsub_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svsub_s64_z(pg, op1, op2); } __forceinline svint32_t svsub_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svsub_s32_z(pg, op1, op2); } __forceinline svfloat64_t svsub_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svsub_n_f64_m(pg, op1, op2); } __forceinline svfloat32_t svsub_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svsub_n_f32_x(pg, op1, op2); } __forceinline svuint16_t svsub_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svsub_n_u16_x(pg, op1, op2); } __forceinline svint8_t svsub_x(svbool_t pg, svint8_t op1, int8_t op2) { return svsub_n_s8_x(pg, op1, op2); } __forceinline svuint64_t svsub_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svsub_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svsub_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svsub_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svsub_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svsub_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svsub_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svsub_n_u8_z(pg, op1, op2); } __forceinline svint64_t svsub_z(svbool_t pg, svint64_t op1, int64_t op2) { return svsub_n_s64_z(pg, op1, op2); } __forceinline svint32_t svsub_z(svbool_t pg, svint32_t op1, int32_t op2) { return svsub_n_s32_z(pg, op1, op2); } __forceinline svint16_t svsub_z(svbool_t pg, svint16_t op1, int16_t op2) { return svsub_n_s16_z(pg, op1, op2); } __forceinline svint8_t svsub_z(svbool_t pg, svint8_t op1, int8_t op2) { return svsub_n_s8_z(pg, op1, op2); } __forceinline svfloat64_t svsub_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svsub_n_f64_z(pg, op1, op2); } __forceinline svfloat32_t svsub_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svsub_n_f32_z(pg, op1, op2); } __forceinline svuint64_t svsub_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svsub_n_u64_x(pg, op1, op2); } __forceinline svuint32_t svsub_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svsub_n_u32_x(pg, op1, op2); } __forceinline svuint8_t svsub_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svsub_n_u8_x(pg, op1, op2); } __forceinline svint64_t svsub_x(svbool_t pg, svint64_t op1, int64_t op2) { return svsub_n_s64_x(pg, op1, op2); } __forceinline svint32_t svsub_x(svbool_t pg, svint32_t op1, int32_t op2) { return svsub_n_s32_x(pg, op1, op2); } __forceinline svint16_t svsub_x(svbool_t pg, svint16_t op1, int16_t op2) { return svsub_n_s16_x(pg, op1, op2); } __forceinline svfloat64_t svsub_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svsub_n_f64_x(pg, op1, op2); } __forceinline svfloat64_t svsub_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svsub_f64_z(pg, op1, op2); } __forceinline svint16_t svsub_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svsub_s16_z(pg, op1, op2); } __forceinline svfloat64_t svsub_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svsub_f64_m(pg, op1, op2); } __forceinline svuint8_t svsub_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svsub_u8_x(pg, op1, op2); } __forceinline svint8_t svsub_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svsub_s8_m(pg, op1, op2); } __forceinline svint16_t svsub_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svsub_s16_m(pg, op1, op2); } __forceinline svint32_t svsub_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svsub_s32_m(pg, op1, op2); } __forceinline svint64_t svsub_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svsub_s64_m(pg, op1, op2); } __forceinline svuint8_t svsub_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svsub_u8_m(pg, op1, op2); } __forceinline svuint32_t svsub_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svsub_u32_m(pg, op1, op2); } __forceinline svuint64_t svsub_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svsub_u64_m(pg, op1, op2); } __forceinline svfloat16_t svsub_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svsub_f16_x(pg, op1, op2); } __forceinline svfloat32_t svsub_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svsub_f32_x(pg, op1, op2); } __forceinline svfloat64_t svsub_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svsub_f64_x(pg, op1, op2); } __forceinline svint8_t svsub_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svsub_s8_x(pg, op1, op2); } __forceinline svint16_t svsub_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svsub_s16_x(pg, op1, op2); } __forceinline svint32_t svsub_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svsub_s32_x(pg, op1, op2); } __forceinline svint64_t svsub_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svsub_s64_x(pg, op1, op2); } __forceinline svfloat32_t svsub_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svsub_f32_m(pg, op1, op2); } __forceinline svfloat16_t svsub_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svsub_f16_m(pg, op1, op2); } __forceinline svuint16_t svsub_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svsub_u16_m(pg, op1, op2); } __forceinline svuint16_t svsub_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svsub_u16_x(pg, op1, op2); } __forceinline svfloat16_t svsub_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svsub_f16_z(pg, op1, op2); } __forceinline svuint64_t svsub_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svsub_u64_x(pg, op1, op2); } __forceinline svuint32_t svsub_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svsub_u32_x(pg, op1, op2); } __forceinline svfloat32_t svsub_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svsub_f32_z(pg, op1, op2); } __forceinline svint8_t svsub_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svsub_s8_z(pg, op1, op2); } __forceinline svuint32_t svsubr_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svsubr_u32_z(pg, op1, op2); } __forceinline svuint16_t svsubr_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svsubr_u16_z(pg, op1, op2); } __forceinline svuint8_t svsubr_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svsubr_u8_z(pg, op1, op2); } __forceinline svint64_t svsubr_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svsubr_s64_z(pg, op1, op2); } __forceinline svuint32_t svsubr_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svsubr_u32_m(pg, op1, op2); } __forceinline svfloat32_t svsubr_m(svbool_t pg, svfloat32_t op1, float32_t op2) { return svsubr_n_f32_m(pg, op1, op2); } __forceinline svint32_t svsubr_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svsubr_s32_z(pg, op1, op2); } __forceinline svint16_t svsubr_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svsubr_s16_z(pg, op1, op2); } __forceinline svint8_t svsubr_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svsubr_s8_z(pg, op1, op2); } __forceinline svfloat64_t svsubr_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svsubr_f64_z(pg, op1, op2); } __forceinline svuint64_t svsubr_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svsubr_u64_z(pg, op1, op2); } __forceinline svuint8_t svsubr_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svsubr_u8_x(pg, op1, op2); } __forceinline svfloat16_t svsubr_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svsubr_f16_z(pg, op1, op2); } __forceinline svfloat32_t svsubr_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svsubr_f32_z(pg, op1, op2); } __forceinline svuint16_t svsubr_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svsubr_u16_x(pg, op1, op2); } __forceinline svuint64_t svsubr_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svsubr_u64_x(pg, op1, op2); } __forceinline svuint32_t svsubr_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svsubr_u32_x(pg, op1, op2); } __forceinline svfloat16_t svsubr_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svsubr_f16_x(pg, op1, op2); } __forceinline svfloat32_t svsubr_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svsubr_f32_x(pg, op1, op2); } __forceinline svfloat64_t svsubr_m(svbool_t pg, svfloat64_t op1, float64_t op2) { return svsubr_n_f64_m(pg, op1, op2); } __forceinline svfloat64_t svsubr_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svsubr_f64_x(pg, op1, op2); } __forceinline svint8_t svsubr_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svsubr_s8_x(pg, op1, op2); } __forceinline svint16_t svsubr_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svsubr_s16_x(pg, op1, op2); } __forceinline svint32_t svsubr_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svsubr_s32_x(pg, op1, op2); } __forceinline svint64_t svsubr_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svsubr_s64_x(pg, op1, op2); } __forceinline svuint64_t svsubr_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svsubr_u64_m(pg, op1, op2); } __forceinline svint64_t svsubr_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svsubr_s64_m(pg, op1, op2); } __forceinline svint32_t svsubr_m(svbool_t pg, svint32_t op1, int32_t op2) { return svsubr_n_s32_m(pg, op1, op2); } __forceinline svint16_t svsubr_m(svbool_t pg, svint16_t op1, int16_t op2) { return svsubr_n_s16_m(pg, op1, op2); } __forceinline svuint8_t svsubr_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svsubr_n_u8_x(pg, op1, op2); } __forceinline svuint16_t svsubr_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svsubr_n_u16_x(pg, op1, op2); } __forceinline svuint32_t svsubr_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svsubr_n_u32_x(pg, op1, op2); } __forceinline svint8_t svsubr_m(svbool_t pg, svint8_t op1, int8_t op2) { return svsubr_n_s8_m(pg, op1, op2); } __forceinline svfloat32_t svsubr_z(svbool_t pg, svfloat32_t op1, float32_t op2) { return svsubr_n_f32_z(pg, op1, op2); } __forceinline svfloat64_t svsubr_z(svbool_t pg, svfloat64_t op1, float64_t op2) { return svsubr_n_f64_z(pg, op1, op2); } __forceinline svint8_t svsubr_z(svbool_t pg, svint8_t op1, int8_t op2) { return svsubr_n_s8_z(pg, op1, op2); } __forceinline svint16_t svsubr_z(svbool_t pg, svint16_t op1, int16_t op2) { return svsubr_n_s16_z(pg, op1, op2); } __forceinline svint32_t svsubr_z(svbool_t pg, svint32_t op1, int32_t op2) { return svsubr_n_s32_z(pg, op1, op2); } __forceinline svint64_t svsubr_z(svbool_t pg, svint64_t op1, int64_t op2) { return svsubr_n_s64_z(pg, op1, op2); } __forceinline svuint8_t svsubr_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svsubr_n_u8_z(pg, op1, op2); } __forceinline svuint16_t svsubr_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svsubr_n_u16_z(pg, op1, op2); } __forceinline svuint32_t svsubr_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svsubr_n_u32_z(pg, op1, op2); } __forceinline svuint64_t svsubr_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svsubr_n_u64_z(pg, op1, op2); } __forceinline svint64_t svsubr_x(svbool_t pg, svint64_t op1, int64_t op2) { return svsubr_n_s64_x(pg, op1, op2); } __forceinline svint32_t svsubr_x(svbool_t pg, svint32_t op1, int32_t op2) { return svsubr_n_s32_x(pg, op1, op2); } __forceinline svuint64_t svsubr_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svsubr_n_u64_x(pg, op1, op2); } __forceinline svint8_t svsubr_x(svbool_t pg, svint8_t op1, int8_t op2) { return svsubr_n_s8_x(pg, op1, op2); } __forceinline svuint16_t svsubr_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svsubr_u16_m(pg, op1, op2); } __forceinline svint64_t svsubr_m(svbool_t pg, svint64_t op1, int64_t op2) { return svsubr_n_s64_m(pg, op1, op2); } __forceinline svint32_t svsubr_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svsubr_s32_m(pg, op1, op2); } __forceinline svint16_t svsubr_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svsubr_s16_m(pg, op1, op2); } __forceinline svint16_t svsubr_x(svbool_t pg, svint16_t op1, int16_t op2) { return svsubr_n_s16_x(pg, op1, op2); } __forceinline svint8_t svsubr_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svsubr_s8_m(pg, op1, op2); } __forceinline svfloat64_t svsubr_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svsubr_f64_m(pg, op1, op2); } __forceinline svfloat32_t svsubr_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svsubr_f32_m(pg, op1, op2); } __forceinline svuint8_t svsubr_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svsubr_u8_m(pg, op1, op2); } __forceinline svuint8_t svsubr_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svsubr_n_u8_m(pg, op1, op2); } __forceinline svuint16_t svsubr_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svsubr_n_u16_m(pg, op1, op2); } __forceinline svuint32_t svsubr_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svsubr_n_u32_m(pg, op1, op2); } __forceinline svfloat64_t svsubr_x(svbool_t pg, svfloat64_t op1, float64_t op2) { return svsubr_n_f64_x(pg, op1, op2); } __forceinline svfloat32_t svsubr_x(svbool_t pg, svfloat32_t op1, float32_t op2) { return svsubr_n_f32_x(pg, op1, op2); } __forceinline svfloat16_t svsubr_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svsubr_f16_m(pg, op1, op2); } __forceinline svuint64_t svsubr_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svsubr_n_u64_m(pg, op1, op2); } template __forceinline T __svtmad(T op1, T op2) { if constexpr(::std::is_same_v) { return svtmad_f16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svtmad_f32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svtmad_f64(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svtmad(op1, op2, imm3) __svtmad(op1, op2) __forceinline svfloat16_t svtssel(svfloat16_t op1, svuint16_t op2) { return svtssel_f16(op1, op2); } __forceinline svfloat32_t svtssel(svfloat32_t op1, svuint32_t op2) { return svtssel_f32(op1, op2); } __forceinline svfloat64_t svtssel(svfloat64_t op1, svuint64_t op2) { return svtssel_f64(op1, op2); } __forceinline svfloat16_t svtsmul(svfloat16_t op1, svuint16_t op2) { return svtsmul_f16(op1, op2); } __forceinline svfloat32_t svtsmul(svfloat32_t op1, svuint32_t op2) { return svtsmul_f32(op1, op2); } __forceinline svfloat64_t svtsmul(svfloat64_t op1, svuint64_t op2) { return svtsmul_f64(op1, op2); } __forceinline uint64_t svlen(svfloat32_t op) { return svlen_f32(op); } __forceinline uint64_t svlen(svfloat16_t op) { return svlen_f16(op); } __forceinline uint64_t svlen(svbfloat16_t op) { return svlen_bf16(op); } __forceinline uint64_t svlen(svuint8_t op) { return svlen_u8(op); } __forceinline uint64_t svlen(svfloat64_t op) { return svlen_f64(op); } __forceinline uint64_t svlen(svint8_t op) { return svlen_s8(op); } __forceinline uint64_t svlen(svint16_t op) { return svlen_s16(op); } __forceinline uint64_t svlen(svint32_t op) { return svlen_s32(op); } __forceinline uint64_t svlen(svuint64_t op) { return svlen_u64(op); } __forceinline uint64_t svlen(svuint32_t op) { return svlen_u32(op); } __forceinline uint64_t svlen(svuint16_t op) { return svlen_u16(op); } __forceinline uint64_t svlen(svint64_t op) { return svlen_s64(op); } template __forceinline T __svqdecb_pat(T op) { if constexpr(::std::is_same_v) { return svqdecb_pat_n_u64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecb_pat_n_u32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecb_pat_n_s64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecb_pat_n_s32(op, N1, N2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdecb_pat(op, pattern, imm_factor) __svqdecb_pat(op) template __forceinline T __svqdecb(T op) { if constexpr(::std::is_same_v) { return svqdecb_n_u64(op, N); } else if constexpr(::std::is_same_v) { return svqdecb_n_s64(op, N); } else if constexpr(::std::is_same_v) { return svqdecb_n_u32(op, N); } else if constexpr(::std::is_same_v) { return svqdecb_n_s32(op, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdecb(op, imm_factor) __svqdecb(op) template __forceinline T __svqdecd(T op) { if constexpr(::std::is_same_v) { return svqdecd_n_u64(op, N); } else if constexpr(::std::is_same_v) { return svqdecd_n_s64(op, N); } else if constexpr(::std::is_same_v) { return svqdecd_n_u32(op, N); } else if constexpr(::std::is_same_v) { return svqdecd_n_s32(op, N); } else if constexpr(::std::is_same_v) { return svqdecd_s64(op, N); } else if constexpr(::std::is_same_v) { return svqdecd_u64(op, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdecd(op, imm_factor) __svqdecd(op) template __forceinline T __svqdecd_pat(T op) { if constexpr(::std::is_same_v) { return svqdecd_pat_n_s32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecd_pat_n_s64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecd_pat_n_u32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecd_pat_n_u64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecd_pat_s64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecd_pat_u64(op, N1, N2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdecd_pat(op, pattern, imm_factor) __svqdecd_pat(op) template __forceinline T __svqdech(T op) { if constexpr(::std::is_same_v) { return svqdech_s16(op, N); } else if constexpr(::std::is_same_v) { return svqdech_u16(op, N); } else if constexpr(::std::is_same_v) { return svqdech_n_s32(op, N); } else if constexpr(::std::is_same_v) { return svqdech_n_u32(op, N); } else if constexpr(::std::is_same_v) { return svqdech_n_s64(op, N); } else if constexpr(::std::is_same_v) { return svqdech_n_u64(op, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdech(op, imm_factor) __svqdech(op) template __forceinline T __svqdech_pat(T op) { if constexpr(::std::is_same_v) { return svqdech_pat_n_s32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdech_pat_n_s64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdech_pat_n_u32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdech_pat_n_u64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdech_pat_s16(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdech_pat_u16(op, N1, N2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdech_pat(op, pattern, imm_factor) __svqdech_pat(op) template __forceinline T __svqdecw(T op) { if constexpr(::std::is_same_v) { return svqdecw_n_s64(op, N); } else if constexpr(::std::is_same_v) { return svqdecw_n_u64(op, N); } else if constexpr(::std::is_same_v) { return svqdecw_n_s32(op, N); } else if constexpr(::std::is_same_v) { return svqdecw_n_u32(op, N); } else if constexpr(::std::is_same_v) { return svqdecw_s32(op, N); } else if constexpr(::std::is_same_v) { return svqdecw_u32(op, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdecw(op, imm_factor) __svqdecw(op) template __forceinline T __svqdecw_pat(T op) { if constexpr(::std::is_same_v) { return svqdecw_pat_n_u64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecw_pat_n_s64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecw_pat_n_u32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecw_pat_n_s32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecw_pat_s32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqdecw_pat_u32(op, N1, N2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdecw_pat(op, pattern, imm_factor) __svqdecw_pat(op) template __forceinline T __svqincb_pat(T op) { if constexpr(::std::is_same_v) { return svqincb_pat_n_u64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincb_pat_n_u32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincb_pat_n_s64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincb_pat_n_s32(op, N1, N2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqincb_pat(op, pattern, imm_factor) __svqincb_pat(op) template __forceinline T __svqincb(T op) { if constexpr(::std::is_same_v) { return svqincb_n_s64(op, N); } else if constexpr(::std::is_same_v) { return svqincb_n_s32(op, N); } else if constexpr(::std::is_same_v) { return svqincb_n_u64(op, N); } else if constexpr(::std::is_same_v) { return svqincb_n_u32(op, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqincb(op, imm_factor) __svqincb(op) template __forceinline T __svqincd_pat(T op) { if constexpr(::std::is_same_v) { return svqincd_pat_n_s32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincd_pat_n_u32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincd_pat_n_s64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincd_pat_n_u64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincd_pat_s64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincd_pat_u64(op, N1, N2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqincd_pat(op, pattern, imm_factor) __svqincd_pat(op) template __forceinline T __svqincd(T op) { if constexpr(::std::is_same_v) { return svqincd_n_s64(op, N); } else if constexpr(::std::is_same_v) { return svqincd_n_u64(op, N); } else if constexpr(::std::is_same_v) { return svqincd_n_s32(op, N); } else if constexpr(::std::is_same_v) { return svqincd_n_u32(op, N); } else if constexpr(::std::is_same_v) { return svqincd_s64(op, N); } else if constexpr(::std::is_same_v) { return svqincd_u64(op, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqincd(op, imm_factor) __svqincd(op) template __forceinline T __svqinch_pat(T op) { if constexpr(::std::is_same_v) { return svqinch_pat_n_s32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqinch_pat_n_s64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqinch_pat_n_u32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqinch_pat_n_u64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqinch_pat_s16(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqinch_pat_u16(op, N1, N2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqinch_pat(op, pattern, imm_factor) __svqinch_pat(op) template __forceinline T __svqinch(T op) { if constexpr(::std::is_same_v) { return svqinch_n_s64(op, N); } else if constexpr(::std::is_same_v) { return svqinch_n_s32(op, N); } else if constexpr(::std::is_same_v) { return svqinch_n_u64(op, N); } else if constexpr(::std::is_same_v) { return svqinch_n_u32(op, N); } else if constexpr(::std::is_same_v) { return svqinch_u16(op, N); } else if constexpr(::std::is_same_v) { return svqinch_s16(op, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqinch(op, imm_factor) __svqinch(op) template __forceinline T __svqincw_pat(T op) { if constexpr(::std::is_same_v) { return svqincw_pat_n_s64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincw_pat_n_u64(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincw_pat_n_s32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincw_pat_n_u32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincw_pat_s32(op, N1, N2); } else if constexpr(::std::is_same_v) { return svqincw_pat_u32(op, N1, N2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqincw_pat(op, pattern, imm_factor) __svqincw_pat(op) template __forceinline T __svqincw(T op) { if constexpr(::std::is_same_v) { return svqincw_n_s32(op, N); } else if constexpr(::std::is_same_v) { return svqincw_n_u32(op, N); } else if constexpr(::std::is_same_v) { return svqincw_n_s64(op, N); } else if constexpr(::std::is_same_v) { return svqincw_n_u64(op, N); } else if constexpr(::std::is_same_v) { return svqincw_s32(op, N); } else if constexpr(::std::is_same_v) { return svqincw_u32(op, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqincw(op, imm_factor) __svqincw(op) __forceinline svint8_t svindex(int8_t base, int8_t step) { return svindex_s8(base, step); } __forceinline svint16_t svindex(int16_t base, int16_t step) { return svindex_s16(base, step); } __forceinline svint32_t svindex(int32_t base, int32_t step) { return svindex_s32(base, step); } __forceinline svuint8_t svindex(uint8_t base, uint8_t step) { return svindex_u8(base, step); } __forceinline svuint16_t svindex(uint16_t base, uint16_t step) { return svindex_u16(base, step); } __forceinline svuint32_t svindex(uint32_t base, uint32_t step) { return svindex_u32(base, step); } __forceinline svint64_t svindex(int64_t base, int64_t step) { return svindex_s64(base, step); } __forceinline svuint64_t svindex(uint64_t base, uint64_t step) { return svindex_u64(base, step); } __forceinline svint64_t svdupq_lane(svint64_t data, uint64_t index) { return svdupq_lane_s64(data, index); } __forceinline svint32_t svdupq_lane(svint32_t data, uint64_t index) { return svdupq_lane_s32(data, index); } __forceinline svint16_t svdupq_lane(svint16_t data, uint64_t index) { return svdupq_lane_s16(data, index); } __forceinline svint8_t svdupq_lane(svint8_t data, uint64_t index) { return svdupq_lane_s8(data, index); } __forceinline svfloat64_t svdupq_lane(svfloat64_t data, uint64_t index) { return svdupq_lane_f64(data, index); } __forceinline svfloat32_t svdupq_lane(svfloat32_t data, uint64_t index) { return svdupq_lane_f32(data, index); } __forceinline svfloat16_t svdupq_lane(svfloat16_t data, uint64_t index) { return svdupq_lane_f16(data, index); } __forceinline svbfloat16_t svdupq_lane(svbfloat16_t data, uint64_t index) { return svdupq_lane_bf16(data, index); } __forceinline svuint64_t svdupq_u64(uint64_t x0, uint64_t x1) { return svdupq_n_u64(x0, x1); } __forceinline svint64_t svdupq_s64(int64_t x0, int64_t x1) { return svdupq_n_s64(x0, x1); } __forceinline svfloat32_t svdupq_f32(float32_t x0, float32_t x1, float32_t x2, float32_t x3) { return svdupq_n_f32(x0, x1, x2, x3); } __forceinline svuint16_t svdupq_lane(svuint16_t data, uint64_t index) { return svdupq_lane_u16(data, index); } __forceinline svuint16_t svdupq_u16(uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) { return svdupq_n_u16(x0, x1, x2, x3, x4, x5, x6, x7); } __forceinline svint16_t svdupq_s16(int16_t x0, int16_t x1, int16_t x2, int16_t x3, int16_t x4, int16_t x5, int16_t x6, int16_t x7) { return svdupq_n_s16(x0, x1, x2, x3, x4, x5, x6, x7); } __forceinline svuint8_t svdupq_lane(svuint8_t data, uint64_t index) { return svdupq_lane_u8(data, index); } __forceinline svuint8_t svdupq_u8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) { return svdupq_n_u8(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); } __forceinline svfloat64_t svdupq_f64(float64_t x0, float64_t x1) { return svdupq_n_f64(x0, x1); } __forceinline svuint64_t svdupq_lane(svuint64_t data, uint64_t index) { return svdupq_lane_u64(data, index); } __forceinline svuint32_t svdupq_lane(svuint32_t data, uint64_t index) { return svdupq_lane_u32(data, index); } __forceinline svint32_t svdupq_s32(int32_t x0, int32_t x1, int32_t x2, int32_t x3) { return svdupq_n_s32(x0, x1, x2, x3); } __forceinline svint8_t svdupq_s8(int8_t x0, int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6, int8_t x7, int8_t x8, int8_t x9, int8_t x10, int8_t x11, int8_t x12, int8_t x13, int8_t x14, int8_t x15) { return svdupq_n_s8(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); } __forceinline svuint32_t svdupq_u32(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) { return svdupq_n_u32(x0, x1, x2, x3); } template __forceinline T __svext(T op1, T op2) { if constexpr(::std::is_same_v) { return svext_f64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svext_f32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svext_f16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svext_bf16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svext_s8(op1, op2, N); } else if constexpr(::std::is_same_v) { return svext_s16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svext_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svext_s64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svext_u8(op1, op2, N); } else if constexpr(::std::is_same_v) { return svext_u16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svext_u32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svext_u64(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svext(op1, op2, imm3) __svext(op1, op2) __forceinline svfloat32_t svsplice(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svsplice_f32(pg, op1, op2); } __forceinline svfloat64_t svsplice(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svsplice_f64(pg, op1, op2); } __forceinline svint8_t svsplice(svbool_t pg, svint8_t op1, svint8_t op2) { return svsplice_s8(pg, op1, op2); } __forceinline svint16_t svsplice(svbool_t pg, svint16_t op1, svint16_t op2) { return svsplice_s16(pg, op1, op2); } __forceinline svfloat16_t svsplice(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svsplice_f16(pg, op1, op2); } __forceinline svbfloat16_t svsplice(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) { return svsplice_bf16(pg, op1, op2); } __forceinline svint64_t svsplice(svbool_t pg, svint64_t op1, svint64_t op2) { return svsplice_s64(pg, op1, op2); } __forceinline svuint8_t svsplice(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svsplice_u8(pg, op1, op2); } __forceinline svuint16_t svsplice(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svsplice_u16(pg, op1, op2); } __forceinline svuint32_t svsplice(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svsplice_u32(pg, op1, op2); } __forceinline svuint64_t svsplice(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svsplice_u64(pg, op1, op2); } __forceinline svint32_t svsplice(svbool_t pg, svint32_t op1, svint32_t op2) { return svsplice_s32(pg, op1, op2); } __forceinline svuint32_t svinsr(svuint32_t op1, uint32_t op2) { return svinsr_n_u32(op1, op2); } __forceinline svuint16_t svinsr(svuint16_t op1, uint16_t op2) { return svinsr_n_u16(op1, op2); } __forceinline svint32_t svinsr(svint32_t op1, int32_t op2) { return svinsr_n_s32(op1, op2); } __forceinline svint64_t svinsr(svint64_t op1, int64_t op2) { return svinsr_n_s64(op1, op2); } __forceinline svint16_t svinsr(svint16_t op1, int16_t op2) { return svinsr_n_s16(op1, op2); } __forceinline svint8_t svinsr(svint8_t op1, int8_t op2) { return svinsr_n_s8(op1, op2); } __forceinline svfloat64_t svinsr(svfloat64_t op1, float64_t op2) { return svinsr_n_f64(op1, op2); } __forceinline svfloat32_t svinsr(svfloat32_t op1, float32_t op2) { return svinsr_n_f32(op1, op2); } __forceinline svuint8_t svinsr(svuint8_t op1, uint8_t op2) { return svinsr_n_u8(op1, op2); } __forceinline svuint64_t svinsr(svuint64_t op1, uint64_t op2) { return svinsr_n_u64(op1, op2); } __forceinline svuint32_t svrbit_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { return svrbit_u32_m(inactive, pg, op); } __forceinline svuint64_t svrbit_z(svbool_t pg, svuint64_t op) { return svrbit_u64_z(pg, op); } __forceinline svuint32_t svrbit_z(svbool_t pg, svuint32_t op) { return svrbit_u32_z(pg, op); } __forceinline svuint16_t svrbit_z(svbool_t pg, svuint16_t op) { return svrbit_u16_z(pg, op); } __forceinline svuint8_t svrbit_z(svbool_t pg, svuint8_t op) { return svrbit_u8_z(pg, op); } __forceinline svint64_t svrbit_z(svbool_t pg, svint64_t op) { return svrbit_s64_z(pg, op); } __forceinline svint32_t svrbit_z(svbool_t pg, svint32_t op) { return svrbit_s32_z(pg, op); } __forceinline svint16_t svrbit_z(svbool_t pg, svint16_t op) { return svrbit_s16_z(pg, op); } __forceinline svint8_t svrbit_z(svbool_t pg, svint8_t op) { return svrbit_s8_z(pg, op); } __forceinline svuint64_t svrbit_x(svbool_t pg, svuint64_t op) { return svrbit_u64_x(pg, op); } __forceinline svuint64_t svrbit_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { return svrbit_u64_m(inactive, pg, op); } __forceinline svuint32_t svrbit_x(svbool_t pg, svuint32_t op) { return svrbit_u32_x(pg, op); } __forceinline svuint8_t svrbit_x(svbool_t pg, svuint8_t op) { return svrbit_u8_x(pg, op); } __forceinline svint64_t svrbit_x(svbool_t pg, svint64_t op) { return svrbit_s64_x(pg, op); } __forceinline svint32_t svrbit_x(svbool_t pg, svint32_t op) { return svrbit_s32_x(pg, op); } __forceinline svint8_t svrbit_m(svint8_t inactive, svbool_t pg, svint8_t op) { return svrbit_s8_m(inactive, pg, op); } __forceinline svint16_t svrbit_m(svint16_t inactive, svbool_t pg, svint16_t op) { return svrbit_s16_m(inactive, pg, op); } __forceinline svint32_t svrbit_m(svint32_t inactive, svbool_t pg, svint32_t op) { return svrbit_s32_m(inactive, pg, op); } __forceinline svint64_t svrbit_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svrbit_s64_m(inactive, pg, op); } __forceinline svint16_t svrbit_x(svbool_t pg, svint16_t op) { return svrbit_s16_x(pg, op); } __forceinline svuint16_t svrbit_m(svuint16_t inactive, svbool_t pg, svuint16_t op) { return svrbit_u16_m(inactive, pg, op); } __forceinline svuint16_t svrbit_x(svbool_t pg, svuint16_t op) { return svrbit_u16_x(pg, op); } __forceinline svuint8_t svrbit_m(svuint8_t inactive, svbool_t pg, svuint8_t op) { return svrbit_u8_m(inactive, pg, op); } __forceinline svint8_t svrbit_x(svbool_t pg, svint8_t op) { return svrbit_s8_x(pg, op); } __forceinline svfloat32_t svrev(svfloat32_t op) { return svrev_f32(op); } __forceinline svfloat64_t svrev(svfloat64_t op) { return svrev_f64(op); } __forceinline svint8_t svrev(svint8_t op) { return svrev_s8(op); } __forceinline svint16_t svrev(svint16_t op) { return svrev_s16(op); } __forceinline svint32_t svrev(svint32_t op) { return svrev_s32(op); } __forceinline svint64_t svrev(svint64_t op) { return svrev_s64(op); } __forceinline svuint8_t svrev(svuint8_t op) { return svrev_u8(op); } __forceinline svuint16_t svrev(svuint16_t op) { return svrev_u16(op); } __forceinline svuint32_t svrev(svuint32_t op) { return svrev_u32(op); } __forceinline svuint64_t svrev(svuint64_t op) { return svrev_u64(op); } __forceinline svfloat16_t svrev(svfloat16_t op) { return svrev_f16(op); } __forceinline svbfloat16_t svrev(svbfloat16_t op) { return svrev_bf16(op); } __forceinline svuint32_t svrevb_z(svbool_t pg, svuint32_t op) { return svrevb_u32_z(pg, op); } __forceinline svuint16_t svrevb_z(svbool_t pg, svuint16_t op) { return svrevb_u16_z(pg, op); } __forceinline svint64_t svrevb_z(svbool_t pg, svint64_t op) { return svrevb_s64_z(pg, op); } __forceinline svint32_t svrevb_z(svbool_t pg, svint32_t op) { return svrevb_s32_z(pg, op); } __forceinline svint16_t svrevb_z(svbool_t pg, svint16_t op) { return svrevb_s16_z(pg, op); } __forceinline svuint64_t svrevb_x(svbool_t pg, svuint64_t op) { return svrevb_u64_x(pg, op); } __forceinline svuint32_t svrevb_x(svbool_t pg, svuint32_t op) { return svrevb_u32_x(pg, op); } __forceinline svuint64_t svrevb_z(svbool_t pg, svuint64_t op) { return svrevb_u64_z(pg, op); } __forceinline svuint16_t svrevb_x(svbool_t pg, svuint16_t op) { return svrevb_u16_x(pg, op); } __forceinline svint32_t svrevb_x(svbool_t pg, svint32_t op) { return svrevb_s32_x(pg, op); } __forceinline svint16_t svrevb_x(svbool_t pg, svint16_t op) { return svrevb_s16_x(pg, op); } __forceinline svuint64_t svrevb_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { return svrevb_u64_m(inactive, pg, op); } __forceinline svuint32_t svrevb_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { return svrevb_u32_m(inactive, pg, op); } __forceinline svuint16_t svrevb_m(svuint16_t inactive, svbool_t pg, svuint16_t op) { return svrevb_u16_m(inactive, pg, op); } __forceinline svint64_t svrevb_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svrevb_s64_m(inactive, pg, op); } __forceinline svint64_t svrevb_x(svbool_t pg, svint64_t op) { return svrevb_s64_x(pg, op); } __forceinline svint32_t svrevb_m(svint32_t inactive, svbool_t pg, svint32_t op) { return svrevb_s32_m(inactive, pg, op); } __forceinline svint16_t svrevb_m(svint16_t inactive, svbool_t pg, svint16_t op) { return svrevb_s16_m(inactive, pg, op); } __forceinline svint32_t svrevh_z(svbool_t pg, svint32_t op) { return svrevh_s32_z(pg, op); } __forceinline svint64_t svrevh_z(svbool_t pg, svint64_t op) { return svrevh_s64_z(pg, op); } __forceinline svuint64_t svrevh_z(svbool_t pg, svuint64_t op) { return svrevh_u64_z(pg, op); } __forceinline svint32_t svrevh_m(svint32_t inactive, svbool_t pg, svint32_t op) { return svrevh_s32_m(inactive, pg, op); } __forceinline svint64_t svrevh_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svrevh_s64_m(inactive, pg, op); } __forceinline svuint32_t svrevh_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { return svrevh_u32_m(inactive, pg, op); } __forceinline svint64_t svrevh_x(svbool_t pg, svint64_t op) { return svrevh_s64_x(pg, op); } __forceinline svint32_t svrevh_x(svbool_t pg, svint32_t op) { return svrevh_s32_x(pg, op); } __forceinline svuint32_t svrevh_z(svbool_t pg, svuint32_t op) { return svrevh_u32_z(pg, op); } __forceinline svuint32_t svrevh_x(svbool_t pg, svuint32_t op) { return svrevh_u32_x(pg, op); } __forceinline svuint64_t svrevh_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { return svrevh_u64_m(inactive, pg, op); } __forceinline svuint64_t svrevh_x(svbool_t pg, svuint64_t op) { return svrevh_u64_x(pg, op); } __forceinline svuint64_t svrevw_z(svbool_t pg, svuint64_t op) { return svrevw_u64_z(pg, op); } __forceinline svint64_t svrevw_z(svbool_t pg, svint64_t op) { return svrevw_s64_z(pg, op); } __forceinline svint64_t svrevw_x(svbool_t pg, svint64_t op) { return svrevw_s64_x(pg, op); } __forceinline svuint64_t svrevw_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { return svrevw_u64_m(inactive, pg, op); } __forceinline svint64_t svrevw_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svrevw_s64_m(inactive, pg, op); } __forceinline svuint64_t svrevw_x(svbool_t pg, svuint64_t op) { return svrevw_u64_x(pg, op); } __forceinline svfloat32_t svdup_f32(float32_t op) { return svdup_n_f32(op); } __forceinline svuint16_t svdup_u16(uint16_t op) { return svdup_n_u16(op); } __forceinline svint8_t svdup_s8(int8_t op) { return svdup_n_s8(op); } __forceinline svuint8_t svdup_u8_m(svuint8_t inactive, svbool_t pg, uint8_t op) { return svdup_n_u8_m(inactive, pg, op); } __forceinline svint64_t svdup_s64_m(svint64_t inactive, svbool_t pg, int64_t op) { return svdup_n_s64_m(inactive, pg, op); } __forceinline svint32_t svdup_s32_m(svint32_t inactive, svbool_t pg, int32_t op) { return svdup_n_s32_m(inactive, pg, op); } __forceinline svint16_t svdup_s16_m(svint16_t inactive, svbool_t pg, int16_t op) { return svdup_n_s16_m(inactive, pg, op); } __forceinline svint8_t svdup_s8_m(svint8_t inactive, svbool_t pg, int8_t op) { return svdup_n_s8_m(inactive, pg, op); } __forceinline svfloat64_t svdup_f64_m(svfloat64_t inactive, svbool_t pg, float64_t op) { return svdup_n_f64_m(inactive, pg, op); } __forceinline svfloat32_t svdup_f32_m(svfloat32_t inactive, svbool_t pg, float32_t op) { return svdup_n_f32_m(inactive, pg, op); } __forceinline svuint64_t svdup_u64(uint64_t op) { return svdup_n_u64(op); } __forceinline svuint32_t svdup_u32(uint32_t op) { return svdup_n_u32(op); } __forceinline svuint8_t svdup_u8(uint8_t op) { return svdup_n_u8(op); } __forceinline svint64_t svdup_s64(int64_t op) { return svdup_n_s64(op); } __forceinline svint32_t svdup_s32(int32_t op) { return svdup_n_s32(op); } __forceinline svint16_t svdup_s16(int16_t op) { return svdup_n_s16(op); } __forceinline svfloat64_t svdup_f64(float64_t op) { return svdup_n_f64(op); } __forceinline svuint64_t svdup_u64_m(svuint64_t inactive, svbool_t pg, uint64_t op) { return svdup_n_u64_m(inactive, pg, op); } __forceinline svint8_t svdup_s8_x(svbool_t pg, int8_t op) { return svdup_n_s8_x(pg, op); } __forceinline svbfloat16_t svdup_lane(svbfloat16_t data, uint16_t index) { return svdup_lane_bf16(data, index); } __forceinline svfloat16_t svdup_lane(svfloat16_t data, uint16_t index) { return svdup_lane_f16(data, index); } __forceinline svfloat32_t svdup_lane(svfloat32_t data, uint32_t index) { return svdup_lane_f32(data, index); } __forceinline svuint64_t svdup_u64_z(svbool_t pg, uint64_t op) { return svdup_n_u64_z(pg, op); } __forceinline svfloat64_t svdup_lane(svfloat64_t data, uint64_t index) { return svdup_lane_f64(data, index); } __forceinline svint16_t svdup_lane(svint16_t data, uint16_t index) { return svdup_lane_s16(data, index); } __forceinline svuint16_t svdup_u16_m(svuint16_t inactive, svbool_t pg, uint16_t op) { return svdup_n_u16_m(inactive, pg, op); } __forceinline svint32_t svdup_lane(svint32_t data, uint32_t index) { return svdup_lane_s32(data, index); } __forceinline svint64_t svdup_lane(svint64_t data, uint64_t index) { return svdup_lane_s64(data, index); } __forceinline svuint16_t svdup_lane(svuint16_t data, uint16_t index) { return svdup_lane_u16(data, index); } __forceinline svuint32_t svdup_lane(svuint32_t data, uint32_t index) { return svdup_lane_u32(data, index); } __forceinline svuint64_t svdup_lane(svuint64_t data, uint64_t index) { return svdup_lane_u64(data, index); } __forceinline svint8_t svdup_lane(svint8_t data, uint8_t index) { return svdup_lane_s8(data, index); } __forceinline svuint32_t svdup_u32_z(svbool_t pg, uint32_t op) { return svdup_n_u32_z(pg, op); } __forceinline svuint16_t svdup_u16_z(svbool_t pg, uint16_t op) { return svdup_n_u16_z(pg, op); } __forceinline svuint8_t svdup_u8_z(svbool_t pg, uint8_t op) { return svdup_n_u8_z(pg, op); } __forceinline svfloat32_t svdup_f32_x(svbool_t pg, float32_t op) { return svdup_n_f32_x(pg, op); } __forceinline svfloat64_t svdup_f64_x(svbool_t pg, float64_t op) { return svdup_n_f64_x(pg, op); } __forceinline svint16_t svdup_s16_x(svbool_t pg, int16_t op) { return svdup_n_s16_x(pg, op); } __forceinline svint32_t svdup_s32_x(svbool_t pg, int32_t op) { return svdup_n_s32_x(pg, op); } __forceinline svint64_t svdup_s64_x(svbool_t pg, int64_t op) { return svdup_n_s64_x(pg, op); } __forceinline svuint8_t svdup_u8_x(svbool_t pg, uint8_t op) { return svdup_n_u8_x(pg, op); } __forceinline svuint16_t svdup_u16_x(svbool_t pg, uint16_t op) { return svdup_n_u16_x(pg, op); } __forceinline svuint32_t svdup_u32_x(svbool_t pg, uint32_t op) { return svdup_n_u32_x(pg, op); } __forceinline svuint64_t svdup_u64_x(svbool_t pg, uint64_t op) { return svdup_n_u64_x(pg, op); } __forceinline svfloat32_t svdup_f32_z(svbool_t pg, float32_t op) { return svdup_n_f32_z(pg, op); } __forceinline svfloat64_t svdup_f64_z(svbool_t pg, float64_t op) { return svdup_n_f64_z(pg, op); } __forceinline svint8_t svdup_s8_z(svbool_t pg, int8_t op) { return svdup_n_s8_z(pg, op); } __forceinline svint16_t svdup_s16_z(svbool_t pg, int16_t op) { return svdup_n_s16_z(pg, op); } __forceinline svint32_t svdup_s32_z(svbool_t pg, int32_t op) { return svdup_n_s32_z(pg, op); } __forceinline svint64_t svdup_s64_z(svbool_t pg, int64_t op) { return svdup_n_s64_z(pg, op); } __forceinline svuint8_t svdup_lane(svuint8_t data, uint8_t index) { return svdup_lane_u8(data, index); } __forceinline svuint32_t svdup_u32_m(svuint32_t inactive, svbool_t pg, uint32_t op) { return svdup_n_u32_m(inactive, pg, op); } __forceinline svuint32_t svtrn1(svuint32_t op1, svuint32_t op2) { return svtrn1_u32(op1, op2); } __forceinline svint32_t svtrn1(svint32_t op1, svint32_t op2) { return svtrn1_s32(op1, op2); } __forceinline svint64_t svtrn1(svint64_t op1, svint64_t op2) { return svtrn1_s64(op1, op2); } __forceinline svuint8_t svtrn1(svuint8_t op1, svuint8_t op2) { return svtrn1_u8(op1, op2); } __forceinline svuint16_t svtrn1(svuint16_t op1, svuint16_t op2) { return svtrn1_u16(op1, op2); } __forceinline svbfloat16_t svtrn1(svbfloat16_t op1, svbfloat16_t op2) { return svtrn1_bf16(op1, op2); } __forceinline svfloat16_t svtrn1(svfloat16_t op1, svfloat16_t op2) { return svtrn1_f16(op1, op2); } __forceinline svint8_t svtrn1(svint8_t op1, svint8_t op2) { return svtrn1_s8(op1, op2); } __forceinline svfloat32_t svtrn1(svfloat32_t op1, svfloat32_t op2) { return svtrn1_f32(op1, op2); } __forceinline svfloat64_t svtrn1(svfloat64_t op1, svfloat64_t op2) { return svtrn1_f64(op1, op2); } __forceinline svuint64_t svtrn1(svuint64_t op1, svuint64_t op2) { return svtrn1_u64(op1, op2); } __forceinline svint16_t svtrn1(svint16_t op1, svint16_t op2) { return svtrn1_s16(op1, op2); } __forceinline svint16_t svtrn1q(svint16_t op1, svint16_t op2) { return svtrn1q_s16(op1, op2); } __forceinline svint32_t svtrn1q(svint32_t op1, svint32_t op2) { return svtrn1q_s32(op1, op2); } __forceinline svfloat64_t svtrn1q(svfloat64_t op1, svfloat64_t op2) { return svtrn1q_f64(op1, op2); } __forceinline svuint8_t svtrn1q(svuint8_t op1, svuint8_t op2) { return svtrn1q_u8(op1, op2); } __forceinline svuint16_t svtrn1q(svuint16_t op1, svuint16_t op2) { return svtrn1q_u16(op1, op2); } __forceinline svuint32_t svtrn1q(svuint32_t op1, svuint32_t op2) { return svtrn1q_u32(op1, op2); } __forceinline svuint64_t svtrn1q(svuint64_t op1, svuint64_t op2) { return svtrn1q_u64(op1, op2); } __forceinline svfloat32_t svtrn1q(svfloat32_t op1, svfloat32_t op2) { return svtrn1q_f32(op1, op2); } __forceinline svfloat16_t svtrn1q(svfloat16_t op1, svfloat16_t op2) { return svtrn1q_f16(op1, op2); } __forceinline svbfloat16_t svtrn1q(svbfloat16_t op1, svbfloat16_t op2) { return svtrn1q_bf16(op1, op2); } __forceinline svint8_t svtrn1q(svint8_t op1, svint8_t op2) { return svtrn1q_s8(op1, op2); } __forceinline svint64_t svtrn1q(svint64_t op1, svint64_t op2) { return svtrn1q_s64(op1, op2); } __forceinline svfloat32_t svtrn2(svfloat32_t op1, svfloat32_t op2) { return svtrn2_f32(op1, op2); } __forceinline svbfloat16_t svtrn2(svbfloat16_t op1, svbfloat16_t op2) { return svtrn2_bf16(op1, op2); } __forceinline svfloat16_t svtrn2(svfloat16_t op1, svfloat16_t op2) { return svtrn2_f16(op1, op2); } __forceinline svfloat64_t svtrn2(svfloat64_t op1, svfloat64_t op2) { return svtrn2_f64(op1, op2); } __forceinline svint8_t svtrn2(svint8_t op1, svint8_t op2) { return svtrn2_s8(op1, op2); } __forceinline svint16_t svtrn2(svint16_t op1, svint16_t op2) { return svtrn2_s16(op1, op2); } __forceinline svint32_t svtrn2(svint32_t op1, svint32_t op2) { return svtrn2_s32(op1, op2); } __forceinline svint64_t svtrn2(svint64_t op1, svint64_t op2) { return svtrn2_s64(op1, op2); } __forceinline svuint8_t svtrn2(svuint8_t op1, svuint8_t op2) { return svtrn2_u8(op1, op2); } __forceinline svuint16_t svtrn2(svuint16_t op1, svuint16_t op2) { return svtrn2_u16(op1, op2); } __forceinline svuint32_t svtrn2(svuint32_t op1, svuint32_t op2) { return svtrn2_u32(op1, op2); } __forceinline svuint64_t svtrn2(svuint64_t op1, svuint64_t op2) { return svtrn2_u64(op1, op2); } __forceinline svuint32_t svtrn2q(svuint32_t op1, svuint32_t op2) { return svtrn2q_u32(op1, op2); } __forceinline svbfloat16_t svtrn2q(svbfloat16_t op1, svbfloat16_t op2) { return svtrn2q_bf16(op1, op2); } __forceinline svfloat16_t svtrn2q(svfloat16_t op1, svfloat16_t op2) { return svtrn2q_f16(op1, op2); } __forceinline svfloat32_t svtrn2q(svfloat32_t op1, svfloat32_t op2) { return svtrn2q_f32(op1, op2); } __forceinline svint8_t svtrn2q(svint8_t op1, svint8_t op2) { return svtrn2q_s8(op1, op2); } __forceinline svint16_t svtrn2q(svint16_t op1, svint16_t op2) { return svtrn2q_s16(op1, op2); } __forceinline svint32_t svtrn2q(svint32_t op1, svint32_t op2) { return svtrn2q_s32(op1, op2); } __forceinline svint64_t svtrn2q(svint64_t op1, svint64_t op2) { return svtrn2q_s64(op1, op2); } __forceinline svuint8_t svtrn2q(svuint8_t op1, svuint8_t op2) { return svtrn2q_u8(op1, op2); } __forceinline svuint16_t svtrn2q(svuint16_t op1, svuint16_t op2) { return svtrn2q_u16(op1, op2); } __forceinline svuint64_t svtrn2q(svuint64_t op1, svuint64_t op2) { return svtrn2q_u64(op1, op2); } __forceinline svfloat64_t svtrn2q(svfloat64_t op1, svfloat64_t op2) { return svtrn2q_f64(op1, op2); } __forceinline svuint64_t svuzp1(svuint64_t op1, svuint64_t op2) { return svuzp1_u64(op1, op2); } __forceinline svuint32_t svuzp1(svuint32_t op1, svuint32_t op2) { return svuzp1_u32(op1, op2); } __forceinline svfloat32_t svuzp1(svfloat32_t op1, svfloat32_t op2) { return svuzp1_f32(op1, op2); } __forceinline svfloat16_t svuzp1(svfloat16_t op1, svfloat16_t op2) { return svuzp1_f16(op1, op2); } __forceinline svuint16_t svuzp1(svuint16_t op1, svuint16_t op2) { return svuzp1_u16(op1, op2); } __forceinline svfloat64_t svuzp1(svfloat64_t op1, svfloat64_t op2) { return svuzp1_f64(op1, op2); } __forceinline svint8_t svuzp1(svint8_t op1, svint8_t op2) { return svuzp1_s8(op1, op2); } __forceinline svint16_t svuzp1(svint16_t op1, svint16_t op2) { return svuzp1_s16(op1, op2); } __forceinline svint32_t svuzp1(svint32_t op1, svint32_t op2) { return svuzp1_s32(op1, op2); } __forceinline svint64_t svuzp1(svint64_t op1, svint64_t op2) { return svuzp1_s64(op1, op2); } __forceinline svuint8_t svuzp1(svuint8_t op1, svuint8_t op2) { return svuzp1_u8(op1, op2); } __forceinline svbfloat16_t svuzp1(svbfloat16_t op1, svbfloat16_t op2) { return svuzp1_bf16(op1, op2); } __forceinline svbfloat16_t svuzp1q(svbfloat16_t op1, svbfloat16_t op2) { return svuzp1q_bf16(op1, op2); } __forceinline svuint64_t svuzp1q(svuint64_t op1, svuint64_t op2) { return svuzp1q_u64(op1, op2); } __forceinline svuint32_t svuzp1q(svuint32_t op1, svuint32_t op2) { return svuzp1q_u32(op1, op2); } __forceinline svuint16_t svuzp1q(svuint16_t op1, svuint16_t op2) { return svuzp1q_u16(op1, op2); } __forceinline svuint8_t svuzp1q(svuint8_t op1, svuint8_t op2) { return svuzp1q_u8(op1, op2); } __forceinline svint64_t svuzp1q(svint64_t op1, svint64_t op2) { return svuzp1q_s64(op1, op2); } __forceinline svint32_t svuzp1q(svint32_t op1, svint32_t op2) { return svuzp1q_s32(op1, op2); } __forceinline svint16_t svuzp1q(svint16_t op1, svint16_t op2) { return svuzp1q_s16(op1, op2); } __forceinline svint8_t svuzp1q(svint8_t op1, svint8_t op2) { return svuzp1q_s8(op1, op2); } __forceinline svfloat64_t svuzp1q(svfloat64_t op1, svfloat64_t op2) { return svuzp1q_f64(op1, op2); } __forceinline svfloat32_t svuzp1q(svfloat32_t op1, svfloat32_t op2) { return svuzp1q_f32(op1, op2); } __forceinline svfloat16_t svuzp1q(svfloat16_t op1, svfloat16_t op2) { return svuzp1q_f16(op1, op2); } __forceinline svuint64_t svuzp2(svuint64_t op1, svuint64_t op2) { return svuzp2_u64(op1, op2); } __forceinline svfloat16_t svuzp2(svfloat16_t op1, svfloat16_t op2) { return svuzp2_f16(op1, op2); } __forceinline svuint16_t svuzp2(svuint16_t op1, svuint16_t op2) { return svuzp2_u16(op1, op2); } __forceinline svbfloat16_t svuzp2(svbfloat16_t op1, svbfloat16_t op2) { return svuzp2_bf16(op1, op2); } __forceinline svfloat32_t svuzp2(svfloat32_t op1, svfloat32_t op2) { return svuzp2_f32(op1, op2); } __forceinline svfloat64_t svuzp2(svfloat64_t op1, svfloat64_t op2) { return svuzp2_f64(op1, op2); } __forceinline svint8_t svuzp2(svint8_t op1, svint8_t op2) { return svuzp2_s8(op1, op2); } __forceinline svint16_t svuzp2(svint16_t op1, svint16_t op2) { return svuzp2_s16(op1, op2); } __forceinline svint32_t svuzp2(svint32_t op1, svint32_t op2) { return svuzp2_s32(op1, op2); } __forceinline svint64_t svuzp2(svint64_t op1, svint64_t op2) { return svuzp2_s64(op1, op2); } __forceinline svuint8_t svuzp2(svuint8_t op1, svuint8_t op2) { return svuzp2_u8(op1, op2); } __forceinline svuint32_t svuzp2(svuint32_t op1, svuint32_t op2) { return svuzp2_u32(op1, op2); } __forceinline svfloat16_t svuzp2q(svfloat16_t op1, svfloat16_t op2) { return svuzp2q_f16(op1, op2); } __forceinline svuint32_t svuzp2q(svuint32_t op1, svuint32_t op2) { return svuzp2q_u32(op1, op2); } __forceinline svuint16_t svuzp2q(svuint16_t op1, svuint16_t op2) { return svuzp2q_u16(op1, op2); } __forceinline svint64_t svuzp2q(svint64_t op1, svint64_t op2) { return svuzp2q_s64(op1, op2); } __forceinline svint32_t svuzp2q(svint32_t op1, svint32_t op2) { return svuzp2q_s32(op1, op2); } __forceinline svint16_t svuzp2q(svint16_t op1, svint16_t op2) { return svuzp2q_s16(op1, op2); } __forceinline svint8_t svuzp2q(svint8_t op1, svint8_t op2) { return svuzp2q_s8(op1, op2); } __forceinline svfloat64_t svuzp2q(svfloat64_t op1, svfloat64_t op2) { return svuzp2q_f64(op1, op2); } __forceinline svfloat32_t svuzp2q(svfloat32_t op1, svfloat32_t op2) { return svuzp2q_f32(op1, op2); } __forceinline svbfloat16_t svuzp2q(svbfloat16_t op1, svbfloat16_t op2) { return svuzp2q_bf16(op1, op2); } __forceinline svuint8_t svuzp2q(svuint8_t op1, svuint8_t op2) { return svuzp2q_u8(op1, op2); } __forceinline svuint64_t svuzp2q(svuint64_t op1, svuint64_t op2) { return svuzp2q_u64(op1, op2); } __forceinline svuint32_t svzip2(svuint32_t op1, svuint32_t op2) { return svzip2_u32(op1, op2); } __forceinline svuint64_t svzip2(svuint64_t op1, svuint64_t op2) { return svzip2_u64(op1, op2); } __forceinline svuint16_t svzip2(svuint16_t op1, svuint16_t op2) { return svzip2_u16(op1, op2); } __forceinline svint64_t svzip2(svint64_t op1, svint64_t op2) { return svzip2_s64(op1, op2); } __forceinline svint32_t svzip2(svint32_t op1, svint32_t op2) { return svzip2_s32(op1, op2); } __forceinline svint16_t svzip2(svint16_t op1, svint16_t op2) { return svzip2_s16(op1, op2); } __forceinline svint8_t svzip2(svint8_t op1, svint8_t op2) { return svzip2_s8(op1, op2); } __forceinline svfloat64_t svzip2(svfloat64_t op1, svfloat64_t op2) { return svzip2_f64(op1, op2); } __forceinline svfloat32_t svzip2(svfloat32_t op1, svfloat32_t op2) { return svzip2_f32(op1, op2); } __forceinline svfloat16_t svzip2(svfloat16_t op1, svfloat16_t op2) { return svzip2_f16(op1, op2); } __forceinline svuint8_t svzip2(svuint8_t op1, svuint8_t op2) { return svzip2_u8(op1, op2); } __forceinline svbfloat16_t svzip2(svbfloat16_t op1, svbfloat16_t op2) { return svzip2_bf16(op1, op2); } __forceinline svbfloat16_t svzip1(svbfloat16_t op1, svbfloat16_t op2) { return svzip1_bf16(op1, op2); } __forceinline svfloat16_t svzip1(svfloat16_t op1, svfloat16_t op2) { return svzip1_f16(op1, op2); } __forceinline svfloat32_t svzip1(svfloat32_t op1, svfloat32_t op2) { return svzip1_f32(op1, op2); } __forceinline svfloat64_t svzip1(svfloat64_t op1, svfloat64_t op2) { return svzip1_f64(op1, op2); } __forceinline svint32_t svzip1(svint32_t op1, svint32_t op2) { return svzip1_s32(op1, op2); } __forceinline svint8_t svzip1(svint8_t op1, svint8_t op2) { return svzip1_s8(op1, op2); } __forceinline svuint8_t svzip1(svuint8_t op1, svuint8_t op2) { return svzip1_u8(op1, op2); } __forceinline svuint16_t svzip1(svuint16_t op1, svuint16_t op2) { return svzip1_u16(op1, op2); } __forceinline svuint32_t svzip1(svuint32_t op1, svuint32_t op2) { return svzip1_u32(op1, op2); } __forceinline svuint64_t svzip1(svuint64_t op1, svuint64_t op2) { return svzip1_u64(op1, op2); } __forceinline svint64_t svzip1(svint64_t op1, svint64_t op2) { return svzip1_s64(op1, op2); } __forceinline svint16_t svzip1(svint16_t op1, svint16_t op2) { return svzip1_s16(op1, op2); } __forceinline svuint64_t svzip2q(svuint64_t op1, svuint64_t op2) { return svzip2q_u64(op1, op2); } __forceinline svint16_t svzip2q(svint16_t op1, svint16_t op2) { return svzip2q_s16(op1, op2); } __forceinline svbfloat16_t svzip2q(svbfloat16_t op1, svbfloat16_t op2) { return svzip2q_bf16(op1, op2); } __forceinline svfloat16_t svzip2q(svfloat16_t op1, svfloat16_t op2) { return svzip2q_f16(op1, op2); } __forceinline svuint32_t svzip2q(svuint32_t op1, svuint32_t op2) { return svzip2q_u32(op1, op2); } __forceinline svfloat32_t svzip2q(svfloat32_t op1, svfloat32_t op2) { return svzip2q_f32(op1, op2); } __forceinline svfloat64_t svzip2q(svfloat64_t op1, svfloat64_t op2) { return svzip2q_f64(op1, op2); } __forceinline svint8_t svzip2q(svint8_t op1, svint8_t op2) { return svzip2q_s8(op1, op2); } __forceinline svint32_t svzip2q(svint32_t op1, svint32_t op2) { return svzip2q_s32(op1, op2); } __forceinline svint64_t svzip2q(svint64_t op1, svint64_t op2) { return svzip2q_s64(op1, op2); } __forceinline svuint8_t svzip2q(svuint8_t op1, svuint8_t op2) { return svzip2q_u8(op1, op2); } __forceinline svuint16_t svzip2q(svuint16_t op1, svuint16_t op2) { return svzip2q_u16(op1, op2); } __forceinline svuint32_t svzip1q(svuint32_t op1, svuint32_t op2) { return svzip1q_u32(op1, op2); } __forceinline svuint64_t svzip1q(svuint64_t op1, svuint64_t op2) { return svzip1q_u64(op1, op2); } __forceinline svbfloat16_t svzip1q(svbfloat16_t op1, svbfloat16_t op2) { return svzip1q_bf16(op1, op2); } __forceinline svfloat32_t svzip1q(svfloat32_t op1, svfloat32_t op2) { return svzip1q_f32(op1, op2); } __forceinline svfloat16_t svzip1q(svfloat16_t op1, svfloat16_t op2) { return svzip1q_f16(op1, op2); } __forceinline svuint16_t svzip1q(svuint16_t op1, svuint16_t op2) { return svzip1q_u16(op1, op2); } __forceinline svuint8_t svzip1q(svuint8_t op1, svuint8_t op2) { return svzip1q_u8(op1, op2); } __forceinline svint64_t svzip1q(svint64_t op1, svint64_t op2) { return svzip1q_s64(op1, op2); } __forceinline svint32_t svzip1q(svint32_t op1, svint32_t op2) { return svzip1q_s32(op1, op2); } __forceinline svint16_t svzip1q(svint16_t op1, svint16_t op2) { return svzip1q_s16(op1, op2); } __forceinline svint8_t svzip1q(svint8_t op1, svint8_t op2) { return svzip1q_s8(op1, op2); } __forceinline svfloat64_t svzip1q(svfloat64_t op1, svfloat64_t op2) { return svzip1q_f64(op1, op2); } __forceinline svuint64_t svbext(svuint64_t op1, svuint64_t op2) { return svbext_u64(op1, op2); } __forceinline svuint32_t svbext(svuint32_t op1, uint32_t op2) { return svbext_n_u32(op1, op2); } __forceinline svuint64_t svbext(svuint64_t op1, uint64_t op2) { return svbext_n_u64(op1, op2); } __forceinline svuint8_t svbext(svuint8_t op1, uint8_t op2) { return svbext_n_u8(op1, op2); } __forceinline svuint32_t svbext(svuint32_t op1, svuint32_t op2) { return svbext_u32(op1, op2); } __forceinline svuint16_t svbext(svuint16_t op1, svuint16_t op2) { return svbext_u16(op1, op2); } __forceinline svuint8_t svbext(svuint8_t op1, svuint8_t op2) { return svbext_u8(op1, op2); } __forceinline svuint16_t svbext(svuint16_t op1, uint16_t op2) { return svbext_n_u16(op1, op2); } __forceinline svuint16_t svbgrp(svuint16_t op1, svuint16_t op2) { return svbgrp_u16(op1, op2); } __forceinline svuint32_t svbgrp(svuint32_t op1, svuint32_t op2) { return svbgrp_u32(op1, op2); } __forceinline svuint8_t svbgrp(svuint8_t op1, uint8_t op2) { return svbgrp_n_u8(op1, op2); } __forceinline svuint16_t svbgrp(svuint16_t op1, uint16_t op2) { return svbgrp_n_u16(op1, op2); } __forceinline svuint32_t svbgrp(svuint32_t op1, uint32_t op2) { return svbgrp_n_u32(op1, op2); } __forceinline svuint8_t svbgrp(svuint8_t op1, svuint8_t op2) { return svbgrp_u8(op1, op2); } __forceinline svuint64_t svbgrp(svuint64_t op1, uint64_t op2) { return svbgrp_n_u64(op1, op2); } __forceinline svuint64_t svbgrp(svuint64_t op1, svuint64_t op2) { return svbgrp_u64(op1, op2); } __forceinline svuint64_t svbdep(svuint64_t op1, uint64_t op2) { return svbdep_n_u64(op1, op2); } __forceinline svuint32_t svbdep(svuint32_t op1, uint32_t op2) { return svbdep_n_u32(op1, op2); } __forceinline svuint16_t svbdep(svuint16_t op1, uint16_t op2) { return svbdep_n_u16(op1, op2); } __forceinline svuint8_t svbdep(svuint8_t op1, uint8_t op2) { return svbdep_n_u8(op1, op2); } __forceinline svuint32_t svbdep(svuint32_t op1, svuint32_t op2) { return svbdep_u32(op1, op2); } __forceinline svuint16_t svbdep(svuint16_t op1, svuint16_t op2) { return svbdep_u16(op1, op2); } __forceinline svuint8_t svbdep(svuint8_t op1, svuint8_t op2) { return svbdep_u8(op1, op2); } __forceinline svuint64_t svbdep(svuint64_t op1, svuint64_t op2) { return svbdep_u64(op1, op2); } __forceinline svuint32_t svhistcnt_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svhistcnt_s32_z(pg, op1, op2); } __forceinline svuint32_t svhistcnt_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svhistcnt_u32_z(pg, op1, op2); } __forceinline svuint64_t svhistcnt_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svhistcnt_u64_z(pg, op1, op2); } __forceinline svuint64_t svhistcnt_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svhistcnt_s64_z(pg, op1, op2); } __forceinline svuint8_t svhistseg(svuint8_t op1, svuint8_t op2) { return svhistseg_u8(op1, op2); } __forceinline svuint8_t svhistseg(svint8_t op1, svint8_t op2) { return svhistseg_s8(op1, op2); } __forceinline svbool_t svmatch(svbool_t pg, svint16_t op1, svint16_t op2) { return svmatch_s16(pg, op1, op2); } __forceinline svbool_t svmatch(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmatch_u8(pg, op1, op2); } __forceinline svbool_t svmatch(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmatch_u16(pg, op1, op2); } __forceinline svbool_t svmatch(svbool_t pg, svint8_t op1, svint8_t op2) { return svmatch_s8(pg, op1, op2); } __forceinline svbool_t svnmatch(svbool_t pg, svint8_t op1, svint8_t op2) { return svnmatch_s8(pg, op1, op2); } __forceinline svbool_t svnmatch(svbool_t pg, svint16_t op1, svint16_t op2) { return svnmatch_s16(pg, op1, op2); } __forceinline svbool_t svnmatch(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svnmatch_u8(pg, op1, op2); } __forceinline svbool_t svnmatch(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svnmatch_u16(pg, op1, op2); } template __forceinline T __svcadd(T op1, T op2) { if constexpr(::std::is_same_v) { return svcadd_u64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_u32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_u16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_u8(op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_s64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_s16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svcadd_s8(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svcadd(op1, op2, imm_rotation) __svcadd(op1, op2) template __forceinline T1 __svcdot(T1 op1, T2 op2, T2 op3) { if constexpr(::std::is_same_v) { return svcdot_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svcdot_s64(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svcdot(op1, op2, op3, imm_rotation) __svcdot(op1, op2, op3) template __forceinline T1 __svcdot_lane(T1 op1, T2 op2, T2 op3) { if constexpr(::std::is_same_v) { return svcdot_lane_s32(op1, op2, op3, N1, N2); } else if constexpr(::std::is_same_v) { return svcdot_lane_s64(op1, op2, op3, N1, N2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svcdot_lane(op1, op2, op3, imm_index, imm_rotation) __svcdot_lane(op1, op2, op3) template __forceinline T __svqcadd(T op1, T op2) { if constexpr(::std::is_same_v) { return svqcadd_s8(op1, op2, N); } else if constexpr(::std::is_same_v) { return svqcadd_s16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svqcadd_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svqcadd_s64(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqcadd(op1, op2, imm_rotation) __svqcadd(op1, op2) template __forceinline T __svqrdcmlah(T op1, T op2, T op3) { if constexpr(::std::is_same_v) { return svqrdcmlah_s64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svqrdcmlah_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svqrdcmlah_s16(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svqrdcmlah_s8(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqrdcmlah(op1, op2, op3, imm_rotation) __svqrdcmlah(op1, op2, op3) template __forceinline T __svqrdcmlah_lane(T op1, T op2, T op3) { if constexpr(::std::is_same_v) { return svqrdcmlah_lane_s16(op1, op2, op3, N1, N2); } else if constexpr(::std::is_same_v) { return svqrdcmlah_lane_s32(op1, op2, op3, N1, N2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqrdcmlah_lane(op1, op2, op3, imm_index, imm_rotation) __svqrdcmlah_lane(op1, op2, op3) __forceinline svuint8_t svaesimc(svuint8_t op) { return svaesimc_u8(op); } __forceinline svuint8_t svaesmc(svuint8_t op) { return svaesmc_u8(op); } __forceinline svuint8_t svaesd(svuint8_t op1, svuint8_t op2) { return svaesd_u8(op1, op2); } __forceinline svuint8_t svaese(svuint8_t op1, svuint8_t op2) { return svaese_u8(op1, op2); } __forceinline svuint32_t svsm4e(svuint32_t op1, svuint32_t op2) { return svsm4e_u32(op1, op2); } __forceinline svuint32_t svsm4ekey(svuint32_t op1, svuint32_t op2) { return svsm4ekey_u32(op1, op2); } __forceinline svfloat16_t svcvtnt_f16_m(svfloat16_t even, svbool_t pg, svfloat32_t op) { return svcvtnt_f16_f32_m(even, pg, op); } __forceinline svfloat32_t svcvtnt_f32_x(svfloat32_t even, svbool_t pg, svfloat64_t op) { return svcvtnt_f32_f64_x(even, pg, op); } __forceinline svfloat16_t svcvtnt_f16_x(svfloat16_t even, svbool_t pg, svfloat32_t op) { return svcvtnt_f16_f32_x(even, pg, op); } __forceinline svfloat32_t svcvtnt_f32_m(svfloat32_t even, svbool_t pg, svfloat64_t op) { return svcvtnt_f32_f64_m(even, pg, op); } __forceinline svfloat32_t svcvtx_f32_z(svbool_t pg, svfloat64_t op) { return svcvtx_f32_f64_z(pg, op); } __forceinline svfloat32_t svcvtx_f32_x(svbool_t pg, svfloat64_t op) { return svcvtx_f32_f64_x(pg, op); } __forceinline svfloat32_t svcvtx_f32_m(svfloat32_t inactive, svbool_t pg, svfloat64_t op) { return svcvtx_f32_f64_m(inactive, pg, op); } __forceinline svfloat32_t svcvtxnt_f32_x(svfloat32_t even, svbool_t pg, svfloat64_t op) { return svcvtxnt_f32_f64_x(even, pg, op); } __forceinline svfloat32_t svcvtxnt_f32_m(svfloat32_t even, svbool_t pg, svfloat64_t op) { return svcvtxnt_f32_f64_m(even, pg, op); } __forceinline svfloat64_t svcvtlt_f64_x(svbool_t pg, svfloat32_t op) { return svcvtlt_f64_f32_x(pg, op); } __forceinline svfloat64_t svcvtlt_f64_m(svfloat64_t inactive, svbool_t pg, svfloat32_t op) { return svcvtlt_f64_f32_m(inactive, pg, op); } __forceinline svfloat32_t svcvtlt_f32_m(svfloat32_t inactive, svbool_t pg, svfloat16_t op) { return svcvtlt_f32_f16_m(inactive, pg, op); } __forceinline svfloat32_t svcvtlt_f32_x(svbool_t pg, svfloat16_t op) { return svcvtlt_f32_f16_x(pg, op); } __forceinline svint64_t svldnt1sh_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1sh_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint64_t svldnt1sh_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svldnt1sh_gather_u64base_index_u64(pg, bases, index); } __forceinline svuint64_t svldnt1sh_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1sh_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svuint32_t svldnt1sh_gather_index_u32(svbool_t pg, svuint32_t bases, int64_t index) { return svldnt1sh_gather_u32base_index_u32(pg, bases, index); } __forceinline svint32_t svldnt1sh_gather_index_s32(svbool_t pg, svuint32_t bases, int64_t index) { return svldnt1sh_gather_u32base_index_s32(pg, bases, index); } __forceinline svint32_t svldnt1sh_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldnt1sh_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svint64_t svldnt1sh_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svldnt1sh_gather_u64base_index_s64(pg, bases, index); } __forceinline svint32_t svldnt1sh_gather_s32(svbool_t pg, svuint32_t bases) { return svldnt1sh_gather_u32base_s32(pg, bases); } __forceinline svuint32_t svldnt1sh_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldnt1sh_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint64_t svldnt1sh_gather_s64(svbool_t pg, svuint64_t bases) { return svldnt1sh_gather_u64base_s64(pg, bases); } __forceinline svuint64_t svldnt1sh_gather_u64(svbool_t pg, svuint64_t bases) { return svldnt1sh_gather_u64base_u64(pg, bases); } __forceinline svint32_t svldnt1sh_gather_offset_s32(svbool_t pg, const int16_t *base, svuint32_t offsets) { return svldnt1sh_gather_u32offset_s32(pg, base, offsets); } __forceinline svuint32_t svldnt1sh_gather_offset_u32(svbool_t pg, const int16_t *base, svuint32_t offsets) { return svldnt1sh_gather_u32offset_u32(pg, base, offsets); } __forceinline svuint32_t svldnt1sh_gather_u32(svbool_t pg, svuint32_t bases) { return svldnt1sh_gather_u32base_u32(pg, bases); } __forceinline svuint64_t svldnt1sh_gather_offset_u64(svbool_t pg, const int16_t *base, svint64_t offsets) { return svldnt1sh_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svldnt1sh_gather_offset_s64(svbool_t pg, const int16_t *base, svuint64_t offsets) { return svldnt1sh_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldnt1sh_gather_offset_u64(svbool_t pg, const int16_t *base, svuint64_t offsets) { return svldnt1sh_gather_u64offset_u64(pg, base, offsets); } __forceinline svint64_t svldnt1sh_gather_index_s64(svbool_t pg, const int16_t *base, svint64_t indices) { return svldnt1sh_gather_s64index_s64(pg, base, indices); } __forceinline svuint64_t svldnt1sh_gather_index_u64(svbool_t pg, const int16_t *base, svuint64_t indices) { return svldnt1sh_gather_u64index_u64(pg, base, indices); } __forceinline svint64_t svldnt1sh_gather_index_s64(svbool_t pg, const int16_t *base, svuint64_t indices) { return svldnt1sh_gather_u64index_s64(pg, base, indices); } __forceinline svint64_t svldnt1sh_gather_offset_s64(svbool_t pg, const int16_t *base, svint64_t offsets) { return svldnt1sh_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldnt1sh_gather_index_u64(svbool_t pg, const int16_t *base, svint64_t indices) { return svldnt1sh_gather_s64index_u64(pg, base, indices); } __forceinline svint64_t svldnt1uh_gather_index_s64(svbool_t pg, const uint16_t *base, svuint64_t indices) { return svldnt1uh_gather_u64index_s64(pg, base, indices); } __forceinline svuint64_t svldnt1uh_gather_index_u64(svbool_t pg, const uint16_t *base, svuint64_t indices) { return svldnt1uh_gather_u64index_u64(pg, base, indices); } __forceinline svint32_t svldnt1uh_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldnt1uh_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svint64_t svldnt1uh_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1uh_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint64_t svldnt1uh_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1uh_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint32_t svldnt1uh_gather_index_s32(svbool_t pg, svuint32_t bases, int64_t index) { return svldnt1uh_gather_u32base_index_s32(pg, bases, index); } __forceinline svuint32_t svldnt1uh_gather_index_u32(svbool_t pg, svuint32_t bases, int64_t index) { return svldnt1uh_gather_u32base_index_u32(pg, bases, index); } __forceinline svuint64_t svldnt1uh_gather_index_u64(svbool_t pg, const uint16_t *base, svint64_t indices) { return svldnt1uh_gather_s64index_u64(pg, base, indices); } __forceinline svuint32_t svldnt1uh_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldnt1uh_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint64_t svldnt1uh_gather_index_s64(svbool_t pg, const uint16_t *base, svint64_t indices) { return svldnt1uh_gather_s64index_s64(pg, base, indices); } __forceinline svint32_t svldnt1uh_gather_offset_s32(svbool_t pg, const uint16_t *base, svuint32_t offsets) { return svldnt1uh_gather_u32offset_s32(pg, base, offsets); } __forceinline svint64_t svldnt1uh_gather_offset_s64(svbool_t pg, const uint16_t *base, svuint64_t offsets) { return svldnt1uh_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldnt1uh_gather_offset_u64(svbool_t pg, const uint16_t *base, svint64_t offsets) { return svldnt1uh_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svldnt1uh_gather_offset_s64(svbool_t pg, const uint16_t *base, svint64_t offsets) { return svldnt1uh_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint32_t svldnt1uh_gather_offset_u32(svbool_t pg, const uint16_t *base, svuint32_t offsets) { return svldnt1uh_gather_u32offset_u32(pg, base, offsets); } __forceinline svuint64_t svldnt1uh_gather_u64(svbool_t pg, svuint64_t bases) { return svldnt1uh_gather_u64base_u64(pg, bases); } __forceinline svint64_t svldnt1uh_gather_s64(svbool_t pg, svuint64_t bases) { return svldnt1uh_gather_u64base_s64(pg, bases); } __forceinline svuint32_t svldnt1uh_gather_u32(svbool_t pg, svuint32_t bases) { return svldnt1uh_gather_u32base_u32(pg, bases); } __forceinline svint32_t svldnt1uh_gather_s32(svbool_t pg, svuint32_t bases) { return svldnt1uh_gather_u32base_s32(pg, bases); } __forceinline svint64_t svldnt1uh_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svldnt1uh_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint64_t svldnt1uh_gather_offset_u64(svbool_t pg, const uint16_t *base, svuint64_t offsets) { return svldnt1uh_gather_u64offset_u64(pg, base, offsets); } __forceinline svuint64_t svldnt1uh_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svldnt1uh_gather_u64base_index_u64(pg, bases, index); } __forceinline svuint64_t svldnt1sw_gather_index_u64(svbool_t pg, const int32_t *base, svuint64_t indices) { return svldnt1sw_gather_u64index_u64(pg, base, indices); } __forceinline svint64_t svldnt1sw_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1sw_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svint64_t svldnt1sw_gather_offset_s64(svbool_t pg, const int32_t *base, svint64_t offsets) { return svldnt1sw_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldnt1sw_gather_u64(svbool_t pg, svuint64_t bases) { return svldnt1sw_gather_u64base_u64(pg, bases); } __forceinline svint64_t svldnt1sw_gather_s64(svbool_t pg, svuint64_t bases) { return svldnt1sw_gather_u64base_s64(pg, bases); } __forceinline svuint64_t svldnt1sw_gather_offset_u64(svbool_t pg, const int32_t *base, svuint64_t offsets) { return svldnt1sw_gather_u64offset_u64(pg, base, offsets); } __forceinline svint64_t svldnt1sw_gather_index_s64(svbool_t pg, const int32_t *base, svint64_t indices) { return svldnt1sw_gather_s64index_s64(pg, base, indices); } __forceinline svuint64_t svldnt1sw_gather_index_u64(svbool_t pg, const int32_t *base, svint64_t indices) { return svldnt1sw_gather_s64index_u64(pg, base, indices); } __forceinline svint64_t svldnt1sw_gather_index_s64(svbool_t pg, const int32_t *base, svuint64_t indices) { return svldnt1sw_gather_u64index_s64(pg, base, indices); } __forceinline svint64_t svldnt1sw_gather_offset_s64(svbool_t pg, const int32_t *base, svuint64_t offsets) { return svldnt1sw_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldnt1sw_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1sw_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint64_t svldnt1sw_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svldnt1sw_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint64_t svldnt1sw_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svldnt1sw_gather_u64base_index_u64(pg, bases, index); } __forceinline svuint64_t svldnt1sw_gather_offset_u64(svbool_t pg, const int32_t *base, svint64_t offsets) { return svldnt1sw_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svldnt1uw_gather_s64(svbool_t pg, svuint64_t bases) { return svldnt1uw_gather_u64base_s64(pg, bases); } __forceinline svuint64_t svldnt1uw_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1uw_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint64_t svldnt1uw_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svldnt1uw_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint64_t svldnt1uw_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svldnt1uw_gather_u64base_index_u64(pg, bases, index); } __forceinline svuint64_t svldnt1uw_gather_u64(svbool_t pg, svuint64_t bases) { return svldnt1uw_gather_u64base_u64(pg, bases); } __forceinline svuint64_t svldnt1uw_gather_offset_u64(svbool_t pg, const uint32_t *base, svint64_t offsets) { return svldnt1uw_gather_s64offset_u64(pg, base, offsets); } __forceinline svuint64_t svldnt1uw_gather_offset_u64(svbool_t pg, const uint32_t *base, svuint64_t offsets) { return svldnt1uw_gather_u64offset_u64(pg, base, offsets); } __forceinline svint64_t svldnt1uw_gather_offset_s64(svbool_t pg, const uint32_t *base, svint64_t offsets) { return svldnt1uw_gather_s64offset_s64(pg, base, offsets); } __forceinline svint64_t svldnt1uw_gather_index_s64(svbool_t pg, const uint32_t *base, svint64_t indices) { return svldnt1uw_gather_s64index_s64(pg, base, indices); } __forceinline svuint64_t svldnt1uw_gather_index_u64(svbool_t pg, const uint32_t *base, svint64_t indices) { return svldnt1uw_gather_s64index_u64(pg, base, indices); } __forceinline svint64_t svldnt1uw_gather_offset_s64(svbool_t pg, const uint32_t *base, svuint64_t offsets) { return svldnt1uw_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldnt1uw_gather_index_u64(svbool_t pg, const uint32_t *base, svuint64_t indices) { return svldnt1uw_gather_u64index_u64(pg, base, indices); } __forceinline svint64_t svldnt1uw_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1uw_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svint64_t svldnt1uw_gather_index_s64(svbool_t pg, const uint32_t *base, svuint64_t indices) { return svldnt1uw_gather_u64index_s64(pg, base, indices); } __forceinline svuint64_t svldnt1sb_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1sb_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint64_t svldnt1sb_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1sb_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint32_t svldnt1sb_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldnt1sb_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint32_t svldnt1sb_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldnt1sb_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svuint64_t svldnt1sb_gather_offset_u64(svbool_t pg, const int8_t *base, svuint64_t offsets) { return svldnt1sb_gather_u64offset_u64(pg, base, offsets); } __forceinline svint64_t svldnt1sb_gather_offset_s64(svbool_t pg, const int8_t *base, svuint64_t offsets) { return svldnt1sb_gather_u64offset_s64(pg, base, offsets); } __forceinline svint64_t svldnt1sb_gather_offset_s64(svbool_t pg, const int8_t *base, svint64_t offsets) { return svldnt1sb_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint32_t svldnt1sb_gather_offset_u32(svbool_t pg, const int8_t *base, svuint32_t offsets) { return svldnt1sb_gather_u32offset_u32(pg, base, offsets); } __forceinline svint32_t svldnt1sb_gather_s32(svbool_t pg, svuint32_t bases) { return svldnt1sb_gather_u32base_s32(pg, bases); } __forceinline svint32_t svldnt1sb_gather_offset_s32(svbool_t pg, const int8_t *base, svuint32_t offsets) { return svldnt1sb_gather_u32offset_s32(pg, base, offsets); } __forceinline svuint64_t svldnt1sb_gather_u64(svbool_t pg, svuint64_t bases) { return svldnt1sb_gather_u64base_u64(pg, bases); } __forceinline svint64_t svldnt1sb_gather_s64(svbool_t pg, svuint64_t bases) { return svldnt1sb_gather_u64base_s64(pg, bases); } __forceinline svuint32_t svldnt1sb_gather_u32(svbool_t pg, svuint32_t bases) { return svldnt1sb_gather_u32base_u32(pg, bases); } __forceinline svuint64_t svldnt1sb_gather_offset_u64(svbool_t pg, const int8_t *base, svint64_t offsets) { return svldnt1sb_gather_s64offset_u64(pg, base, offsets); } __forceinline svuint32_t svldnt1ub_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldnt1ub_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint64_t svldnt1ub_gather_s64(svbool_t pg, svuint64_t bases) { return svldnt1ub_gather_u64base_s64(pg, bases); } __forceinline svuint32_t svldnt1ub_gather_offset_u32(svbool_t pg, const uint8_t *base, svuint32_t offsets) { return svldnt1ub_gather_u32offset_u32(pg, base, offsets); } __forceinline svint64_t svldnt1ub_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1ub_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svint32_t svldnt1ub_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldnt1ub_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svuint64_t svldnt1ub_gather_offset_u64(svbool_t pg, const uint8_t *base, svuint64_t offsets) { return svldnt1ub_gather_u64offset_u64(pg, base, offsets); } __forceinline svint64_t svldnt1ub_gather_offset_s64(svbool_t pg, const uint8_t *base, svuint64_t offsets) { return svldnt1ub_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldnt1ub_gather_offset_u64(svbool_t pg, const uint8_t *base, svint64_t offsets) { return svldnt1ub_gather_s64offset_u64(pg, base, offsets); } __forceinline svint64_t svldnt1ub_gather_offset_s64(svbool_t pg, const uint8_t *base, svint64_t offsets) { return svldnt1ub_gather_s64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldnt1ub_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1ub_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint32_t svldnt1ub_gather_offset_s32(svbool_t pg, const uint8_t *base, svuint32_t offsets) { return svldnt1ub_gather_u32offset_s32(pg, base, offsets); } __forceinline svuint64_t svldnt1ub_gather_u64(svbool_t pg, svuint64_t bases) { return svldnt1ub_gather_u64base_u64(pg, bases); } __forceinline svuint32_t svldnt1ub_gather_u32(svbool_t pg, svuint32_t bases) { return svldnt1ub_gather_u32base_u32(pg, bases); } __forceinline svint32_t svldnt1ub_gather_s32(svbool_t pg, svuint32_t bases) { return svldnt1ub_gather_u32base_s32(pg, bases); } __forceinline svint64_t svldnt1_gather_index(svbool_t pg, const int64_t *base, svuint64_t indices) { return svldnt1_gather_u64index_s64(pg, base, indices); } __forceinline svuint32_t svldnt1_gather_index_u32(svbool_t pg, svuint32_t bases, int64_t index) { return svldnt1_gather_u32base_index_u32(pg, bases, index); } __forceinline svfloat64_t svldnt1_gather_index_f64(svbool_t pg, svuint64_t bases, int64_t index) { return svldnt1_gather_u64base_index_f64(pg, bases, index); } __forceinline svint64_t svldnt1_gather_index_s64(svbool_t pg, svuint64_t bases, int64_t index) { return svldnt1_gather_u64base_index_s64(pg, bases, index); } __forceinline svuint64_t svldnt1_gather_index_u64(svbool_t pg, svuint64_t bases, int64_t index) { return svldnt1_gather_u64base_index_u64(pg, bases, index); } __forceinline svfloat64_t svldnt1_gather_offset(svbool_t pg, const float64_t *base, svuint64_t offsets) { return svldnt1_gather_u64offset_f64(pg, base, offsets); } __forceinline svint64_t svldnt1_gather_offset(svbool_t pg, const int64_t *base, svuint64_t offsets) { return svldnt1_gather_u64offset_s64(pg, base, offsets); } __forceinline svuint64_t svldnt1_gather_index(svbool_t pg, const uint64_t *base, svuint64_t indices) { return svldnt1_gather_u64index_u64(pg, base, indices); } __forceinline svfloat32_t svldnt1_gather_offset(svbool_t pg, const float32_t *base, svuint32_t offsets) { return svldnt1_gather_u32offset_f32(pg, base, offsets); } __forceinline svfloat64_t svldnt1_gather_index(svbool_t pg, const float64_t *base, svint64_t indices) { return svldnt1_gather_s64index_f64(pg, base, indices); } __forceinline svuint64_t svldnt1_gather_u64(svbool_t pg, svuint64_t bases) { return svldnt1_gather_u64base_u64(pg, bases); } __forceinline svint64_t svldnt1_gather_s64(svbool_t pg, svuint64_t bases) { return svldnt1_gather_u64base_s64(pg, bases); } __forceinline svfloat64_t svldnt1_gather_f64(svbool_t pg, svuint64_t bases) { return svldnt1_gather_u64base_f64(pg, bases); } __forceinline svuint32_t svldnt1_gather_u32(svbool_t pg, svuint32_t bases) { return svldnt1_gather_u32base_u32(pg, bases); } __forceinline svint32_t svldnt1_gather_s32(svbool_t pg, svuint32_t bases) { return svldnt1_gather_u32base_s32(pg, bases); } __forceinline svfloat32_t svldnt1_gather_f32(svbool_t pg, svuint32_t bases) { return svldnt1_gather_u32base_f32(pg, bases); } __forceinline svint32_t svldnt1_gather_index_s32(svbool_t pg, svuint32_t bases, int64_t index) { return svldnt1_gather_u32base_index_s32(pg, bases, index); } __forceinline svfloat32_t svldnt1_gather_index_f32(svbool_t pg, svuint32_t bases, int64_t index) { return svldnt1_gather_u32base_index_f32(pg, bases, index); } __forceinline svint32_t svldnt1_gather_offset(svbool_t pg, const int32_t *base, svuint32_t offsets) { return svldnt1_gather_u32offset_s32(pg, base, offsets); } __forceinline svint64_t svldnt1_gather_offset_s64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1_gather_u64base_offset_s64(pg, bases, offset); } __forceinline svuint64_t svldnt1_gather_offset(svbool_t pg, const uint64_t *base, svint64_t offsets) { return svldnt1_gather_s64offset_u64(pg, base, offsets); } __forceinline svuint64_t svldnt1_gather_index(svbool_t pg, const uint64_t *base, svint64_t indices) { return svldnt1_gather_s64index_u64(pg, base, indices); } __forceinline svint64_t svldnt1_gather_index(svbool_t pg, const int64_t *base, svint64_t indices) { return svldnt1_gather_s64index_s64(pg, base, indices); } __forceinline svuint64_t svldnt1_gather_offset(svbool_t pg, const uint64_t *base, svuint64_t offsets) { return svldnt1_gather_u64offset_u64(pg, base, offsets); } __forceinline svuint64_t svldnt1_gather_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1_gather_u64base_offset_u64(pg, bases, offset); } __forceinline svint64_t svldnt1_gather_offset(svbool_t pg, const int64_t *base, svint64_t offsets) { return svldnt1_gather_s64offset_s64(pg, base, offsets); } __forceinline svfloat64_t svldnt1_gather_offset(svbool_t pg, const float64_t *base, svint64_t offsets) { return svldnt1_gather_s64offset_f64(pg, base, offsets); } __forceinline svfloat64_t svldnt1_gather_index(svbool_t pg, const float64_t *base, svuint64_t indices) { return svldnt1_gather_u64index_f64(pg, base, indices); } __forceinline svfloat64_t svldnt1_gather_offset_f64(svbool_t pg, svuint64_t bases, int64_t offset) { return svldnt1_gather_u64base_offset_f64(pg, bases, offset); } __forceinline svuint32_t svldnt1_gather_offset_u32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldnt1_gather_u32base_offset_u32(pg, bases, offset); } __forceinline svint32_t svldnt1_gather_offset_s32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldnt1_gather_u32base_offset_s32(pg, bases, offset); } __forceinline svfloat32_t svldnt1_gather_offset_f32(svbool_t pg, svuint32_t bases, int64_t offset) { return svldnt1_gather_u32base_offset_f32(pg, bases, offset); } __forceinline svuint32_t svldnt1_gather_offset(svbool_t pg, const uint32_t *base, svuint32_t offsets) { return svldnt1_gather_u32offset_u32(pg, base, offsets); } __forceinline svuint8_t svbcax(svuint8_t op1, svuint8_t op2, uint8_t op3) { return svbcax_n_u8(op1, op2, op3); } __forceinline svuint16_t svbcax(svuint16_t op1, svuint16_t op2, uint16_t op3) { return svbcax_n_u16(op1, op2, op3); } __forceinline svuint32_t svbcax(svuint32_t op1, svuint32_t op2, uint32_t op3) { return svbcax_n_u32(op1, op2, op3); } __forceinline svint64_t svbcax(svint64_t op1, svint64_t op2, svint64_t op3) { return svbcax_s64(op1, op2, op3); } __forceinline svint8_t svbcax(svint8_t op1, svint8_t op2, svint8_t op3) { return svbcax_s8(op1, op2, op3); } __forceinline svint16_t svbcax(svint16_t op1, svint16_t op2, svint16_t op3) { return svbcax_s16(op1, op2, op3); } __forceinline svint32_t svbcax(svint32_t op1, svint32_t op2, svint32_t op3) { return svbcax_s32(op1, op2, op3); } __forceinline svint32_t svbcax(svint32_t op1, svint32_t op2, int32_t op3) { return svbcax_n_s32(op1, op2, op3); } __forceinline svuint64_t svbcax(svuint64_t op1, svuint64_t op2, uint64_t op3) { return svbcax_n_u64(op1, op2, op3); } __forceinline svint16_t svbcax(svint16_t op1, svint16_t op2, int16_t op3) { return svbcax_n_s16(op1, op2, op3); } __forceinline svint64_t svbcax(svint64_t op1, svint64_t op2, int64_t op3) { return svbcax_n_s64(op1, op2, op3); } __forceinline svuint64_t svbcax(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svbcax_u64(op1, op2, op3); } __forceinline svuint32_t svbcax(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svbcax_u32(op1, op2, op3); } __forceinline svuint16_t svbcax(svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svbcax_u16(op1, op2, op3); } __forceinline svuint8_t svbcax(svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svbcax_u8(op1, op2, op3); } __forceinline svint8_t svbcax(svint8_t op1, svint8_t op2, int8_t op3) { return svbcax_n_s8(op1, op2, op3); } __forceinline svint16_t svbsl(svint16_t op1, svint16_t op2, svint16_t op3) { return svbsl_s16(op1, op2, op3); } __forceinline svuint16_t svnbsl(svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svnbsl_u16(op1, op2, op3); } __forceinline svuint32_t svnbsl(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svnbsl_u32(op1, op2, op3); } __forceinline svuint64_t svnbsl(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svnbsl_u64(op1, op2, op3); } __forceinline svint8_t svnbsl(svint8_t op1, svint8_t op2, int8_t op3) { return svnbsl_n_s8(op1, op2, op3); } __forceinline svint16_t svnbsl(svint16_t op1, svint16_t op2, int16_t op3) { return svnbsl_n_s16(op1, op2, op3); } __forceinline svint64_t svnbsl(svint64_t op1, svint64_t op2, int64_t op3) { return svnbsl_n_s64(op1, op2, op3); } __forceinline svint64_t svbsl(svint64_t op1, svint64_t op2, int64_t op3) { return svbsl_n_s64(op1, op2, op3); } __forceinline svuint8_t svnbsl(svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svnbsl_u8(op1, op2, op3); } __forceinline svuint8_t svbsl(svuint8_t op1, svuint8_t op2, uint8_t op3) { return svbsl_n_u8(op1, op2, op3); } __forceinline svuint16_t svbsl(svuint16_t op1, svuint16_t op2, uint16_t op3) { return svbsl_n_u16(op1, op2, op3); } __forceinline svuint32_t svbsl(svuint32_t op1, svuint32_t op2, uint32_t op3) { return svbsl_n_u32(op1, op2, op3); } __forceinline svuint64_t svbsl(svuint64_t op1, svuint64_t op2, uint64_t op3) { return svbsl_n_u64(op1, op2, op3); } __forceinline svint8_t svbsl(svint8_t op1, svint8_t op2, svint8_t op3) { return svbsl_s8(op1, op2, op3); } __forceinline svint32_t svnbsl(svint32_t op1, svint32_t op2, int32_t op3) { return svnbsl_n_s32(op1, op2, op3); } __forceinline svuint8_t svnbsl(svuint8_t op1, svuint8_t op2, uint8_t op3) { return svnbsl_n_u8(op1, op2, op3); } __forceinline svuint16_t svnbsl(svuint16_t op1, svuint16_t op2, uint16_t op3) { return svnbsl_n_u16(op1, op2, op3); } __forceinline svuint32_t svnbsl(svuint32_t op1, svuint32_t op2, uint32_t op3) { return svnbsl_n_u32(op1, op2, op3); } __forceinline svint32_t svbsl(svint32_t op1, svint32_t op2, svint32_t op3) { return svbsl_s32(op1, op2, op3); } __forceinline svint64_t svbsl(svint64_t op1, svint64_t op2, svint64_t op3) { return svbsl_s64(op1, op2, op3); } __forceinline svuint8_t svbsl(svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svbsl_u8(op1, op2, op3); } __forceinline svint64_t svnbsl(svint64_t op1, svint64_t op2, svint64_t op3) { return svnbsl_s64(op1, op2, op3); } __forceinline svint32_t svnbsl(svint32_t op1, svint32_t op2, svint32_t op3) { return svnbsl_s32(op1, op2, op3); } __forceinline svint16_t svnbsl(svint16_t op1, svint16_t op2, svint16_t op3) { return svnbsl_s16(op1, op2, op3); } __forceinline svint8_t svnbsl(svint8_t op1, svint8_t op2, svint8_t op3) { return svnbsl_s8(op1, op2, op3); } __forceinline svuint32_t svbsl(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svbsl_u32(op1, op2, op3); } __forceinline svuint64_t svbsl(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svbsl_u64(op1, op2, op3); } __forceinline svint8_t svbsl(svint8_t op1, svint8_t op2, int8_t op3) { return svbsl_n_s8(op1, op2, op3); } __forceinline svint16_t svbsl(svint16_t op1, svint16_t op2, int16_t op3) { return svbsl_n_s16(op1, op2, op3); } __forceinline svint32_t svbsl(svint32_t op1, svint32_t op2, int32_t op3) { return svbsl_n_s32(op1, op2, op3); } __forceinline svuint64_t svnbsl(svuint64_t op1, svuint64_t op2, uint64_t op3) { return svnbsl_n_u64(op1, op2, op3); } __forceinline svuint16_t svbsl(svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svbsl_u16(op1, op2, op3); } __forceinline svint16_t svbsl1n(svint16_t op1, svint16_t op2, svint16_t op3) { return svbsl1n_s16(op1, op2, op3); } __forceinline svint8_t svbsl1n(svint8_t op1, svint8_t op2, svint8_t op3) { return svbsl1n_s8(op1, op2, op3); } __forceinline svint32_t svbsl1n(svint32_t op1, svint32_t op2, svint32_t op3) { return svbsl1n_s32(op1, op2, op3); } __forceinline svint8_t svbsl1n(svint8_t op1, svint8_t op2, int8_t op3) { return svbsl1n_n_s8(op1, op2, op3); } __forceinline svuint8_t svbsl1n(svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svbsl1n_u8(op1, op2, op3); } __forceinline svint64_t svbsl1n(svint64_t op1, svint64_t op2, svint64_t op3) { return svbsl1n_s64(op1, op2, op3); } __forceinline svuint64_t svbsl1n(svuint64_t op1, svuint64_t op2, uint64_t op3) { return svbsl1n_n_u64(op1, op2, op3); } __forceinline svuint32_t svbsl1n(svuint32_t op1, svuint32_t op2, uint32_t op3) { return svbsl1n_n_u32(op1, op2, op3); } __forceinline svuint8_t svbsl1n(svuint8_t op1, svuint8_t op2, uint8_t op3) { return svbsl1n_n_u8(op1, op2, op3); } __forceinline svint64_t svbsl1n(svint64_t op1, svint64_t op2, int64_t op3) { return svbsl1n_n_s64(op1, op2, op3); } __forceinline svuint16_t svbsl1n(svuint16_t op1, svuint16_t op2, uint16_t op3) { return svbsl1n_n_u16(op1, op2, op3); } __forceinline svint16_t svbsl1n(svint16_t op1, svint16_t op2, int16_t op3) { return svbsl1n_n_s16(op1, op2, op3); } __forceinline svuint64_t svbsl1n(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svbsl1n_u64(op1, op2, op3); } __forceinline svuint32_t svbsl1n(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svbsl1n_u32(op1, op2, op3); } __forceinline svuint16_t svbsl1n(svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svbsl1n_u16(op1, op2, op3); } __forceinline svint32_t svbsl1n(svint32_t op1, svint32_t op2, int32_t op3) { return svbsl1n_n_s32(op1, op2, op3); } __forceinline svuint8_t svbsl2n(svuint8_t op1, svuint8_t op2, uint8_t op3) { return svbsl2n_n_u8(op1, op2, op3); } __forceinline svint8_t svbsl2n(svint8_t op1, svint8_t op2, int8_t op3) { return svbsl2n_n_s8(op1, op2, op3); } __forceinline svuint16_t svbsl2n(svuint16_t op1, svuint16_t op2, uint16_t op3) { return svbsl2n_n_u16(op1, op2, op3); } __forceinline svuint32_t svbsl2n(svuint32_t op1, svuint32_t op2, uint32_t op3) { return svbsl2n_n_u32(op1, op2, op3); } __forceinline svuint64_t svbsl2n(svuint64_t op1, svuint64_t op2, uint64_t op3) { return svbsl2n_n_u64(op1, op2, op3); } __forceinline svint16_t svbsl2n(svint16_t op1, svint16_t op2, int16_t op3) { return svbsl2n_n_s16(op1, op2, op3); } __forceinline svuint64_t svbsl2n(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svbsl2n_u64(op1, op2, op3); } __forceinline svint16_t svbsl2n(svint16_t op1, svint16_t op2, svint16_t op3) { return svbsl2n_s16(op1, op2, op3); } __forceinline svuint16_t svbsl2n(svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svbsl2n_u16(op1, op2, op3); } __forceinline svuint8_t svbsl2n(svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svbsl2n_u8(op1, op2, op3); } __forceinline svint64_t svbsl2n(svint64_t op1, svint64_t op2, svint64_t op3) { return svbsl2n_s64(op1, op2, op3); } __forceinline svint32_t svbsl2n(svint32_t op1, svint32_t op2, svint32_t op3) { return svbsl2n_s32(op1, op2, op3); } __forceinline svint8_t svbsl2n(svint8_t op1, svint8_t op2, svint8_t op3) { return svbsl2n_s8(op1, op2, op3); } __forceinline svint32_t svbsl2n(svint32_t op1, svint32_t op2, int32_t op3) { return svbsl2n_n_s32(op1, op2, op3); } __forceinline svuint32_t svbsl2n(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svbsl2n_u32(op1, op2, op3); } __forceinline svint64_t svbsl2n(svint64_t op1, svint64_t op2, int64_t op3) { return svbsl2n_n_s64(op1, op2, op3); } template __forceinline T __svxar(T op1, T op2) { if constexpr(::std::is_same_v) { return svxar_n_u64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svxar_n_s64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svxar_n_u32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svxar_n_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svxar_n_u16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svxar_n_s16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svxar_n_u8(op1, op2, N); } else if constexpr(::std::is_same_v) { return svxar_n_s8(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svxar(op1, op2, imm3) __svxar(op1, op2) __forceinline svint8_t sveor3(svint8_t op1, svint8_t op2, svint8_t op3) { return sveor3_s8(op1, op2, op3); } __forceinline svuint64_t sveor3(svuint64_t op1, svuint64_t op2, uint64_t op3) { return sveor3_n_u64(op1, op2, op3); } __forceinline svint32_t sveor3(svint32_t op1, svint32_t op2, svint32_t op3) { return sveor3_s32(op1, op2, op3); } __forceinline svint64_t sveor3(svint64_t op1, svint64_t op2, svint64_t op3) { return sveor3_s64(op1, op2, op3); } __forceinline svint8_t sveor3(svint8_t op1, svint8_t op2, int8_t op3) { return sveor3_n_s8(op1, op2, op3); } __forceinline svint16_t sveor3(svint16_t op1, svint16_t op2, int16_t op3) { return sveor3_n_s16(op1, op2, op3); } __forceinline svuint8_t sveor3(svuint8_t op1, svuint8_t op2, svuint8_t op3) { return sveor3_u8(op1, op2, op3); } __forceinline svuint32_t sveor3(svuint32_t op1, svuint32_t op2, uint32_t op3) { return sveor3_n_u32(op1, op2, op3); } __forceinline svuint16_t sveor3(svuint16_t op1, svuint16_t op2, uint16_t op3) { return sveor3_n_u16(op1, op2, op3); } __forceinline svuint8_t sveor3(svuint8_t op1, svuint8_t op2, uint8_t op3) { return sveor3_n_u8(op1, op2, op3); } __forceinline svint64_t sveor3(svint64_t op1, svint64_t op2, int64_t op3) { return sveor3_n_s64(op1, op2, op3); } __forceinline svint32_t sveor3(svint32_t op1, svint32_t op2, int32_t op3) { return sveor3_n_s32(op1, op2, op3); } __forceinline svuint64_t sveor3(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return sveor3_u64(op1, op2, op3); } __forceinline svuint16_t sveor3(svuint16_t op1, svuint16_t op2, svuint16_t op3) { return sveor3_u16(op1, op2, op3); } __forceinline svuint32_t sveor3(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return sveor3_u32(op1, op2, op3); } __forceinline svint16_t sveor3(svint16_t op1, svint16_t op2, svint16_t op3) { return sveor3_s16(op1, op2, op3); } __forceinline svint32_t sveorbt(svint32_t odd, svint32_t op1, int32_t op2) { return sveorbt_n_s32(odd, op1, op2); } __forceinline svuint32_t sveorbt(svuint32_t odd, svuint32_t op1, svuint32_t op2) { return sveorbt_u32(odd, op1, op2); } __forceinline svuint64_t sveorbt(svuint64_t odd, svuint64_t op1, svuint64_t op2) { return sveorbt_u64(odd, op1, op2); } __forceinline svint8_t sveorbt(svint8_t odd, svint8_t op1, int8_t op2) { return sveorbt_n_s8(odd, op1, op2); } __forceinline svint16_t sveorbt(svint16_t odd, svint16_t op1, int16_t op2) { return sveorbt_n_s16(odd, op1, op2); } __forceinline svint64_t sveorbt(svint64_t odd, svint64_t op1, int64_t op2) { return sveorbt_n_s64(odd, op1, op2); } __forceinline svuint8_t sveorbt(svuint8_t odd, svuint8_t op1, svuint8_t op2) { return sveorbt_u8(odd, op1, op2); } __forceinline svuint64_t sveorbt(svuint64_t odd, svuint64_t op1, uint64_t op2) { return sveorbt_n_u64(odd, op1, op2); } __forceinline svuint16_t sveorbt(svuint16_t odd, svuint16_t op1, svuint16_t op2) { return sveorbt_u16(odd, op1, op2); } __forceinline svuint16_t sveorbt(svuint16_t odd, svuint16_t op1, uint16_t op2) { return sveorbt_n_u16(odd, op1, op2); } __forceinline svint64_t sveorbt(svint64_t odd, svint64_t op1, svint64_t op2) { return sveorbt_s64(odd, op1, op2); } __forceinline svint32_t sveorbt(svint32_t odd, svint32_t op1, svint32_t op2) { return sveorbt_s32(odd, op1, op2); } __forceinline svint16_t sveorbt(svint16_t odd, svint16_t op1, svint16_t op2) { return sveorbt_s16(odd, op1, op2); } __forceinline svint8_t sveorbt(svint8_t odd, svint8_t op1, svint8_t op2) { return sveorbt_s8(odd, op1, op2); } __forceinline svuint32_t sveorbt(svuint32_t odd, svuint32_t op1, uint32_t op2) { return sveorbt_n_u32(odd, op1, op2); } __forceinline svuint8_t sveorbt(svuint8_t odd, svuint8_t op1, uint8_t op2) { return sveorbt_n_u8(odd, op1, op2); } __forceinline svint8_t sveortb(svint8_t even, svint8_t op1, svint8_t op2) { return sveortb_s8(even, op1, op2); } __forceinline svint16_t sveortb(svint16_t even, svint16_t op1, svint16_t op2) { return sveortb_s16(even, op1, op2); } __forceinline svint32_t sveortb(svint32_t even, svint32_t op1, svint32_t op2) { return sveortb_s32(even, op1, op2); } __forceinline svint64_t sveortb(svint64_t even, svint64_t op1, svint64_t op2) { return sveortb_s64(even, op1, op2); } __forceinline svint8_t sveortb(svint8_t even, svint8_t op1, int8_t op2) { return sveortb_n_s8(even, op1, op2); } __forceinline svuint64_t sveortb(svuint64_t even, svuint64_t op1, svuint64_t op2) { return sveortb_u64(even, op1, op2); } __forceinline svuint32_t sveortb(svuint32_t even, svuint32_t op1, svuint32_t op2) { return sveortb_u32(even, op1, op2); } __forceinline svint64_t sveortb(svint64_t even, svint64_t op1, int64_t op2) { return sveortb_n_s64(even, op1, op2); } __forceinline svint16_t sveortb(svint16_t even, svint16_t op1, int16_t op2) { return sveortb_n_s16(even, op1, op2); } __forceinline svint32_t sveortb(svint32_t even, svint32_t op1, int32_t op2) { return sveortb_n_s32(even, op1, op2); } __forceinline svuint16_t sveortb(svuint16_t even, svuint16_t op1, svuint16_t op2) { return sveortb_u16(even, op1, op2); } __forceinline svuint8_t sveortb(svuint8_t even, svuint8_t op1, uint8_t op2) { return sveortb_n_u8(even, op1, op2); } __forceinline svuint16_t sveortb(svuint16_t even, svuint16_t op1, uint16_t op2) { return sveortb_n_u16(even, op1, op2); } __forceinline svuint32_t sveortb(svuint32_t even, svuint32_t op1, uint32_t op2) { return sveortb_n_u32(even, op1, op2); } __forceinline svuint64_t sveortb(svuint64_t even, svuint64_t op1, uint64_t op2) { return sveortb_n_u64(even, op1, op2); } __forceinline svuint8_t sveortb(svuint8_t even, svuint8_t op1, svuint8_t op2) { return sveortb_u8(even, op1, op2); } __forceinline svuint64_t svrax1(svuint64_t op1, svuint64_t op2) { return svrax1_u64(op1, op2); } __forceinline svint64_t svrax1(svint64_t op1, svint64_t op2) { return svrax1_s64(op1, op2); } __forceinline svuint8_t svqxtnb(svuint16_t op) { return svqxtnb_u16(op); } __forceinline svint32_t svqxtnb(svint64_t op) { return svqxtnb_s64(op); } __forceinline svuint16_t svqxtnb(svuint32_t op) { return svqxtnb_u32(op); } __forceinline svuint32_t svqxtnb(svuint64_t op) { return svqxtnb_u64(op); } __forceinline svint16_t svqxtnb(svint32_t op) { return svqxtnb_s32(op); } __forceinline svint8_t svqxtnb(svint16_t op) { return svqxtnb_s16(op); } __forceinline svint8_t svqxtnt(svint8_t even, svint16_t op) { return svqxtnt_s16(even, op); } __forceinline svint16_t svqxtnt(svint16_t even, svint32_t op) { return svqxtnt_s32(even, op); } __forceinline svint32_t svqxtnt(svint32_t even, svint64_t op) { return svqxtnt_s64(even, op); } __forceinline svuint8_t svqxtnt(svuint8_t even, svuint16_t op) { return svqxtnt_u16(even, op); } __forceinline svuint16_t svqxtnt(svuint16_t even, svuint32_t op) { return svqxtnt_u32(even, op); } __forceinline svuint32_t svqxtnt(svuint32_t even, svuint64_t op) { return svqxtnt_u64(even, op); } __forceinline svuint32_t svqxtunb(svint64_t op) { return svqxtunb_s64(op); } __forceinline svuint8_t svqxtunb(svint16_t op) { return svqxtunb_s16(op); } __forceinline svuint16_t svqxtunb(svint32_t op) { return svqxtunb_s32(op); } __forceinline svuint32_t svqxtunt(svuint32_t even, svint64_t op) { return svqxtunt_s64(even, op); } __forceinline svuint16_t svqxtunt(svuint16_t even, svint32_t op) { return svqxtunt_s32(even, op); } __forceinline svuint8_t svqxtunt(svuint8_t even, svint16_t op) { return svqxtunt_s16(even, op); } __forceinline svint16_t svmovlb(svint8_t op) { return svmovlb_s16(op); } __forceinline svint32_t svmovlb(svint16_t op) { return svmovlb_s32(op); } __forceinline svint64_t svmovlb(svint32_t op) { return svmovlb_s64(op); } __forceinline svuint16_t svmovlb(svuint8_t op) { return svmovlb_u16(op); } __forceinline svuint32_t svmovlb(svuint16_t op) { return svmovlb_u32(op); } __forceinline svuint64_t svmovlb(svuint32_t op) { return svmovlb_u64(op); } __forceinline svint16_t svmovlt(svint8_t op) { return svmovlt_s16(op); } __forceinline svint32_t svmovlt(svint16_t op) { return svmovlt_s32(op); } __forceinline svint64_t svmovlt(svint32_t op) { return svmovlt_s64(op); } __forceinline svuint16_t svmovlt(svuint8_t op) { return svmovlt_u16(op); } __forceinline svuint32_t svmovlt(svuint16_t op) { return svmovlt_u32(op); } __forceinline svuint64_t svmovlt(svuint32_t op) { return svmovlt_u64(op); } __forceinline svbool_t svwhilerw(const float32_t *op1, const float32_t *op2) { return svwhilerw_f32(op1, op2); } __forceinline svbool_t svwhilerw(const uint16_t *op1, const uint16_t *op2) { return svwhilerw_u16(op1, op2); } __forceinline svbool_t svwhilerw(const int16_t *op1, const int16_t *op2) { return svwhilerw_s16(op1, op2); } __forceinline svbool_t svwhilerw(const int32_t *op1, const int32_t *op2) { return svwhilerw_s32(op1, op2); } __forceinline svbool_t svwhilerw(const int64_t *op1, const int64_t *op2) { return svwhilerw_s64(op1, op2); } __forceinline svbool_t svwhilerw(const uint8_t *op1, const uint8_t *op2) { return svwhilerw_u8(op1, op2); } __forceinline svbool_t svwhilerw(const uint32_t *op1, const uint32_t *op2) { return svwhilerw_u32(op1, op2); } __forceinline svbool_t svwhilerw(const uint64_t *op1, const uint64_t *op2) { return svwhilerw_u64(op1, op2); } __forceinline svbool_t svwhilerw(const int8_t *op1, const int8_t *op2) { return svwhilerw_s8(op1, op2); } __forceinline svbool_t svwhilerw(const float64_t *op1, const float64_t *op2) { return svwhilerw_f64(op1, op2); } __forceinline svbool_t svwhilewr(const uint32_t *op1, const uint32_t *op2) { return svwhilewr_u32(op1, op2); } __forceinline svbool_t svwhilewr(const float32_t *op1, const float32_t *op2) { return svwhilewr_f32(op1, op2); } __forceinline svbool_t svwhilewr(const float64_t *op1, const float64_t *op2) { return svwhilewr_f64(op1, op2); } __forceinline svbool_t svwhilewr(const int8_t *op1, const int8_t *op2) { return svwhilewr_s8(op1, op2); } __forceinline svbool_t svwhilewr(const int32_t *op1, const int32_t *op2) { return svwhilewr_s32(op1, op2); } __forceinline svbool_t svwhilewr(const int64_t *op1, const int64_t *op2) { return svwhilewr_s64(op1, op2); } __forceinline svbool_t svwhilewr(const uint8_t *op1, const uint8_t *op2) { return svwhilewr_u8(op1, op2); } __forceinline svbool_t svwhilewr(const uint16_t *op1, const uint16_t *op2) { return svwhilewr_u16(op1, op2); } __forceinline svbool_t svwhilewr(const uint64_t *op1, const uint64_t *op2) { return svwhilewr_u64(op1, op2); } __forceinline svbool_t svwhilewr(const int16_t *op1, const int16_t *op2) { return svwhilewr_s16(op1, op2); } __forceinline svbool_t svwhilegt_b8(int32_t op1, int32_t op2) { return svwhilegt_b8_s32(op1, op2); } __forceinline svbool_t svwhilegt_b8(int64_t op1, int64_t op2) { return svwhilegt_b8_s64(op1, op2); } __forceinline svbool_t svwhilegt_b8(uint32_t op1, uint32_t op2) { return svwhilegt_b8_u32(op1, op2); } __forceinline svbool_t svwhilegt_b8(uint64_t op1, uint64_t op2) { return svwhilegt_b8_u64(op1, op2); } __forceinline svbool_t svwhilegt_b16(int32_t op1, int32_t op2) { return svwhilegt_b16_s32(op1, op2); } __forceinline svbool_t svwhilegt_b16(int64_t op1, int64_t op2) { return svwhilegt_b16_s64(op1, op2); } __forceinline svbool_t svwhilegt_b16(uint32_t op1, uint32_t op2) { return svwhilegt_b16_u32(op1, op2); } __forceinline svbool_t svwhilegt_b16(uint64_t op1, uint64_t op2) { return svwhilegt_b16_u64(op1, op2); } __forceinline svbool_t svwhilegt_b32(int32_t op1, int32_t op2) { return svwhilegt_b32_s32(op1, op2); } __forceinline svbool_t svwhilegt_b32(int64_t op1, int64_t op2) { return svwhilegt_b32_s64(op1, op2); } __forceinline svbool_t svwhilegt_b32(uint32_t op1, uint32_t op2) { return svwhilegt_b32_u32(op1, op2); } __forceinline svbool_t svwhilegt_b32(uint64_t op1, uint64_t op2) { return svwhilegt_b32_u64(op1, op2); } __forceinline svbool_t svwhilegt_b64(int32_t op1, int32_t op2) { return svwhilegt_b64_s32(op1, op2); } __forceinline svbool_t svwhilegt_b64(int64_t op1, int64_t op2) { return svwhilegt_b64_s64(op1, op2); } __forceinline svbool_t svwhilegt_b64(uint32_t op1, uint32_t op2) { return svwhilegt_b64_u32(op1, op2); } __forceinline svbool_t svwhilegt_b64(uint64_t op1, uint64_t op2) { return svwhilegt_b64_u64(op1, op2); } __forceinline svbool_t svwhilege_b8(int32_t op1, int32_t op2) { return svwhilege_b8_s32(op1, op2); } __forceinline svbool_t svwhilege_b8(int64_t op1, int64_t op2) { return svwhilege_b8_s64(op1, op2); } __forceinline svbool_t svwhilege_b8(uint32_t op1, uint32_t op2) { return svwhilege_b8_u32(op1, op2); } __forceinline svbool_t svwhilege_b8(uint64_t op1, uint64_t op2) { return svwhilege_b8_u64(op1, op2); } __forceinline svbool_t svwhilege_b16(int32_t op1, int32_t op2) { return svwhilege_b16_s32(op1, op2); } __forceinline svbool_t svwhilege_b16(int64_t op1, int64_t op2) { return svwhilege_b16_s64(op1, op2); } __forceinline svbool_t svwhilege_b16(uint32_t op1, uint32_t op2) { return svwhilege_b16_u32(op1, op2); } __forceinline svbool_t svwhilege_b16(uint64_t op1, uint64_t op2) { return svwhilege_b16_u64(op1, op2); } __forceinline svbool_t svwhilege_b32(int32_t op1, int32_t op2) { return svwhilege_b32_s32(op1, op2); } __forceinline svbool_t svwhilege_b32(int64_t op1, int64_t op2) { return svwhilege_b32_s64(op1, op2); } __forceinline svbool_t svwhilege_b32(uint32_t op1, uint32_t op2) { return svwhilege_b32_u32(op1, op2); } __forceinline svbool_t svwhilege_b32(uint64_t op1, uint64_t op2) { return svwhilege_b32_u64(op1, op2); } __forceinline svbool_t svwhilege_b64(int32_t op1, int32_t op2) { return svwhilege_b64_s32(op1, op2); } __forceinline svbool_t svwhilege_b64(int64_t op1, int64_t op2) { return svwhilege_b64_s64(op1, op2); } __forceinline svbool_t svwhilege_b64(uint32_t op1, uint32_t op2) { return svwhilege_b64_u32(op1, op2); } __forceinline svbool_t svwhilege_b64(uint64_t op1, uint64_t op2) { return svwhilege_b64_u64(op1, op2); } __forceinline svuint16_t svrshl_z(svbool_t pg, svuint16_t op1, svint16_t op2) { return svrshl_u16_z(pg, op1, op2); } __forceinline svint32_t svrshl_m(svbool_t pg, svint32_t op1, int32_t op2) { return svrshl_n_s32_m(pg, op1, op2); } __forceinline svint16_t svrshl_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svrshl_s16_m(pg, op1, op2); } __forceinline svint32_t svrshl_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svrshl_s32_m(pg, op1, op2); } __forceinline svint64_t svrshl_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svrshl_s64_m(pg, op1, op2); } __forceinline svuint8_t svrshl_m(svbool_t pg, svuint8_t op1, svint8_t op2) { return svrshl_u8_m(pg, op1, op2); } __forceinline svuint16_t svrshl_m(svbool_t pg, svuint16_t op1, svint16_t op2) { return svrshl_u16_m(pg, op1, op2); } __forceinline svuint32_t svrshl_m(svbool_t pg, svuint32_t op1, svint32_t op2) { return svrshl_u32_m(pg, op1, op2); } __forceinline svuint64_t svrshl_m(svbool_t pg, svuint64_t op1, svint64_t op2) { return svrshl_u64_m(pg, op1, op2); } __forceinline svint8_t svrshl_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svrshl_s8_x(pg, op1, op2); } __forceinline svint16_t svrshl_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svrshl_s16_x(pg, op1, op2); } __forceinline svint32_t svrshl_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svrshl_s32_x(pg, op1, op2); } __forceinline svint64_t svrshl_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svrshl_s64_x(pg, op1, op2); } __forceinline svuint8_t svrshl_x(svbool_t pg, svuint8_t op1, svint8_t op2) { return svrshl_u8_x(pg, op1, op2); } __forceinline svuint16_t svrshl_x(svbool_t pg, svuint16_t op1, svint16_t op2) { return svrshl_u16_x(pg, op1, op2); } __forceinline svuint32_t svrshl_x(svbool_t pg, svuint32_t op1, svint32_t op2) { return svrshl_u32_x(pg, op1, op2); } __forceinline svuint64_t svrshl_x(svbool_t pg, svuint64_t op1, svint64_t op2) { return svrshl_u64_x(pg, op1, op2); } __forceinline svint8_t svrshl_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svrshl_s8_z(pg, op1, op2); } __forceinline svint16_t svrshl_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svrshl_s16_z(pg, op1, op2); } __forceinline svint32_t svrshl_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svrshl_s32_z(pg, op1, op2); } __forceinline svint64_t svrshl_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svrshl_s64_z(pg, op1, op2); } __forceinline svuint8_t svrshl_z(svbool_t pg, svuint8_t op1, svint8_t op2) { return svrshl_u8_z(pg, op1, op2); } __forceinline svuint32_t svrshl_z(svbool_t pg, svuint32_t op1, svint32_t op2) { return svrshl_u32_z(pg, op1, op2); } __forceinline svuint64_t svrshl_z(svbool_t pg, svuint64_t op1, svint64_t op2) { return svrshl_u64_z(pg, op1, op2); } __forceinline svint8_t svrshl_m(svbool_t pg, svint8_t op1, int8_t op2) { return svrshl_n_s8_m(pg, op1, op2); } __forceinline svint16_t svrshl_m(svbool_t pg, svint16_t op1, int16_t op2) { return svrshl_n_s16_m(pg, op1, op2); } __forceinline svuint64_t svrshl_m(svbool_t pg, svuint64_t op1, int64_t op2) { return svrshl_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svrshl_m(svbool_t pg, svuint32_t op1, int32_t op2) { return svrshl_n_u32_m(pg, op1, op2); } __forceinline svint64_t svrshl_m(svbool_t pg, svint64_t op1, int64_t op2) { return svrshl_n_s64_m(pg, op1, op2); } __forceinline svint8_t svrshl_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svrshl_s8_m(pg, op1, op2); } __forceinline svuint8_t svrshl_m(svbool_t pg, svuint8_t op1, int8_t op2) { return svrshl_n_u8_m(pg, op1, op2); } __forceinline svint8_t svrshl_x(svbool_t pg, svint8_t op1, int8_t op2) { return svrshl_n_s8_x(pg, op1, op2); } __forceinline svint32_t svrshl_x(svbool_t pg, svint32_t op1, int32_t op2) { return svrshl_n_s32_x(pg, op1, op2); } __forceinline svint16_t svrshl_x(svbool_t pg, svint16_t op1, int16_t op2) { return svrshl_n_s16_x(pg, op1, op2); } __forceinline svuint64_t svrshl_z(svbool_t pg, svuint64_t op1, int64_t op2) { return svrshl_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svrshl_z(svbool_t pg, svuint32_t op1, int32_t op2) { return svrshl_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svrshl_z(svbool_t pg, svuint16_t op1, int16_t op2) { return svrshl_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svrshl_z(svbool_t pg, svuint8_t op1, int8_t op2) { return svrshl_n_u8_z(pg, op1, op2); } __forceinline svint64_t svrshl_z(svbool_t pg, svint64_t op1, int64_t op2) { return svrshl_n_s64_z(pg, op1, op2); } __forceinline svint32_t svrshl_z(svbool_t pg, svint32_t op1, int32_t op2) { return svrshl_n_s32_z(pg, op1, op2); } __forceinline svuint16_t svrshl_m(svbool_t pg, svuint16_t op1, int16_t op2) { return svrshl_n_u16_m(pg, op1, op2); } __forceinline svint8_t svrshl_z(svbool_t pg, svint8_t op1, int8_t op2) { return svrshl_n_s8_z(pg, op1, op2); } __forceinline svuint64_t svrshl_x(svbool_t pg, svuint64_t op1, int64_t op2) { return svrshl_n_u64_x(pg, op1, op2); } __forceinline svuint32_t svrshl_x(svbool_t pg, svuint32_t op1, int32_t op2) { return svrshl_n_u32_x(pg, op1, op2); } __forceinline svuint16_t svrshl_x(svbool_t pg, svuint16_t op1, int16_t op2) { return svrshl_n_u16_x(pg, op1, op2); } __forceinline svuint8_t svrshl_x(svbool_t pg, svuint8_t op1, int8_t op2) { return svrshl_n_u8_x(pg, op1, op2); } __forceinline svint64_t svrshl_x(svbool_t pg, svint64_t op1, int64_t op2) { return svrshl_n_s64_x(pg, op1, op2); } __forceinline svint16_t svrshl_z(svbool_t pg, svint16_t op1, int16_t op2) { return svrshl_n_s16_z(pg, op1, op2); } __forceinline svint16_t svqrshl_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svqrshl_s16_x(pg, op1, op2); } __forceinline svuint16_t svqrshl_z(svbool_t pg, svuint16_t op1, int16_t op2) { return svqrshl_n_u16_z(pg, op1, op2); } __forceinline svint64_t svqrshl_x(svbool_t pg, svint64_t op1, int64_t op2) { return svqrshl_n_s64_x(pg, op1, op2); } __forceinline svuint8_t svqrshl_x(svbool_t pg, svuint8_t op1, int8_t op2) { return svqrshl_n_u8_x(pg, op1, op2); } __forceinline svuint16_t svqrshl_x(svbool_t pg, svuint16_t op1, int16_t op2) { return svqrshl_n_u16_x(pg, op1, op2); } __forceinline svuint32_t svqrshl_x(svbool_t pg, svuint32_t op1, int32_t op2) { return svqrshl_n_u32_x(pg, op1, op2); } __forceinline svuint64_t svqrshl_x(svbool_t pg, svuint64_t op1, int64_t op2) { return svqrshl_n_u64_x(pg, op1, op2); } __forceinline svint8_t svqrshl_z(svbool_t pg, svint8_t op1, int8_t op2) { return svqrshl_n_s8_z(pg, op1, op2); } __forceinline svint16_t svqrshl_z(svbool_t pg, svint16_t op1, int16_t op2) { return svqrshl_n_s16_z(pg, op1, op2); } __forceinline svint32_t svqrshl_z(svbool_t pg, svint32_t op1, int32_t op2) { return svqrshl_n_s32_z(pg, op1, op2); } __forceinline svint64_t svqrshl_z(svbool_t pg, svint64_t op1, int64_t op2) { return svqrshl_n_s64_z(pg, op1, op2); } __forceinline svint32_t svqrshl_x(svbool_t pg, svint32_t op1, int32_t op2) { return svqrshl_n_s32_x(pg, op1, op2); } __forceinline svuint8_t svqrshl_z(svbool_t pg, svuint8_t op1, int8_t op2) { return svqrshl_n_u8_z(pg, op1, op2); } __forceinline svuint64_t svqrshl_z(svbool_t pg, svuint64_t op1, int64_t op2) { return svqrshl_n_u64_z(pg, op1, op2); } __forceinline svint8_t svqrshl_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svqrshl_s8_m(pg, op1, op2); } __forceinline svint16_t svqrshl_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svqrshl_s16_m(pg, op1, op2); } __forceinline svint32_t svqrshl_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svqrshl_s32_m(pg, op1, op2); } __forceinline svint64_t svqrshl_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svqrshl_s64_m(pg, op1, op2); } __forceinline svuint8_t svqrshl_m(svbool_t pg, svuint8_t op1, svint8_t op2) { return svqrshl_u8_m(pg, op1, op2); } __forceinline svuint16_t svqrshl_m(svbool_t pg, svuint16_t op1, svint16_t op2) { return svqrshl_u16_m(pg, op1, op2); } __forceinline svuint32_t svqrshl_m(svbool_t pg, svuint32_t op1, svint32_t op2) { return svqrshl_u32_m(pg, op1, op2); } __forceinline svint8_t svqrshl_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svqrshl_s8_x(pg, op1, op2); } __forceinline svuint32_t svqrshl_z(svbool_t pg, svuint32_t op1, int32_t op2) { return svqrshl_n_u32_z(pg, op1, op2); } __forceinline svint16_t svqrshl_x(svbool_t pg, svint16_t op1, int16_t op2) { return svqrshl_n_s16_x(pg, op1, op2); } __forceinline svuint64_t svqrshl_m(svbool_t pg, svuint64_t op1, svint64_t op2) { return svqrshl_u64_m(pg, op1, op2); } __forceinline svuint64_t svqrshl_m(svbool_t pg, svuint64_t op1, int64_t op2) { return svqrshl_n_u64_m(pg, op1, op2); } __forceinline svint8_t svqrshl_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svqrshl_s8_z(pg, op1, op2); } __forceinline svuint64_t svqrshl_x(svbool_t pg, svuint64_t op1, svint64_t op2) { return svqrshl_u64_x(pg, op1, op2); } __forceinline svuint32_t svqrshl_x(svbool_t pg, svuint32_t op1, svint32_t op2) { return svqrshl_u32_x(pg, op1, op2); } __forceinline svuint16_t svqrshl_x(svbool_t pg, svuint16_t op1, svint16_t op2) { return svqrshl_u16_x(pg, op1, op2); } __forceinline svuint8_t svqrshl_x(svbool_t pg, svuint8_t op1, svint8_t op2) { return svqrshl_u8_x(pg, op1, op2); } __forceinline svint64_t svqrshl_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svqrshl_s64_x(pg, op1, op2); } __forceinline svint32_t svqrshl_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svqrshl_s32_z(pg, op1, op2); } __forceinline svint64_t svqrshl_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svqrshl_s64_z(pg, op1, op2); } __forceinline svint32_t svqrshl_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svqrshl_s32_x(pg, op1, op2); } __forceinline svint16_t svqrshl_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svqrshl_s16_z(pg, op1, op2); } __forceinline svuint8_t svqrshl_z(svbool_t pg, svuint8_t op1, svint8_t op2) { return svqrshl_u8_z(pg, op1, op2); } __forceinline svuint32_t svqrshl_z(svbool_t pg, svuint32_t op1, svint32_t op2) { return svqrshl_u32_z(pg, op1, op2); } __forceinline svuint64_t svqrshl_z(svbool_t pg, svuint64_t op1, svint64_t op2) { return svqrshl_u64_z(pg, op1, op2); } __forceinline svint8_t svqrshl_m(svbool_t pg, svint8_t op1, int8_t op2) { return svqrshl_n_s8_m(pg, op1, op2); } __forceinline svint16_t svqrshl_m(svbool_t pg, svint16_t op1, int16_t op2) { return svqrshl_n_s16_m(pg, op1, op2); } __forceinline svint32_t svqrshl_m(svbool_t pg, svint32_t op1, int32_t op2) { return svqrshl_n_s32_m(pg, op1, op2); } __forceinline svint64_t svqrshl_m(svbool_t pg, svint64_t op1, int64_t op2) { return svqrshl_n_s64_m(pg, op1, op2); } __forceinline svuint8_t svqrshl_m(svbool_t pg, svuint8_t op1, int8_t op2) { return svqrshl_n_u8_m(pg, op1, op2); } __forceinline svuint16_t svqrshl_m(svbool_t pg, svuint16_t op1, int16_t op2) { return svqrshl_n_u16_m(pg, op1, op2); } __forceinline svuint32_t svqrshl_m(svbool_t pg, svuint32_t op1, int32_t op2) { return svqrshl_n_u32_m(pg, op1, op2); } __forceinline svuint16_t svqrshl_z(svbool_t pg, svuint16_t op1, svint16_t op2) { return svqrshl_u16_z(pg, op1, op2); } __forceinline svint8_t svqrshl_x(svbool_t pg, svint8_t op1, int8_t op2) { return svqrshl_n_s8_x(pg, op1, op2); } __forceinline svuint32_t svqshl_x(svbool_t pg, svuint32_t op1, int32_t op2) { return svqshl_n_u32_x(pg, op1, op2); } __forceinline svuint8_t svqshl_x(svbool_t pg, svuint8_t op1, int8_t op2) { return svqshl_n_u8_x(pg, op1, op2); } __forceinline svint64_t svqshl_x(svbool_t pg, svint64_t op1, int64_t op2) { return svqshl_n_s64_x(pg, op1, op2); } __forceinline svuint64_t svqshl_m(svbool_t pg, svuint64_t op1, int64_t op2) { return svqshl_n_u64_m(pg, op1, op2); } __forceinline svint16_t svqshl_x(svbool_t pg, svint16_t op1, int16_t op2) { return svqshl_n_s16_x(pg, op1, op2); } __forceinline svint8_t svqshl_x(svbool_t pg, svint8_t op1, int8_t op2) { return svqshl_n_s8_x(pg, op1, op2); } __forceinline svuint64_t svqshl_x(svbool_t pg, svuint64_t op1, int64_t op2) { return svqshl_n_u64_x(pg, op1, op2); } __forceinline svint32_t svqshl_x(svbool_t pg, svint32_t op1, int32_t op2) { return svqshl_n_s32_x(pg, op1, op2); } __forceinline svuint32_t svqshl_m(svbool_t pg, svuint32_t op1, int32_t op2) { return svqshl_n_u32_m(pg, op1, op2); } __forceinline svint8_t svqshl_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svqshl_s8_z(pg, op1, op2); } __forceinline svint16_t svqshl_z(svbool_t pg, svint16_t op1, int16_t op2) { return svqshl_n_s16_z(pg, op1, op2); } __forceinline svint32_t svqshl_z(svbool_t pg, svint32_t op1, int32_t op2) { return svqshl_n_s32_z(pg, op1, op2); } __forceinline svint64_t svqshl_z(svbool_t pg, svint64_t op1, int64_t op2) { return svqshl_n_s64_z(pg, op1, op2); } __forceinline svuint8_t svqshl_z(svbool_t pg, svuint8_t op1, int8_t op2) { return svqshl_n_u8_z(pg, op1, op2); } __forceinline svuint16_t svqshl_z(svbool_t pg, svuint16_t op1, int16_t op2) { return svqshl_n_u16_z(pg, op1, op2); } __forceinline svuint32_t svqshl_z(svbool_t pg, svuint32_t op1, int32_t op2) { return svqshl_n_u32_z(pg, op1, op2); } __forceinline svuint64_t svqshl_z(svbool_t pg, svuint64_t op1, int64_t op2) { return svqshl_n_u64_z(pg, op1, op2); } __forceinline svuint16_t svqshl_m(svbool_t pg, svuint16_t op1, int16_t op2) { return svqshl_n_u16_m(pg, op1, op2); } __forceinline svint8_t svqshl_z(svbool_t pg, svint8_t op1, int8_t op2) { return svqshl_n_s8_z(pg, op1, op2); } __forceinline svuint8_t svqshl_m(svbool_t pg, svuint8_t op1, int8_t op2) { return svqshl_n_u8_m(pg, op1, op2); } __forceinline svuint16_t svqshl_x(svbool_t pg, svuint16_t op1, int16_t op2) { return svqshl_n_u16_x(pg, op1, op2); } __forceinline svint32_t svqshl_m(svbool_t pg, svint32_t op1, int32_t op2) { return svqshl_n_s32_m(pg, op1, op2); } __forceinline svint8_t svqshl_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svqshl_s8_m(pg, op1, op2); } __forceinline svint16_t svqshl_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svqshl_s16_m(pg, op1, op2); } __forceinline svint32_t svqshl_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svqshl_s32_m(pg, op1, op2); } __forceinline svint64_t svqshl_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svqshl_s64_m(pg, op1, op2); } __forceinline svuint8_t svqshl_m(svbool_t pg, svuint8_t op1, svint8_t op2) { return svqshl_u8_m(pg, op1, op2); } __forceinline svint64_t svqshl_m(svbool_t pg, svint64_t op1, int64_t op2) { return svqshl_n_s64_m(pg, op1, op2); } __forceinline svuint32_t svqshl_m(svbool_t pg, svuint32_t op1, svint32_t op2) { return svqshl_u32_m(pg, op1, op2); } __forceinline svuint64_t svqshl_m(svbool_t pg, svuint64_t op1, svint64_t op2) { return svqshl_u64_m(pg, op1, op2); } __forceinline svint8_t svqshl_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svqshl_s8_x(pg, op1, op2); } __forceinline svint16_t svqshl_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svqshl_s16_x(pg, op1, op2); } __forceinline svint32_t svqshl_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svqshl_s32_x(pg, op1, op2); } __forceinline svint64_t svqshl_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svqshl_s64_x(pg, op1, op2); } __forceinline svuint8_t svqshl_x(svbool_t pg, svuint8_t op1, svint8_t op2) { return svqshl_u8_x(pg, op1, op2); } __forceinline svuint16_t svqshl_m(svbool_t pg, svuint16_t op1, svint16_t op2) { return svqshl_u16_m(pg, op1, op2); } __forceinline svuint32_t svqshl_x(svbool_t pg, svuint32_t op1, svint32_t op2) { return svqshl_u32_x(pg, op1, op2); } __forceinline svuint16_t svqshl_z(svbool_t pg, svuint16_t op1, svint16_t op2) { return svqshl_u16_z(pg, op1, op2); } __forceinline svuint32_t svqshl_z(svbool_t pg, svuint32_t op1, svint32_t op2) { return svqshl_u32_z(pg, op1, op2); } __forceinline svuint64_t svqshl_z(svbool_t pg, svuint64_t op1, svint64_t op2) { return svqshl_u64_z(pg, op1, op2); } __forceinline svint8_t svqshl_m(svbool_t pg, svint8_t op1, int8_t op2) { return svqshl_n_s8_m(pg, op1, op2); } __forceinline svint16_t svqshl_m(svbool_t pg, svint16_t op1, int16_t op2) { return svqshl_n_s16_m(pg, op1, op2); } __forceinline svuint8_t svqshl_z(svbool_t pg, svuint8_t op1, svint8_t op2) { return svqshl_u8_z(pg, op1, op2); } __forceinline svuint16_t svqshl_x(svbool_t pg, svuint16_t op1, svint16_t op2) { return svqshl_u16_x(pg, op1, op2); } __forceinline svint64_t svqshl_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svqshl_s64_z(pg, op1, op2); } __forceinline svuint64_t svqshl_x(svbool_t pg, svuint64_t op1, svint64_t op2) { return svqshl_u64_x(pg, op1, op2); } __forceinline svint16_t svqshl_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svqshl_s16_z(pg, op1, op2); } __forceinline svint32_t svqshl_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svqshl_s32_z(pg, op1, op2); } template __forceinline __svehdr_utype __svqshlu_x(svbool_t pg, T op1) { if constexpr(::std::is_same_v) { return svqshlu_n_s64_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svqshlu_n_s32_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svqshlu_n_s16_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svqshlu_n_s8_x(pg, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqshlu_x(pg, op1, imm2) __svqshlu_x(pg, op1) template __forceinline __svehdr_utype __svqshlu_z(svbool_t pg, T op1) { if constexpr(::std::is_same_v) { return svqshlu_n_s64_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svqshlu_n_s32_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svqshlu_n_s16_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svqshlu_n_s8_z(pg, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqshlu_z(pg, op1, imm2) __svqshlu_z(pg, op1) template __forceinline __svehdr_utype __svqshlu_m(svbool_t pg, T op1) { if constexpr(::std::is_same_v) { return svqshlu_n_s64_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svqshlu_n_s32_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svqshlu_n_s16_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svqshlu_n_s8_m(pg, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqshlu_m(pg, op1, imm2) __svqshlu_m(pg, op1) template __forceinline T __svsli(T op1, T op2) { if constexpr(::std::is_same_v) { return svsli_n_s64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsli_n_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsli_n_s16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsli_n_s8(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsli_n_u64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsli_n_u32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsli_n_u16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsli_n_u8(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svsli(op1, op2, imm3) __svsli(op1, op2) template __forceinline __svehdr_twice_type __svshllb(T op1) { if constexpr(::std::is_same_v) { return svshllb_n_s16(op1, N); } else if constexpr(::std::is_same_v) { return svshllb_n_s32(op1, N); } else if constexpr(::std::is_same_v) { return svshllb_n_s64(op1, N); } else if constexpr(::std::is_same_v) { return svshllb_n_u16(op1, N); } else if constexpr(::std::is_same_v) { return svshllb_n_u32(op1, N); } else if constexpr(::std::is_same_v) { return svshllb_n_u64(op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svshllb(op1, imm2) __svshllb(op1) template __forceinline __svehdr_twice_type __svshllt(T op1) { if constexpr(::std::is_same_v) { return svshllt_n_s64(op1, N); } else if constexpr(::std::is_same_v) { return svshllt_n_s32(op1, N); } else if constexpr(::std::is_same_v) { return svshllt_n_s16(op1, N); } else if constexpr(::std::is_same_v) { return svshllt_n_u64(op1, N); } else if constexpr(::std::is_same_v) { return svshllt_n_u32(op1, N); } else if constexpr(::std::is_same_v) { return svshllt_n_u16(op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svshllt(op1, imm2) __svshllt(op1) template __forceinline T __svrsra(T op1, T op2) { if constexpr(::std::is_same_v) { return svrsra_n_u8(op1, op2, N); } else if constexpr(::std::is_same_v) { return svrsra_n_u16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svrsra_n_u32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svrsra_n_u64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svrsra_n_s8(op1, op2, N); } else if constexpr(::std::is_same_v) { return svrsra_n_s16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svrsra_n_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svrsra_n_s64(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svrsra(op1, op2, imm3) __svrsra(op1, op2) template __forceinline __svehdr_half_type __svrshrnb(T op1) { if constexpr(::std::is_same_v) { return svrshrnb_n_s64(op1, N); } else if constexpr(::std::is_same_v) { return svrshrnb_n_u64(op1, N); } else if constexpr(::std::is_same_v) { return svrshrnb_n_s32(op1, N); } else if constexpr(::std::is_same_v) { return svrshrnb_n_u32(op1, N); } else if constexpr(::std::is_same_v) { return svrshrnb_n_s16(op1, N); } else if constexpr(::std::is_same_v) { return svrshrnb_n_u16(op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svrshrnb(op1, imm2) __svrshrnb(op1) template __forceinline T1 __svrshrnt(T1 even, T2 op1) { if constexpr(::std::is_same_v) { return svrshrnt_n_u16(even, op1, N); } else if constexpr(::std::is_same_v) { return svrshrnt_n_u32(even, op1, N); } else if constexpr(::std::is_same_v) { return svrshrnt_n_u64(even, op1, N); } else if constexpr(::std::is_same_v) { return svrshrnt_n_s16(even, op1, N); } else if constexpr(::std::is_same_v) { return svrshrnt_n_s32(even, op1, N); } else if constexpr(::std::is_same_v) { return svrshrnt_n_s64(even, op1, N); } } #define svrshrnt(even, op1, imm2) __svrshrnt(even, op1) template __forceinline T __svrshr_m(svbool_t pg, T op1) { if constexpr(::std::is_same_v) { return svrshr_n_u64_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_u32_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_u16_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_u8_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s64_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s32_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s16_m(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s8_m(pg, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svrshr_m(pg, op1, imm2) __svrshr_m(pg, op1) template __forceinline T __svrshr_x(svbool_t pg, T op1) { if constexpr(::std::is_same_v) { return svrshr_n_u64_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_u32_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_u16_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_u8_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s64_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s32_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s16_x(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s8_x(pg, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svrshr_x(pg, op1, imm2) __svrshr_x(pg, op1) template __forceinline T __svrshr_z(svbool_t pg, T op1) { if constexpr(::std::is_same_v) { return svrshr_n_u64_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_u32_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_u16_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_u8_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s64_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s32_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s16_z(pg, op1, N); } else if constexpr(::std::is_same_v) { return svrshr_n_s8_z(pg, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svrshr_z(pg, op1, imm2) __svrshr_z(pg, op1) template __forceinline __svehdr_half_type __svqrshrnb(T op1) { if constexpr(::std::is_same_v) { return svqrshrnb_n_u16(op1, N); } else if constexpr(::std::is_same_v) { return svqrshrnb_n_u32(op1, N); } else if constexpr(::std::is_same_v) { return svqrshrnb_n_u64(op1, N); } else if constexpr(::std::is_same_v) { return svqrshrnb_n_s16(op1, N); } else if constexpr(::std::is_same_v) { return svqrshrnb_n_s32(op1, N); } else if constexpr(::std::is_same_v) { return svqrshrnb_n_s64(op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqrshrnb(op1, imm2) __svqrshrnb(op1) template __forceinline T1 __svqrshrnt(T1 even, T2 op1) { if constexpr(::std::is_same_v) { return svqrshrnt_n_s64(even, op1, N); } else if constexpr(::std::is_same_v) { return svqrshrnt_n_s32(even, op1, N); } else if constexpr(::std::is_same_v) { return svqrshrnt_n_s16(even, op1, N); } else if constexpr(::std::is_same_v) { return svqrshrnt_n_u64(even, op1, N); } else if constexpr(::std::is_same_v) { return svqrshrnt_n_u32(even, op1, N); } else if constexpr(::std::is_same_v) { return svqrshrnt_n_u16(even, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqrshrnt(even, op1, imm2) __svqrshrnt(even, op1) template __forceinline __svehdr_half_utype __svqrshrunb(T op1) { if constexpr(::std::is_same_v) { return svqrshrunb_n_s64(op1, N); } else if constexpr(::std::is_same_v) { return svqrshrunb_n_s32(op1, N); } else if constexpr(::std::is_same_v) { return svqrshrunb_n_s16(op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqrshrunb(op1, imm2) __svqrshrunb(op1) template __forceinline T1 __svqrshrunt(T1 even, T2 op1) { if constexpr(::std::is_same_v) { return svqrshrunt_n_s64(even, op1, N); } else if constexpr(::std::is_same_v) { return svqrshrunt_n_s32(even, op1, N); } else if constexpr(::std::is_same_v) { return svqrshrunt_n_s16(even, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqrshrunt(even, op1, imm2) __svqrshrunt(even, op1) template __forceinline __svehdr_half_type __svqshrnb(T op1) { if constexpr(::std::is_same_v) { return svqshrnb_n_u64(op1, N); } else if constexpr(::std::is_same_v) { return svqshrnb_n_u32(op1, N); } else if constexpr(::std::is_same_v) { return svqshrnb_n_u16(op1, N); } else if constexpr(::std::is_same_v) { return svqshrnb_n_s64(op1, N); } else if constexpr(::std::is_same_v) { return svqshrnb_n_s32(op1, N); } else if constexpr(::std::is_same_v) { return svqshrnb_n_s16(op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqshrnb(op1, imm2) __svqshrnb(op1) template __forceinline T1 __svqshrnt(T1 even, T2 op1) { if constexpr(::std::is_same_v) { return svqshrnt_n_s64(even, op1, N); } else if constexpr(::std::is_same_v) { return svqshrnt_n_s32(even, op1, N); } else if constexpr(::std::is_same_v) { return svqshrnt_n_s16(even, op1, N); } else if constexpr(::std::is_same_v) { return svqshrnt_n_u64(even, op1, N); } else if constexpr(::std::is_same_v) { return svqshrnt_n_u32(even, op1, N); } else if constexpr(::std::is_same_v) { return svqshrnt_n_u16(even, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqshrnt(even, op1, imm2) __svqshrnt(even, op1) template __forceinline __svehdr_half_utype __svqshrunb(T op1) { if constexpr(::std::is_same_v) { return svqshrunb_n_s64(op1, N); } else if constexpr(::std::is_same_v) { return svqshrunb_n_s32(op1, N); } else if constexpr(::std::is_same_v) { return svqshrunb_n_s16(op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqshrunb(op1, imm2) __svqshrunb(op1) template __forceinline T1 __svqshrunt(T1 even, T2 op1) { if constexpr(::std::is_same_v) { return svqshrunt_n_s64(even, op1, N); } else if constexpr(::std::is_same_v) { return svqshrunt_n_s32(even, op1, N); } else if constexpr(::std::is_same_v) { return svqshrunt_n_s16(even, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqshrunt(even, op1, imm2) __svqshrunt(even, op1) template __forceinline T __svsra(T op1, T op2) { if constexpr(::std::is_same_v) { return svsra_n_s64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsra_n_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsra_n_s16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsra_n_s8(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsra_n_u64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsra_n_u32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsra_n_u16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsra_n_u8(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svsra(op1, op2, imm3) __svsra(op1, op2) template __forceinline T __svsri(T op1, T op2) { if constexpr(::std::is_same_v) { return svsri_n_s64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsri_n_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsri_n_s16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsri_n_s8(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsri_n_u64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsri_n_u32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsri_n_u16(op1, op2, N); } else if constexpr(::std::is_same_v) { return svsri_n_u8(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svsri(op1, op2, imm3) __svsri(op1, op2) template __forceinline __svehdr_half_type __svshrnb(T op1) { if constexpr(::std::is_same_v) { return svshrnb_n_u64(op1, N); } else if constexpr(::std::is_same_v) { return svshrnb_n_u32(op1, N); } else if constexpr(::std::is_same_v) { return svshrnb_n_u16(op1, N); } else if constexpr(::std::is_same_v) { return svshrnb_n_s64(op1, N); } else if constexpr(::std::is_same_v) { return svshrnb_n_s32(op1, N); } else if constexpr(::std::is_same_v) { return svshrnb_n_s16(op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svshrnb(op1, imm2) __svshrnb(op1) template __forceinline T1 __svshrnt(T1 even, T2 op1) { if constexpr(::std::is_same_v) { return svshrnt_n_s64(even, op1, N); } else if constexpr(::std::is_same_v) { return svshrnt_n_s32(even, op1, N); } else if constexpr(::std::is_same_v) { return svshrnt_n_s16(even, op1, N); } else if constexpr(::std::is_same_v) { return svshrnt_n_u64(even, op1, N); } else if constexpr(::std::is_same_v) { return svshrnt_n_u32(even, op1, N); } else if constexpr(::std::is_same_v) { return svshrnt_n_u16(even, op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svshrnt(even, op1, imm2) __svshrnt(even, op1) __forceinline void svstnt1_scatter_index(svbool_t pg, int64_t *base, svuint64_t indices, svint64_t data) { return svstnt1_scatter_u64index_s64(pg, base, indices, data); } __forceinline void svstnt1_scatter_index(svbool_t pg, uint64_t *base, svuint64_t indices, svuint64_t data) { return svstnt1_scatter_u64index_u64(pg, base, indices, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svfloat32_t data) { return svstnt1_scatter_u32base_offset_f32(pg, bases, offset, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) { return svstnt1_scatter_u32base_offset_s32(pg, bases, offset, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) { return svstnt1_scatter_u32base_offset_u32(pg, bases, offset, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svfloat64_t data) { return svstnt1_scatter_u64base_offset_f64(pg, bases, offset, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) { return svstnt1_scatter_u64base_offset_u64(pg, bases, offset, data); } __forceinline void svstnt1_scatter_index(svbool_t pg, float64_t *base, svuint64_t indices, svfloat64_t data) { return svstnt1_scatter_u64index_f64(pg, base, indices, data); } __forceinline void svstnt1_scatter_index(svbool_t pg, svuint32_t bases, int64_t index, svfloat32_t data) { return svstnt1_scatter_u32base_index_f32(pg, bases, index, data); } __forceinline void svstnt1_scatter_index(svbool_t pg, svuint32_t bases, int64_t index, svint32_t data) { return svstnt1_scatter_u32base_index_s32(pg, bases, index, data); } __forceinline void svstnt1_scatter_index(svbool_t pg, svuint32_t bases, int64_t index, svuint32_t data) { return svstnt1_scatter_u32base_index_u32(pg, bases, index, data); } __forceinline void svstnt1_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svfloat64_t data) { return svstnt1_scatter_u64base_index_f64(pg, bases, index, data); } __forceinline void svstnt1_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data) { return svstnt1_scatter_u64base_index_s64(pg, bases, index, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) { return svstnt1_scatter_u64base_offset_s64(pg, bases, offset, data); } __forceinline void svstnt1_scatter_index(svbool_t pg, uint64_t *base, svint64_t indices, svuint64_t data) { return svstnt1_scatter_s64index_u64(pg, base, indices, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, uint64_t *base, svuint64_t offsets, svuint64_t data) { return svstnt1_scatter_u64offset_u64(pg, base, offsets, data); } __forceinline void svstnt1_scatter_index(svbool_t pg, float64_t *base, svint64_t indices, svfloat64_t data) { return svstnt1_scatter_s64index_f64(pg, base, indices, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, int64_t *base, svuint64_t offsets, svint64_t data) { return svstnt1_scatter_u64offset_s64(pg, base, offsets, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, float64_t *base, svuint64_t offsets, svfloat64_t data) { return svstnt1_scatter_u64offset_f64(pg, base, offsets, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, uint64_t *base, svint64_t offsets, svuint64_t data) { return svstnt1_scatter_s64offset_u64(pg, base, offsets, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, int64_t *base, svint64_t offsets, svint64_t data) { return svstnt1_scatter_s64offset_s64(pg, base, offsets, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, float64_t *base, svint64_t offsets, svfloat64_t data) { return svstnt1_scatter_s64offset_f64(pg, base, offsets, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, uint32_t *base, svuint32_t offsets, svuint32_t data) { return svstnt1_scatter_u32offset_u32(pg, base, offsets, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, int32_t *base, svuint32_t offsets, svint32_t data) { return svstnt1_scatter_u32offset_s32(pg, base, offsets, data); } __forceinline void svstnt1_scatter_offset(svbool_t pg, float32_t *base, svuint32_t offsets, svfloat32_t data) { return svstnt1_scatter_u32offset_f32(pg, base, offsets, data); } __forceinline void svstnt1_scatter(svbool_t pg, svuint64_t bases, svuint64_t data) { return svstnt1_scatter_u64base_u64(pg, bases, data); } __forceinline void svstnt1_scatter(svbool_t pg, svuint64_t bases, svint64_t data) { return svstnt1_scatter_u64base_s64(pg, bases, data); } __forceinline void svstnt1_scatter(svbool_t pg, svuint64_t bases, svfloat64_t data) { return svstnt1_scatter_u64base_f64(pg, bases, data); } __forceinline void svstnt1_scatter(svbool_t pg, svuint32_t bases, svuint32_t data) { return svstnt1_scatter_u32base_u32(pg, bases, data); } __forceinline void svstnt1_scatter(svbool_t pg, svuint32_t bases, svint32_t data) { return svstnt1_scatter_u32base_s32(pg, bases, data); } __forceinline void svstnt1_scatter_index(svbool_t pg, int64_t *base, svint64_t indices, svint64_t data) { return svstnt1_scatter_s64index_s64(pg, base, indices, data); } __forceinline void svstnt1_scatter(svbool_t pg, svuint32_t bases, svfloat32_t data) { return svstnt1_scatter_u32base_f32(pg, bases, data); } __forceinline void svstnt1_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data) { return svstnt1_scatter_u64base_index_u64(pg, bases, index, data); } __forceinline void svstnt1h_scatter(svbool_t pg, svuint64_t bases, svint64_t data) { return svstnt1h_scatter_u64base_s64(pg, bases, data); } __forceinline void svstnt1h_scatter(svbool_t pg, svuint64_t bases, svuint64_t data) { return svstnt1h_scatter_u64base_u64(pg, bases, data); } __forceinline void svstnt1h_scatter_offset(svbool_t pg, int16_t *base, svuint32_t offsets, svint32_t data) { return svstnt1h_scatter_u32offset_s32(pg, base, offsets, data); } __forceinline void svstnt1h_scatter(svbool_t pg, svuint32_t bases, svint32_t data) { return svstnt1h_scatter_u32base_s32(pg, bases, data); } __forceinline void svstnt1h_scatter(svbool_t pg, svuint32_t bases, svuint32_t data) { return svstnt1h_scatter_u32base_u32(pg, bases, data); } __forceinline void svstnt1h_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data) { return svstnt1h_scatter_u64base_index_u64(pg, bases, index, data); } __forceinline void svstnt1h_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data) { return svstnt1h_scatter_u64base_index_s64(pg, bases, index, data); } __forceinline void svstnt1h_scatter_index(svbool_t pg, svuint32_t bases, int64_t index, svint32_t data) { return svstnt1h_scatter_u32base_index_s32(pg, bases, index, data); } __forceinline void svstnt1h_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) { return svstnt1h_scatter_u64base_offset_u64(pg, bases, offset, data); } __forceinline void svstnt1h_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) { return svstnt1h_scatter_u64base_offset_s64(pg, bases, offset, data); } __forceinline void svstnt1h_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) { return svstnt1h_scatter_u32base_offset_u32(pg, bases, offset, data); } __forceinline void svstnt1h_scatter_index(svbool_t pg, svuint32_t bases, int64_t index, svuint32_t data) { return svstnt1h_scatter_u32base_index_u32(pg, bases, index, data); } __forceinline void svstnt1h_scatter_index(svbool_t pg, uint16_t *base, svuint64_t indices, svuint64_t data) { return svstnt1h_scatter_u64index_u64(pg, base, indices, data); } __forceinline void svstnt1h_scatter_index(svbool_t pg, int16_t *base, svuint64_t indices, svint64_t data) { return svstnt1h_scatter_u64index_s64(pg, base, indices, data); } __forceinline void svstnt1h_scatter_index(svbool_t pg, uint16_t *base, svint64_t indices, svuint64_t data) { return svstnt1h_scatter_s64index_u64(pg, base, indices, data); } __forceinline void svstnt1h_scatter_index(svbool_t pg, int16_t *base, svint64_t indices, svint64_t data) { return svstnt1h_scatter_s64index_s64(pg, base, indices, data); } __forceinline void svstnt1h_scatter_offset(svbool_t pg, uint16_t *base, svuint64_t offsets, svuint64_t data) { return svstnt1h_scatter_u64offset_u64(pg, base, offsets, data); } __forceinline void svstnt1h_scatter_offset(svbool_t pg, int16_t *base, svuint64_t offsets, svint64_t data) { return svstnt1h_scatter_u64offset_s64(pg, base, offsets, data); } __forceinline void svstnt1h_scatter_offset(svbool_t pg, uint16_t *base, svuint32_t offsets, svuint32_t data) { return svstnt1h_scatter_u32offset_u32(pg, base, offsets, data); } __forceinline void svstnt1h_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) { return svstnt1h_scatter_u32base_offset_s32(pg, bases, offset, data); } __forceinline void svstnt1h_scatter_offset(svbool_t pg, uint16_t *base, svint64_t offsets, svuint64_t data) { return svstnt1h_scatter_s64offset_u64(pg, base, offsets, data); } __forceinline void svstnt1h_scatter_offset(svbool_t pg, int16_t *base, svint64_t offsets, svint64_t data) { return svstnt1h_scatter_s64offset_s64(pg, base, offsets, data); } __forceinline void svstnt1w_scatter_offset(svbool_t pg, uint32_t *base, svuint64_t offsets, svuint64_t data) { return svstnt1w_scatter_u64offset_u64(pg, base, offsets, data); } __forceinline void svstnt1w_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) { return svstnt1w_scatter_u64base_offset_u64(pg, bases, offset, data); } __forceinline void svstnt1w_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) { return svstnt1w_scatter_u64base_offset_s64(pg, bases, offset, data); } __forceinline void svstnt1w_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svuint64_t data) { return svstnt1w_scatter_u64base_index_u64(pg, bases, index, data); } __forceinline void svstnt1w_scatter_offset(svbool_t pg, uint32_t *base, svint64_t offsets, svuint64_t data) { return svstnt1w_scatter_s64offset_u64(pg, base, offsets, data); } __forceinline void svstnt1w_scatter_offset(svbool_t pg, int32_t *base, svint64_t offsets, svint64_t data) { return svstnt1w_scatter_s64offset_s64(pg, base, offsets, data); } __forceinline void svstnt1w_scatter(svbool_t pg, svuint64_t bases, svuint64_t data) { return svstnt1w_scatter_u64base_u64(pg, bases, data); } __forceinline void svstnt1w_scatter(svbool_t pg, svuint64_t bases, svint64_t data) { return svstnt1w_scatter_u64base_s64(pg, bases, data); } __forceinline void svstnt1w_scatter_index(svbool_t pg, int32_t *base, svint64_t indices, svint64_t data) { return svstnt1w_scatter_s64index_s64(pg, base, indices, data); } __forceinline void svstnt1w_scatter_offset(svbool_t pg, int32_t *base, svuint64_t offsets, svint64_t data) { return svstnt1w_scatter_u64offset_s64(pg, base, offsets, data); } __forceinline void svstnt1w_scatter_index(svbool_t pg, uint32_t *base, svint64_t indices, svuint64_t data) { return svstnt1w_scatter_s64index_u64(pg, base, indices, data); } __forceinline void svstnt1w_scatter_index(svbool_t pg, svuint64_t bases, int64_t index, svint64_t data) { return svstnt1w_scatter_u64base_index_s64(pg, bases, index, data); } __forceinline void svstnt1w_scatter_index(svbool_t pg, uint32_t *base, svuint64_t indices, svuint64_t data) { return svstnt1w_scatter_u64index_u64(pg, base, indices, data); } __forceinline void svstnt1w_scatter_index(svbool_t pg, int32_t *base, svuint64_t indices, svint64_t data) { return svstnt1w_scatter_u64index_s64(pg, base, indices, data); } __forceinline void svstnt1b_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svuint64_t data) { return svstnt1b_scatter_u64base_offset_u64(pg, bases, offset, data); } __forceinline void svstnt1b_scatter_offset(svbool_t pg, svuint64_t bases, int64_t offset, svint64_t data) { return svstnt1b_scatter_u64base_offset_s64(pg, bases, offset, data); } __forceinline void svstnt1b_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svuint32_t data) { return svstnt1b_scatter_u32base_offset_u32(pg, bases, offset, data); } __forceinline void svstnt1b_scatter_offset(svbool_t pg, svuint32_t bases, int64_t offset, svint32_t data) { return svstnt1b_scatter_u32base_offset_s32(pg, bases, offset, data); } __forceinline void svstnt1b_scatter_offset(svbool_t pg, uint8_t *base, svuint64_t offsets, svuint64_t data) { return svstnt1b_scatter_u64offset_u64(pg, base, offsets, data); } __forceinline void svstnt1b_scatter_offset(svbool_t pg, int8_t *base, svuint64_t offsets, svint64_t data) { return svstnt1b_scatter_u64offset_s64(pg, base, offsets, data); } __forceinline void svstnt1b_scatter_offset(svbool_t pg, uint8_t *base, svint64_t offsets, svuint64_t data) { return svstnt1b_scatter_s64offset_u64(pg, base, offsets, data); } __forceinline void svstnt1b_scatter_offset(svbool_t pg, uint8_t *base, svuint32_t offsets, svuint32_t data) { return svstnt1b_scatter_u32offset_u32(pg, base, offsets, data); } __forceinline void svstnt1b_scatter_offset(svbool_t pg, int8_t *base, svuint32_t offsets, svint32_t data) { return svstnt1b_scatter_u32offset_s32(pg, base, offsets, data); } __forceinline void svstnt1b_scatter(svbool_t pg, svuint64_t bases, svuint64_t data) { return svstnt1b_scatter_u64base_u64(pg, bases, data); } __forceinline void svstnt1b_scatter(svbool_t pg, svuint64_t bases, svint64_t data) { return svstnt1b_scatter_u64base_s64(pg, bases, data); } __forceinline void svstnt1b_scatter(svbool_t pg, svuint32_t bases, svuint32_t data) { return svstnt1b_scatter_u32base_u32(pg, bases, data); } __forceinline void svstnt1b_scatter(svbool_t pg, svuint32_t bases, svint32_t data) { return svstnt1b_scatter_u32base_s32(pg, bases, data); } __forceinline void svstnt1b_scatter_offset(svbool_t pg, int8_t *base, svint64_t offsets, svint64_t data) { return svstnt1b_scatter_s64offset_s64(pg, base, offsets, data); } __forceinline svbfloat16_t svtbx(svbfloat16_t fallback, svbfloat16_t data, svuint16_t indices) { return svtbx_bf16(fallback, data, indices); } __forceinline svint8_t svtbx(svint8_t fallback, svint8_t data, svuint8_t indices) { return svtbx_s8(fallback, data, indices); } __forceinline svuint32_t svtbx(svuint32_t fallback, svuint32_t data, svuint32_t indices) { return svtbx_u32(fallback, data, indices); } __forceinline svfloat16_t svtbx(svfloat16_t fallback, svfloat16_t data, svuint16_t indices) { return svtbx_f16(fallback, data, indices); } __forceinline svfloat32_t svtbx(svfloat32_t fallback, svfloat32_t data, svuint32_t indices) { return svtbx_f32(fallback, data, indices); } __forceinline svfloat64_t svtbx(svfloat64_t fallback, svfloat64_t data, svuint64_t indices) { return svtbx_f64(fallback, data, indices); } __forceinline svint16_t svtbx(svint16_t fallback, svint16_t data, svuint16_t indices) { return svtbx_s16(fallback, data, indices); } __forceinline svint32_t svtbx(svint32_t fallback, svint32_t data, svuint32_t indices) { return svtbx_s32(fallback, data, indices); } __forceinline svint64_t svtbx(svint64_t fallback, svint64_t data, svuint64_t indices) { return svtbx_s64(fallback, data, indices); } __forceinline svuint64_t svtbx(svuint64_t fallback, svuint64_t data, svuint64_t indices) { return svtbx_u64(fallback, data, indices); } __forceinline svuint16_t svtbx(svuint16_t fallback, svuint16_t data, svuint16_t indices) { return svtbx_u16(fallback, data, indices); } __forceinline svuint8_t svtbx(svuint8_t fallback, svuint8_t data, svuint8_t indices) { return svtbx_u8(fallback, data, indices); } __forceinline svuint64_t svaba(svuint64_t op1, svuint64_t op2, uint64_t op3) { return svaba_n_u64(op1, op2, op3); } __forceinline svuint32_t svaba(svuint32_t op1, svuint32_t op2, uint32_t op3) { return svaba_n_u32(op1, op2, op3); } __forceinline svuint16_t svaba(svuint16_t op1, svuint16_t op2, uint16_t op3) { return svaba_n_u16(op1, op2, op3); } __forceinline svuint8_t svaba(svuint8_t op1, svuint8_t op2, uint8_t op3) { return svaba_n_u8(op1, op2, op3); } __forceinline svint64_t svaba(svint64_t op1, svint64_t op2, int64_t op3) { return svaba_n_s64(op1, op2, op3); } __forceinline svint32_t svaba(svint32_t op1, svint32_t op2, int32_t op3) { return svaba_n_s32(op1, op2, op3); } __forceinline svint16_t svaba(svint16_t op1, svint16_t op2, int16_t op3) { return svaba_n_s16(op1, op2, op3); } __forceinline svint8_t svaba(svint8_t op1, svint8_t op2, int8_t op3) { return svaba_n_s8(op1, op2, op3); } __forceinline svuint64_t svaba(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svaba_u64(op1, op2, op3); } __forceinline svuint32_t svaba(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svaba_u32(op1, op2, op3); } __forceinline svuint16_t svaba(svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svaba_u16(op1, op2, op3); } __forceinline svuint8_t svaba(svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svaba_u8(op1, op2, op3); } __forceinline svint64_t svaba(svint64_t op1, svint64_t op2, svint64_t op3) { return svaba_s64(op1, op2, op3); } __forceinline svint16_t svaba(svint16_t op1, svint16_t op2, svint16_t op3) { return svaba_s16(op1, op2, op3); } __forceinline svint32_t svaba(svint32_t op1, svint32_t op2, svint32_t op3) { return svaba_s32(op1, op2, op3); } __forceinline svint8_t svaba(svint8_t op1, svint8_t op2, svint8_t op3) { return svaba_s8(op1, op2, op3); } __forceinline svint64_t svqabs_x(svbool_t pg, svint64_t op) { return svqabs_s64_x(pg, op); } __forceinline svint32_t svqabs_x(svbool_t pg, svint32_t op) { return svqabs_s32_x(pg, op); } __forceinline svint16_t svqabs_x(svbool_t pg, svint16_t op) { return svqabs_s16_x(pg, op); } __forceinline svint8_t svqabs_x(svbool_t pg, svint8_t op) { return svqabs_s8_x(pg, op); } __forceinline svint64_t svqabs_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svqabs_s64_m(inactive, pg, op); } __forceinline svint32_t svqabs_m(svint32_t inactive, svbool_t pg, svint32_t op) { return svqabs_s32_m(inactive, pg, op); } __forceinline svint16_t svqabs_m(svint16_t inactive, svbool_t pg, svint16_t op) { return svqabs_s16_m(inactive, pg, op); } __forceinline svint8_t svqabs_m(svint8_t inactive, svbool_t pg, svint8_t op) { return svqabs_s8_m(inactive, pg, op); } __forceinline svint8_t svqabs_z(svbool_t pg, svint8_t op) { return svqabs_s8_z(pg, op); } __forceinline svint16_t svqabs_z(svbool_t pg, svint16_t op) { return svqabs_s16_z(pg, op); } __forceinline svint32_t svqabs_z(svbool_t pg, svint32_t op) { return svqabs_s32_z(pg, op); } __forceinline svint64_t svqabs_z(svbool_t pg, svint64_t op) { return svqabs_s64_z(pg, op); } __forceinline svint32_t svabalb(svint32_t op1, svint16_t op2, int16_t op3) { return svabalb_n_s32(op1, op2, op3); } __forceinline svint16_t svabalb(svint16_t op1, svint8_t op2, svint8_t op3) { return svabalb_s16(op1, op2, op3); } __forceinline svint32_t svabalb(svint32_t op1, svint16_t op2, svint16_t op3) { return svabalb_s32(op1, op2, op3); } __forceinline svint64_t svabalb(svint64_t op1, svint32_t op2, svint32_t op3) { return svabalb_s64(op1, op2, op3); } __forceinline svuint16_t svabalb(svuint16_t op1, svuint8_t op2, svuint8_t op3) { return svabalb_u16(op1, op2, op3); } __forceinline svuint32_t svabalb(svuint32_t op1, svuint16_t op2, svuint16_t op3) { return svabalb_u32(op1, op2, op3); } __forceinline svuint64_t svabalb(svuint64_t op1, svuint32_t op2, svuint32_t op3) { return svabalb_u64(op1, op2, op3); } __forceinline svint16_t svabalb(svint16_t op1, svint8_t op2, int8_t op3) { return svabalb_n_s16(op1, op2, op3); } __forceinline svint64_t svabalb(svint64_t op1, svint32_t op2, int32_t op3) { return svabalb_n_s64(op1, op2, op3); } __forceinline svuint16_t svabalb(svuint16_t op1, svuint8_t op2, uint8_t op3) { return svabalb_n_u16(op1, op2, op3); } __forceinline svuint32_t svabalb(svuint32_t op1, svuint16_t op2, uint16_t op3) { return svabalb_n_u32(op1, op2, op3); } __forceinline svuint64_t svabalb(svuint64_t op1, svuint32_t op2, uint32_t op3) { return svabalb_n_u64(op1, op2, op3); } __forceinline svint64_t svabalt(svint64_t op1, svint32_t op2, int32_t op3) { return svabalt_n_s64(op1, op2, op3); } __forceinline svuint16_t svabalt(svuint16_t op1, svuint8_t op2, uint8_t op3) { return svabalt_n_u16(op1, op2, op3); } __forceinline svuint32_t svabalt(svuint32_t op1, svuint16_t op2, uint16_t op3) { return svabalt_n_u32(op1, op2, op3); } __forceinline svint32_t svabalt(svint32_t op1, svint16_t op2, int16_t op3) { return svabalt_n_s32(op1, op2, op3); } __forceinline svuint64_t svabalt(svuint64_t op1, svuint32_t op2, uint32_t op3) { return svabalt_n_u64(op1, op2, op3); } __forceinline svint16_t svabalt(svint16_t op1, svint8_t op2, int8_t op3) { return svabalt_n_s16(op1, op2, op3); } __forceinline svint32_t svabalt(svint32_t op1, svint16_t op2, svint16_t op3) { return svabalt_s32(op1, op2, op3); } __forceinline svuint32_t svabalt(svuint32_t op1, svuint16_t op2, svuint16_t op3) { return svabalt_u32(op1, op2, op3); } __forceinline svuint16_t svabalt(svuint16_t op1, svuint8_t op2, svuint8_t op3) { return svabalt_u16(op1, op2, op3); } __forceinline svint64_t svabalt(svint64_t op1, svint32_t op2, svint32_t op3) { return svabalt_s64(op1, op2, op3); } __forceinline svint16_t svabalt(svint16_t op1, svint8_t op2, svint8_t op3) { return svabalt_s16(op1, op2, op3); } __forceinline svuint64_t svabalt(svuint64_t op1, svuint32_t op2, svuint32_t op3) { return svabalt_u64(op1, op2, op3); } __forceinline svint32_t svabdlb(svint16_t op1, int16_t op2) { return svabdlb_n_s32(op1, op2); } __forceinline svuint64_t svabdlb(svuint32_t op1, uint32_t op2) { return svabdlb_n_u64(op1, op2); } __forceinline svuint32_t svabdlb(svuint16_t op1, uint16_t op2) { return svabdlb_n_u32(op1, op2); } __forceinline svuint16_t svabdlb(svuint8_t op1, uint8_t op2) { return svabdlb_n_u16(op1, op2); } __forceinline svint64_t svabdlb(svint32_t op1, int32_t op2) { return svabdlb_n_s64(op1, op2); } __forceinline svint16_t svabdlb(svint8_t op1, int8_t op2) { return svabdlb_n_s16(op1, op2); } __forceinline svuint32_t svabdlb(svuint16_t op1, svuint16_t op2) { return svabdlb_u32(op1, op2); } __forceinline svuint16_t svabdlb(svuint8_t op1, svuint8_t op2) { return svabdlb_u16(op1, op2); } __forceinline svint64_t svabdlb(svint32_t op1, svint32_t op2) { return svabdlb_s64(op1, op2); } __forceinline svint32_t svabdlb(svint16_t op1, svint16_t op2) { return svabdlb_s32(op1, op2); } __forceinline svint16_t svabdlb(svint8_t op1, svint8_t op2) { return svabdlb_s16(op1, op2); } __forceinline svuint64_t svabdlb(svuint32_t op1, svuint32_t op2) { return svabdlb_u64(op1, op2); } __forceinline svuint16_t svabdlt(svuint8_t op1, svuint8_t op2) { return svabdlt_u16(op1, op2); } __forceinline svuint16_t svabdlt(svuint8_t op1, uint8_t op2) { return svabdlt_n_u16(op1, op2); } __forceinline svint64_t svabdlt(svint32_t op1, int32_t op2) { return svabdlt_n_s64(op1, op2); } __forceinline svint32_t svabdlt(svint16_t op1, int16_t op2) { return svabdlt_n_s32(op1, op2); } __forceinline svint16_t svabdlt(svint8_t op1, int8_t op2) { return svabdlt_n_s16(op1, op2); } __forceinline svint64_t svabdlt(svint32_t op1, svint32_t op2) { return svabdlt_s64(op1, op2); } __forceinline svuint32_t svabdlt(svuint16_t op1, svuint16_t op2) { return svabdlt_u32(op1, op2); } __forceinline svint32_t svabdlt(svint16_t op1, svint16_t op2) { return svabdlt_s32(op1, op2); } __forceinline svint16_t svabdlt(svint8_t op1, svint8_t op2) { return svabdlt_s16(op1, op2); } __forceinline svuint32_t svabdlt(svuint16_t op1, uint16_t op2) { return svabdlt_n_u32(op1, op2); } __forceinline svuint64_t svabdlt(svuint32_t op1, svuint32_t op2) { return svabdlt_u64(op1, op2); } __forceinline svuint64_t svabdlt(svuint32_t op1, uint32_t op2) { return svabdlt_n_u64(op1, op2); } __forceinline svuint32_t svadclb(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svadclb_u32(op1, op2, op3); } __forceinline svuint64_t svadclb(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svadclb_u64(op1, op2, op3); } __forceinline svuint32_t svadclb(svuint32_t op1, svuint32_t op2, uint32_t op3) { return svadclb_n_u32(op1, op2, op3); } __forceinline svuint64_t svadclb(svuint64_t op1, svuint64_t op2, uint64_t op3) { return svadclb_n_u64(op1, op2, op3); } __forceinline svuint32_t svadclt(svuint32_t op1, svuint32_t op2, uint32_t op3) { return svadclt_n_u32(op1, op2, op3); } __forceinline svuint64_t svadclt(svuint64_t op1, svuint64_t op2, uint64_t op3) { return svadclt_n_u64(op1, op2, op3); } __forceinline svuint32_t svadclt(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svadclt_u32(op1, op2, op3); } __forceinline svuint64_t svadclt(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svadclt_u64(op1, op2, op3); } __forceinline svint16_t svhadd_z(svbool_t pg, svint16_t op1, int16_t op2) { return svhadd_n_s16_z(pg, op1, op2); } __forceinline svint8_t svhadd_z(svbool_t pg, svint8_t op1, int8_t op2) { return svhadd_n_s8_z(pg, op1, op2); } __forceinline svint32_t svhadd_z(svbool_t pg, svint32_t op1, int32_t op2) { return svhadd_n_s32_z(pg, op1, op2); } __forceinline svuint16_t svhadd_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svhadd_n_u16_x(pg, op1, op2); } __forceinline svuint64_t svhadd_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svhadd_n_u64_x(pg, op1, op2); } __forceinline svuint8_t svhadd_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svhadd_n_u8_z(pg, op1, op2); } __forceinline svuint16_t svhadd_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svhadd_n_u16_z(pg, op1, op2); } __forceinline svuint32_t svhadd_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svhadd_n_u32_z(pg, op1, op2); } __forceinline svuint64_t svhadd_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svhadd_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svhadd_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svhadd_u32_z(pg, op1, op2); } __forceinline svint64_t svhadd_z(svbool_t pg, svint64_t op1, int64_t op2) { return svhadd_n_s64_z(pg, op1, op2); } __forceinline svuint32_t svhadd_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svhadd_n_u32_x(pg, op1, op2); } __forceinline svint16_t svhadd_x(svbool_t pg, svint16_t op1, int16_t op2) { return svhadd_n_s16_x(pg, op1, op2); } __forceinline svuint8_t svhadd_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svhadd_n_u8_x(pg, op1, op2); } __forceinline svint64_t svhadd_x(svbool_t pg, svint64_t op1, int64_t op2) { return svhadd_n_s64_x(pg, op1, op2); } __forceinline svint32_t svhadd_x(svbool_t pg, svint32_t op1, int32_t op2) { return svhadd_n_s32_x(pg, op1, op2); } __forceinline svuint16_t svhadd_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svhadd_u16_z(pg, op1, op2); } __forceinline svint8_t svhadd_x(svbool_t pg, svint8_t op1, int8_t op2) { return svhadd_n_s8_x(pg, op1, op2); } __forceinline svuint64_t svhadd_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svhadd_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svhadd_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svhadd_n_u32_m(pg, op1, op2); } __forceinline svuint16_t svhadd_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svhadd_n_u16_m(pg, op1, op2); } __forceinline svuint8_t svhadd_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svhadd_n_u8_m(pg, op1, op2); } __forceinline svint64_t svhadd_m(svbool_t pg, svint64_t op1, int64_t op2) { return svhadd_n_s64_m(pg, op1, op2); } __forceinline svint32_t svhadd_m(svbool_t pg, svint32_t op1, int32_t op2) { return svhadd_n_s32_m(pg, op1, op2); } __forceinline svuint64_t svhadd_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svhadd_u64_z(pg, op1, op2); } __forceinline svuint8_t svhadd_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svhadd_u8_z(pg, op1, op2); } __forceinline svuint16_t svhadd_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svhadd_u16_x(pg, op1, op2); } __forceinline svint32_t svhadd_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svhadd_s32_z(pg, op1, op2); } __forceinline svint8_t svhadd_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svhadd_s8_m(pg, op1, op2); } __forceinline svint16_t svhadd_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svhadd_s16_m(pg, op1, op2); } __forceinline svint32_t svhadd_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svhadd_s32_m(pg, op1, op2); } __forceinline svint64_t svhadd_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svhadd_s64_m(pg, op1, op2); } __forceinline svuint8_t svhadd_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svhadd_u8_m(pg, op1, op2); } __forceinline svint64_t svhadd_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svhadd_s64_z(pg, op1, op2); } __forceinline svuint32_t svhadd_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svhadd_u32_m(pg, op1, op2); } __forceinline svuint64_t svhadd_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svhadd_u64_m(pg, op1, op2); } __forceinline svint8_t svhadd_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svhadd_s8_x(pg, op1, op2); } __forceinline svuint16_t svhadd_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svhadd_u16_m(pg, op1, op2); } __forceinline svint32_t svhadd_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svhadd_s32_x(pg, op1, op2); } __forceinline svint16_t svhadd_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svhadd_s16_x(pg, op1, op2); } __forceinline svint16_t svhadd_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svhadd_s16_z(pg, op1, op2); } __forceinline svint8_t svhadd_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svhadd_s8_z(pg, op1, op2); } __forceinline svuint64_t svhadd_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svhadd_u64_x(pg, op1, op2); } __forceinline svint8_t svhadd_m(svbool_t pg, svint8_t op1, int8_t op2) { return svhadd_n_s8_m(pg, op1, op2); } __forceinline svint16_t svhadd_m(svbool_t pg, svint16_t op1, int16_t op2) { return svhadd_n_s16_m(pg, op1, op2); } __forceinline svuint8_t svhadd_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svhadd_u8_x(pg, op1, op2); } __forceinline svint64_t svhadd_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svhadd_s64_x(pg, op1, op2); } __forceinline svuint32_t svhadd_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svhadd_u32_x(pg, op1, op2); } __forceinline svint32_t svrhadd_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svrhadd_s32_m(pg, op1, op2); } __forceinline svint16_t svrhadd_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svrhadd_s16_m(pg, op1, op2); } __forceinline svint8_t svrhadd_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svrhadd_s8_m(pg, op1, op2); } __forceinline svint64_t svrhadd_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svrhadd_s64_m(pg, op1, op2); } __forceinline svuint16_t svrhadd_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svrhadd_u16_z(pg, op1, op2); } __forceinline svint64_t svrhadd_z(svbool_t pg, svint64_t op1, int64_t op2) { return svrhadd_n_s64_z(pg, op1, op2); } __forceinline svuint32_t svrhadd_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svrhadd_n_u32_z(pg, op1, op2); } __forceinline svuint8_t svrhadd_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svrhadd_u8_m(pg, op1, op2); } __forceinline svuint16_t svrhadd_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svrhadd_u16_m(pg, op1, op2); } __forceinline svuint32_t svrhadd_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svrhadd_u32_m(pg, op1, op2); } __forceinline svuint64_t svrhadd_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svrhadd_u64_m(pg, op1, op2); } __forceinline svint8_t svrhadd_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svrhadd_s8_x(pg, op1, op2); } __forceinline svint16_t svrhadd_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svrhadd_s16_x(pg, op1, op2); } __forceinline svint32_t svrhadd_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svrhadd_s32_x(pg, op1, op2); } __forceinline svint64_t svrhadd_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svrhadd_s64_x(pg, op1, op2); } __forceinline svuint8_t svrhadd_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svrhadd_u8_x(pg, op1, op2); } __forceinline svuint64_t svrhadd_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svrhadd_u64_z(pg, op1, op2); } __forceinline svuint32_t svrhadd_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svrhadd_u32_x(pg, op1, op2); } __forceinline svuint64_t svrhadd_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svrhadd_u64_x(pg, op1, op2); } __forceinline svint8_t svrhadd_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svrhadd_s8_z(pg, op1, op2); } __forceinline svint16_t svrhadd_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svrhadd_s16_z(pg, op1, op2); } __forceinline svint32_t svrhadd_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svrhadd_s32_z(pg, op1, op2); } __forceinline svint64_t svrhadd_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svrhadd_s64_z(pg, op1, op2); } __forceinline svuint8_t svrhadd_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svrhadd_u8_z(pg, op1, op2); } __forceinline svint8_t svrhadd_m(svbool_t pg, svint8_t op1, int8_t op2) { return svrhadd_n_s8_m(pg, op1, op2); } __forceinline svint16_t svrhadd_m(svbool_t pg, svint16_t op1, int16_t op2) { return svrhadd_n_s16_m(pg, op1, op2); } __forceinline svint32_t svrhadd_m(svbool_t pg, svint32_t op1, int32_t op2) { return svrhadd_n_s32_m(pg, op1, op2); } __forceinline svint64_t svrhadd_m(svbool_t pg, svint64_t op1, int64_t op2) { return svrhadd_n_s64_m(pg, op1, op2); } __forceinline svuint16_t svrhadd_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svrhadd_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svrhadd_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svrhadd_n_u8_z(pg, op1, op2); } __forceinline svuint32_t svrhadd_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svrhadd_u32_z(pg, op1, op2); } __forceinline svint32_t svrhadd_z(svbool_t pg, svint32_t op1, int32_t op2) { return svrhadd_n_s32_z(pg, op1, op2); } __forceinline svint16_t svrhadd_z(svbool_t pg, svint16_t op1, int16_t op2) { return svrhadd_n_s16_z(pg, op1, op2); } __forceinline svint8_t svrhadd_z(svbool_t pg, svint8_t op1, int8_t op2) { return svrhadd_n_s8_z(pg, op1, op2); } __forceinline svuint64_t svrhadd_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svrhadd_n_u64_x(pg, op1, op2); } __forceinline svuint32_t svrhadd_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svrhadd_n_u32_x(pg, op1, op2); } __forceinline svuint64_t svrhadd_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svrhadd_n_u64_z(pg, op1, op2); } __forceinline svuint16_t svrhadd_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svrhadd_n_u16_x(pg, op1, op2); } __forceinline svint64_t svrhadd_x(svbool_t pg, svint64_t op1, int64_t op2) { return svrhadd_n_s64_x(pg, op1, op2); } __forceinline svint32_t svrhadd_x(svbool_t pg, svint32_t op1, int32_t op2) { return svrhadd_n_s32_x(pg, op1, op2); } __forceinline svint16_t svrhadd_x(svbool_t pg, svint16_t op1, int16_t op2) { return svrhadd_n_s16_x(pg, op1, op2); } __forceinline svint8_t svrhadd_x(svbool_t pg, svint8_t op1, int8_t op2) { return svrhadd_n_s8_x(pg, op1, op2); } __forceinline svuint64_t svrhadd_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svrhadd_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svrhadd_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svrhadd_n_u32_m(pg, op1, op2); } __forceinline svuint16_t svrhadd_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svrhadd_n_u16_m(pg, op1, op2); } __forceinline svuint8_t svrhadd_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svrhadd_n_u8_m(pg, op1, op2); } __forceinline svuint8_t svrhadd_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svrhadd_n_u8_x(pg, op1, op2); } __forceinline svuint16_t svrhadd_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svrhadd_u16_x(pg, op1, op2); } __forceinline svint32_t svaddhnb(svint64_t op1, int64_t op2) { return svaddhnb_n_s64(op1, op2); } __forceinline svuint8_t svaddhnb(svuint16_t op1, uint16_t op2) { return svaddhnb_n_u16(op1, op2); } __forceinline svuint16_t svaddhnb(svuint32_t op1, uint32_t op2) { return svaddhnb_n_u32(op1, op2); } __forceinline svint16_t svaddhnb(svint32_t op1, int32_t op2) { return svaddhnb_n_s32(op1, op2); } __forceinline svuint32_t svaddhnb(svuint64_t op1, uint64_t op2) { return svaddhnb_n_u64(op1, op2); } __forceinline svuint32_t svaddhnb(svuint64_t op1, svuint64_t op2) { return svaddhnb_u64(op1, op2); } __forceinline svint8_t svaddhnb(svint16_t op1, int16_t op2) { return svaddhnb_n_s16(op1, op2); } __forceinline svuint8_t svaddhnb(svuint16_t op1, svuint16_t op2) { return svaddhnb_u16(op1, op2); } __forceinline svint32_t svaddhnb(svint64_t op1, svint64_t op2) { return svaddhnb_s64(op1, op2); } __forceinline svint16_t svaddhnb(svint32_t op1, svint32_t op2) { return svaddhnb_s32(op1, op2); } __forceinline svint8_t svaddhnb(svint16_t op1, svint16_t op2) { return svaddhnb_s16(op1, op2); } __forceinline svuint16_t svaddhnb(svuint32_t op1, svuint32_t op2) { return svaddhnb_u32(op1, op2); } __forceinline svuint32_t svaddhnt(svuint32_t even, svuint64_t op1, uint64_t op2) { return svaddhnt_n_u64(even, op1, op2); } __forceinline svuint16_t svaddhnt(svuint16_t even, svuint32_t op1, uint32_t op2) { return svaddhnt_n_u32(even, op1, op2); } __forceinline svuint8_t svaddhnt(svuint8_t even, svuint16_t op1, uint16_t op2) { return svaddhnt_n_u16(even, op1, op2); } __forceinline svint32_t svaddhnt(svint32_t even, svint64_t op1, int64_t op2) { return svaddhnt_n_s64(even, op1, op2); } __forceinline svint16_t svaddhnt(svint16_t even, svint32_t op1, int32_t op2) { return svaddhnt_n_s32(even, op1, op2); } __forceinline svint8_t svaddhnt(svint8_t even, svint16_t op1, int16_t op2) { return svaddhnt_n_s16(even, op1, op2); } __forceinline svuint32_t svaddhnt(svuint32_t even, svuint64_t op1, svuint64_t op2) { return svaddhnt_u64(even, op1, op2); } __forceinline svuint16_t svaddhnt(svuint16_t even, svuint32_t op1, svuint32_t op2) { return svaddhnt_u32(even, op1, op2); } __forceinline svint32_t svaddhnt(svint32_t even, svint64_t op1, svint64_t op2) { return svaddhnt_s64(even, op1, op2); } __forceinline svint16_t svaddhnt(svint16_t even, svint32_t op1, svint32_t op2) { return svaddhnt_s32(even, op1, op2); } __forceinline svint8_t svaddhnt(svint8_t even, svint16_t op1, svint16_t op2) { return svaddhnt_s16(even, op1, op2); } __forceinline svuint8_t svaddhnt(svuint8_t even, svuint16_t op1, svuint16_t op2) { return svaddhnt_u16(even, op1, op2); } __forceinline svint32_t svraddhnb(svint64_t op1, svint64_t op2) { return svraddhnb_s64(op1, op2); } __forceinline svint16_t svraddhnb(svint32_t op1, svint32_t op2) { return svraddhnb_s32(op1, op2); } __forceinline svuint8_t svraddhnb(svuint16_t op1, svuint16_t op2) { return svraddhnb_u16(op1, op2); } __forceinline svuint16_t svraddhnb(svuint32_t op1, svuint32_t op2) { return svraddhnb_u32(op1, op2); } __forceinline svuint32_t svraddhnb(svuint64_t op1, svuint64_t op2) { return svraddhnb_u64(op1, op2); } __forceinline svint8_t svraddhnb(svint16_t op1, int16_t op2) { return svraddhnb_n_s16(op1, op2); } __forceinline svuint8_t svraddhnb(svuint16_t op1, uint16_t op2) { return svraddhnb_n_u16(op1, op2); } __forceinline svuint32_t svraddhnb(svuint64_t op1, uint64_t op2) { return svraddhnb_n_u64(op1, op2); } __forceinline svint16_t svraddhnb(svint32_t op1, int32_t op2) { return svraddhnb_n_s32(op1, op2); } __forceinline svuint16_t svraddhnb(svuint32_t op1, uint32_t op2) { return svraddhnb_n_u32(op1, op2); } __forceinline svint32_t svraddhnb(svint64_t op1, int64_t op2) { return svraddhnb_n_s64(op1, op2); } __forceinline svint8_t svraddhnb(svint16_t op1, svint16_t op2) { return svraddhnb_s16(op1, op2); } __forceinline svint8_t svraddhnt(svint8_t even, svint16_t op1, svint16_t op2) { return svraddhnt_s16(even, op1, op2); } __forceinline svint16_t svraddhnt(svint16_t even, svint32_t op1, svint32_t op2) { return svraddhnt_s32(even, op1, op2); } __forceinline svint32_t svraddhnt(svint32_t even, svint64_t op1, svint64_t op2) { return svraddhnt_s64(even, op1, op2); } __forceinline svuint8_t svraddhnt(svuint8_t even, svuint16_t op1, svuint16_t op2) { return svraddhnt_u16(even, op1, op2); } __forceinline svuint16_t svraddhnt(svuint16_t even, svuint32_t op1, svuint32_t op2) { return svraddhnt_u32(even, op1, op2); } __forceinline svuint16_t svraddhnt(svuint16_t even, svuint32_t op1, uint32_t op2) { return svraddhnt_n_u32(even, op1, op2); } __forceinline svuint32_t svraddhnt(svuint32_t even, svuint64_t op1, uint64_t op2) { return svraddhnt_n_u64(even, op1, op2); } __forceinline svuint8_t svraddhnt(svuint8_t even, svuint16_t op1, uint16_t op2) { return svraddhnt_n_u16(even, op1, op2); } __forceinline svuint32_t svraddhnt(svuint32_t even, svuint64_t op1, svuint64_t op2) { return svraddhnt_u64(even, op1, op2); } __forceinline svint8_t svraddhnt(svint8_t even, svint16_t op1, int16_t op2) { return svraddhnt_n_s16(even, op1, op2); } __forceinline svint16_t svraddhnt(svint16_t even, svint32_t op1, int32_t op2) { return svraddhnt_n_s32(even, op1, op2); } __forceinline svint32_t svraddhnt(svint32_t even, svint64_t op1, int64_t op2) { return svraddhnt_n_s64(even, op1, op2); } __forceinline svuint8_t svqadd_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svqadd_n_u8_m(pg, op1, op2); } __forceinline svuint64_t svqadd_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svqadd_u64_x(pg, op1, op2); } __forceinline svint8_t svqadd_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svqadd_s8_z(pg, op1, op2); } __forceinline svint16_t svqadd_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svqadd_s16_z(pg, op1, op2); } __forceinline svint32_t svqadd_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svqadd_s32_z(pg, op1, op2); } __forceinline svint64_t svqadd_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svqadd_s64_z(pg, op1, op2); } __forceinline svint8_t svqadd_m(svbool_t pg, svint8_t op1, int8_t op2) { return svqadd_n_s8_m(pg, op1, op2); } __forceinline svuint16_t svqadd_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svqadd_u16_z(pg, op1, op2); } __forceinline svuint32_t svqadd_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svqadd_u32_z(pg, op1, op2); } __forceinline svuint64_t svqadd_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svqadd_u64_z(pg, op1, op2); } __forceinline svuint32_t svqadd_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svqadd_u32_x(pg, op1, op2); } __forceinline svuint8_t svqadd_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svqadd_u8_z(pg, op1, op2); } __forceinline svuint16_t svqadd_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svqadd_u16_x(pg, op1, op2); } __forceinline svuint64_t svqadd_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svqadd_u64_m(pg, op1, op2); } __forceinline svint64_t svqadd_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svqadd_s64_x(pg, op1, op2); } __forceinline svint32_t svqadd_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svqadd_s32_x(pg, op1, op2); } __forceinline svint16_t svqadd_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svqadd_s16_x(pg, op1, op2); } __forceinline svint8_t svqadd_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svqadd_s8_x(pg, op1, op2); } __forceinline svint16_t svqadd_m(svbool_t pg, svint16_t op1, int16_t op2) { return svqadd_n_s16_m(pg, op1, op2); } __forceinline svint64_t svqadd_m(svbool_t pg, svint64_t op1, int64_t op2) { return svqadd_n_s64_m(pg, op1, op2); } __forceinline svuint16_t svqadd_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svqadd_u16_m(pg, op1, op2); } __forceinline svuint8_t svqadd_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svqadd_u8_m(pg, op1, op2); } __forceinline svint64_t svqadd_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svqadd_s64_m(pg, op1, op2); } __forceinline svint32_t svqadd_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svqadd_s32_m(pg, op1, op2); } __forceinline svint16_t svqadd_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svqadd_s16_m(pg, op1, op2); } __forceinline svint8_t svqadd_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svqadd_s8_m(pg, op1, op2); } __forceinline svuint8_t svqadd_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svqadd_u8_x(pg, op1, op2); } __forceinline svint32_t svqadd_m(svbool_t pg, svint32_t op1, int32_t op2) { return svqadd_n_s32_m(pg, op1, op2); } __forceinline svuint32_t svqadd_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svqadd_u32_m(pg, op1, op2); } __forceinline svuint32_t svqadd_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svqadd_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svqadd_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svqadd_n_u16_m(pg, op1, op2); } __forceinline svuint64_t svqadd_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svqadd_n_u64_m(pg, op1, op2); } __forceinline svint8_t svqadd_x(svbool_t pg, svint8_t op1, int8_t op2) { return svqadd_n_s8_x(pg, op1, op2); } __forceinline svint16_t svqadd_x(svbool_t pg, svint16_t op1, int16_t op2) { return svqadd_n_s16_x(pg, op1, op2); } __forceinline svint32_t svqadd_x(svbool_t pg, svint32_t op1, int32_t op2) { return svqadd_n_s32_x(pg, op1, op2); } __forceinline svuint64_t svqadd_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svqadd_n_u64_z(pg, op1, op2); } __forceinline svuint16_t svqadd_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svqadd_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svqadd_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svqadd_n_u8_z(pg, op1, op2); } __forceinline svuint32_t svqadd_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svqadd_n_u32_m(pg, op1, op2); } __forceinline svint32_t svqadd_z(svbool_t pg, svint32_t op1, int32_t op2) { return svqadd_n_s32_z(pg, op1, op2); } __forceinline svint16_t svqadd_z(svbool_t pg, svint16_t op1, int16_t op2) { return svqadd_n_s16_z(pg, op1, op2); } __forceinline svint8_t svqadd_z(svbool_t pg, svint8_t op1, int8_t op2) { return svqadd_n_s8_z(pg, op1, op2); } __forceinline svuint64_t svqadd_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svqadd_n_u64_x(pg, op1, op2); } __forceinline svuint32_t svqadd_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svqadd_n_u32_x(pg, op1, op2); } __forceinline svuint16_t svqadd_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svqadd_n_u16_x(pg, op1, op2); } __forceinline svuint8_t svqadd_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svqadd_n_u8_x(pg, op1, op2); } __forceinline svint64_t svqadd_x(svbool_t pg, svint64_t op1, int64_t op2) { return svqadd_n_s64_x(pg, op1, op2); } __forceinline svint64_t svqadd_z(svbool_t pg, svint64_t op1, int64_t op2) { return svqadd_n_s64_z(pg, op1, op2); } __forceinline svuint8_t svsqadd_m(svbool_t pg, svuint8_t op1, svint8_t op2) { return svsqadd_u8_m(pg, op1, op2); } __forceinline svuint32_t svsqadd_m(svbool_t pg, svuint32_t op1, svint32_t op2) { return svsqadd_u32_m(pg, op1, op2); } __forceinline svuint64_t svsqadd_m(svbool_t pg, svuint64_t op1, svint64_t op2) { return svsqadd_u64_m(pg, op1, op2); } __forceinline svuint8_t svsqadd_x(svbool_t pg, svuint8_t op1, svint8_t op2) { return svsqadd_u8_x(pg, op1, op2); } __forceinline svuint16_t svsqadd_x(svbool_t pg, svuint16_t op1, svint16_t op2) { return svsqadd_u16_x(pg, op1, op2); } __forceinline svuint64_t svsqadd_x(svbool_t pg, svuint64_t op1, svint64_t op2) { return svsqadd_u64_x(pg, op1, op2); } __forceinline svuint32_t svsqadd_x(svbool_t pg, svuint32_t op1, svint32_t op2) { return svsqadd_u32_x(pg, op1, op2); } __forceinline svuint16_t svsqadd_m(svbool_t pg, svuint16_t op1, svint16_t op2) { return svsqadd_u16_m(pg, op1, op2); } __forceinline svuint32_t svsqadd_x(svbool_t pg, svuint32_t op1, int32_t op2) { return svsqadd_n_u32_x(pg, op1, op2); } __forceinline svuint16_t svsqadd_z(svbool_t pg, svuint16_t op1, int16_t op2) { return svsqadd_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svsqadd_z(svbool_t pg, svuint8_t op1, int8_t op2) { return svsqadd_n_u8_z(pg, op1, op2); } __forceinline svuint64_t svsqadd_x(svbool_t pg, svuint64_t op1, int64_t op2) { return svsqadd_n_u64_x(pg, op1, op2); } __forceinline svuint16_t svsqadd_x(svbool_t pg, svuint16_t op1, int16_t op2) { return svsqadd_n_u16_x(pg, op1, op2); } __forceinline svuint8_t svsqadd_x(svbool_t pg, svuint8_t op1, int8_t op2) { return svsqadd_n_u8_x(pg, op1, op2); } __forceinline svuint64_t svsqadd_z(svbool_t pg, svuint64_t op1, int64_t op2) { return svsqadd_n_u64_z(pg, op1, op2); } __forceinline svuint64_t svsqadd_m(svbool_t pg, svuint64_t op1, int64_t op2) { return svsqadd_n_u64_m(pg, op1, op2); } __forceinline svuint16_t svsqadd_m(svbool_t pg, svuint16_t op1, int16_t op2) { return svsqadd_n_u16_m(pg, op1, op2); } __forceinline svuint8_t svsqadd_m(svbool_t pg, svuint8_t op1, int8_t op2) { return svsqadd_n_u8_m(pg, op1, op2); } __forceinline svuint64_t svsqadd_z(svbool_t pg, svuint64_t op1, svint64_t op2) { return svsqadd_u64_z(pg, op1, op2); } __forceinline svuint32_t svsqadd_z(svbool_t pg, svuint32_t op1, svint32_t op2) { return svsqadd_u32_z(pg, op1, op2); } __forceinline svuint16_t svsqadd_z(svbool_t pg, svuint16_t op1, svint16_t op2) { return svsqadd_u16_z(pg, op1, op2); } __forceinline svuint8_t svsqadd_z(svbool_t pg, svuint8_t op1, svint8_t op2) { return svsqadd_u8_z(pg, op1, op2); } __forceinline svuint32_t svsqadd_m(svbool_t pg, svuint32_t op1, int32_t op2) { return svsqadd_n_u32_m(pg, op1, op2); } __forceinline svuint32_t svsqadd_z(svbool_t pg, svuint32_t op1, int32_t op2) { return svsqadd_n_u32_z(pg, op1, op2); } __forceinline svint16_t svuqadd_z(svbool_t pg, svint16_t op1, uint16_t op2) { return svuqadd_n_s16_z(pg, op1, op2); } __forceinline svint64_t svuqadd_x(svbool_t pg, svint64_t op1, svuint64_t op2) { return svuqadd_s64_x(pg, op1, op2); } __forceinline svint64_t svuqadd_x(svbool_t pg, svint64_t op1, uint64_t op2) { return svuqadd_n_s64_x(pg, op1, op2); } __forceinline svint16_t svuqadd_z(svbool_t pg, svint16_t op1, svuint16_t op2) { return svuqadd_s16_z(pg, op1, op2); } __forceinline svint32_t svuqadd_x(svbool_t pg, svint32_t op1, uint32_t op2) { return svuqadd_n_s32_x(pg, op1, op2); } __forceinline svint32_t svuqadd_z(svbool_t pg, svint32_t op1, uint32_t op2) { return svuqadd_n_s32_z(pg, op1, op2); } __forceinline svint8_t svuqadd_z(svbool_t pg, svint8_t op1, svuint8_t op2) { return svuqadd_s8_z(pg, op1, op2); } __forceinline svint8_t svuqadd_m(svbool_t pg, svint8_t op1, svuint8_t op2) { return svuqadd_s8_m(pg, op1, op2); } __forceinline svint16_t svuqadd_x(svbool_t pg, svint16_t op1, svuint16_t op2) { return svuqadd_s16_x(pg, op1, op2); } __forceinline svint32_t svuqadd_m(svbool_t pg, svint32_t op1, svuint32_t op2) { return svuqadd_s32_m(pg, op1, op2); } __forceinline svint64_t svuqadd_m(svbool_t pg, svint64_t op1, svuint64_t op2) { return svuqadd_s64_m(pg, op1, op2); } __forceinline svint64_t svuqadd_z(svbool_t pg, svint64_t op1, uint64_t op2) { return svuqadd_n_s64_z(pg, op1, op2); } __forceinline svint8_t svuqadd_x(svbool_t pg, svint8_t op1, svuint8_t op2) { return svuqadd_s8_x(pg, op1, op2); } __forceinline svint64_t svuqadd_z(svbool_t pg, svint64_t op1, svuint64_t op2) { return svuqadd_s64_z(pg, op1, op2); } __forceinline svint32_t svuqadd_x(svbool_t pg, svint32_t op1, svuint32_t op2) { return svuqadd_s32_x(pg, op1, op2); } __forceinline svint16_t svuqadd_m(svbool_t pg, svint16_t op1, svuint16_t op2) { return svuqadd_s16_m(pg, op1, op2); } __forceinline svint8_t svuqadd_m(svbool_t pg, svint8_t op1, uint8_t op2) { return svuqadd_n_s8_m(pg, op1, op2); } __forceinline svint32_t svuqadd_z(svbool_t pg, svint32_t op1, svuint32_t op2) { return svuqadd_s32_z(pg, op1, op2); } __forceinline svint32_t svuqadd_m(svbool_t pg, svint32_t op1, uint32_t op2) { return svuqadd_n_s32_m(pg, op1, op2); } __forceinline svint64_t svuqadd_m(svbool_t pg, svint64_t op1, uint64_t op2) { return svuqadd_n_s64_m(pg, op1, op2); } __forceinline svint8_t svuqadd_x(svbool_t pg, svint8_t op1, uint8_t op2) { return svuqadd_n_s8_x(pg, op1, op2); } __forceinline svint16_t svuqadd_x(svbool_t pg, svint16_t op1, uint16_t op2) { return svuqadd_n_s16_x(pg, op1, op2); } __forceinline svint16_t svuqadd_m(svbool_t pg, svint16_t op1, uint16_t op2) { return svuqadd_n_s16_m(pg, op1, op2); } __forceinline svint8_t svuqadd_z(svbool_t pg, svint8_t op1, uint8_t op2) { return svuqadd_n_s8_z(pg, op1, op2); } __forceinline svint32_t svaddlbt(svint16_t op1, svint16_t op2) { return svaddlbt_s32(op1, op2); } __forceinline svint64_t svaddlbt(svint32_t op1, svint32_t op2) { return svaddlbt_s64(op1, op2); } __forceinline svint16_t svaddlbt(svint8_t op1, svint8_t op2) { return svaddlbt_s16(op1, op2); } __forceinline svint32_t svaddlbt(svint16_t op1, int16_t op2) { return svaddlbt_n_s32(op1, op2); } __forceinline svint64_t svaddlbt(svint32_t op1, int32_t op2) { return svaddlbt_n_s64(op1, op2); } __forceinline svint16_t svaddlbt(svint8_t op1, int8_t op2) { return svaddlbt_n_s16(op1, op2); } __forceinline svint16_t svaddlb(svint8_t op1, svint8_t op2) { return svaddlb_s16(op1, op2); } __forceinline svuint64_t svaddlb(svuint32_t op1, uint32_t op2) { return svaddlb_n_u64(op1, op2); } __forceinline svuint32_t svaddlb(svuint16_t op1, uint16_t op2) { return svaddlb_n_u32(op1, op2); } __forceinline svuint16_t svaddlb(svuint8_t op1, uint8_t op2) { return svaddlb_n_u16(op1, op2); } __forceinline svint64_t svaddlb(svint32_t op1, int32_t op2) { return svaddlb_n_s64(op1, op2); } __forceinline svint32_t svaddlb(svint16_t op1, int16_t op2) { return svaddlb_n_s32(op1, op2); } __forceinline svint32_t svaddlb(svint16_t op1, svint16_t op2) { return svaddlb_s32(op1, op2); } __forceinline svint64_t svaddlb(svint32_t op1, svint32_t op2) { return svaddlb_s64(op1, op2); } __forceinline svuint64_t svaddlb(svuint32_t op1, svuint32_t op2) { return svaddlb_u64(op1, op2); } __forceinline svuint32_t svaddlb(svuint16_t op1, svuint16_t op2) { return svaddlb_u32(op1, op2); } __forceinline svuint16_t svaddlb(svuint8_t op1, svuint8_t op2) { return svaddlb_u16(op1, op2); } __forceinline svint16_t svaddlb(svint8_t op1, int8_t op2) { return svaddlb_n_s16(op1, op2); } __forceinline svint32_t svaddlt(svint16_t op1, int16_t op2) { return svaddlt_n_s32(op1, op2); } __forceinline svint16_t svaddlt(svint8_t op1, svint8_t op2) { return svaddlt_s16(op1, op2); } __forceinline svint32_t svaddlt(svint16_t op1, svint16_t op2) { return svaddlt_s32(op1, op2); } __forceinline svint64_t svaddlt(svint32_t op1, svint32_t op2) { return svaddlt_s64(op1, op2); } __forceinline svuint16_t svaddlt(svuint8_t op1, svuint8_t op2) { return svaddlt_u16(op1, op2); } __forceinline svuint32_t svaddlt(svuint16_t op1, svuint16_t op2) { return svaddlt_u32(op1, op2); } __forceinline svuint64_t svaddlt(svuint32_t op1, svuint32_t op2) { return svaddlt_u64(op1, op2); } __forceinline svint16_t svaddlt(svint8_t op1, int8_t op2) { return svaddlt_n_s16(op1, op2); } __forceinline svint64_t svaddlt(svint32_t op1, int32_t op2) { return svaddlt_n_s64(op1, op2); } __forceinline svuint16_t svaddlt(svuint8_t op1, uint8_t op2) { return svaddlt_n_u16(op1, op2); } __forceinline svuint32_t svaddlt(svuint16_t op1, uint16_t op2) { return svaddlt_n_u32(op1, op2); } __forceinline svuint64_t svaddlt(svuint32_t op1, uint32_t op2) { return svaddlt_n_u64(op1, op2); } __forceinline svuint64_t svaddwb(svuint64_t op1, uint32_t op2) { return svaddwb_n_u64(op1, op2); } __forceinline svuint64_t svaddwb(svuint64_t op1, svuint32_t op2) { return svaddwb_u64(op1, op2); } __forceinline svint16_t svaddwb(svint16_t op1, int8_t op2) { return svaddwb_n_s16(op1, op2); } __forceinline svint32_t svaddwb(svint32_t op1, int16_t op2) { return svaddwb_n_s32(op1, op2); } __forceinline svuint16_t svaddwb(svuint16_t op1, uint8_t op2) { return svaddwb_n_u16(op1, op2); } __forceinline svint64_t svaddwb(svint64_t op1, int32_t op2) { return svaddwb_n_s64(op1, op2); } __forceinline svuint32_t svaddwb(svuint32_t op1, svuint16_t op2) { return svaddwb_u32(op1, op2); } __forceinline svuint32_t svaddwb(svuint32_t op1, uint16_t op2) { return svaddwb_n_u32(op1, op2); } __forceinline svint64_t svaddwb(svint64_t op1, svint32_t op2) { return svaddwb_s64(op1, op2); } __forceinline svint32_t svaddwb(svint32_t op1, svint16_t op2) { return svaddwb_s32(op1, op2); } __forceinline svint16_t svaddwb(svint16_t op1, svint8_t op2) { return svaddwb_s16(op1, op2); } __forceinline svuint16_t svaddwb(svuint16_t op1, svuint8_t op2) { return svaddwb_u16(op1, op2); } __forceinline svint64_t svaddwt(svint64_t op1, int32_t op2) { return svaddwt_n_s64(op1, op2); } __forceinline svuint16_t svaddwt(svuint16_t op1, uint8_t op2) { return svaddwt_n_u16(op1, op2); } __forceinline svuint32_t svaddwt(svuint32_t op1, uint16_t op2) { return svaddwt_n_u32(op1, op2); } __forceinline svuint64_t svaddwt(svuint64_t op1, uint32_t op2) { return svaddwt_n_u64(op1, op2); } __forceinline svint32_t svaddwt(svint32_t op1, int16_t op2) { return svaddwt_n_s32(op1, op2); } __forceinline svint16_t svaddwt(svint16_t op1, int8_t op2) { return svaddwt_n_s16(op1, op2); } __forceinline svuint64_t svaddwt(svuint64_t op1, svuint32_t op2) { return svaddwt_u64(op1, op2); } __forceinline svuint32_t svaddwt(svuint32_t op1, svuint16_t op2) { return svaddwt_u32(op1, op2); } __forceinline svuint16_t svaddwt(svuint16_t op1, svuint8_t op2) { return svaddwt_u16(op1, op2); } __forceinline svint32_t svaddwt(svint32_t op1, svint16_t op2) { return svaddwt_s32(op1, op2); } __forceinline svint16_t svaddwt(svint16_t op1, svint8_t op2) { return svaddwt_s16(op1, op2); } __forceinline svint64_t svaddwt(svint64_t op1, svint32_t op2) { return svaddwt_s64(op1, op2); } __forceinline svint16_t svlogb_x(svbool_t pg, svfloat16_t op) { return svlogb_f16_x(pg, op); } __forceinline svint64_t svlogb_x(svbool_t pg, svfloat64_t op) { return svlogb_f64_x(pg, op); } __forceinline svint16_t svlogb_z(svbool_t pg, svfloat16_t op) { return svlogb_f16_z(pg, op); } __forceinline svint32_t svlogb_z(svbool_t pg, svfloat32_t op) { return svlogb_f32_z(pg, op); } __forceinline svint64_t svlogb_z(svbool_t pg, svfloat64_t op) { return svlogb_f64_z(pg, op); } __forceinline svint64_t svlogb_m(svint64_t inactive, svbool_t pg, svfloat64_t op) { return svlogb_f64_m(inactive, pg, op); } __forceinline svint32_t svlogb_m(svint32_t inactive, svbool_t pg, svfloat32_t op) { return svlogb_f32_m(inactive, pg, op); } __forceinline svint16_t svlogb_m(svint16_t inactive, svbool_t pg, svfloat16_t op) { return svlogb_f16_m(inactive, pg, op); } __forceinline svint32_t svlogb_x(svbool_t pg, svfloat32_t op) { return svlogb_f32_x(pg, op); } __forceinline svint32_t svqdmullb(svint16_t op1, svint16_t op2) { return svqdmullb_s32(op1, op2); } __forceinline svint64_t svqdmullb(svint32_t op1, svint32_t op2) { return svqdmullb_s64(op1, op2); } __forceinline svint16_t svqdmullb(svint8_t op1, svint8_t op2) { return svqdmullb_s16(op1, op2); } __forceinline svint16_t svqdmullb(svint8_t op1, int8_t op2) { return svqdmullb_n_s16(op1, op2); } __forceinline svint32_t svqdmullb(svint16_t op1, int16_t op2) { return svqdmullb_n_s32(op1, op2); } __forceinline svint64_t svqdmullb(svint32_t op1, int32_t op2) { return svqdmullb_n_s64(op1, op2); } template __forceinline __svehdr_twice_type __svqdmullb_lane(T op1, T op2) { if constexpr(::std::is_same_v) { return svqdmullb_lane_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svqdmullb_lane_s64(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdmullb_lane(op1, op2, imm_index) __svqdmullb_lane(op1, op2) template __forceinline __svehdr_twice_type __svqdmullt_lane(T op1, T op2) { if constexpr(::std::is_same_v) { return svqdmullt_lane_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svqdmullt_lane_s64(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdmullt_lane(op1, op2, imm_index) __svqdmullt_lane(op1, op2) __forceinline svint64_t svqdmullt(svint32_t op1, svint32_t op2) { return svqdmullt_s64(op1, op2); } __forceinline svint64_t svqdmullt(svint32_t op1, int32_t op2) { return svqdmullt_n_s64(op1, op2); } __forceinline svint32_t svqdmullt(svint16_t op1, int16_t op2) { return svqdmullt_n_s32(op1, op2); } __forceinline svint16_t svqdmullt(svint8_t op1, int8_t op2) { return svqdmullt_n_s16(op1, op2); } __forceinline svint32_t svqdmullt(svint16_t op1, svint16_t op2) { return svqdmullt_s32(op1, op2); } __forceinline svint16_t svqdmullt(svint8_t op1, svint8_t op2) { return svqdmullt_s16(op1, op2); } template __forceinline T __svqdmulh_lane(T op1, T op2) { if constexpr(::std::is_same_v) { return svqdmulh_lane_s64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svqdmulh_lane_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svqdmulh_lane_s16(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdmulh_lane(op1, op2, imm_index) __svqdmulh_lane(op1, op2) __forceinline svint32_t svqdmulh(svint32_t op1, int32_t op2) { return svqdmulh_n_s32(op1, op2); } __forceinline svint64_t svqdmulh(svint64_t op1, int64_t op2) { return svqdmulh_n_s64(op1, op2); } __forceinline svint64_t svqdmulh(svint64_t op1, svint64_t op2) { return svqdmulh_s64(op1, op2); } __forceinline svint32_t svqdmulh(svint32_t op1, svint32_t op2) { return svqdmulh_s32(op1, op2); } __forceinline svint16_t svqdmulh(svint16_t op1, svint16_t op2) { return svqdmulh_s16(op1, op2); } __forceinline svint16_t svqdmulh(svint16_t op1, int16_t op2) { return svqdmulh_n_s16(op1, op2); } __forceinline svint8_t svqdmulh(svint8_t op1, svint8_t op2) { return svqdmulh_s8(op1, op2); } __forceinline svint8_t svqdmulh(svint8_t op1, int8_t op2) { return svqdmulh_n_s8(op1, op2); } __forceinline svint16_t svqrdmulh(svint16_t op1, svint16_t op2) { return svqrdmulh_s16(op1, op2); } template __forceinline T __svqrdmulh_lane(T op1, T op2) { if constexpr(::std::is_same_v) { return svqrdmulh_lane_s64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svqrdmulh_lane_s32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svqrdmulh_lane_s16(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqrdmulh_lane(op1, op2, imm_index) __svqrdmulh_lane(op1, op2) __forceinline svint64_t svqrdmulh(svint64_t op1, int64_t op2) { return svqrdmulh_n_s64(op1, op2); } __forceinline svint32_t svqrdmulh(svint32_t op1, int32_t op2) { return svqrdmulh_n_s32(op1, op2); } __forceinline svint8_t svqrdmulh(svint8_t op1, svint8_t op2) { return svqrdmulh_s8(op1, op2); } __forceinline svint8_t svqrdmulh(svint8_t op1, int8_t op2) { return svqrdmulh_n_s8(op1, op2); } __forceinline svint64_t svqrdmulh(svint64_t op1, svint64_t op2) { return svqrdmulh_s64(op1, op2); } __forceinline svint32_t svqrdmulh(svint32_t op1, svint32_t op2) { return svqrdmulh_s32(op1, op2); } __forceinline svint16_t svqrdmulh(svint16_t op1, int16_t op2) { return svqrdmulh_n_s16(op1, op2); } __forceinline svuint32_t svmullb(svuint16_t op1, uint16_t op2) { return svmullb_n_u32(op1, op2); } template __forceinline __svehdr_twice_type __svmullb_lane(T op1, T op2) { if constexpr(::std::is_same_v) { return svmullb_lane_u64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmullb_lane_u32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmullb_lane_s64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmullb_lane_s32(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svmullb_lane(op1, op2, imm_index) __svmullb_lane(op1, op2) __forceinline svuint64_t svmullb(svuint32_t op1, uint32_t op2) { return svmullb_n_u64(op1, op2); } __forceinline svuint16_t svmullb(svuint8_t op1, uint8_t op2) { return svmullb_n_u16(op1, op2); } __forceinline svint32_t svmullb(svint16_t op1, int16_t op2) { return svmullb_n_s32(op1, op2); } __forceinline svint64_t svmullb(svint32_t op1, int32_t op2) { return svmullb_n_s64(op1, op2); } __forceinline svuint64_t svmullb(svuint32_t op1, svuint32_t op2) { return svmullb_u64(op1, op2); } __forceinline svuint32_t svmullb(svuint16_t op1, svuint16_t op2) { return svmullb_u32(op1, op2); } __forceinline svuint16_t svmullb(svuint8_t op1, svuint8_t op2) { return svmullb_u16(op1, op2); } __forceinline svint64_t svmullb(svint32_t op1, svint32_t op2) { return svmullb_s64(op1, op2); } __forceinline svint32_t svmullb(svint16_t op1, svint16_t op2) { return svmullb_s32(op1, op2); } __forceinline svint16_t svmullb(svint8_t op1, svint8_t op2) { return svmullb_s16(op1, op2); } __forceinline svint16_t svmullb(svint8_t op1, int8_t op2) { return svmullb_n_s16(op1, op2); } __forceinline svint32_t svmullt(svint16_t op1, int16_t op2) { return svmullt_n_s32(op1, op2); } __forceinline svint64_t svmullt(svint32_t op1, int32_t op2) { return svmullt_n_s64(op1, op2); } template __forceinline __svehdr_twice_type __svmullt_lane(T op1, T op2) { if constexpr(::std::is_same_v) { return svmullt_lane_u64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmullt_lane_u32(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmullt_lane_s64(op1, op2, N); } else if constexpr(::std::is_same_v) { return svmullt_lane_s32(op1, op2, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svmullt_lane(op1, op2, imm_index) __svmullt_lane(op1, op2) __forceinline svuint16_t svmullt(svuint8_t op1, uint8_t op2) { return svmullt_n_u16(op1, op2); } __forceinline svint16_t svmullt(svint8_t op1, int8_t op2) { return svmullt_n_s16(op1, op2); } __forceinline svuint64_t svmullt(svuint32_t op1, svuint32_t op2) { return svmullt_u64(op1, op2); } __forceinline svuint32_t svmullt(svuint16_t op1, svuint16_t op2) { return svmullt_u32(op1, op2); } __forceinline svuint16_t svmullt(svuint8_t op1, svuint8_t op2) { return svmullt_u16(op1, op2); } __forceinline svuint32_t svmullt(svuint16_t op1, uint16_t op2) { return svmullt_n_u32(op1, op2); } __forceinline svuint64_t svmullt(svuint32_t op1, uint32_t op2) { return svmullt_n_u64(op1, op2); } __forceinline svint64_t svmullt(svint32_t op1, svint32_t op2) { return svmullt_s64(op1, op2); } __forceinline svint32_t svmullt(svint16_t op1, svint16_t op2) { return svmullt_s32(op1, op2); } __forceinline svint16_t svmullt(svint8_t op1, svint8_t op2) { return svmullt_s16(op1, op2); } template __forceinline T1 __svmlalb_lane(T1 op1, T2 op2, T2 op3) { if constexpr(::std::is_same_v) { return svmlalb_lane_u32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlalb_lane_u64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlalb_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlalb_lane_s64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlalb_lane_f32(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svmlalb_lane(op1, op2, op3, imm_index) __svmlalb_lane(op1, op2, op3) __forceinline svfloat32_t svmlalb(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3) { return svmlalb_f32(op1, op2, op3); } __forceinline svuint64_t svmlalb(svuint64_t op1, svuint32_t op2, uint32_t op3) { return svmlalb_n_u64(op1, op2, op3); } __forceinline svuint32_t svmlalb(svuint32_t op1, svuint16_t op2, uint16_t op3) { return svmlalb_n_u32(op1, op2, op3); } __forceinline svint16_t svmlalb(svint16_t op1, svint8_t op2, int8_t op3) { return svmlalb_n_s16(op1, op2, op3); } __forceinline svint64_t svmlalb(svint64_t op1, svint32_t op2, int32_t op3) { return svmlalb_n_s64(op1, op2, op3); } __forceinline svint32_t svmlalb(svint32_t op1, svint16_t op2, int16_t op3) { return svmlalb_n_s32(op1, op2, op3); } __forceinline svuint64_t svmlalb(svuint64_t op1, svuint32_t op2, svuint32_t op3) { return svmlalb_u64(op1, op2, op3); } __forceinline svuint32_t svmlalb(svuint32_t op1, svuint16_t op2, svuint16_t op3) { return svmlalb_u32(op1, op2, op3); } __forceinline svuint16_t svmlalb(svuint16_t op1, svuint8_t op2, svuint8_t op3) { return svmlalb_u16(op1, op2, op3); } __forceinline svint64_t svmlalb(svint64_t op1, svint32_t op2, svint32_t op3) { return svmlalb_s64(op1, op2, op3); } __forceinline svuint16_t svmlalb(svuint16_t op1, svuint8_t op2, uint8_t op3) { return svmlalb_n_u16(op1, op2, op3); } __forceinline svint16_t svmlalb(svint16_t op1, svint8_t op2, svint8_t op3) { return svmlalb_s16(op1, op2, op3); } __forceinline svint32_t svmlalb(svint32_t op1, svint16_t op2, svint16_t op3) { return svmlalb_s32(op1, op2, op3); } __forceinline svint32_t svmlalt(svint32_t op1, svint16_t op2, svint16_t op3) { return svmlalt_s32(op1, op2, op3); } template __forceinline T1 __svmlalt_lane(T1 op1, T2 op2, T2 op3) { if constexpr(::std::is_same_v) { return svmlalt_lane_u32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlalt_lane_u64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlalt_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlalt_lane_s64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlalt_lane_f32(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svmlalt_lane(op1, op2, op3, imm_index) __svmlalt_lane(op1, op2, op3) __forceinline svfloat32_t svmlalt(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3) { return svmlalt_f32(op1, op2, op3); } __forceinline svint16_t svmlalt(svint16_t op1, svint8_t op2, svint8_t op3) { return svmlalt_s16(op1, op2, op3); } __forceinline svuint64_t svmlalt(svuint64_t op1, svuint32_t op2, uint32_t op3) { return svmlalt_n_u64(op1, op2, op3); } __forceinline svuint16_t svmlalt(svuint16_t op1, svuint8_t op2, uint8_t op3) { return svmlalt_n_u16(op1, op2, op3); } __forceinline svint64_t svmlalt(svint64_t op1, svint32_t op2, int32_t op3) { return svmlalt_n_s64(op1, op2, op3); } __forceinline svint32_t svmlalt(svint32_t op1, svint16_t op2, int16_t op3) { return svmlalt_n_s32(op1, op2, op3); } __forceinline svint16_t svmlalt(svint16_t op1, svint8_t op2, int8_t op3) { return svmlalt_n_s16(op1, op2, op3); } __forceinline svuint64_t svmlalt(svuint64_t op1, svuint32_t op2, svuint32_t op3) { return svmlalt_u64(op1, op2, op3); } __forceinline svuint32_t svmlalt(svuint32_t op1, svuint16_t op2, svuint16_t op3) { return svmlalt_u32(op1, op2, op3); } __forceinline svint64_t svmlalt(svint64_t op1, svint32_t op2, svint32_t op3) { return svmlalt_s64(op1, op2, op3); } __forceinline svuint32_t svmlalt(svuint32_t op1, svuint16_t op2, uint16_t op3) { return svmlalt_n_u32(op1, op2, op3); } __forceinline svuint16_t svmlalt(svuint16_t op1, svuint8_t op2, svuint8_t op3) { return svmlalt_u16(op1, op2, op3); } template __forceinline T1 __svmlslb_lane(T1 op1, T2 op2, T2 op3) { if constexpr(::std::is_same_v) { return svmlslb_lane_u32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlslb_lane_u64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlslb_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlslb_lane_s64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlslb_lane_f32(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svmlslb_lane(op1, op2, op3, imm_index) __svmlslb_lane(op1, op2, op3) __forceinline svfloat32_t svmlslb(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3) { return svmlslb_f32(op1, op2, op3); } __forceinline svuint64_t svmlslb(svuint64_t op1, svuint32_t op2, uint32_t op3) { return svmlslb_n_u64(op1, op2, op3); } __forceinline svuint32_t svmlslb(svuint32_t op1, svuint16_t op2, uint16_t op3) { return svmlslb_n_u32(op1, op2, op3); } __forceinline svint32_t svmlslb(svint32_t op1, svint16_t op2, int16_t op3) { return svmlslb_n_s32(op1, op2, op3); } __forceinline svint64_t svmlslb(svint64_t op1, svint32_t op2, int32_t op3) { return svmlslb_n_s64(op1, op2, op3); } __forceinline svint16_t svmlslb(svint16_t op1, svint8_t op2, int8_t op3) { return svmlslb_n_s16(op1, op2, op3); } __forceinline svuint64_t svmlslb(svuint64_t op1, svuint32_t op2, svuint32_t op3) { return svmlslb_u64(op1, op2, op3); } __forceinline svuint32_t svmlslb(svuint32_t op1, svuint16_t op2, svuint16_t op3) { return svmlslb_u32(op1, op2, op3); } __forceinline svuint16_t svmlslb(svuint16_t op1, svuint8_t op2, svuint8_t op3) { return svmlslb_u16(op1, op2, op3); } __forceinline svint64_t svmlslb(svint64_t op1, svint32_t op2, svint32_t op3) { return svmlslb_s64(op1, op2, op3); } __forceinline svuint16_t svmlslb(svuint16_t op1, svuint8_t op2, uint8_t op3) { return svmlslb_n_u16(op1, op2, op3); } __forceinline svint16_t svmlslb(svint16_t op1, svint8_t op2, svint8_t op3) { return svmlslb_s16(op1, op2, op3); } __forceinline svint32_t svmlslb(svint32_t op1, svint16_t op2, svint16_t op3) { return svmlslb_s32(op1, op2, op3); } template __forceinline T1 __svmlslt_lane(T1 op1, T2 op2, T2 op3) { if constexpr(::std::is_same_v) { return svmlslt_lane_u32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlslt_lane_u64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlslt_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlslt_lane_s64(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svmlslt_lane_f32(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svmlslt_lane(op1, op2, op3, imm_index) __svmlslt_lane(op1, op2, op3) __forceinline svfloat32_t svmlslt(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3) { return svmlslt_f32(op1, op2, op3); } __forceinline svuint64_t svmlslt(svuint64_t op1, svuint32_t op2, uint32_t op3) { return svmlslt_n_u64(op1, op2, op3); } __forceinline svint16_t svmlslt(svint16_t op1, svint8_t op2, svint8_t op3) { return svmlslt_s16(op1, op2, op3); } __forceinline svuint32_t svmlslt(svuint32_t op1, svuint16_t op2, uint16_t op3) { return svmlslt_n_u32(op1, op2, op3); } __forceinline svint64_t svmlslt(svint64_t op1, svint32_t op2, int32_t op3) { return svmlslt_n_s64(op1, op2, op3); } __forceinline svint32_t svmlslt(svint32_t op1, svint16_t op2, int16_t op3) { return svmlslt_n_s32(op1, op2, op3); } __forceinline svint16_t svmlslt(svint16_t op1, svint8_t op2, int8_t op3) { return svmlslt_n_s16(op1, op2, op3); } __forceinline svuint64_t svmlslt(svuint64_t op1, svuint32_t op2, svuint32_t op3) { return svmlslt_u64(op1, op2, op3); } __forceinline svuint32_t svmlslt(svuint32_t op1, svuint16_t op2, svuint16_t op3) { return svmlslt_u32(op1, op2, op3); } __forceinline svuint16_t svmlslt(svuint16_t op1, svuint8_t op2, svuint8_t op3) { return svmlslt_u16(op1, op2, op3); } __forceinline svint64_t svmlslt(svint64_t op1, svint32_t op2, svint32_t op3) { return svmlslt_s64(op1, op2, op3); } __forceinline svuint16_t svmlslt(svuint16_t op1, svuint8_t op2, uint8_t op3) { return svmlslt_n_u16(op1, op2, op3); } __forceinline svint32_t svmlslt(svint32_t op1, svint16_t op2, svint16_t op3) { return svmlslt_s32(op1, op2, op3); } __forceinline svint32_t svqdmlalbt(svint32_t op1, svint16_t op2, int16_t op3) { return svqdmlalbt_n_s32(op1, op2, op3); } __forceinline svint32_t svqdmlalbt(svint32_t op1, svint16_t op2, svint16_t op3) { return svqdmlalbt_s32(op1, op2, op3); } __forceinline svint64_t svqdmlalbt(svint64_t op1, svint32_t op2, int32_t op3) { return svqdmlalbt_n_s64(op1, op2, op3); } __forceinline svint16_t svqdmlalbt(svint16_t op1, svint8_t op2, int8_t op3) { return svqdmlalbt_n_s16(op1, op2, op3); } __forceinline svint64_t svqdmlalbt(svint64_t op1, svint32_t op2, svint32_t op3) { return svqdmlalbt_s64(op1, op2, op3); } __forceinline svint16_t svqdmlalbt(svint16_t op1, svint8_t op2, svint8_t op3) { return svqdmlalbt_s16(op1, op2, op3); } __forceinline svint16_t svqdmlalb(svint16_t op1, svint8_t op2, svint8_t op3) { return svqdmlalb_s16(op1, op2, op3); } template __forceinline T1 __svqdmlalb_lane(T1 op1, T2 op2, T2 op3) { if constexpr(::std::is_same_v) { return svqdmlalb_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svqdmlalb_lane_s64(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdmlalb_lane(op1, op2, op3, imm_index) __svqdmlalb_lane(op1, op2, op3) __forceinline svint32_t svqdmlalb(svint32_t op1, svint16_t op2, svint16_t op3) { return svqdmlalb_s32(op1, op2, op3); } __forceinline svint64_t svqdmlalb(svint64_t op1, svint32_t op2, int32_t op3) { return svqdmlalb_n_s64(op1, op2, op3); } __forceinline svint32_t svqdmlalb(svint32_t op1, svint16_t op2, int16_t op3) { return svqdmlalb_n_s32(op1, op2, op3); } __forceinline svint16_t svqdmlalb(svint16_t op1, svint8_t op2, int8_t op3) { return svqdmlalb_n_s16(op1, op2, op3); } __forceinline svint64_t svqdmlalb(svint64_t op1, svint32_t op2, svint32_t op3) { return svqdmlalb_s64(op1, op2, op3); } __forceinline svint16_t svqdmlalt(svint16_t op1, svint8_t op2, int8_t op3) { return svqdmlalt_n_s16(op1, op2, op3); } __forceinline svint32_t svqdmlalt(svint32_t op1, svint16_t op2, int16_t op3) { return svqdmlalt_n_s32(op1, op2, op3); } __forceinline svint64_t svqdmlalt(svint64_t op1, svint32_t op2, int32_t op3) { return svqdmlalt_n_s64(op1, op2, op3); } template __forceinline T1 __svqdmlalt_lane(T1 op1, T2 op2, T2 op3) { if constexpr(::std::is_same_v) { return svqdmlalt_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svqdmlalt_lane_s64(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdmlalt_lane(op1, op2, op3, imm_index) __svqdmlalt_lane(op1, op2, op3) __forceinline svint64_t svqdmlalt(svint64_t op1, svint32_t op2, svint32_t op3) { return svqdmlalt_s64(op1, op2, op3); } __forceinline svint32_t svqdmlalt(svint32_t op1, svint16_t op2, svint16_t op3) { return svqdmlalt_s32(op1, op2, op3); } __forceinline svint16_t svqdmlalt(svint16_t op1, svint8_t op2, svint8_t op3) { return svqdmlalt_s16(op1, op2, op3); } __forceinline svint16_t svqdmlslbt(svint16_t op1, svint8_t op2, svint8_t op3) { return svqdmlslbt_s16(op1, op2, op3); } __forceinline svint64_t svqdmlslbt(svint64_t op1, svint32_t op2, svint32_t op3) { return svqdmlslbt_s64(op1, op2, op3); } __forceinline svint16_t svqdmlslbt(svint16_t op1, svint8_t op2, int8_t op3) { return svqdmlslbt_n_s16(op1, op2, op3); } __forceinline svint32_t svqdmlslbt(svint32_t op1, svint16_t op2, int16_t op3) { return svqdmlslbt_n_s32(op1, op2, op3); } __forceinline svint64_t svqdmlslbt(svint64_t op1, svint32_t op2, int32_t op3) { return svqdmlslbt_n_s64(op1, op2, op3); } __forceinline svint32_t svqdmlslbt(svint32_t op1, svint16_t op2, svint16_t op3) { return svqdmlslbt_s32(op1, op2, op3); } template __forceinline T1 __svqdmlslb_lane(T1 op1, T2 op2, T2 op3) { if constexpr(::std::is_same_v) { return svqdmlslb_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svqdmlslb_lane_s64(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdmlslb_lane(op1, op2, op3, imm_index) __svqdmlslb_lane(op1, op2, op3) __forceinline svint64_t svqdmlslb(svint64_t op1, svint32_t op2, int32_t op3) { return svqdmlslb_n_s64(op1, op2, op3); } __forceinline svint32_t svqdmlslb(svint32_t op1, svint16_t op2, int16_t op3) { return svqdmlslb_n_s32(op1, op2, op3); } __forceinline svint16_t svqdmlslb(svint16_t op1, svint8_t op2, int8_t op3) { return svqdmlslb_n_s16(op1, op2, op3); } __forceinline svint64_t svqdmlslb(svint64_t op1, svint32_t op2, svint32_t op3) { return svqdmlslb_s64(op1, op2, op3); } __forceinline svint32_t svqdmlslb(svint32_t op1, svint16_t op2, svint16_t op3) { return svqdmlslb_s32(op1, op2, op3); } __forceinline svint16_t svqdmlslb(svint16_t op1, svint8_t op2, svint8_t op3) { return svqdmlslb_s16(op1, op2, op3); } __forceinline svint16_t svqdmlslt(svint16_t op1, svint8_t op2, svint8_t op3) { return svqdmlslt_s16(op1, op2, op3); } __forceinline svint32_t svqdmlslt(svint32_t op1, svint16_t op2, svint16_t op3) { return svqdmlslt_s32(op1, op2, op3); } __forceinline svint64_t svqdmlslt(svint64_t op1, svint32_t op2, svint32_t op3) { return svqdmlslt_s64(op1, op2, op3); } __forceinline svint16_t svqdmlslt(svint16_t op1, svint8_t op2, int8_t op3) { return svqdmlslt_n_s16(op1, op2, op3); } __forceinline svint32_t svqdmlslt(svint32_t op1, svint16_t op2, int16_t op3) { return svqdmlslt_n_s32(op1, op2, op3); } __forceinline svint64_t svqdmlslt(svint64_t op1, svint32_t op2, int32_t op3) { return svqdmlslt_n_s64(op1, op2, op3); } template __forceinline T1 __svqdmlslt_lane(T1 op1, T2 op2, T2 op3) { if constexpr(::std::is_same_v) { return svqdmlslt_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svqdmlslt_lane_s64(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqdmlslt_lane(op1, op2, op3, imm_index) __svqdmlslt_lane(op1, op2, op3) __forceinline svint64_t svqrdmlah(svint64_t op1, svint64_t op2, int64_t op3) { return svqrdmlah_n_s64(op1, op2, op3); } __forceinline svint8_t svqrdmlah(svint8_t op1, svint8_t op2, svint8_t op3) { return svqrdmlah_s8(op1, op2, op3); } template __forceinline T __svqrdmlah_lane(T op1, T op2, T op3) { if constexpr(::std::is_same_v) { return svqrdmlah_lane_s16(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svqrdmlah_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svqrdmlah_lane_s64(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqrdmlah_lane(op1, op2, op3, imm_index) __svqrdmlah_lane(op1, op2, op3) __forceinline svint8_t svqrdmlah(svint8_t op1, svint8_t op2, int8_t op3) { return svqrdmlah_n_s8(op1, op2, op3); } __forceinline svint16_t svqrdmlah(svint16_t op1, svint16_t op2, int16_t op3) { return svqrdmlah_n_s16(op1, op2, op3); } __forceinline svint32_t svqrdmlah(svint32_t op1, svint32_t op2, svint32_t op3) { return svqrdmlah_s32(op1, op2, op3); } __forceinline svint32_t svqrdmlah(svint32_t op1, svint32_t op2, int32_t op3) { return svqrdmlah_n_s32(op1, op2, op3); } __forceinline svint64_t svqrdmlah(svint64_t op1, svint64_t op2, svint64_t op3) { return svqrdmlah_s64(op1, op2, op3); } __forceinline svint16_t svqrdmlah(svint16_t op1, svint16_t op2, svint16_t op3) { return svqrdmlah_s16(op1, op2, op3); } __forceinline svint8_t svqrdmlsh(svint8_t op1, svint8_t op2, svint8_t op3) { return svqrdmlsh_s8(op1, op2, op3); } __forceinline svint16_t svqrdmlsh(svint16_t op1, svint16_t op2, svint16_t op3) { return svqrdmlsh_s16(op1, op2, op3); } __forceinline svint32_t svqrdmlsh(svint32_t op1, svint32_t op2, svint32_t op3) { return svqrdmlsh_s32(op1, op2, op3); } __forceinline svint64_t svqrdmlsh(svint64_t op1, svint64_t op2, svint64_t op3) { return svqrdmlsh_s64(op1, op2, op3); } __forceinline svint16_t svqrdmlsh(svint16_t op1, svint16_t op2, int16_t op3) { return svqrdmlsh_n_s16(op1, op2, op3); } __forceinline svint32_t svqrdmlsh(svint32_t op1, svint32_t op2, int32_t op3) { return svqrdmlsh_n_s32(op1, op2, op3); } __forceinline svint64_t svqrdmlsh(svint64_t op1, svint64_t op2, int64_t op3) { return svqrdmlsh_n_s64(op1, op2, op3); } template __forceinline T __svqrdmlsh_lane(T op1, T op2, T op3) { if constexpr(::std::is_same_v) { return svqrdmlsh_lane_s16(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svqrdmlsh_lane_s32(op1, op2, op3, N); } else if constexpr(::std::is_same_v) { return svqrdmlsh_lane_s64(op1, op2, op3, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svqrdmlsh_lane(op1, op2, op3, imm_index) __svqrdmlsh_lane(op1, op2, op3) __forceinline svint8_t svqrdmlsh(svint8_t op1, svint8_t op2, int8_t op3) { return svqrdmlsh_n_s8(op1, op2, op3); } __forceinline svint8_t svqneg_m(svint8_t inactive, svbool_t pg, svint8_t op) { return svqneg_s8_m(inactive, pg, op); } __forceinline svint16_t svqneg_m(svint16_t inactive, svbool_t pg, svint16_t op) { return svqneg_s16_m(inactive, pg, op); } __forceinline svint32_t svqneg_m(svint32_t inactive, svbool_t pg, svint32_t op) { return svqneg_s32_m(inactive, pg, op); } __forceinline svint16_t svqneg_x(svbool_t pg, svint16_t op) { return svqneg_s16_x(pg, op); } __forceinline svint64_t svqneg_m(svint64_t inactive, svbool_t pg, svint64_t op) { return svqneg_s64_m(inactive, pg, op); } __forceinline svint64_t svqneg_z(svbool_t pg, svint64_t op) { return svqneg_s64_z(pg, op); } __forceinline svint32_t svqneg_z(svbool_t pg, svint32_t op) { return svqneg_s32_z(pg, op); } __forceinline svint16_t svqneg_z(svbool_t pg, svint16_t op) { return svqneg_s16_z(pg, op); } __forceinline svint8_t svqneg_z(svbool_t pg, svint8_t op) { return svqneg_s8_z(pg, op); } __forceinline svint64_t svqneg_x(svbool_t pg, svint64_t op) { return svqneg_s64_x(pg, op); } __forceinline svint32_t svqneg_x(svbool_t pg, svint32_t op) { return svqneg_s32_x(pg, op); } __forceinline svint8_t svqneg_x(svbool_t pg, svint8_t op) { return svqneg_s8_x(pg, op); } __forceinline svuint16_t svadalp_z(svbool_t pg, svuint16_t op1, svuint8_t op2) { return svadalp_u16_z(pg, op1, op2); } __forceinline svint32_t svadalp_m(svbool_t pg, svint32_t op1, svint16_t op2) { return svadalp_s32_m(pg, op1, op2); } __forceinline svint64_t svadalp_m(svbool_t pg, svint64_t op1, svint32_t op2) { return svadalp_s64_m(pg, op1, op2); } __forceinline svuint16_t svadalp_m(svbool_t pg, svuint16_t op1, svuint8_t op2) { return svadalp_u16_m(pg, op1, op2); } __forceinline svuint32_t svadalp_m(svbool_t pg, svuint32_t op1, svuint16_t op2) { return svadalp_u32_m(pg, op1, op2); } __forceinline svuint64_t svadalp_m(svbool_t pg, svuint64_t op1, svuint32_t op2) { return svadalp_u64_m(pg, op1, op2); } __forceinline svint16_t svadalp_x(svbool_t pg, svint16_t op1, svint8_t op2) { return svadalp_s16_x(pg, op1, op2); } __forceinline svint32_t svadalp_x(svbool_t pg, svint32_t op1, svint16_t op2) { return svadalp_s32_x(pg, op1, op2); } __forceinline svint64_t svadalp_x(svbool_t pg, svint64_t op1, svint32_t op2) { return svadalp_s64_x(pg, op1, op2); } __forceinline svuint64_t svadalp_x(svbool_t pg, svuint64_t op1, svuint32_t op2) { return svadalp_u64_x(pg, op1, op2); } __forceinline svint16_t svadalp_m(svbool_t pg, svint16_t op1, svint8_t op2) { return svadalp_s16_m(pg, op1, op2); } __forceinline svint16_t svadalp_z(svbool_t pg, svint16_t op1, svint8_t op2) { return svadalp_s16_z(pg, op1, op2); } __forceinline svuint32_t svadalp_x(svbool_t pg, svuint32_t op1, svuint16_t op2) { return svadalp_u32_x(pg, op1, op2); } __forceinline svint64_t svadalp_z(svbool_t pg, svint64_t op1, svint32_t op2) { return svadalp_s64_z(pg, op1, op2); } __forceinline svuint16_t svadalp_x(svbool_t pg, svuint16_t op1, svuint8_t op2) { return svadalp_u16_x(pg, op1, op2); } __forceinline svuint32_t svadalp_z(svbool_t pg, svuint32_t op1, svuint16_t op2) { return svadalp_u32_z(pg, op1, op2); } __forceinline svuint64_t svadalp_z(svbool_t pg, svuint64_t op1, svuint32_t op2) { return svadalp_u64_z(pg, op1, op2); } __forceinline svint32_t svadalp_z(svbool_t pg, svint32_t op1, svint16_t op2) { return svadalp_s32_z(pg, op1, op2); } __forceinline svuint64_t svaddp_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svaddp_u64_m(pg, op1, op2); } __forceinline svuint32_t svaddp_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svaddp_u32_m(pg, op1, op2); } __forceinline svint64_t svaddp_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svaddp_s64_m(pg, op1, op2); } __forceinline svuint16_t svaddp_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svaddp_u16_m(pg, op1, op2); } __forceinline svuint8_t svaddp_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svaddp_u8_m(pg, op1, op2); } __forceinline svint32_t svaddp_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svaddp_s32_m(pg, op1, op2); } __forceinline svint64_t svaddp_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svaddp_s64_x(pg, op1, op2); } __forceinline svint8_t svaddp_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svaddp_s8_m(pg, op1, op2); } __forceinline svfloat64_t svaddp_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svaddp_f64_m(pg, op1, op2); } __forceinline svfloat32_t svaddp_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svaddp_f32_m(pg, op1, op2); } __forceinline svfloat16_t svaddp_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svaddp_f16_x(pg, op1, op2); } __forceinline svfloat16_t svaddp_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svaddp_f16_m(pg, op1, op2); } __forceinline svint16_t svaddp_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svaddp_s16_m(pg, op1, op2); } __forceinline svfloat32_t svaddp_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svaddp_f32_x(pg, op1, op2); } __forceinline svuint64_t svaddp_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svaddp_u64_x(pg, op1, op2); } __forceinline svint8_t svaddp_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svaddp_s8_x(pg, op1, op2); } __forceinline svint16_t svaddp_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svaddp_s16_x(pg, op1, op2); } __forceinline svint32_t svaddp_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svaddp_s32_x(pg, op1, op2); } __forceinline svuint8_t svaddp_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svaddp_u8_x(pg, op1, op2); } __forceinline svuint16_t svaddp_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svaddp_u16_x(pg, op1, op2); } __forceinline svuint32_t svaddp_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svaddp_u32_x(pg, op1, op2); } __forceinline svfloat64_t svaddp_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svaddp_f64_x(pg, op1, op2); } __forceinline svfloat32_t svmaxnmp_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmaxnmp_f32_m(pg, op1, op2); } __forceinline svfloat32_t svmaxnmp_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmaxnmp_f32_x(pg, op1, op2); } __forceinline svfloat16_t svmaxnmp_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmaxnmp_f16_m(pg, op1, op2); } __forceinline svfloat16_t svmaxnmp_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmaxnmp_f16_x(pg, op1, op2); } __forceinline svfloat64_t svmaxnmp_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmaxnmp_f64_x(pg, op1, op2); } __forceinline svfloat64_t svmaxnmp_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmaxnmp_f64_m(pg, op1, op2); } __forceinline svuint64_t svmaxp_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmaxp_u64_m(pg, op1, op2); } __forceinline svfloat16_t svmaxp_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmaxp_f16_m(pg, op1, op2); } __forceinline svuint64_t svmaxp_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svmaxp_u64_x(pg, op1, op2); } __forceinline svuint16_t svmaxp_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmaxp_u16_x(pg, op1, op2); } __forceinline svuint8_t svmaxp_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmaxp_u8_x(pg, op1, op2); } __forceinline svint64_t svmaxp_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svmaxp_s64_x(pg, op1, op2); } __forceinline svint32_t svmaxp_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svmaxp_s32_x(pg, op1, op2); } __forceinline svint16_t svmaxp_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svmaxp_s16_x(pg, op1, op2); } __forceinline svint8_t svmaxp_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svmaxp_s8_x(pg, op1, op2); } __forceinline svfloat64_t svmaxp_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmaxp_f64_x(pg, op1, op2); } __forceinline svfloat32_t svmaxp_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmaxp_f32_x(pg, op1, op2); } __forceinline svuint32_t svmaxp_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmaxp_u32_x(pg, op1, op2); } __forceinline svuint32_t svmaxp_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svmaxp_u32_m(pg, op1, op2); } __forceinline svuint16_t svmaxp_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svmaxp_u16_m(pg, op1, op2); } __forceinline svuint8_t svmaxp_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svmaxp_u8_m(pg, op1, op2); } __forceinline svint64_t svmaxp_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svmaxp_s64_m(pg, op1, op2); } __forceinline svint32_t svmaxp_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svmaxp_s32_m(pg, op1, op2); } __forceinline svint16_t svmaxp_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svmaxp_s16_m(pg, op1, op2); } __forceinline svint8_t svmaxp_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svmaxp_s8_m(pg, op1, op2); } __forceinline svfloat64_t svmaxp_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svmaxp_f64_m(pg, op1, op2); } __forceinline svfloat32_t svmaxp_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svmaxp_f32_m(pg, op1, op2); } __forceinline svfloat16_t svmaxp_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svmaxp_f16_x(pg, op1, op2); } __forceinline svfloat64_t svminnmp_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svminnmp_f64_x(pg, op1, op2); } __forceinline svfloat32_t svminnmp_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svminnmp_f32_x(pg, op1, op2); } __forceinline svfloat16_t svminnmp_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svminnmp_f16_x(pg, op1, op2); } __forceinline svfloat32_t svminnmp_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svminnmp_f32_m(pg, op1, op2); } __forceinline svfloat16_t svminnmp_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svminnmp_f16_m(pg, op1, op2); } __forceinline svfloat64_t svminnmp_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svminnmp_f64_m(pg, op1, op2); } __forceinline svuint32_t svminp_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svminp_u32_m(pg, op1, op2); } __forceinline svint8_t svminp_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svminp_s8_x(pg, op1, op2); } __forceinline svint16_t svminp_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svminp_s16_x(pg, op1, op2); } __forceinline svint32_t svminp_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svminp_s32_x(pg, op1, op2); } __forceinline svint64_t svminp_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svminp_s64_x(pg, op1, op2); } __forceinline svuint8_t svminp_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svminp_u8_x(pg, op1, op2); } __forceinline svuint16_t svminp_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svminp_u16_x(pg, op1, op2); } __forceinline svuint32_t svminp_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svminp_u32_x(pg, op1, op2); } __forceinline svuint64_t svminp_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svminp_u64_x(pg, op1, op2); } __forceinline svfloat16_t svminp_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svminp_f16_x(pg, op1, op2); } __forceinline svuint64_t svminp_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svminp_u64_m(pg, op1, op2); } __forceinline svuint16_t svminp_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svminp_u16_m(pg, op1, op2); } __forceinline svint32_t svminp_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svminp_s32_m(pg, op1, op2); } __forceinline svfloat64_t svminp_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svminp_f64_x(pg, op1, op2); } __forceinline svfloat16_t svminp_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) { return svminp_f16_m(pg, op1, op2); } __forceinline svfloat32_t svminp_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svminp_f32_m(pg, op1, op2); } __forceinline svfloat64_t svminp_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svminp_f64_m(pg, op1, op2); } __forceinline svint8_t svminp_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svminp_s8_m(pg, op1, op2); } __forceinline svint16_t svminp_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svminp_s16_m(pg, op1, op2); } __forceinline svfloat32_t svminp_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svminp_f32_x(pg, op1, op2); } __forceinline svint64_t svminp_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svminp_s64_m(pg, op1, op2); } __forceinline svuint8_t svminp_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svminp_u8_m(pg, op1, op2); } __forceinline svuint8_t svpmul(svuint8_t op1, uint8_t op2) { return svpmul_n_u8(op1, op2); } __forceinline svuint8_t svpmul(svuint8_t op1, svuint8_t op2) { return svpmul_u8(op1, op2); } __forceinline svuint8_t svpmullb_pair(svuint8_t op1, uint8_t op2) { return svpmullb_pair_n_u8(op1, op2); } __forceinline svuint64_t svpmullb_pair(svuint64_t op1, svuint64_t op2) { return svpmullb_pair_u64(op1, op2); } __forceinline svuint64_t svpmullb_pair(svuint64_t op1, uint64_t op2) { return svpmullb_pair_n_u64(op1, op2); } __forceinline svuint32_t svpmullb_pair(svuint32_t op1, svuint32_t op2) { return svpmullb_pair_u32(op1, op2); } __forceinline svuint8_t svpmullb_pair(svuint8_t op1, svuint8_t op2) { return svpmullb_pair_u8(op1, op2); } __forceinline svuint64_t svpmullb(svuint32_t op1, uint32_t op2) { return svpmullb_n_u64(op1, op2); } __forceinline svuint16_t svpmullb(svuint8_t op1, uint8_t op2) { return svpmullb_n_u16(op1, op2); } __forceinline svuint64_t svpmullb(svuint32_t op1, svuint32_t op2) { return svpmullb_u64(op1, op2); } __forceinline svuint16_t svpmullb(svuint8_t op1, svuint8_t op2) { return svpmullb_u16(op1, op2); } __forceinline svuint32_t svpmullb_pair(svuint32_t op1, uint32_t op2) { return svpmullb_pair_n_u32(op1, op2); } __forceinline svuint16_t svpmullt(svuint8_t op1, svuint8_t op2) { return svpmullt_u16(op1, op2); } __forceinline svuint16_t svpmullt(svuint8_t op1, uint8_t op2) { return svpmullt_n_u16(op1, op2); } __forceinline svuint64_t svpmullt(svuint32_t op1, uint32_t op2) { return svpmullt_n_u64(op1, op2); } __forceinline svuint64_t svpmullt(svuint32_t op1, svuint32_t op2) { return svpmullt_u64(op1, op2); } __forceinline svuint32_t svpmullt_pair(svuint32_t op1, svuint32_t op2) { return svpmullt_pair_u32(op1, op2); } __forceinline svuint8_t svpmullt_pair(svuint8_t op1, svuint8_t op2) { return svpmullt_pair_u8(op1, op2); } __forceinline svuint64_t svpmullt_pair(svuint64_t op1, svuint64_t op2) { return svpmullt_pair_u64(op1, op2); } __forceinline svuint32_t svpmullt_pair(svuint32_t op1, uint32_t op2) { return svpmullt_pair_n_u32(op1, op2); } __forceinline svuint64_t svpmullt_pair(svuint64_t op1, uint64_t op2) { return svpmullt_pair_n_u64(op1, op2); } __forceinline svuint8_t svpmullt_pair(svuint8_t op1, uint8_t op2) { return svpmullt_pair_n_u8(op1, op2); } __forceinline svuint32_t svrecpe_z(svbool_t pg, svuint32_t op) { return svrecpe_u32_z(pg, op); } __forceinline svuint32_t svrecpe_x(svbool_t pg, svuint32_t op) { return svrecpe_u32_x(pg, op); } __forceinline svuint32_t svrecpe_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { return svrecpe_u32_m(inactive, pg, op); } __forceinline svuint32_t svrsqrte_x(svbool_t pg, svuint32_t op) { return svrsqrte_u32_x(pg, op); } __forceinline svuint32_t svrsqrte_z(svbool_t pg, svuint32_t op) { return svrsqrte_u32_z(pg, op); } __forceinline svuint32_t svrsqrte_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { return svrsqrte_u32_m(inactive, pg, op); } __forceinline svuint32_t svrsubhnb(svuint64_t op1, uint64_t op2) { return svrsubhnb_n_u64(op1, op2); } __forceinline svint8_t svrsubhnb(svint16_t op1, int16_t op2) { return svrsubhnb_n_s16(op1, op2); } __forceinline svuint8_t svrsubhnb(svuint16_t op1, uint16_t op2) { return svrsubhnb_n_u16(op1, op2); } __forceinline svint32_t svrsubhnb(svint64_t op1, svint64_t op2) { return svrsubhnb_s64(op1, op2); } __forceinline svint16_t svrsubhnb(svint32_t op1, svint32_t op2) { return svrsubhnb_s32(op1, op2); } __forceinline svint8_t svrsubhnb(svint16_t op1, svint16_t op2) { return svrsubhnb_s16(op1, op2); } __forceinline svuint8_t svrsubhnb(svuint16_t op1, svuint16_t op2) { return svrsubhnb_u16(op1, op2); } __forceinline svuint16_t svrsubhnb(svuint32_t op1, svuint32_t op2) { return svrsubhnb_u32(op1, op2); } __forceinline svuint32_t svrsubhnb(svuint64_t op1, svuint64_t op2) { return svrsubhnb_u64(op1, op2); } __forceinline svint16_t svrsubhnb(svint32_t op1, int32_t op2) { return svrsubhnb_n_s32(op1, op2); } __forceinline svint32_t svrsubhnb(svint64_t op1, int64_t op2) { return svrsubhnb_n_s64(op1, op2); } __forceinline svuint16_t svrsubhnb(svuint32_t op1, uint32_t op2) { return svrsubhnb_n_u32(op1, op2); } __forceinline svuint32_t svrsubhnt(svuint32_t even, svuint64_t op1, uint64_t op2) { return svrsubhnt_n_u64(even, op1, op2); } __forceinline svuint16_t svrsubhnt(svuint16_t even, svuint32_t op1, uint32_t op2) { return svrsubhnt_n_u32(even, op1, op2); } __forceinline svint32_t svrsubhnt(svint32_t even, svint64_t op1, int64_t op2) { return svrsubhnt_n_s64(even, op1, op2); } __forceinline svint16_t svrsubhnt(svint16_t even, svint32_t op1, int32_t op2) { return svrsubhnt_n_s32(even, op1, op2); } __forceinline svint8_t svrsubhnt(svint8_t even, svint16_t op1, int16_t op2) { return svrsubhnt_n_s16(even, op1, op2); } __forceinline svuint32_t svrsubhnt(svuint32_t even, svuint64_t op1, svuint64_t op2) { return svrsubhnt_u64(even, op1, op2); } __forceinline svuint16_t svrsubhnt(svuint16_t even, svuint32_t op1, svuint32_t op2) { return svrsubhnt_u32(even, op1, op2); } __forceinline svuint8_t svrsubhnt(svuint8_t even, svuint16_t op1, svuint16_t op2) { return svrsubhnt_u16(even, op1, op2); } __forceinline svint32_t svrsubhnt(svint32_t even, svint64_t op1, svint64_t op2) { return svrsubhnt_s64(even, op1, op2); } __forceinline svint16_t svrsubhnt(svint16_t even, svint32_t op1, svint32_t op2) { return svrsubhnt_s32(even, op1, op2); } __forceinline svint8_t svrsubhnt(svint8_t even, svint16_t op1, svint16_t op2) { return svrsubhnt_s16(even, op1, op2); } __forceinline svuint8_t svrsubhnt(svuint8_t even, svuint16_t op1, uint16_t op2) { return svrsubhnt_n_u16(even, op1, op2); } __forceinline svuint8_t svsubhnb(svuint16_t op1, svuint16_t op2) { return svsubhnb_u16(op1, op2); } __forceinline svint8_t svsubhnb(svint16_t op1, svint16_t op2) { return svsubhnb_s16(op1, op2); } __forceinline svint32_t svsubhnb(svint64_t op1, svint64_t op2) { return svsubhnb_s64(op1, op2); } __forceinline svint16_t svsubhnb(svint32_t op1, svint32_t op2) { return svsubhnb_s32(op1, op2); } __forceinline svuint32_t svsubhnb(svuint64_t op1, svuint64_t op2) { return svsubhnb_u64(op1, op2); } __forceinline svuint16_t svsubhnb(svuint32_t op1, svuint32_t op2) { return svsubhnb_u32(op1, op2); } __forceinline svint8_t svsubhnb(svint16_t op1, int16_t op2) { return svsubhnb_n_s16(op1, op2); } __forceinline svint16_t svsubhnb(svint32_t op1, int32_t op2) { return svsubhnb_n_s32(op1, op2); } __forceinline svint32_t svsubhnb(svint64_t op1, int64_t op2) { return svsubhnb_n_s64(op1, op2); } __forceinline svuint8_t svsubhnb(svuint16_t op1, uint16_t op2) { return svsubhnb_n_u16(op1, op2); } __forceinline svuint16_t svsubhnb(svuint32_t op1, uint32_t op2) { return svsubhnb_n_u32(op1, op2); } __forceinline svuint32_t svsubhnb(svuint64_t op1, uint64_t op2) { return svsubhnb_n_u64(op1, op2); } __forceinline svint8_t svsubhnt(svint8_t even, svint16_t op1, int16_t op2) { return svsubhnt_n_s16(even, op1, op2); } __forceinline svint32_t svsubhnt(svint32_t even, svint64_t op1, int64_t op2) { return svsubhnt_n_s64(even, op1, op2); } __forceinline svint8_t svsubhnt(svint8_t even, svint16_t op1, svint16_t op2) { return svsubhnt_s16(even, op1, op2); } __forceinline svint16_t svsubhnt(svint16_t even, svint32_t op1, svint32_t op2) { return svsubhnt_s32(even, op1, op2); } __forceinline svuint32_t svsubhnt(svuint32_t even, svuint64_t op1, svuint64_t op2) { return svsubhnt_u64(even, op1, op2); } __forceinline svint16_t svsubhnt(svint16_t even, svint32_t op1, int32_t op2) { return svsubhnt_n_s32(even, op1, op2); } __forceinline svuint8_t svsubhnt(svuint8_t even, svuint16_t op1, uint16_t op2) { return svsubhnt_n_u16(even, op1, op2); } __forceinline svint32_t svsubhnt(svint32_t even, svint64_t op1, svint64_t op2) { return svsubhnt_s64(even, op1, op2); } __forceinline svuint32_t svsubhnt(svuint32_t even, svuint64_t op1, uint64_t op2) { return svsubhnt_n_u64(even, op1, op2); } __forceinline svuint8_t svsubhnt(svuint8_t even, svuint16_t op1, svuint16_t op2) { return svsubhnt_u16(even, op1, op2); } __forceinline svuint16_t svsubhnt(svuint16_t even, svuint32_t op1, uint32_t op2) { return svsubhnt_n_u32(even, op1, op2); } __forceinline svuint16_t svsubhnt(svuint16_t even, svuint32_t op1, svuint32_t op2) { return svsubhnt_u32(even, op1, op2); } __forceinline svint16_t svqsub_m(svbool_t pg, svint16_t op1, int16_t op2) { return svqsub_n_s16_m(pg, op1, op2); } __forceinline svint32_t svqsub_m(svbool_t pg, svint32_t op1, int32_t op2) { return svqsub_n_s32_m(pg, op1, op2); } __forceinline svint64_t svqsub_m(svbool_t pg, svint64_t op1, int64_t op2) { return svqsub_n_s64_m(pg, op1, op2); } __forceinline svuint8_t svqsub_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svqsub_n_u8_m(pg, op1, op2); } __forceinline svuint16_t svqsub_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svqsub_n_u16_m(pg, op1, op2); } __forceinline svuint32_t svqsub_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svqsub_n_u32_m(pg, op1, op2); } __forceinline svuint64_t svqsub_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svqsub_n_u64_m(pg, op1, op2); } __forceinline svint8_t svqsub_x(svbool_t pg, svint8_t op1, int8_t op2) { return svqsub_n_s8_x(pg, op1, op2); } __forceinline svint16_t svqsub_x(svbool_t pg, svint16_t op1, int16_t op2) { return svqsub_n_s16_x(pg, op1, op2); } __forceinline svint32_t svqsub_x(svbool_t pg, svint32_t op1, int32_t op2) { return svqsub_n_s32_x(pg, op1, op2); } __forceinline svuint8_t svqsub_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svqsub_n_u8_x(pg, op1, op2); } __forceinline svuint16_t svqsub_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svqsub_n_u16_x(pg, op1, op2); } __forceinline svuint32_t svqsub_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svqsub_n_u32_x(pg, op1, op2); } __forceinline svuint64_t svqsub_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svqsub_n_u64_x(pg, op1, op2); } __forceinline svint8_t svqsub_z(svbool_t pg, svint8_t op1, int8_t op2) { return svqsub_n_s8_z(pg, op1, op2); } __forceinline svint16_t svqsub_z(svbool_t pg, svint16_t op1, int16_t op2) { return svqsub_n_s16_z(pg, op1, op2); } __forceinline svuint8_t svqsub_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svqsub_n_u8_z(pg, op1, op2); } __forceinline svint32_t svqsub_z(svbool_t pg, svint32_t op1, int32_t op2) { return svqsub_n_s32_z(pg, op1, op2); } __forceinline svint64_t svqsub_z(svbool_t pg, svint64_t op1, int64_t op2) { return svqsub_n_s64_z(pg, op1, op2); } __forceinline svint8_t svqsub_m(svbool_t pg, svint8_t op1, int8_t op2) { return svqsub_n_s8_m(pg, op1, op2); } __forceinline svint64_t svqsub_x(svbool_t pg, svint64_t op1, int64_t op2) { return svqsub_n_s64_x(pg, op1, op2); } __forceinline svuint64_t svqsub_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svqsub_u64_z(pg, op1, op2); } __forceinline svint32_t svqsub_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svqsub_s32_m(pg, op1, op2); } __forceinline svuint16_t svqsub_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svqsub_u16_z(pg, op1, op2); } __forceinline svuint16_t svqsub_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svqsub_n_u16_z(pg, op1, op2); } __forceinline svint8_t svqsub_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svqsub_s8_m(pg, op1, op2); } __forceinline svint16_t svqsub_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svqsub_s16_m(pg, op1, op2); } __forceinline svint64_t svqsub_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svqsub_s64_m(pg, op1, op2); } __forceinline svuint8_t svqsub_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svqsub_u8_m(pg, op1, op2); } __forceinline svuint16_t svqsub_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svqsub_u16_m(pg, op1, op2); } __forceinline svuint32_t svqsub_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svqsub_u32_m(pg, op1, op2); } __forceinline svuint64_t svqsub_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svqsub_u64_m(pg, op1, op2); } __forceinline svint8_t svqsub_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svqsub_s8_x(pg, op1, op2); } __forceinline svint16_t svqsub_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svqsub_s16_x(pg, op1, op2); } __forceinline svint32_t svqsub_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svqsub_s32_x(pg, op1, op2); } __forceinline svint64_t svqsub_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svqsub_s64_x(pg, op1, op2); } __forceinline svuint8_t svqsub_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svqsub_u8_x(pg, op1, op2); } __forceinline svuint16_t svqsub_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svqsub_u16_x(pg, op1, op2); } __forceinline svuint32_t svqsub_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svqsub_u32_x(pg, op1, op2); } __forceinline svuint64_t svqsub_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svqsub_u64_x(pg, op1, op2); } __forceinline svint8_t svqsub_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svqsub_s8_z(pg, op1, op2); } __forceinline svint16_t svqsub_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svqsub_s16_z(pg, op1, op2); } __forceinline svint32_t svqsub_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svqsub_s32_z(pg, op1, op2); } __forceinline svint64_t svqsub_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svqsub_s64_z(pg, op1, op2); } __forceinline svuint8_t svqsub_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svqsub_u8_z(pg, op1, op2); } __forceinline svuint32_t svqsub_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svqsub_u32_z(pg, op1, op2); } __forceinline svuint64_t svqsub_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svqsub_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svqsub_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svqsub_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svqsubr_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svqsubr_u16_z(pg, op1, op2); } __forceinline svint16_t svqsubr_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svqsubr_s16_z(pg, op1, op2); } __forceinline svint32_t svqsubr_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svqsubr_s32_z(pg, op1, op2); } __forceinline svint64_t svqsubr_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svqsubr_s64_z(pg, op1, op2); } __forceinline svuint8_t svqsubr_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svqsubr_u8_z(pg, op1, op2); } __forceinline svuint64_t svqsubr_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svqsubr_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svqsubr_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svqsubr_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svqsubr_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svqsubr_n_u16_z(pg, op1, op2); } __forceinline svuint8_t svqsubr_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svqsubr_n_u8_z(pg, op1, op2); } __forceinline svint64_t svqsubr_z(svbool_t pg, svint64_t op1, int64_t op2) { return svqsubr_n_s64_z(pg, op1, op2); } __forceinline svint32_t svqsubr_z(svbool_t pg, svint32_t op1, int32_t op2) { return svqsubr_n_s32_z(pg, op1, op2); } __forceinline svint16_t svqsubr_z(svbool_t pg, svint16_t op1, int16_t op2) { return svqsubr_n_s16_z(pg, op1, op2); } __forceinline svint8_t svqsubr_z(svbool_t pg, svint8_t op1, int8_t op2) { return svqsubr_n_s8_z(pg, op1, op2); } __forceinline svuint64_t svqsubr_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svqsubr_n_u64_x(pg, op1, op2); } __forceinline svint8_t svqsubr_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svqsubr_s8_z(pg, op1, op2); } __forceinline svuint32_t svqsubr_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svqsubr_n_u32_x(pg, op1, op2); } __forceinline svuint8_t svqsubr_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svqsubr_n_u8_x(pg, op1, op2); } __forceinline svint64_t svqsubr_x(svbool_t pg, svint64_t op1, int64_t op2) { return svqsubr_n_s64_x(pg, op1, op2); } __forceinline svint32_t svqsubr_x(svbool_t pg, svint32_t op1, int32_t op2) { return svqsubr_n_s32_x(pg, op1, op2); } __forceinline svint16_t svqsubr_x(svbool_t pg, svint16_t op1, int16_t op2) { return svqsubr_n_s16_x(pg, op1, op2); } __forceinline svint8_t svqsubr_x(svbool_t pg, svint8_t op1, int8_t op2) { return svqsubr_n_s8_x(pg, op1, op2); } __forceinline svuint64_t svqsubr_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svqsubr_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svqsubr_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svqsubr_n_u32_m(pg, op1, op2); } __forceinline svuint16_t svqsubr_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svqsubr_n_u16_m(pg, op1, op2); } __forceinline svuint8_t svqsubr_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svqsubr_n_u8_m(pg, op1, op2); } __forceinline svint64_t svqsubr_m(svbool_t pg, svint64_t op1, int64_t op2) { return svqsubr_n_s64_m(pg, op1, op2); } __forceinline svint32_t svqsubr_m(svbool_t pg, svint32_t op1, int32_t op2) { return svqsubr_n_s32_m(pg, op1, op2); } __forceinline svint16_t svqsubr_m(svbool_t pg, svint16_t op1, int16_t op2) { return svqsubr_n_s16_m(pg, op1, op2); } __forceinline svint8_t svqsubr_m(svbool_t pg, svint8_t op1, int8_t op2) { return svqsubr_n_s8_m(pg, op1, op2); } __forceinline svuint16_t svqsubr_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svqsubr_n_u16_x(pg, op1, op2); } __forceinline svuint64_t svqsubr_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svqsubr_u64_z(pg, op1, op2); } __forceinline svuint64_t svqsubr_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svqsubr_u64_x(pg, op1, op2); } __forceinline svuint16_t svqsubr_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svqsubr_u16_x(pg, op1, op2); } __forceinline svuint32_t svqsubr_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svqsubr_u32_x(pg, op1, op2); } __forceinline svint8_t svqsubr_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svqsubr_s8_m(pg, op1, op2); } __forceinline svint16_t svqsubr_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svqsubr_s16_m(pg, op1, op2); } __forceinline svint32_t svqsubr_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svqsubr_s32_m(pg, op1, op2); } __forceinline svint64_t svqsubr_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svqsubr_s64_m(pg, op1, op2); } __forceinline svuint8_t svqsubr_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svqsubr_u8_m(pg, op1, op2); } __forceinline svuint16_t svqsubr_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svqsubr_u16_m(pg, op1, op2); } __forceinline svuint32_t svqsubr_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svqsubr_u32_z(pg, op1, op2); } __forceinline svuint64_t svqsubr_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svqsubr_u64_m(pg, op1, op2); } __forceinline svint8_t svqsubr_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svqsubr_s8_x(pg, op1, op2); } __forceinline svint16_t svqsubr_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svqsubr_s16_x(pg, op1, op2); } __forceinline svint32_t svqsubr_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svqsubr_s32_x(pg, op1, op2); } __forceinline svint64_t svqsubr_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svqsubr_s64_x(pg, op1, op2); } __forceinline svuint8_t svqsubr_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svqsubr_u8_x(pg, op1, op2); } __forceinline svuint32_t svqsubr_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svqsubr_u32_m(pg, op1, op2); } __forceinline svuint64_t svsbclb(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svsbclb_u64(op1, op2, op3); } __forceinline svuint64_t svsbclb(svuint64_t op1, svuint64_t op2, uint64_t op3) { return svsbclb_n_u64(op1, op2, op3); } __forceinline svuint32_t svsbclb(svuint32_t op1, svuint32_t op2, uint32_t op3) { return svsbclb_n_u32(op1, op2, op3); } __forceinline svuint32_t svsbclb(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svsbclb_u32(op1, op2, op3); } __forceinline svuint32_t svsbclt(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svsbclt_u32(op1, op2, op3); } __forceinline svuint64_t svsbclt(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svsbclt_u64(op1, op2, op3); } __forceinline svuint32_t svsbclt(svuint32_t op1, svuint32_t op2, uint32_t op3) { return svsbclt_n_u32(op1, op2, op3); } __forceinline svuint64_t svsbclt(svuint64_t op1, svuint64_t op2, uint64_t op3) { return svsbclt_n_u64(op1, op2, op3); } __forceinline svint8_t svhsub_x(svbool_t pg, svint8_t op1, int8_t op2) { return svhsub_n_s8_x(pg, op1, op2); } __forceinline svuint64_t svhsub_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svhsub_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svhsub_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svhsub_n_u32_m(pg, op1, op2); } __forceinline svuint16_t svhsub_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svhsub_n_u16_m(pg, op1, op2); } __forceinline svuint8_t svhsub_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svhsub_n_u8_m(pg, op1, op2); } __forceinline svint64_t svhsub_m(svbool_t pg, svint64_t op1, int64_t op2) { return svhsub_n_s64_m(pg, op1, op2); } __forceinline svint16_t svhsub_x(svbool_t pg, svint16_t op1, int16_t op2) { return svhsub_n_s16_x(pg, op1, op2); } __forceinline svint32_t svhsub_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svhsub_s32_z(pg, op1, op2); } __forceinline svint32_t svhsub_x(svbool_t pg, svint32_t op1, int32_t op2) { return svhsub_n_s32_x(pg, op1, op2); } __forceinline svint8_t svhsub_z(svbool_t pg, svint8_t op1, int8_t op2) { return svhsub_n_s8_z(pg, op1, op2); } __forceinline svint16_t svhsub_z(svbool_t pg, svint16_t op1, int16_t op2) { return svhsub_n_s16_z(pg, op1, op2); } __forceinline svint32_t svhsub_z(svbool_t pg, svint32_t op1, int32_t op2) { return svhsub_n_s32_z(pg, op1, op2); } __forceinline svint32_t svhsub_m(svbool_t pg, svint32_t op1, int32_t op2) { return svhsub_n_s32_m(pg, op1, op2); } __forceinline svint16_t svhsub_m(svbool_t pg, svint16_t op1, int16_t op2) { return svhsub_n_s16_m(pg, op1, op2); } __forceinline svint8_t svhsub_m(svbool_t pg, svint8_t op1, int8_t op2) { return svhsub_n_s8_m(pg, op1, op2); } __forceinline svuint64_t svhsub_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svhsub_u64_z(pg, op1, op2); } __forceinline svint8_t svhsub_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svhsub_s8_z(pg, op1, op2); } __forceinline svuint32_t svhsub_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svhsub_u32_z(pg, op1, op2); } __forceinline svuint8_t svhsub_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svhsub_u8_z(pg, op1, op2); } __forceinline svint64_t svhsub_z(svbool_t pg, svint64_t op1, int64_t op2) { return svhsub_n_s64_z(pg, op1, op2); } __forceinline svuint8_t svhsub_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svhsub_n_u8_z(pg, op1, op2); } __forceinline svuint16_t svhsub_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svhsub_n_u16_z(pg, op1, op2); } __forceinline svint64_t svhsub_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svhsub_s64_z(pg, op1, op2); } __forceinline svuint64_t svhsub_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svhsub_n_u64_z(pg, op1, op2); } __forceinline svuint32_t svhsub_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svhsub_n_u32_z(pg, op1, op2); } __forceinline svuint16_t svhsub_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svhsub_u16_z(pg, op1, op2); } __forceinline svint16_t svhsub_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svhsub_s16_z(pg, op1, op2); } __forceinline svuint64_t svhsub_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svhsub_u64_x(pg, op1, op2); } __forceinline svuint16_t svhsub_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svhsub_u16_x(pg, op1, op2); } __forceinline svint64_t svhsub_x(svbool_t pg, svint64_t op1, int64_t op2) { return svhsub_n_s64_x(pg, op1, op2); } __forceinline svuint8_t svhsub_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svhsub_n_u8_x(pg, op1, op2); } __forceinline svuint16_t svhsub_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svhsub_n_u16_x(pg, op1, op2); } __forceinline svuint32_t svhsub_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svhsub_n_u32_x(pg, op1, op2); } __forceinline svint8_t svhsub_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svhsub_s8_m(pg, op1, op2); } __forceinline svint16_t svhsub_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svhsub_s16_m(pg, op1, op2); } __forceinline svint32_t svhsub_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svhsub_s32_m(pg, op1, op2); } __forceinline svint64_t svhsub_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svhsub_s64_m(pg, op1, op2); } __forceinline svuint32_t svhsub_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svhsub_u32_x(pg, op1, op2); } __forceinline svuint8_t svhsub_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svhsub_u8_m(pg, op1, op2); } __forceinline svuint32_t svhsub_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svhsub_u32_m(pg, op1, op2); } __forceinline svuint64_t svhsub_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svhsub_n_u64_x(pg, op1, op2); } __forceinline svint8_t svhsub_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svhsub_s8_x(pg, op1, op2); } __forceinline svint16_t svhsub_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svhsub_s16_x(pg, op1, op2); } __forceinline svint32_t svhsub_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svhsub_s32_x(pg, op1, op2); } __forceinline svint64_t svhsub_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svhsub_s64_x(pg, op1, op2); } __forceinline svuint8_t svhsub_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svhsub_u8_x(pg, op1, op2); } __forceinline svuint16_t svhsub_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svhsub_u16_m(pg, op1, op2); } __forceinline svuint64_t svhsub_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svhsub_u64_m(pg, op1, op2); } __forceinline svuint64_t svhsubr_x(svbool_t pg, svuint64_t op1, uint64_t op2) { return svhsubr_n_u64_x(pg, op1, op2); } __forceinline svint8_t svhsubr_z(svbool_t pg, svint8_t op1, int8_t op2) { return svhsubr_n_s8_z(pg, op1, op2); } __forceinline svuint64_t svhsubr_m(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svhsubr_u64_m(pg, op1, op2); } __forceinline svuint8_t svhsubr_m(svbool_t pg, svuint8_t op1, uint8_t op2) { return svhsubr_n_u8_m(pg, op1, op2); } __forceinline svint64_t svhsubr_m(svbool_t pg, svint64_t op1, int64_t op2) { return svhsubr_n_s64_m(pg, op1, op2); } __forceinline svint32_t svhsubr_m(svbool_t pg, svint32_t op1, int32_t op2) { return svhsubr_n_s32_m(pg, op1, op2); } __forceinline svint16_t svhsubr_m(svbool_t pg, svint16_t op1, int16_t op2) { return svhsubr_n_s16_m(pg, op1, op2); } __forceinline svuint32_t svhsubr_m(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svhsubr_u32_m(pg, op1, op2); } __forceinline svint8_t svhsubr_x(svbool_t pg, svint8_t op1, svint8_t op2) { return svhsubr_s8_x(pg, op1, op2); } __forceinline svint16_t svhsubr_x(svbool_t pg, svint16_t op1, svint16_t op2) { return svhsubr_s16_x(pg, op1, op2); } __forceinline svint32_t svhsubr_x(svbool_t pg, svint32_t op1, svint32_t op2) { return svhsubr_s32_x(pg, op1, op2); } __forceinline svint64_t svhsubr_x(svbool_t pg, svint64_t op1, svint64_t op2) { return svhsubr_s64_x(pg, op1, op2); } __forceinline svint16_t svhsubr_z(svbool_t pg, svint16_t op1, svint16_t op2) { return svhsubr_s16_z(pg, op1, op2); } __forceinline svuint8_t svhsubr_x(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svhsubr_u8_x(pg, op1, op2); } __forceinline svuint32_t svhsubr_x(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svhsubr_u32_x(pg, op1, op2); } __forceinline svuint64_t svhsubr_x(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svhsubr_u64_x(pg, op1, op2); } __forceinline svint8_t svhsubr_m(svbool_t pg, svint8_t op1, int8_t op2) { return svhsubr_n_s8_m(pg, op1, op2); } __forceinline svuint64_t svhsubr_z(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svhsubr_u64_z(pg, op1, op2); } __forceinline svuint32_t svhsubr_z(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svhsubr_u32_z(pg, op1, op2); } __forceinline svuint16_t svhsubr_z(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svhsubr_u16_z(pg, op1, op2); } __forceinline svuint8_t svhsubr_z(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svhsubr_u8_z(pg, op1, op2); } __forceinline svint64_t svhsubr_z(svbool_t pg, svint64_t op1, svint64_t op2) { return svhsubr_s64_z(pg, op1, op2); } __forceinline svint32_t svhsubr_z(svbool_t pg, svint32_t op1, svint32_t op2) { return svhsubr_s32_z(pg, op1, op2); } __forceinline svuint16_t svhsubr_x(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svhsubr_u16_x(pg, op1, op2); } __forceinline svuint32_t svhsubr_m(svbool_t pg, svuint32_t op1, uint32_t op2) { return svhsubr_n_u32_m(pg, op1, op2); } __forceinline svuint16_t svhsubr_m(svbool_t pg, svuint16_t op1, uint16_t op2) { return svhsubr_n_u16_m(pg, op1, op2); } __forceinline svint8_t svhsubr_x(svbool_t pg, svint8_t op1, int8_t op2) { return svhsubr_n_s8_x(pg, op1, op2); } __forceinline svuint16_t svhsubr_m(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svhsubr_u16_m(pg, op1, op2); } __forceinline svuint8_t svhsubr_m(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svhsubr_u8_m(pg, op1, op2); } __forceinline svint64_t svhsubr_m(svbool_t pg, svint64_t op1, svint64_t op2) { return svhsubr_s64_m(pg, op1, op2); } __forceinline svint32_t svhsubr_m(svbool_t pg, svint32_t op1, svint32_t op2) { return svhsubr_s32_m(pg, op1, op2); } __forceinline svint16_t svhsubr_m(svbool_t pg, svint16_t op1, svint16_t op2) { return svhsubr_s16_m(pg, op1, op2); } __forceinline svint8_t svhsubr_m(svbool_t pg, svint8_t op1, svint8_t op2) { return svhsubr_s8_m(pg, op1, op2); } __forceinline svint16_t svhsubr_z(svbool_t pg, svint16_t op1, int16_t op2) { return svhsubr_n_s16_z(pg, op1, op2); } __forceinline svint32_t svhsubr_z(svbool_t pg, svint32_t op1, int32_t op2) { return svhsubr_n_s32_z(pg, op1, op2); } __forceinline svint64_t svhsubr_z(svbool_t pg, svint64_t op1, int64_t op2) { return svhsubr_n_s64_z(pg, op1, op2); } __forceinline svuint8_t svhsubr_z(svbool_t pg, svuint8_t op1, uint8_t op2) { return svhsubr_n_u8_z(pg, op1, op2); } __forceinline svuint16_t svhsubr_z(svbool_t pg, svuint16_t op1, uint16_t op2) { return svhsubr_n_u16_z(pg, op1, op2); } __forceinline svuint64_t svhsubr_m(svbool_t pg, svuint64_t op1, uint64_t op2) { return svhsubr_n_u64_m(pg, op1, op2); } __forceinline svuint32_t svhsubr_z(svbool_t pg, svuint32_t op1, uint32_t op2) { return svhsubr_n_u32_z(pg, op1, op2); } __forceinline svuint32_t svhsubr_x(svbool_t pg, svuint32_t op1, uint32_t op2) { return svhsubr_n_u32_x(pg, op1, op2); } __forceinline svuint16_t svhsubr_x(svbool_t pg, svuint16_t op1, uint16_t op2) { return svhsubr_n_u16_x(pg, op1, op2); } __forceinline svuint8_t svhsubr_x(svbool_t pg, svuint8_t op1, uint8_t op2) { return svhsubr_n_u8_x(pg, op1, op2); } __forceinline svint64_t svhsubr_x(svbool_t pg, svint64_t op1, int64_t op2) { return svhsubr_n_s64_x(pg, op1, op2); } __forceinline svint32_t svhsubr_x(svbool_t pg, svint32_t op1, int32_t op2) { return svhsubr_n_s32_x(pg, op1, op2); } __forceinline svint16_t svhsubr_x(svbool_t pg, svint16_t op1, int16_t op2) { return svhsubr_n_s16_x(pg, op1, op2); } __forceinline svuint64_t svhsubr_z(svbool_t pg, svuint64_t op1, uint64_t op2) { return svhsubr_n_u64_z(pg, op1, op2); } __forceinline svint8_t svhsubr_z(svbool_t pg, svint8_t op1, svint8_t op2) { return svhsubr_s8_z(pg, op1, op2); } __forceinline svint16_t svsublbt(svint8_t op1, svint8_t op2) { return svsublbt_s16(op1, op2); } __forceinline svint32_t svsublbt(svint16_t op1, svint16_t op2) { return svsublbt_s32(op1, op2); } __forceinline svint64_t svsublbt(svint32_t op1, svint32_t op2) { return svsublbt_s64(op1, op2); } __forceinline svint16_t svsublbt(svint8_t op1, int8_t op2) { return svsublbt_n_s16(op1, op2); } __forceinline svint32_t svsublbt(svint16_t op1, int16_t op2) { return svsublbt_n_s32(op1, op2); } __forceinline svint64_t svsublbt(svint32_t op1, int32_t op2) { return svsublbt_n_s64(op1, op2); } __forceinline svint16_t svsublb(svint8_t op1, svint8_t op2) { return svsublb_s16(op1, op2); } __forceinline svuint64_t svsublb(svuint32_t op1, uint32_t op2) { return svsublb_n_u64(op1, op2); } __forceinline svuint32_t svsublb(svuint16_t op1, uint16_t op2) { return svsublb_n_u32(op1, op2); } __forceinline svuint16_t svsublb(svuint8_t op1, uint8_t op2) { return svsublb_n_u16(op1, op2); } __forceinline svint64_t svsublb(svint32_t op1, int32_t op2) { return svsublb_n_s64(op1, op2); } __forceinline svint32_t svsublb(svint16_t op1, int16_t op2) { return svsublb_n_s32(op1, op2); } __forceinline svint16_t svsublb(svint8_t op1, int8_t op2) { return svsublb_n_s16(op1, op2); } __forceinline svuint64_t svsublb(svuint32_t op1, svuint32_t op2) { return svsublb_u64(op1, op2); } __forceinline svuint32_t svsublb(svuint16_t op1, svuint16_t op2) { return svsublb_u32(op1, op2); } __forceinline svuint16_t svsublb(svuint8_t op1, svuint8_t op2) { return svsublb_u16(op1, op2); } __forceinline svint64_t svsublb(svint32_t op1, svint32_t op2) { return svsublb_s64(op1, op2); } __forceinline svint32_t svsublb(svint16_t op1, svint16_t op2) { return svsublb_s32(op1, op2); } __forceinline svint32_t svsubltb(svint16_t op1, svint16_t op2) { return svsubltb_s32(op1, op2); } __forceinline svint16_t svsubltb(svint8_t op1, svint8_t op2) { return svsubltb_s16(op1, op2); } __forceinline svint16_t svsubltb(svint8_t op1, int8_t op2) { return svsubltb_n_s16(op1, op2); } __forceinline svint64_t svsubltb(svint32_t op1, svint32_t op2) { return svsubltb_s64(op1, op2); } __forceinline svint64_t svsubltb(svint32_t op1, int32_t op2) { return svsubltb_n_s64(op1, op2); } __forceinline svint32_t svsubltb(svint16_t op1, int16_t op2) { return svsubltb_n_s32(op1, op2); } __forceinline svint32_t svsublt(svint16_t op1, svint16_t op2) { return svsublt_s32(op1, op2); } __forceinline svuint32_t svsublt(svuint16_t op1, svuint16_t op2) { return svsublt_u32(op1, op2); } __forceinline svuint16_t svsublt(svuint8_t op1, svuint8_t op2) { return svsublt_u16(op1, op2); } __forceinline svint64_t svsublt(svint32_t op1, svint32_t op2) { return svsublt_s64(op1, op2); } __forceinline svint16_t svsublt(svint8_t op1, svint8_t op2) { return svsublt_s16(op1, op2); } __forceinline svuint64_t svsublt(svuint32_t op1, svuint32_t op2) { return svsublt_u64(op1, op2); } __forceinline svint16_t svsublt(svint8_t op1, int8_t op2) { return svsublt_n_s16(op1, op2); } __forceinline svint32_t svsublt(svint16_t op1, int16_t op2) { return svsublt_n_s32(op1, op2); } __forceinline svint64_t svsublt(svint32_t op1, int32_t op2) { return svsublt_n_s64(op1, op2); } __forceinline svuint16_t svsublt(svuint8_t op1, uint8_t op2) { return svsublt_n_u16(op1, op2); } __forceinline svuint32_t svsublt(svuint16_t op1, uint16_t op2) { return svsublt_n_u32(op1, op2); } __forceinline svuint64_t svsublt(svuint32_t op1, uint32_t op2) { return svsublt_n_u64(op1, op2); } __forceinline svint16_t svsubwb(svint16_t op1, svint8_t op2) { return svsubwb_s16(op1, op2); } __forceinline svint32_t svsubwb(svint32_t op1, svint16_t op2) { return svsubwb_s32(op1, op2); } __forceinline svint64_t svsubwb(svint64_t op1, svint32_t op2) { return svsubwb_s64(op1, op2); } __forceinline svuint16_t svsubwb(svuint16_t op1, svuint8_t op2) { return svsubwb_u16(op1, op2); } __forceinline svuint32_t svsubwb(svuint32_t op1, svuint16_t op2) { return svsubwb_u32(op1, op2); } __forceinline svuint64_t svsubwb(svuint64_t op1, svuint32_t op2) { return svsubwb_u64(op1, op2); } __forceinline svint16_t svsubwb(svint16_t op1, int8_t op2) { return svsubwb_n_s16(op1, op2); } __forceinline svint32_t svsubwb(svint32_t op1, int16_t op2) { return svsubwb_n_s32(op1, op2); } __forceinline svint64_t svsubwb(svint64_t op1, int32_t op2) { return svsubwb_n_s64(op1, op2); } __forceinline svuint16_t svsubwb(svuint16_t op1, uint8_t op2) { return svsubwb_n_u16(op1, op2); } __forceinline svuint32_t svsubwb(svuint32_t op1, uint16_t op2) { return svsubwb_n_u32(op1, op2); } __forceinline svuint64_t svsubwb(svuint64_t op1, uint32_t op2) { return svsubwb_n_u64(op1, op2); } __forceinline svuint64_t svsubwt(svuint64_t op1, svuint32_t op2) { return svsubwt_u64(op1, op2); } __forceinline svint16_t svsubwt(svint16_t op1, svint8_t op2) { return svsubwt_s16(op1, op2); } __forceinline svuint64_t svsubwt(svuint64_t op1, uint32_t op2) { return svsubwt_n_u64(op1, op2); } __forceinline svuint32_t svsubwt(svuint32_t op1, uint16_t op2) { return svsubwt_n_u32(op1, op2); } __forceinline svuint16_t svsubwt(svuint16_t op1, uint8_t op2) { return svsubwt_n_u16(op1, op2); } __forceinline svint64_t svsubwt(svint64_t op1, int32_t op2) { return svsubwt_n_s64(op1, op2); } __forceinline svint32_t svsubwt(svint32_t op1, int16_t op2) { return svsubwt_n_s32(op1, op2); } __forceinline svint32_t svsubwt(svint32_t op1, svint16_t op2) { return svsubwt_s32(op1, op2); } __forceinline svint16_t svsubwt(svint16_t op1, int8_t op2) { return svsubwt_n_s16(op1, op2); } __forceinline svuint16_t svsubwt(svuint16_t op1, svuint8_t op2) { return svsubwt_u16(op1, op2); } __forceinline svuint32_t svsubwt(svuint32_t op1, svuint16_t op2) { return svsubwt_u32(op1, op2); } __forceinline svint64_t svsubwt(svint64_t op1, svint32_t op2) { return svsubwt_s64(op1, op2); } __forceinline svint32x2_t svcreate2(svint32_t op1, svint32_t op2) { return svcreate2_s32(op1, op2); } __forceinline svint16x2_t svcreate2(svint16_t op1, svint16_t op2) { return svcreate2_s16(op1, op2); } __forceinline svuint8x2_t svcreate2(svuint8_t op1, svuint8_t op2) { return svcreate2_u8(op1, op2); } __forceinline svuint16x2_t svcreate2(svuint16_t op1, svuint16_t op2) { return svcreate2_u16(op1, op2); } __forceinline svuint32x2_t svcreate2(svuint32_t op1, svuint32_t op2) { return svcreate2_u32(op1, op2); } __forceinline svint8x2_t svcreate2(svint8_t op1, svint8_t op2) { return svcreate2_s8(op1, op2); } __forceinline svint64x2_t svcreate2(svint64_t op1, svint64_t op2) { return svcreate2_s64(op1, op2); } __forceinline svuint64x2_t svcreate2(svuint64_t op1, svuint64_t op2) { return svcreate2_u64(op1, op2); } __forceinline svfloat16x2_t svcreate2(svfloat16_t op1, svfloat16_t op2) { return svcreate2_f16(op1, op2); } __forceinline svfloat32x2_t svcreate2(svfloat32_t op1, svfloat32_t op2) { return svcreate2_f32(op1, op2); } __forceinline svfloat64x2_t svcreate2(svfloat64_t op1, svfloat64_t op2) { return svcreate2_f64(op1, op2); } __forceinline svint32x3_t svcreate3(svint32_t op1, svint32_t op2, svint32_t op3) { return svcreate3_s32(op1, op2, op3); } __forceinline svint16x3_t svcreate3(svint16_t op1, svint16_t op2, svint16_t op3) { return svcreate3_s16(op1, op2, op3); } __forceinline svuint8x3_t svcreate3(svuint8_t op1, svuint8_t op2, svuint8_t op3) { return svcreate3_u8(op1, op2, op3); } __forceinline svuint16x3_t svcreate3(svuint16_t op1, svuint16_t op2, svuint16_t op3) { return svcreate3_u16(op1, op2, op3); } __forceinline svuint32x3_t svcreate3(svuint32_t op1, svuint32_t op2, svuint32_t op3) { return svcreate3_u32(op1, op2, op3); } __forceinline svint8x3_t svcreate3(svint8_t op1, svint8_t op2, svint8_t op3) { return svcreate3_s8(op1, op2, op3); } __forceinline svint64x3_t svcreate3(svint64_t op1, svint64_t op2, svint64_t op3) { return svcreate3_s64(op1, op2, op3); } __forceinline svuint64x3_t svcreate3(svuint64_t op1, svuint64_t op2, svuint64_t op3) { return svcreate3_u64(op1, op2, op3); } __forceinline svfloat16x3_t svcreate3(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) { return svcreate3_f16(op1, op2, op3); } __forceinline svfloat32x3_t svcreate3(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) { return svcreate3_f32(op1, op2, op3); } __forceinline svfloat64x3_t svcreate3(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) { return svcreate3_f64(op1, op2, op3); } __forceinline svint32x4_t svcreate4(svint32_t op1, svint32_t op2, svint32_t op3, svint32_t op4) { return svcreate4_s32(op1, op2, op3, op4); } __forceinline svint16x4_t svcreate4(svint16_t op1, svint16_t op2, svint16_t op3, svint16_t op4) { return svcreate4_s16(op1, op2, op3, op4); } __forceinline svuint8x4_t svcreate4(svuint8_t op1, svuint8_t op2, svuint8_t op3, svuint8_t op4) { return svcreate4_u8(op1, op2, op3, op4); } __forceinline svuint16x4_t svcreate4(svuint16_t op1, svuint16_t op2, svuint16_t op3, svuint16_t op4) { return svcreate4_u16(op1, op2, op3, op4); } __forceinline svuint32x4_t svcreate4(svuint32_t op1, svuint32_t op2, svuint32_t op3, svuint32_t op4) { return svcreate4_u32(op1, op2, op3, op4); } __forceinline svint8x4_t svcreate4(svint8_t op1, svint8_t op2, svint8_t op3, svint8_t op4) { return svcreate4_s8(op1, op2, op3, op4); } __forceinline svint64x4_t svcreate4(svint64_t op1, svint64_t op2, svint64_t op3, svint64_t op4) { return svcreate4_s64(op1, op2, op3, op4); } __forceinline svuint64x4_t svcreate4(svuint64_t op1, svuint64_t op2, svuint64_t op3, svuint64_t op4) { return svcreate4_u64(op1, op2, op3, op4); } __forceinline svfloat16x4_t svcreate4(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3, svfloat16_t op4) { return svcreate4_f16(op1, op2, op3, op4); } __forceinline svfloat32x4_t svcreate4(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, svfloat32_t op4) { return svcreate4_f32(op1, op2, op3, op4); } __forceinline svfloat64x4_t svcreate4(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, svfloat64_t op4) { return svcreate4_f64(op1, op2, op3, op4); } template __forceinline __svehdr_hfa2_type __svget2(T op1) { if constexpr(::std::is_same_v) { return svget2_f64(op1, N); } else if constexpr(::std::is_same_v) { return svget2_f32(op1, N); } else if constexpr(::std::is_same_v) { return svget2_f16(op1, N); } else if constexpr(::std::is_same_v) { return svget2_s64(op1, N); } else if constexpr(::std::is_same_v) { return svget2_s32(op1, N); } else if constexpr(::std::is_same_v) { return svget2_s16(op1, N); } else if constexpr(::std::is_same_v) { return svget2_s8(op1, N); } else if constexpr(::std::is_same_v) { return svget2_u64(op1, N); } else if constexpr(::std::is_same_v) { return svget2_u32(op1, N); } else if constexpr(::std::is_same_v) { return svget2_u16(op1, N); } else if constexpr(::std::is_same_v) { return svget2_u8(op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svget2(op1, imm_index) __svget2(op1) template __forceinline __svehdr_hfa3_type __svget3(T op1) { if constexpr(::std::is_same_v) { return svget3_f64(op1, N); } else if constexpr(::std::is_same_v) { return svget3_f32(op1, N); } else if constexpr(::std::is_same_v) { return svget3_f16(op1, N); } else if constexpr(::std::is_same_v) { return svget3_s64(op1, N); } else if constexpr(::std::is_same_v) { return svget3_s32(op1, N); } else if constexpr(::std::is_same_v) { return svget3_s16(op1, N); } else if constexpr(::std::is_same_v) { return svget3_s8(op1, N); } else if constexpr(::std::is_same_v) { return svget3_u64(op1, N); } else if constexpr(::std::is_same_v) { return svget3_u32(op1, N); } else if constexpr(::std::is_same_v) { return svget3_u16(op1, N); } else if constexpr(::std::is_same_v) { return svget3_u8(op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svget3(op1, imm_index) __svget3(op1) template __forceinline __svehdr_hfa4_type __svget4(T op1) { if constexpr(::std::is_same_v) { return svget4_f64(op1, N); } else if constexpr(::std::is_same_v) { return svget4_f32(op1, N); } else if constexpr(::std::is_same_v) { return svget4_f16(op1, N); } else if constexpr(::std::is_same_v) { return svget4_s64(op1, N); } else if constexpr(::std::is_same_v) { return svget4_s32(op1, N); } else if constexpr(::std::is_same_v) { return svget4_s16(op1, N); } else if constexpr(::std::is_same_v) { return svget4_s8(op1, N); } else if constexpr(::std::is_same_v) { return svget4_u64(op1, N); } else if constexpr(::std::is_same_v) { return svget4_u32(op1, N); } else if constexpr(::std::is_same_v) { return svget4_u16(op1, N); } else if constexpr(::std::is_same_v) { return svget4_u8(op1, N); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svget4(op1, imm_index) __svget4(op1) template __forceinline T1 __svset2(T1 op1, T2 op2) { if constexpr(::std::is_same_v) { return svset2_f64(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset2_f32(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset2_f16(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset2_s64(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset2_s32(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset2_s16(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset2_s8(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset2_u64(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset2_u32(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset2_u16(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset2_u8(op1, N, op2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svset2(op1, imm_index, op2) __svset2(op1, op2) template __forceinline T1 __svset3(T1 op1, T2 op2) { if constexpr(::std::is_same_v) { return svset3_f64(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset3_f32(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset3_f16(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset3_s64(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset3_s32(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset3_s16(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset3_s8(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset3_u64(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset3_u32(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset3_u16(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset3_u8(op1, N, op2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svset3(op1, imm_index, op2) __svset3(op1, op2) template __forceinline T1 __svset4(T1 op1, T2 op2) { if constexpr(::std::is_same_v) { return svset4_f64(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset4_f32(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset4_f16(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset4_s64(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset4_s32(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset4_s16(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset4_s8(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset4_u64(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset4_u32(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset4_u16(op1, N, op2); } else if constexpr(::std::is_same_v) { return svset4_u8(op1, N, op2); } else { static_assert(false, "unexpected type during SVE intrinsic overload"); } } #define svset4(op1, imm_index, op2) __svset4(op1, op2) #pragma warning(pop) #endif // defined(__cplusplus)