/*******************************************************************************
 * Copyright:  (c) 2007-2011 by Apple, Inc., All Rights Reserved.
 ******************************************************************************/

#ifndef __CL_KERNEL_H
#define __CL_KERNEL_H

// FIXME: Temporary hack to get double working until cl_khr_fp64 is handled correctly in the compiler
#if ! defined( cl_khr_fp64 )                                            &&  \
        /* Intel */ (defined( __i386__ ) || defined( __x86_64__ )       ||  \
        /* ARM   */ (defined( __arm__ ) && ! defined(__SOFT_FP__) ))
    #define cl_khr_fp64 1
#endif

// FIXME: Temporary hack to get double working until cl_khr_fp64 is handled correctly in the compiler
#if !defined(cl_ext_q13_APPLE) && defined( __arm__ )
    #define cl_ext_q13_APPLE 1
#endif

// 5.4 Variable Type Qualifiers
// private -> ptx .local, default
// global -> ptx .global
// constant -> ptx .constant
// local -> ptx .shared
#define __private
#define __global    __attribute__((address_space(1)))
#define __constant  const __attribute__((address_space(2)))
#define __local     __attribute__((address_space(3)))

#define private
#define global    __attribute__((address_space(1)))
#define constant  const __attribute__((address_space(2)))
#define local     __attribute__((address_space(3)))

// 5.1.2 OpenCL Vector Data Types
typedef __attribute__(( ext_vector_type(2) ))  char __char2;
typedef __attribute__(( ext_vector_type(3) ))  char __char3;
typedef __attribute__(( ext_vector_type(4) ))  char __char4;
typedef __attribute__(( ext_vector_type(8) ))  char __char8;
typedef __attribute__(( ext_vector_type(16) )) char __char16;
typedef unsigned char uchar;
typedef __attribute__(( ext_vector_type(2) ))  unsigned char __uchar2;
typedef __attribute__(( ext_vector_type(3) ))  unsigned char __uchar3;
typedef __attribute__(( ext_vector_type(4) ))  unsigned char __uchar4;
typedef __attribute__(( ext_vector_type(8) ))  unsigned char __uchar8;
typedef __attribute__(( ext_vector_type(16) )) unsigned char __uchar16;
typedef __attribute__(( ext_vector_type(2) ))  short __short2;
typedef __attribute__(( ext_vector_type(3) ))  short __short3;
typedef __attribute__(( ext_vector_type(4) ))  short __short4;
typedef __attribute__(( ext_vector_type(8) ))  short __short8;
typedef __attribute__(( ext_vector_type(16) )) short __short16;
typedef unsigned short ushort;
typedef __attribute__(( ext_vector_type(2) ))  unsigned short __ushort2;
typedef __attribute__(( ext_vector_type(3) ))  unsigned short __ushort3;
typedef __attribute__(( ext_vector_type(4) ))  unsigned short __ushort4;
typedef __attribute__(( ext_vector_type(8) ))  unsigned short __ushort8;
typedef __attribute__(( ext_vector_type(16) )) unsigned short __ushort16;
typedef __attribute__(( ext_vector_type(2) ))  int __int2;
typedef __attribute__(( ext_vector_type(3) ))  int __int3;          //  This type is UNSUPPORTED!!!  It is for Apple internal use only. It will go away without warning and break your app.  PLEASE do not add a regular int3 type. 3-wide types are performace hostile.
typedef __attribute__(( ext_vector_type(4) ))  int __int4;
typedef __attribute__(( ext_vector_type(8) ))  int __int8;
typedef __attribute__(( ext_vector_type(16) )) int __int16;
typedef unsigned int uint;
typedef __attribute__(( ext_vector_type(2) ))  unsigned int __uint2;
typedef __attribute__(( ext_vector_type(3) ))  unsigned int __uint3;
typedef __attribute__(( ext_vector_type(4) ))  unsigned int __uint4;
typedef __attribute__(( ext_vector_type(8) ))  unsigned int __uint8;
typedef __attribute__(( ext_vector_type(16) )) unsigned int __uint16;
typedef __attribute__(( ext_vector_type(2) ))  long __long2;
typedef __attribute__(( ext_vector_type(3) ))  long __long3;
typedef __attribute__(( ext_vector_type(4) ))  long __long4;
typedef __attribute__(( ext_vector_type(8) ))  long __long8;
typedef __attribute__(( ext_vector_type(16) )) long __long16;
typedef unsigned long ulong;
typedef __attribute__(( ext_vector_type(2) ))  unsigned long __ulong2;
typedef __attribute__(( ext_vector_type(3) ))  unsigned long __ulong3;
typedef __attribute__(( ext_vector_type(4) ))  unsigned long __ulong4;
typedef __attribute__(( ext_vector_type(8) ))  unsigned long __ulong8;
typedef __attribute__(( ext_vector_type(16) )) unsigned long __ulong16;
typedef __attribute__(( ext_vector_type(2) ))  float __float2;
typedef __attribute__(( ext_vector_type(3) ))  float __float3;   //  This type is UNSUPPORTED!!!   It is for Apple internal use only. It will go away without warning and break your app.   PLEASE do not add a regular float3 type.  3-wide types are performace hostile.
typedef __attribute__(( ext_vector_type(4) ))  float __float4;
typedef __attribute__(( ext_vector_type(8) ))  float __float8;
typedef __attribute__(( ext_vector_type(16) )) float __float16;
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
    typedef __attribute__(( ext_vector_type(2) ))  double __double2;
    typedef __attribute__(( ext_vector_type(3) ))  double __double3;
    typedef __attribute__(( ext_vector_type(4) ))  double __double4;
    typedef __attribute__(( ext_vector_type(8) ))  double __double8;
    typedef __attribute__(( ext_vector_type(16) )) double __double16;
#endif

//Vector type common names. These can now be #undef'd to avoid collision with similar names in imported C code. 
#define char2			__char2
#define char3			__char3
#define char4			__char4
#define char8			__char8
#define char16			__char16
#define uchar2			__uchar2
#define uchar3			__uchar3
#define uchar4			__uchar4
#define uchar8			__uchar8
#define uchar16			__uchar16
#define short2			__short2
#define short3			__short3
#define short4			__short4
#define short8			__short8
#define short16			__short16
#define ushort2			__ushort2
#define ushort3			__ushort3
#define ushort4			__ushort4
#define ushort8			__ushort8
#define ushort16		__ushort16
#define int2			__int2
#define int3			__int3
#define int4			__int4
#define int8			__int8
#define int16			__int16
#define uint2			__uint2
#define uint3			__uint3
#define uint4			__uint4
#define uint8			__uint8
#define uint16			__uint16
#define long2			__long2
#define long3			__long3
#define long4			__long4
#define long8			__long8
#define long16			__long16
#define ulong2			__ulong2
#define ulong3			__ulong3
#define ulong4			__ulong4
#define ulong8			__ulong8
#define ulong16			__ulong16
#define float2			__float2
#define float3			__float3
#define float4			__float4
#define float8			__float8
#define float16			__float16

#ifdef cl_khr_fp64
	#define double2			__double2
	#define double3			__double3
	#define double4			__double4
	#define double8			__double8
	#define double16		__double16
#endif

//Legacy SPI interfaces
#define __int3_SPI		__int3
#define __float3_SPI	__float3


// Half data type
typedef unsigned short half;

// Defend reserved types
typedef struct __Reserved_Name__Do_not_use_bool2        bool2;
typedef struct __Reserved_Name__Do_not_use_bool4        bool4;
typedef struct __Reserved_Name__Do_not_use_bool8        bool8;
typedef struct __Reserved_Name__Do_not_use_bool16       bool16;
typedef struct __Reserved_Name__Do_not_use_quad         quad;
typedef struct __Reserved_Name__Do_not_use_quad2        quad2;
typedef struct __Reserved_Name__Do_not_use_quad3        quad3;
typedef struct __Reserved_Name__Do_not_use_quad4        quad4;
typedef struct __Reserved_Name__Do_not_use_quad8        quad8;
typedef struct __Reserved_Name__Do_not_use_quad16       quad16;
typedef struct __Reserved_Name__Do_not_use_complex      complex;
typedef struct __Reserved_Name__Do_not_use_imaginary    imaginary;
typedef struct __Reserved_Name__Do_not_use_float2x2     float2x2;
typedef struct __Reserved_Name__Do_not_use_float2x3     float2x3;
typedef struct __Reserved_Name__Do_not_use_float3x2     float3x2;
typedef struct __Reserved_Name__Do_not_use_float2x4     float2x4;
typedef struct __Reserved_Name__Do_not_use_float4x2     float4x2;
typedef struct __Reserved_Name__Do_not_use_float3x4     float3x4;
typedef struct __Reserved_Name__Do_not_use_float4x3     float4x3;
typedef struct __Reserved_Name__Do_not_use_float4x4     float4x4;
typedef struct __Reserved_Name__Do_not_use_float8x8     float8x8;
typedef struct __Reserved_Name__Do_not_use_float16x16   float16x16;
typedef struct __Reserved_Name__Do_not_use_double2x2    double2x2;
typedef struct __Reserved_Name__Do_not_use_double2x3    double2x3;
typedef struct __Reserved_Name__Do_not_use_double3x2    double3x2;
typedef struct __Reserved_Name__Do_not_use_double2x4    double2x4;
typedef struct __Reserved_Name__Do_not_use_double4x2    double4x2;
typedef struct __Reserved_Name__Do_not_use_double3x4    double3x4;
typedef struct __Reserved_Name__Do_not_use_double4x3    double4x3;
typedef struct __Reserved_Name__Do_not_use_double4x4    double4x4;
typedef struct __Reserved_Name__Do_not_use_double8x8    double8x8;
typedef struct __Reserved_Name__Do_not_use_double16x16  double16x16;
typedef struct __Reserved_Name__Do_not_use_half2        half2;
typedef struct __Reserved_Name__Do_not_use_half3        half3;
typedef struct __Reserved_Name__Do_not_use_half4        half4;
typedef struct __Reserved_Name__Do_not_use_half8        half8;
typedef struct __Reserved_Name__Do_not_use_half16       half16;
typedef struct __Reserved_Name__Do_not_use_float5       float5;
typedef struct __Reserved_Name__Do_not_use_float6       float6;
typedef struct __Reserved_Name__Do_not_use_float7       float7;
typedef struct __Reserved_Name__Do_not_use_float9       float9;
typedef struct __Reserved_Name__Do_not_use_float10      float10;
typedef struct __Reserved_Name__Do_not_use_float11      float11;
typedef struct __Reserved_Name__Do_not_use_float12      float12;
typedef struct __Reserved_Name__Do_not_use_float13      float13;
typedef struct __Reserved_Name__Do_not_use_float14      float14;
typedef struct __Reserved_Name__Do_not_use_float15      float15;
typedef struct __Reserved_Name__Do_not_use_float32      float32;
#ifndef cl_khr_fp64
typedef struct __Unsupported_Type__Do_not_use_double2_on_this_device      double2;
typedef struct __Unsupported_Type__Do_not_use_double3_on_this_device      double3;
typedef struct __Unsupported_Type__Do_not_use_double4_on_this_device      double4;
typedef struct __Unsupported_Type__Do_not_use_double8_on_this_device      double8;
typedef struct __Unsupported_Type__Do_not_use_double16_on_this_device     double16;
#endif
typedef struct __Reserved_Name__Do_not_use_double5      double5;
typedef struct __Reserved_Name__Do_not_use_double6      double6;
typedef struct __Reserved_Name__Do_not_use_double7      double7;
typedef struct __Reserved_Name__Do_not_use_double9      double9;
typedef struct __Reserved_Name__Do_not_use_double10     double10;
typedef struct __Reserved_Name__Do_not_use_double11     double11;
typedef struct __Reserved_Name__Do_not_use_double12     double12;
typedef struct __Reserved_Name__Do_not_use_double13     double13;
typedef struct __Reserved_Name__Do_not_use_double14     double14;
typedef struct __Reserved_Name__Do_not_use_double15     double15;
typedef struct __Reserved_Name__Do_not_use_double32     double32;
typedef struct __Reserved_Name__Do_not_use_char5        char5;
typedef struct __Reserved_Name__Do_not_use_char6        char6;
typedef struct __Reserved_Name__Do_not_use_char7        char7;
typedef struct __Reserved_Name__Do_not_use_char9        char9;
typedef struct __Reserved_Name__Do_not_use_char10       char10;
typedef struct __Reserved_Name__Do_not_use_char11       char11;
typedef struct __Reserved_Name__Do_not_use_char12       char12;
typedef struct __Reserved_Name__Do_not_use_char13       char13;
typedef struct __Reserved_Name__Do_not_use_char14       char14;
typedef struct __Reserved_Name__Do_not_use_char15       char15;
typedef struct __Reserved_Name__Do_not_use_char32       char32;
typedef struct __Reserved_Name__Do_not_use_uchar5       uchar5;
typedef struct __Reserved_Name__Do_not_use_uchar6       uchar6;
typedef struct __Reserved_Name__Do_not_use_uchar7       uchar7;
typedef struct __Reserved_Name__Do_not_use_uchar9       uchar9;
typedef struct __Reserved_Name__Do_not_use_uchar10      uchar10;
typedef struct __Reserved_Name__Do_not_use_uchar11      uchar11;
typedef struct __Reserved_Name__Do_not_use_uchar12      uchar12;
typedef struct __Reserved_Name__Do_not_use_uchar13      uchar13;
typedef struct __Reserved_Name__Do_not_use_uchar14      uchar14;
typedef struct __Reserved_Name__Do_not_use_uchar15      uchar15;
typedef struct __Reserved_Name__Do_not_use_uchar32      uchar32;
typedef struct __Reserved_Name__Do_not_use_short5       short5;
typedef struct __Reserved_Name__Do_not_use_short6       short6;
typedef struct __Reserved_Name__Do_not_use_short7       short7;
typedef struct __Reserved_Name__Do_not_use_short9       short9;
typedef struct __Reserved_Name__Do_not_use_short10      short10;
typedef struct __Reserved_Name__Do_not_use_short11      short11;
typedef struct __Reserved_Name__Do_not_use_short12      short12;
typedef struct __Reserved_Name__Do_not_use_short13      short13;
typedef struct __Reserved_Name__Do_not_use_short14      short14;
typedef struct __Reserved_Name__Do_not_use_short15      short15;
typedef struct __Reserved_Name__Do_not_use_short32      short32;
typedef struct __Reserved_Name__Do_not_use_ushort5      ushort5;
typedef struct __Reserved_Name__Do_not_use_ushort6      ushort6;
typedef struct __Reserved_Name__Do_not_use_ushort7      ushort7;
typedef struct __Reserved_Name__Do_not_use_ushort9      ushort9;
typedef struct __Reserved_Name__Do_not_use_ushort10     ushort10;
typedef struct __Reserved_Name__Do_not_use_ushort11     ushort11;
typedef struct __Reserved_Name__Do_not_use_ushort12     ushort12;
typedef struct __Reserved_Name__Do_not_use_ushort13     ushort13;
typedef struct __Reserved_Name__Do_not_use_ushort14     ushort14;
typedef struct __Reserved_Name__Do_not_use_ushort15     ushort15;
typedef struct __Reserved_Name__Do_not_use_ushort32     ushort32;
typedef struct __Reserved_Name__Do_not_use_int5         int5;
typedef struct __Reserved_Name__Do_not_use_int6         int6;
typedef struct __Reserved_Name__Do_not_use_int7         int7;
typedef struct __Reserved_Name__Do_not_use_int9         int9;
typedef struct __Reserved_Name__Do_not_use_int10        int10;
typedef struct __Reserved_Name__Do_not_use_int11        int11;
typedef struct __Reserved_Name__Do_not_use_int12        int12;
typedef struct __Reserved_Name__Do_not_use_int13        int13;
typedef struct __Reserved_Name__Do_not_use_int14        int14;
typedef struct __Reserved_Name__Do_not_use_int15        int15;
typedef struct __Reserved_Name__Do_not_use_int32        int32;
typedef struct __Reserved_Name__Do_not_use_uint5        uint5;
typedef struct __Reserved_Name__Do_not_use_uint6        uint6;
typedef struct __Reserved_Name__Do_not_use_uint7        uint7;
typedef struct __Reserved_Name__Do_not_use_uint9        uint9;
typedef struct __Reserved_Name__Do_not_use_uint10       uint10;
typedef struct __Reserved_Name__Do_not_use_uint11       uint11;
typedef struct __Reserved_Name__Do_not_use_uint12       uint12;
typedef struct __Reserved_Name__Do_not_use_uint13       uint13;
typedef struct __Reserved_Name__Do_not_use_uint14       uint14;
typedef struct __Reserved_Name__Do_not_use_uint15       uint15;
typedef struct __Reserved_Name__Do_not_use_uint32       uint32;
typedef struct __Reserved_Name__Do_not_use_long5        long5;
typedef struct __Reserved_Name__Do_not_use_long6        long6;
typedef struct __Reserved_Name__Do_not_use_long7        long7;
typedef struct __Reserved_Name__Do_not_use_long9        long9;
typedef struct __Reserved_Name__Do_not_use_long10       long10;
typedef struct __Reserved_Name__Do_not_use_long11       long11;
typedef struct __Reserved_Name__Do_not_use_long12       long12;
typedef struct __Reserved_Name__Do_not_use_long13       long13;
typedef struct __Reserved_Name__Do_not_use_long14       long14;
typedef struct __Reserved_Name__Do_not_use_long15       long15;
typedef struct __Reserved_Name__Do_not_use_long32       long32;
typedef struct __Reserved_Name__Do_not_use_ulong5       ulong5;
typedef struct __Reserved_Name__Do_not_use_ulong6       ulong6;
typedef struct __Reserved_Name__Do_not_use_ulong7       ulong7;
typedef struct __Reserved_Name__Do_not_use_ulong9       ulong9;
typedef struct __Reserved_Name__Do_not_use_ulong10      ulong10;
typedef struct __Reserved_Name__Do_not_use_ulong11      ulong11;
typedef struct __Reserved_Name__Do_not_use_ulong12      ulong12;
typedef struct __Reserved_Name__Do_not_use_ulong13      ulong13;
typedef struct __Reserved_Name__Do_not_use_ulong14      ulong14;
typedef struct __Reserved_Name__Do_not_use_ulong15      ulong15;
typedef struct __Reserved_Name__Do_not_use_ulong32      ulong32;
typedef struct __Reserved_Name__Do_not_use_quad5        quad5;
typedef struct __Reserved_Name__Do_not_use_quad6        quad6;
typedef struct __Reserved_Name__Do_not_use_quad7        quad7;
typedef struct __Reserved_Name__Do_not_use_quad9        quad9;
typedef struct __Reserved_Name__Do_not_use_quad10       quad10;
typedef struct __Reserved_Name__Do_not_use_quad11       quad11;
typedef struct __Reserved_Name__Do_not_use_quad12       quad12;
typedef struct __Reserved_Name__Do_not_use_quad13       quad13;
typedef struct __Reserved_Name__Do_not_use_quad14       quad14;
typedef struct __Reserved_Name__Do_not_use_quad15       quad15;
typedef struct __Reserved_Name__Do_not_use_quad32       quad32;
typedef struct __Reserved_Name__Do_not_use_half5        half5;
typedef struct __Reserved_Name__Do_not_use_half6        half6;
typedef struct __Reserved_Name__Do_not_use_half7        half7;
typedef struct __Reserved_Name__Do_not_use_half9        half9;
typedef struct __Reserved_Name__Do_not_use_half10       half10;
typedef struct __Reserved_Name__Do_not_use_half11       half11;
typedef struct __Reserved_Name__Do_not_use_half12       half12;
typedef struct __Reserved_Name__Do_not_use_half13       half13;
typedef struct __Reserved_Name__Do_not_use_half14       half14;
typedef struct __Reserved_Name__Do_not_use_half15       half15;
typedef struct __Reserved_Name__Do_not_use_half32       half32;



typedef __typeof__(((int*)0)-((int*)0)) ptrdiff_t;
typedef __SIZE_TYPE__ size_t;
typedef __SIZE_TYPE__ uintptr_t;
typedef __PTRDIFF_TYPE__ intptr_t;
typedef size_t       event_t;

#define __OVERLOAD_INLINE__ __attribute__((__overloadable__, __always_inline__))
#define __OVERLOAD__ __attribute__((__overloadable__))

// Macro machinery for C-based type overloading of builtin functions

// Returns float or double, one float or double input.
#define __CLFN_FD_1FD(name) float __OVERLOAD__ name(float x); \
float2 __OVERLOAD__ name(float2 x); \
float3 __OVERLOAD__ name(float3 x); \
float4 __OVERLOAD__ name(float4 x); \
float8 __OVERLOAD__ name(float8 x); \
float16 __OVERLOAD__ name(float16 x); \
double __OVERLOAD__ name(double x); \
double2 __OVERLOAD__ name(double2 x); \
double3 __OVERLOAD__ name(double3 x); \
double4 __OVERLOAD__ name(double4 x); \
double8 __OVERLOAD__ name(double8 x); \
double16 __OVERLOAD__ name(double16 x); 

#define __CLFN_FD_1FD_MODD(name) float __OVERLOAD__ name(float x); \
float __OVERLOAD__ name(float2 x); \
float __OVERLOAD__ name(float3 x); \
float __OVERLOAD__ name(float4 x); \
float __OVERLOAD__ name(float8 x); \
float __OVERLOAD__ name(float16 x); \
double __OVERLOAD__ name(double x); \
double __OVERLOAD__ name(double2 x); \
double __OVERLOAD__ name(double3 x); \
double __OVERLOAD__ name(double4 x); \
double __OVERLOAD__ name(double8 x); \
double __OVERLOAD__ name(double16 x);

#define __CLFN_I_1FD(name) int __OVERLOAD__ name(float x); \
int2 __OVERLOAD__ name(float2 x); \
int3 __OVERLOAD__ name(float3 x); \
int4 __OVERLOAD__ name(float4 x); \
int8 __OVERLOAD__ name(float8 x); \
int16 __OVERLOAD__ name(float16 x); \
int __OVERLOAD__ name(double x); \
int2 __OVERLOAD__ name(double2 x); \
int3 __OVERLOAD__ name(double3 x); \
int4 __OVERLOAD__ name(double4 x); \
int8 __OVERLOAD__ name(double8 x); \
int16 __OVERLOAD__ name(double16 x);

// Returns float, float inputs, up to size 4. 
#define __CLFN_1F_2F(name) float __OVERLOAD__ name(float x, float y); \
float2 __OVERLOAD__ name(float2 x, float2 y); \
float3 __OVERLOAD__ name(float3 x, float3 y); \
float4 __OVERLOAD__ name(float4 x, float4 y); 

// Returns float, float inputs, up to size 4.
#define __CLFN_1F_2F_MODD(name) float __OVERLOAD__ name(float x, float y); \
float __OVERLOAD__ name(float2 x, float2 y); \
float __OVERLOAD__ name(float3 x, float3 y); \
float __OVERLOAD__ name(float4 x, float4 y);

#define __CLFN_1FD_2FD_MODD(name) float __OVERLOAD__ name(float x, float y); \
float __OVERLOAD__ name(float2 x, float2 y); \
float __OVERLOAD__ name(float3 x, float3 y); \
float __OVERLOAD__ name(float4 x, float4 y); \
double __OVERLOAD__ name(double x, double y); \
double __OVERLOAD__ name(double2 x, double2 y); \
double __OVERLOAD__ name(double3 x, double3 y); \
double __OVERLOAD__ name(double4 x, double4 y);

// Returns float, float inputs, up to size 4.
#define __CLFN_F_1F_MODD4(name) float __OVERLOAD__ name(float x ); \
float __OVERLOAD__ name(float2 x); \
float __OVERLOAD__ name(float3 x); \
float __OVERLOAD__ name(float4 x);

// Returns float or double, two float or double input. Only up to size 4.
#define __CLFN_FD_2FD_MODD4(name) float __OVERLOAD__ name(float x, float y); \
float2 __OVERLOAD__ name(float2 x, float2 y); \
float3 __OVERLOAD__ name(float3 x, float3 y); \
float4 __OVERLOAD__ name(float4 x, float4 y); \
double __OVERLOAD__ name(double x, double y); \
double2 __OVERLOAD__ name(double2 x, double2 y); \
double3 __OVERLOAD__ name(double3 x, double3 y); \
double4 __OVERLOAD__ name(double4 x, double4 y); 

// Returns float or double, one float or double input. Only up to size 4.
#define __CLFN_FD_1FD_MODD4(name) float __OVERLOAD__ name(float x); \
float2 __OVERLOAD__ name(float2 x); \
float3 __OVERLOAD__ name(float3 x); \
float4 __OVERLOAD__ name(float4 x); \
double __OVERLOAD__ name(double x); \
double2 __OVERLOAD__ name(double2 x); \
double3 __OVERLOAD__ name(double3 x); \
double4 __OVERLOAD__ name(double4 x);

// Returns float or double, one uing or ulong input.
#define __CLFN_FD_1UIL(name) float __OVERLOAD__ name(uint x); \
float2 __OVERLOAD__ name(uint2 x); \
float3 __OVERLOAD__ name(uint3 x); \
float4 __OVERLOAD__ name(uint4 x); \
float8 __OVERLOAD__ name(uint8 x); \
float16 __OVERLOAD__ name(uint16 x); \
double __OVERLOAD__ name(ulong x); \
double2 __OVERLOAD__ name(ulong2 x); \
double3 __OVERLOAD__ name(ulong3 x); \
double4 __OVERLOAD__ name(ulong4 x); \
double8 __OVERLOAD__ name(ulong8 x); \
double16 __OVERLOAD__ name(ulong16 x); 

// Returns int or long, one float or double input.
#define __CLFN_IL_1FD(name) int __OVERLOAD__ name(float x); \
int2 __OVERLOAD__ name(float2 x); \
int3 __OVERLOAD__ name(float3 x); \
int4 __OVERLOAD__ name(float4 x); \
int8 __OVERLOAD__ name(float8 x); \
int16 __OVERLOAD__ name(float16 x); \
long __OVERLOAD__ name(double x); \
long2 __OVERLOAD__ name(double2 x); \
long3 __OVERLOAD__ name(double3 x); \
long4 __OVERLOAD__ name(double4 x); \
long8 __OVERLOAD__ name(double8 x); \
long16 __OVERLOAD__ name(double16 x); 

#define __CLFN_IL_1FD_MODD(name) int __OVERLOAD__ name(float x); \
int2 __OVERLOAD__ name(float2 x); \
int3 __OVERLOAD__ name(float3 x); \
int4 __OVERLOAD__ name(float4 x); \
int8 __OVERLOAD__ name(float8 x); \
int16 __OVERLOAD__ name(float16 x); \
int __OVERLOAD__ name(double x); \
long2 __OVERLOAD__ name(double2 x); \
long3 __OVERLOAD__ name(double3 x); \
long4 __OVERLOAD__ name(double4 x); \
long8 __OVERLOAD__ name(double8 x); \
long16 __OVERLOAD__ name(double16 x);



// Returns same as the input. Accepts char,uchar,short,ushort,
// long,ulong,int,unint.
#define __CLFN_CSIL_1CSIL(name) char __OVERLOAD__ name(char x); \
char2 __OVERLOAD__ name(char2 x); \
char3 __OVERLOAD__ name(char3 x); \
char4 __OVERLOAD__ name(char4 x); \
char8 __OVERLOAD__ name(char8 x); \
char16 __OVERLOAD__ name(char16 x); \
uchar __OVERLOAD__ name(uchar x); \
uchar2 __OVERLOAD__ name(uchar2 x); \
uchar3 __OVERLOAD__ name(uchar3 x); \
uchar4 __OVERLOAD__ name(uchar4 x); \
uchar8 __OVERLOAD__ name(uchar8 x); \
uchar16 __OVERLOAD__ name(uchar16 x); \
int __OVERLOAD__ name(int x); \
int2 __OVERLOAD__ name(int2 x); \
int3 __OVERLOAD__ name(int3 x); \
int4 __OVERLOAD__ name(int4 x); \
int8 __OVERLOAD__ name(int8 x); \
int16 __OVERLOAD__ name(int16 x); \
uint __OVERLOAD__ name(uint x); \
uint2 __OVERLOAD__ name(uint2 x); \
uint3 __OVERLOAD__ name(uint3 x); \
uint4 __OVERLOAD__ name(uint4 x); \
uint8 __OVERLOAD__ name(uint8 x); \
uint16 __OVERLOAD__ name(uint16 x); \
short __OVERLOAD__ name(short x); \
short2 __OVERLOAD__ name(short2 x); \
short3 __OVERLOAD__ name(short3 x); \
short4 __OVERLOAD__ name(short4 x); \
short8 __OVERLOAD__ name(short8 x); \
short16 __OVERLOAD__ name(short16 x); \
ushort __OVERLOAD__ name(ushort x); \
ushort2 __OVERLOAD__ name(ushort2 x); \
ushort3 __OVERLOAD__ name(ushort3 x); \
ushort4 __OVERLOAD__ name(ushort4 x); \
ushort8 __OVERLOAD__ name(ushort8 x); \
ushort16 __OVERLOAD__ name(ushort16 x); \
long __OVERLOAD__ name(long x); \
long2 __OVERLOAD__ name(long2 x); \
long3 __OVERLOAD__ name(long3 x); \
long4 __OVERLOAD__ name(long4 x); \
long8 __OVERLOAD__ name(long8 x); \
long16 __OVERLOAD__ name(long16 x); \
ulong __OVERLOAD__ name(ulong x); \
ulong2 __OVERLOAD__ name(ulong2 x); \
ulong3 __OVERLOAD__ name(ulong3 x); \
ulong4 __OVERLOAD__ name(ulong4 x); \
ulong8 __OVERLOAD__ name(ulong8 x); \
ulong16 __OVERLOAD__ name(ulong16 x); 

// Returns size if twise that of input. For ARM builtins with suffix long
#define __CLFN_R2_A(typer, typea, name) \
typer __OVERLOAD__ name(typea##2 x); \
typer##2 __OVERLOAD__ name(typea##4 x); \
typer##4 __OVERLOAD__ name(typea##8 x); \
typer##8 __OVERLOAD__ name(typea##16 x);

// Returns size if twise that of input. For ARM builtins with suffix long
#define __CLFN_R2_A2(typer, typea, name) \
typer __OVERLOAD__ name(typea x, typea y); \
typer##2 __OVERLOAD__ name(typea##2 x, typea##2 y); \
typer##4 __OVERLOAD__ name(typea##4 x, typea##4 y); \
typer##8 __OVERLOAD__ name(typea##8 x, typea##8 y); \
typer##16 __OVERLOAD__ name(typea##16 x, typea##16 y);

// Returns size if twise that of input. For ARM builtins with suffix long
#define __CLFN_R2_A3(typer, typea, name) \
typer __OVERLOAD__ name(typer x, typea y, typea z); \
typer##2 __OVERLOAD__ name(typer##2 x, typea##2 y, typea##2 z); \
typer##4 __OVERLOAD__ name(typer##4 x, typea##4 y, typea##4 z); \
typer##8 __OVERLOAD__ name(typer##8 x, typea##8 y, typea##8 z); \
typer##16 __OVERLOAD__ name(typer##16 x, typea##16 y, typea##16 z);

// Returns size if twise that of input. For ARM builtins with suffix long
#define __CLFN_R2_A3R(typer, typea, name) \
typer __OVERLOAD__ name(typer x, typea y, typea z); \
typer##2 __OVERLOAD__ name(typea##2 x, typea##2 y, typer##2 z); \
typer##4 __OVERLOAD__ name(typea##4 x, typea##4 y, typer##4 z); \
typer##8 __OVERLOAD__ name(typea##8 x, typea##8 y, typer##8 z); \
typer##16 __OVERLOAD__ name(typea##16 x, typea##16 y, typer##16 z);


// Returns same as the input but unsigned. Accepts char,uchar,short,ushort,
// long,ulong,int,unint.
#define __CLFN_UCSIL_1CSIL(name) uchar __OVERLOAD__ name(char x); \
uchar2 __OVERLOAD__ name(char2 x); \
uchar3 __OVERLOAD__ name(char3 x); \
uchar4 __OVERLOAD__ name(char4 x); \
uchar8 __OVERLOAD__ name(char8 x); \
uchar16 __OVERLOAD__ name(char16 x); \
uchar __OVERLOAD__ name(uchar x); \
uchar2 __OVERLOAD__ name(uchar2 x); \
uchar3 __OVERLOAD__ name(uchar3 x); \
uchar4 __OVERLOAD__ name(uchar4 x); \
uchar8 __OVERLOAD__ name(uchar8 x); \
uchar16 __OVERLOAD__ name(uchar16 x); \
uint __OVERLOAD__ name(int x); \
uint2 __OVERLOAD__ name(int2 x); \
uint3 __OVERLOAD__ name(int3 x); \
uint4 __OVERLOAD__ name(int4 x); \
uint8 __OVERLOAD__ name(int8 x); \
uint16 __OVERLOAD__ name(int16 x); \
uint __OVERLOAD__ name(uint x); \
uint2 __OVERLOAD__ name(uint2 x); \
uint3 __OVERLOAD__ name(uint3 x); \
uint4 __OVERLOAD__ name(uint4 x); \
uint8 __OVERLOAD__ name(uint8 x); \
uint16 __OVERLOAD__ name(uint16 x); \
ushort __OVERLOAD__ name(short x); \
ushort2 __OVERLOAD__ name(short2 x); \
ushort3 __OVERLOAD__ name(short3 x); \
ushort4 __OVERLOAD__ name(short4 x); \
ushort8 __OVERLOAD__ name(short8 x); \
ushort16 __OVERLOAD__ name(short16 x); \
ushort __OVERLOAD__ name(ushort x); \
ushort2 __OVERLOAD__ name(ushort2 x); \
ushort3 __OVERLOAD__ name(ushort3 x); \
ushort4 __OVERLOAD__ name(ushort4 x); \
ushort8 __OVERLOAD__ name(ushort8 x); \
ushort16 __OVERLOAD__ name(ushort16 x); \
ulong __OVERLOAD__ name(long x); \
ulong2 __OVERLOAD__ name(long2 x); \
ulong3 __OVERLOAD__ name(long3 x); \
ulong4 __OVERLOAD__ name(long4 x); \
ulong8 __OVERLOAD__ name(long8 x); \
ulong16 __OVERLOAD__ name(long16 x); \
ulong __OVERLOAD__ name(ulong x); \
ulong2 __OVERLOAD__ name(ulong2 x); \
ulong3 __OVERLOAD__ name(ulong3 x); \
ulong4 __OVERLOAD__ name(ulong4 x); \
ulong8 __OVERLOAD__ name(ulong8 x); \
ulong16 __OVERLOAD__ name(ulong16 x); 

// Returns float or double, two float or double input.
#define __CLFN_FD_1F(name) float __OVERLOAD__ name(float x, float y); \
float2 __OVERLOAD__ name(float2 x, float2 y); \
float3 __OVERLOAD__ name(float3 x, float3 y); \
float4 __OVERLOAD__ name(float4 x, float4 y); \
float8 __OVERLOAD__ name(float8 x, float8 y); \
float16 __OVERLOAD__ name(float16 x, float16 y); \
double __OVERLOAD__ name(double x, double y); \
double2 __OVERLOAD__ name(double2 x, double2 y); \
double3 __OVERLOAD__ name(double3 x, double3 y); \
double4 __OVERLOAD__ name(double4 x, double4 y); \
double8 __OVERLOAD__ name(double8 x, double8 y); \
double16 __OVERLOAD__ name(double16 x, double16 y); 

// Returns float or double, two float or double input.
#define __CLFN_FD_1FD_1I(name) float __OVERLOAD__ name(float x, int y); \
float2 __OVERLOAD__ name(float2 x, int2 y); \
float3 __OVERLOAD__ name(float3 x, int3 y); \
float4 __OVERLOAD__ name(float4 x, int4 y); \
float8 __OVERLOAD__ name(float8 x, int8 y); \
float16 __OVERLOAD__ name(float16 x, int16 y); \
float2 __OVERLOAD__ name(float2 x, int y); \
float3 __OVERLOAD__ name(float3 x, int y); \
float4 __OVERLOAD__ name(float4 x, int y); \
float8 __OVERLOAD__ name(float8 x, int y); \
float16 __OVERLOAD__ name(float16 x, int y); \
double __OVERLOAD__ name(double x, int y); \
double2 __OVERLOAD__ name(double2 x, int2 y); \
double3 __OVERLOAD__ name(double3 x, int3 y); \
double4 __OVERLOAD__ name(double4 x, int4 y); \
double8 __OVERLOAD__ name(double8 x, int8 y); \
double16 __OVERLOAD__ name(double16 x, int16 y); \
double2 __OVERLOAD__ name(double2 x, int y); \
double3 __OVERLOAD__ name(double3 x, int y); \
double4 __OVERLOAD__ name(double4 x, int y); \
double8 __OVERLOAD__ name(double8 x, int y); \
double16 __OVERLOAD__ name(double16 x, int y); 

// Returns float or double, one float or double input, and one pointer to 
// float or double.
#define __CLFN_FD_1FD_1PFD(name) float __OVERLOAD__ name(float x, float* y); \
float2 __OVERLOAD__ name(float2 x, float2* y); \
float3 __OVERLOAD__ name(float3 x, float3* y); \
float4 __OVERLOAD__ name(float4 x, float4* y); \
float8 __OVERLOAD__ name(float8 x, float8* y); \
float16 __OVERLOAD__ name(float16 x, float16* y); \
float __OVERLOAD__ name(float x, __global float* y); \
float2 __OVERLOAD__ name(float2 x, __global float2* y); \
float3 __OVERLOAD__ name(float3 x, __global float3* y); \
float4 __OVERLOAD__ name(float4 x, __global float4* y); \
float8 __OVERLOAD__ name(float8 x, __global float8* y); \
float16 __OVERLOAD__ name(float16 x, __global float16* y); \
float __OVERLOAD__ name(float x, __local float* y); \
float2 __OVERLOAD__ name(float2 x, __local float2* y); \
float3 __OVERLOAD__ name(float3 x, __local float3* y); \
float4 __OVERLOAD__ name(float4 x, __local float4* y); \
float8 __OVERLOAD__ name(float8 x, __local float8* y); \
float16 __OVERLOAD__ name(float16 x, __local float16* y); \
double __OVERLOAD__ name(double x, double* y); \
double2 __OVERLOAD__ name(double2 x, double2* y); \
double3 __OVERLOAD__ name(double3 x, double3* y); \
double4 __OVERLOAD__ name(double4 x, double4* y); \
double8 __OVERLOAD__ name(double8 x, double8* y); \
double16 __OVERLOAD__ name(double16 x, double16* y); \
double __OVERLOAD__ name(double x, __global double* y); \
double2 __OVERLOAD__ name(double2 x, __global double2* y); \
double3 __OVERLOAD__ name(double3 x, __global double3* y); \
double4 __OVERLOAD__ name(double4 x, __global double4* y); \
double8 __OVERLOAD__ name(double8 x, __global double8* y); \
double16 __OVERLOAD__ name(double16 x, __global double16* y); \
double __OVERLOAD__ name(double x, __local double* y); \
double2 __OVERLOAD__ name(double2 x, __local double2* y); \
double3 __OVERLOAD__ name(double3 x, __local double3* y); \
double4 __OVERLOAD__ name(double4 x, __local double4* y); \
double8 __OVERLOAD__ name(double8 x, __local double8* y); \
double16 __OVERLOAD__ name(double16 x, __local double16* y); 

// Returns float or double, one float or double input, and one pointer to 
// int.
#define __CLFN_FD_1FD_1PI(name) float __OVERLOAD__ name(float x, int* y); \
float2 __OVERLOAD__ name(float2 x, int2* y); \
float3 __OVERLOAD__ name(float3 x, int3* y); \
float4 __OVERLOAD__ name(float4 x, int4* y); \
float8 __OVERLOAD__ name(float8 x, int8* y); \
float16 __OVERLOAD__ name(float16 x, int16* y); \
float __OVERLOAD__ name(float x, __global int* y); \
float2 __OVERLOAD__ name(float2 x, __global int2* y); \
float3 __OVERLOAD__ name(float3 x, __global int3* y); \
float4 __OVERLOAD__ name(float4 x, __global int4* y); \
float8 __OVERLOAD__ name(float8 x, __global int8* y); \
float16 __OVERLOAD__ name(float16 x, __global int16* y); \
float __OVERLOAD__ name(float x, __local int* y); \
float2 __OVERLOAD__ name(float2 x, __local int2* y); \
float3 __OVERLOAD__ name(float3 x, __local int3* y); \
float4 __OVERLOAD__ name(float4 x, __local int4* y); \
float8 __OVERLOAD__ name(float8 x, __local int8* y); \
float16 __OVERLOAD__ name(float16 x, __local int16* y); \
double __OVERLOAD__ name(double x, int* y); \
double2 __OVERLOAD__ name(double2 x, int2* y); \
double3 __OVERLOAD__ name(double3 x, int3* y); \
double4 __OVERLOAD__ name(double4 x, int4* y); \
double8 __OVERLOAD__ name(double8 x, int8* y); \
double16 __OVERLOAD__ name(double16 x, int16* y); \
double __OVERLOAD__ name(double x, __global int* y); \
double2 __OVERLOAD__ name(double2 x, __global int2* y); \
double3 __OVERLOAD__ name(double3 x, __global int3* y); \
double4 __OVERLOAD__ name(double4 x, __global int4* y); \
double8 __OVERLOAD__ name(double8 x, __global int8* y); \
double16 __OVERLOAD__ name(double16 x, __global int16* y); \
double __OVERLOAD__ name(double x, __local int* y); \
double2 __OVERLOAD__ name(double2 x, __local int2* y); \
double3 __OVERLOAD__ name(double3 x, __local int3* y); \
double4 __OVERLOAD__ name(double4 x, __local int4* y); \
double8 __OVERLOAD__ name(double8 x, __local int8* y); \
double16 __OVERLOAD__ name(double16 x, __local int16* y); 

// Returns float or double, two float or double input, and one pointer to 
// int.
#define __CLFN_FD_2FD_1PI(name) float __OVERLOAD__ name(float x, float y, int* z); \
float2 __OVERLOAD__ name(float2 x, float2 y, int2* z); \
float3 __OVERLOAD__ name(float3 x, float3 y, int3* z); \
float4 __OVERLOAD__ name(float4 x, float4 y, int4* z); \
float8 __OVERLOAD__ name(float8 x, float8 y, int8* z); \
float16 __OVERLOAD__ name(float16 x, float16 y, int16* z); \
float __OVERLOAD__ name(float x, float y, __global int* z); \
float2 __OVERLOAD__ name(float2 x, float2 y, __global int2* z); \
float3 __OVERLOAD__ name(float3 x, float3 y, __global int3* z); \
float4 __OVERLOAD__ name(float4 x, float4 y, __global int4* z); \
float8 __OVERLOAD__ name(float8 x, float8 y, __global int8* z); \
float16 __OVERLOAD__ name(float16 x, float16 y, __global int16* z); \
float __OVERLOAD__ name(float x, float y, __local int* z); \
float2 __OVERLOAD__ name(float2 x, float2 y, __local int2* z); \
float3 __OVERLOAD__ name(float3 x, float3 y, __local int3* z); \
float4 __OVERLOAD__ name(float4 x, float4 y, __local int4* z); \
float8 __OVERLOAD__ name(float8 x, float8 y, __local int8* z); \
float16 __OVERLOAD__ name(float16 x, float16 y, __local int16* z); \
double __OVERLOAD__ name(double x, double y, int* z); \
double2 __OVERLOAD__ name(double2 x, double2 y, int2* z); \
double3 __OVERLOAD__ name(double3 x, double3 y, int3* z); \
double4 __OVERLOAD__ name(double4 x, double4 y, int4* z); \
double8 __OVERLOAD__ name(double8 x, double8 y, int8* z); \
double16 __OVERLOAD__ name(double16 x, double16 y, int16* z); \
double __OVERLOAD__ name(double x, double y, __global int* z); \
double2 __OVERLOAD__ name(double2 x, double2 y, __global int2* z); \
double3 __OVERLOAD__ name(double3 x, double3 y, __global int3* z); \
double4 __OVERLOAD__ name(double4 x, double4 y, __global int4* z); \
double8 __OVERLOAD__ name(double8 x, double8 y, __global int8* z); \
double16 __OVERLOAD__ name(double16 x, double16 y, __global int16* z); \
double __OVERLOAD__ name(double x, double y, __local int* z); \
double2 __OVERLOAD__ name(double2 x, double2 y, __local int2* z); \
double3 __OVERLOAD__ name(double3 x, double3 y, __local int3* z); \
double4 __OVERLOAD__ name(double4 x, double4 y, __local int4* z); \
double8 __OVERLOAD__ name(double8 x, double8 y, __local int8* z); \
double16 __OVERLOAD__ name(double16 x, double16 y, __local int16* z); 


// Returns float or double, three float or double input.
#define __CLFN_FD_3FD_MOD(name) float __OVERLOAD__ name(float x, float y, float z); \
float2 __OVERLOAD__ name(float2 x, float2 y, float z); \
float3 __OVERLOAD__ name(float3 x, float3 y, float z); \
float4 __OVERLOAD__ name(float4 x, float4 y, float z); \
float8 __OVERLOAD__ name(float8 x, float8 y, float z); \
float16 __OVERLOAD__ name(float16 x, float16 y, float z); \
double __OVERLOAD__ name(double x, double y, double z); \
double2 __OVERLOAD__ name(double2 x, double2 y, double z); \
double3 __OVERLOAD__ name(double3 x, double3 y, double z); \
double4 __OVERLOAD__ name(double4 x, double4 y, double z); \
double8 __OVERLOAD__ name(double8 x, double8 y, double z); \
double16 __OVERLOAD__ name(double16 x, double16 y, double z); 

#define __CLFN_FD_3FD(name) float __OVERLOAD__ name(float x, float y, float z); \
float2 __OVERLOAD__ name(float2 x, float2 y, float2 z); \
float3 __OVERLOAD__ name(float3 x, float3 y, float3 z); \
float4 __OVERLOAD__ name(float4 x, float4 y, float4 z); \
float8 __OVERLOAD__ name(float8 x, float8 y, float8 z); \
float16 __OVERLOAD__ name(float16 x, float16 y, float16 z); \
double __OVERLOAD__ name(double x, double y, double z); \
double2 __OVERLOAD__ name(double2 x, double2 y, double2 z); \
double3 __OVERLOAD__ name(double3 x, double3 y, double3 z); \
double4 __OVERLOAD__ name(double4 x, double4 y, double4 z); \
double8 __OVERLOAD__ name(double8 x, double8 y, double8 z); \
double16 __OVERLOAD__ name(double16 x, double16 y, double16 z);


// Returns float or double, two float or double input (possible not vector).
#define __CLFN_FD_2FD(name) float __OVERLOAD__ name(float x, float y); \
float2 __OVERLOAD__ name(float2 x, float2 y); \
float3 __OVERLOAD__ name(float3 x, float3 y); \
float4 __OVERLOAD__ name(float4 x, float4 y); \
float8 __OVERLOAD__ name(float8 x, float8 y); \
float16 __OVERLOAD__ name(float16 x, float16 y); \
double __OVERLOAD__ name(double x, double y); \
double2 __OVERLOAD__ name(double2 x, double2 y); \
double3 __OVERLOAD__ name(double3 x, double3 y); \
double4 __OVERLOAD__ name(double4 x, double4 y); \
double8 __OVERLOAD__ name(double8 x, double8 y); \
double16 __OVERLOAD__ name(double16 x, double16 y); 

#define __CLFN_FD_2FD_EXT(name) float2 __OVERLOAD__ name(float2 x, float y); \
float3 __OVERLOAD__ name(float3 x, float y); \
float4 __OVERLOAD__ name(float4 x, float y); \
float8 __OVERLOAD__ name(float8 x, float y); \
float16 __OVERLOAD__ name(float16 x, float y); \
double2 __OVERLOAD__ name(double2 x, double y); \
double3 __OVERLOAD__ name(double3 x, double y); \
double4 __OVERLOAD__ name(double4 x, double y); \
double8 __OVERLOAD__ name(double8 x, double y); \
double16 __OVERLOAD__ name(double16 x, double y);

// Returns int or long, two float or double input.
#define __CLFN_IL_2FD(name) int __OVERLOAD__ name(float x, float y); \
int2 __OVERLOAD__ name(float2 x, float2 y); \
int3 __OVERLOAD__ name(float3 x, float3 y); \
int4 __OVERLOAD__ name(float4 x, float4 y); \
int8 __OVERLOAD__ name(float8 x, float8 y); \
int16 __OVERLOAD__ name(float16 x, float16 y); \
int __OVERLOAD__ name(double x, double y); \
long2 __OVERLOAD__ name(double2 x, double2 y); \
long3 __OVERLOAD__ name(double3 x, double3 y); \
long4 __OVERLOAD__ name(double4 x, double4 y); \
long8 __OVERLOAD__ name(double8 x, double8 y); \
long16 __OVERLOAD__ name(double16 x, double16 y); 

// These macros are for the operations that support SPI types

// Returns float and takes one float.
#define __CLFN_F_1F(name) float __OVERLOAD__ name(float x); \
float2 __OVERLOAD__ name(float2 x); \
float3 __OVERLOAD__ name(float3 x); \
float4 __OVERLOAD__ name(float4 x); \
float8 __OVERLOAD__ name(float8 x); \
float16 __OVERLOAD__ name(float16 x); 

// Returns float or double, two float or double input.
#define __CLFN_F_2F(name) float __OVERLOAD__ name(float x, float y); \
float2 __OVERLOAD__ name(float2 x, float2 y); \
float3 __OVERLOAD__ name(float3 x, float3 y); \
float4 __OVERLOAD__ name(float4 x, float4 y); \
float8 __OVERLOAD__ name(float8 x, float8 y); \
float16 __OVERLOAD__ name(float16 x, float16 y);

// Returns uchar,uint, ushort, or ulong, 
// and takes two char, uchar, short, ushort, int, uint, long, ulong inputs.
#define __CLFN_UCSIL_2CSIL(name) uchar __OVERLOAD__ name(char x, char y); \
uchar2 __OVERLOAD__ name(char2 x, char2 y); \
uchar3 __OVERLOAD__ name(char3 x, char3 y); \
uchar4 __OVERLOAD__ name(char4 x, char4 y); \
uchar8 __OVERLOAD__ name(char8 x, char8 y); \
uchar16 __OVERLOAD__ name(char16 x, char16 y); \
uchar __OVERLOAD__ name(uchar x, uchar y); \
uchar2 __OVERLOAD__ name(uchar2 x, uchar2 y); \
uchar3 __OVERLOAD__ name(uchar3 x, uchar3 y); \
uchar4 __OVERLOAD__ name(uchar4 x, uchar4 y); \
uchar8 __OVERLOAD__ name(uchar8 x, uchar8 y); \
uchar16 __OVERLOAD__ name(uchar16 x,uchar16 y); \
ushort __OVERLOAD__ name(short x, short y); \
ushort2 __OVERLOAD__ name(short2 x, short2 y); \
ushort3 __OVERLOAD__ name(short3 x, short3 y); \
ushort4 __OVERLOAD__ name(short4 x, short4 y); \
ushort8 __OVERLOAD__ name(short8 x, short8 y); \
ushort16 __OVERLOAD__ name(short16 x, short16 y); \
ushort __OVERLOAD__ name(ushort x, ushort y); \
ushort2 __OVERLOAD__ name(ushort2 x, ushort2 y); \
ushort3 __OVERLOAD__ name(ushort3 x, ushort3 y); \
ushort4 __OVERLOAD__ name(ushort4 x, ushort4 y); \
ushort8 __OVERLOAD__ name(ushort8 x, ushort8 y); \
ushort16 __OVERLOAD__ name(ushort16 x, ushort16 y); \
uint __OVERLOAD__ name(int x, int y); \
uint2 __OVERLOAD__ name(int2 x, int2 y); \
uint3 __OVERLOAD__ name(int3 x, int3 y); \
uint4 __OVERLOAD__ name(int4 x, int4 y); \
uint8 __OVERLOAD__ name(int8 x, int8 y); \
uint16 __OVERLOAD__ name(int16 x, int16 y); \
uint __OVERLOAD__ name(uint x, uint y); \
uint2 __OVERLOAD__ name(uint2 x, uint2 y); \
uint3 __OVERLOAD__ name(uint3 x, uint3 y); \
uint4 __OVERLOAD__ name(uint4 x, uint4 y); \
uint8 __OVERLOAD__ name(uint8 x, uint8 y); \
uint16 __OVERLOAD__ name(uint16 x, uint16 y); \
ulong __OVERLOAD__ name(long x, long y); \
ulong2 __OVERLOAD__ name(long2 x, long2 y); \
ulong3 __OVERLOAD__ name(long3 x, long3 y); \
ulong4 __OVERLOAD__ name(long4 x, long4 y); \
ulong8 __OVERLOAD__ name(long8 x, long8 y); \
ulong16 __OVERLOAD__ name(long16 x, long16 y); \
ulong __OVERLOAD__ name(ulong x, ulong y); \
ulong2 __OVERLOAD__ name(ulong2 x, ulong2 y); \
ulong3 __OVERLOAD__ name(ulong3 x, ulong3 y); \
ulong4 __OVERLOAD__ name(ulong4 x, ulong4 y); \
ulong8 __OVERLOAD__ name(ulong8 x, ulong8 y); \
ulong16 __OVERLOAD__ name(ulong16 x, ulong16 y); 

// Returns char, int, short, or long, or the unsigned variety 
// and takes two char, uchar, short, ushort, int, uint, long, ulong inputs.
#define __CLFN_ALL(name) char __OVERLOAD__ name(char x, char y); \
char2 __OVERLOAD__ name(char2 x, char2 y); \
char3 __OVERLOAD__ name(char3 x, char3 y); \
char4 __OVERLOAD__ name(char4 x, char4 y); \
char8 __OVERLOAD__ name(char8 x, char8 y); \
char16 __OVERLOAD__ name(char16 x, char16 y); \
uchar __OVERLOAD__ name(uchar x, uchar y); \
uchar2 __OVERLOAD__ name(uchar2 x, uchar2 y); \
uchar3 __OVERLOAD__ name(uchar3 x, uchar3 y); \
uchar4 __OVERLOAD__ name(uchar4 x, uchar4 y); \
uchar8 __OVERLOAD__ name(uchar8 x, uchar8 y); \
uchar16 __OVERLOAD__ name(uchar16 x,uchar16 y); \
short __OVERLOAD__ name(short x, short y); \
short2 __OVERLOAD__ name(short2 x, short2 y); \
short3 __OVERLOAD__ name(short3 x, short3 y); \
short4 __OVERLOAD__ name(short4 x, short4 y); \
short8 __OVERLOAD__ name(short8 x, short8 y); \
short16 __OVERLOAD__ name(short16 x, short16 y); \
ushort __OVERLOAD__ name(ushort x, ushort y); \
ushort2 __OVERLOAD__ name(ushort2 x, ushort2 y); \
ushort3 __OVERLOAD__ name(ushort3 x, ushort3 y); \
ushort4 __OVERLOAD__ name(ushort4 x, ushort4 y); \
ushort8 __OVERLOAD__ name(ushort8 x, ushort8 y); \
ushort16 __OVERLOAD__ name(ushort16 x, ushort16 y); \
int __OVERLOAD__ name(int x, int y); \
int2 __OVERLOAD__ name(int2 x, int2 y); \
int3 __OVERLOAD__ name(int3 x, int3 y); \
int4 __OVERLOAD__ name(int4 x, int4 y); \
int8 __OVERLOAD__ name(int8 x, int8 y); \
int16 __OVERLOAD__ name(int16 x, int16 y); \
uint __OVERLOAD__ name(uint x, uint y); \
uint2 __OVERLOAD__ name(uint2 x, uint2 y); \
uint3 __OVERLOAD__ name(uint3 x, uint3 y); \
uint4 __OVERLOAD__ name(uint4 x, uint4 y); \
uint8 __OVERLOAD__ name(uint8 x, uint8 y); \
uint16 __OVERLOAD__ name(uint16 x, uint16 y); \
long __OVERLOAD__ name(long x, long y); \
long2 __OVERLOAD__ name(long2 x, long2 y); \
long3 __OVERLOAD__ name(long3 x, long3 y); \
long4 __OVERLOAD__ name(long4 x, long4 y); \
long8 __OVERLOAD__ name(long8 x, long8 y); \
long16 __OVERLOAD__ name(long16 x, long16 y); \
ulong __OVERLOAD__ name(ulong x, ulong y); \
ulong2 __OVERLOAD__ name(ulong2 x, ulong2 y); \
ulong3 __OVERLOAD__ name(ulong3 x, ulong3 y); \
ulong4 __OVERLOAD__ name(ulong4 x, ulong4 y); \
ulong8 __OVERLOAD__ name(ulong8 x, ulong8 y); \
ulong16 __OVERLOAD__ name(ulong16 x, ulong16 y); 

#define __CLFN_ALL_MODD(name) char __OVERLOAD__ name(char x, char y); \
char2 __OVERLOAD__ name(char2 x, char y); \
char3 __OVERLOAD__ name(char3 x, char y); \
char4 __OVERLOAD__ name(char4 x, char y); \
char8 __OVERLOAD__ name(char8 x, char y); \
char16 __OVERLOAD__ name(char16 x, char y); \
uchar __OVERLOAD__ name(uchar x, uchar y); \
uchar2 __OVERLOAD__ name(uchar2 x, uchar y); \
uchar3 __OVERLOAD__ name(uchar3 x, uchar y); \
uchar4 __OVERLOAD__ name(uchar4 x, uchar y); \
uchar8 __OVERLOAD__ name(uchar8 x, uchar y); \
uchar16 __OVERLOAD__ name(uchar16 x,uchar y); \
short __OVERLOAD__ name(short x, short y); \
short2 __OVERLOAD__ name(short2 x, short y); \
short3 __OVERLOAD__ name(short3 x, short y); \
short4 __OVERLOAD__ name(short4 x, short y); \
short8 __OVERLOAD__ name(short8 x, short y); \
short16 __OVERLOAD__ name(short16 x, short y); \
ushort __OVERLOAD__ name(ushort x, ushort y); \
ushort2 __OVERLOAD__ name(ushort2 x, ushort y); \
ushort3 __OVERLOAD__ name(ushort3 x, ushort y); \
ushort4 __OVERLOAD__ name(ushort4 x, ushort y); \
ushort8 __OVERLOAD__ name(ushort8 x, ushort y); \
ushort16 __OVERLOAD__ name(ushort16 x, ushort y); \
int __OVERLOAD__ name(int x, int y); \
int2 __OVERLOAD__ name(int2 x, int y); \
int3 __OVERLOAD__ name(int3 x, int y); \
int4 __OVERLOAD__ name(int4 x, int y); \
int8 __OVERLOAD__ name(int8 x, int y); \
int16 __OVERLOAD__ name(int16 x, int y); \
uint __OVERLOAD__ name(uint x, uint y); \
uint2 __OVERLOAD__ name(uint2 x, uint y); \
uint3 __OVERLOAD__ name(uint3 x, uint y); \
uint4 __OVERLOAD__ name(uint4 x, uint y); \
uint8 __OVERLOAD__ name(uint8 x, uint y); \
uint16 __OVERLOAD__ name(uint16 x, uint y); \
long __OVERLOAD__ name(long x, long y); \
long2 __OVERLOAD__ name(long2 x, long y); \
long3 __OVERLOAD__ name(long3 x, long y); \
long4 __OVERLOAD__ name(long4 x, long y); \
long8 __OVERLOAD__ name(long8 x, long y); \
long16 __OVERLOAD__ name(long16 x, long y); \
ulong __OVERLOAD__ name(ulong x, ulong y); \
ulong2 __OVERLOAD__ name(ulong2 x, ulong y); \
ulong3 __OVERLOAD__ name(ulong3 x, ulong y); \
ulong4 __OVERLOAD__ name(ulong4 x, ulong y); \
ulong8 __OVERLOAD__ name(ulong8 x, ulong y); \
ulong16 __OVERLOAD__ name(ulong16 x, ulong y); \
float __OVERLOAD__ name(float x, float y); \
float2 __OVERLOAD__ name(float2 x, float y); \
float3 __OVERLOAD__ name(float3 x, float y); \
float4 __OVERLOAD__ name(float4 x, float y); \
float8 __OVERLOAD__ name(float8 x, float y); \
float16 __OVERLOAD__ name(float16 x, float y); \
double __OVERLOAD__ name(double x, double y); \
double2 __OVERLOAD__ name(double2 x, double y); \
double3 __OVERLOAD__ name(double3 x, double y); \
double4 __OVERLOAD__ name(double4 x, double y); \
double8 __OVERLOAD__ name(double8 x, double y); \
double16 __OVERLOAD__ name(double16 x, double y);

#define __CLFN_I3(name) char __OVERLOAD__ name(char x, char y, char z); \
short __OVERLOAD__ name( short x, short y, short z ); \
int __OVERLOAD__ name( int x, int y, int z ); \
long __OVERLOAD__ name( long x, long y, long z ); \
uchar __OVERLOAD__ name( uchar x, uchar y, uchar z ); \
ushort __OVERLOAD__ name( ushort x, ushort y, ushort z ); \
uint __OVERLOAD__ name( uint x, uint y, uint z ); \
ulong __OVERLOAD__ name( ulong x, ulong y, ulong z ); \
char2 __OVERLOAD__ name( char2 x, char2 y, char2 z ); \
short2 __OVERLOAD__ name( short2 x, short2 y, short2 z ); \
int2 __OVERLOAD__ name( int2 x, int2 y, int2 z ); \
long2 __OVERLOAD__ name( long2 x, long2 y, long2 z ); \
uchar2 __OVERLOAD__ name( uchar2 x, uchar2 y, uchar2 z ); \
ushort2 __OVERLOAD__ name( ushort2 x, ushort2 y, ushort2 z ); \
uint2 __OVERLOAD__ name( uint2 x, uint2 y, uint2 z ); \
ulong2 __OVERLOAD__ name( ulong2 x, ulong2 y, ulong2 z ); \
char3 __OVERLOAD__ name( char3 x, char3 y, char3 z ); \
short3 __OVERLOAD__ name( short3 x, short3 y, short3 z ); \
int3 __OVERLOAD__ name( int3 x, int3 y, int3 z ); \
long3 __OVERLOAD__ name( long3 x, long3 y, long3 z ); \
uchar3 __OVERLOAD__ name( uchar3 x, uchar3 y, uchar3 z ); \
ushort3 __OVERLOAD__ name( ushort3 x, ushort3 y, ushort3 z ); \
uint3 __OVERLOAD__ name( uint3 x, uint3 y, uint3 z ); \
ulong3 __OVERLOAD__ name( ulong3 x, ulong3 y, ulong3 z ); \
char4 __OVERLOAD__ name( char4 x, char4 y, char4 z ); \
short4 __OVERLOAD__ name( short4 x, short4 y, short4 z ); \
int4 __OVERLOAD__ name( int4 x, int4 y, int4 z ); \
long4 __OVERLOAD__ name( long4 x, long4 y, long4 z ); \
uchar4 __OVERLOAD__ name( uchar4 x, uchar4 y, uchar4 z ); \
ushort4 __OVERLOAD__ name( ushort4 x, ushort4 y, ushort4 z ); \
uint4 __OVERLOAD__ name( uint4 x, uint4 y, uint4 z ); \
ulong4 __OVERLOAD__ name( ulong4 x, ulong4 y, ulong4 z ); \
char8 __OVERLOAD__ name( char8 x, char8 y, char8 z ); \
short8 __OVERLOAD__ name( short8 x, short8 y, short8 z ); \
int8 __OVERLOAD__ name( int8 x, int8 y, int8 z ); \
long8 __OVERLOAD__ name( long8 x, long8 y, long8 z ); \
uchar8 __OVERLOAD__ name( uchar8 x, uchar8 y, uchar8 z ); \
ushort8 __OVERLOAD__ name( ushort8 x, ushort8 y, ushort8 z ); \
uint8 __OVERLOAD__ name( uint8 x, uint8 y, uint8 z ); \
ulong8 __OVERLOAD__ name( ulong8 x, ulong8 y, ulong8 z ); \
char16 __OVERLOAD__ name( char16 x, char16 y, char16 z ); \
short16 __OVERLOAD__ name( short16 x, short16 y, short16 z ); \
int16 __OVERLOAD__ name( int16 x, int16 y, int16 z ); \
long16 __OVERLOAD__ name( long16 x, long16 y, long16 z ); \
uchar16 __OVERLOAD__ name( uchar16 x, uchar16 y, uchar16 z ); \
ushort16 __OVERLOAD__ name( ushort16 x, ushort16 y, ushort16 z ); \
uint16 __OVERLOAD__ name( uint16 x, uint16 y, uint16 z ); \
ulong16 __OVERLOAD__ name( ulong16 x, ulong16 y, ulong16 z ); 

#define __CLFN_I_3CSIL(name) int __OVERLOAD__ name(short2 a, short2 b, int c); \
int __OVERLOAD__ name(short2 a, ushort2 b, int c); \
uint __OVERLOAD__ name(ushort2 a, ushort2 b, uint c); \
int2 __OVERLOAD__ name(short4 a, short4 b, int2 c); \
int2 __OVERLOAD__ name(short4 a, ushort4 b, int2 c); \
uint2 __OVERLOAD__ name(ushort4 a, ushort4 b, uint2 c); \
int4 __OVERLOAD__ name(short8 a, short8 b, int4 c); \
int4 __OVERLOAD__ name(short8 a, ushort8 b, int4 c); \
uint4 __OVERLOAD__ name(ushort8 a, ushort8 b, uint4 c); \
int8 __OVERLOAD__ name(short16 a, short16 b, int8 c); \
int8 __OVERLOAD__ name(short16 a, ushort16 b, int8 c); \
uint8 __OVERLOAD__ name(ushort16 a, ushort16 b, uint8 c);

// Returns int or unsigned and takes 3 ints or 3 unsigned ints
#define __CLFN_I_3I(name) int __OVERLOAD__ name(int a, int b, int c); \
int2 __OVERLOAD__ name(int2 a, int2 b, int2 c); \
int3 __OVERLOAD__ name(int3 a, int3 b, int3 c); \
int4 __OVERLOAD__ name(int4 a, int4 b, int4 c); \
int8 __OVERLOAD__ name(int8 a, int8 b, int8 c); \
int16 __OVERLOAD__ name(int16 a, int16 b, int16 c); \
uint __OVERLOAD__ name(uint a, uint b, uint c); \
uint2 __OVERLOAD__ name(uint2 a, uint2 b, uint2 c); \
uint3 __OVERLOAD__ name(uint3 a, uint3 b, uint3 c); \
uint4 __OVERLOAD__ name(uint4 a, uint4 b, uint4 c); \
uint8 __OVERLOAD__ name(uint8 a, uint8 b, uint8 c); \
uint16 __OVERLOAD__ name(uint16 a, uint16 b, uint16 c);

#define __CLFN_I_2I(name) int __OVERLOAD__ name(int a, int b); \
int2 __OVERLOAD__ name(int2 a, int2 b); \
int3 __OVERLOAD__ name(int3 a, int3 b); \
int4 __OVERLOAD__ name(int4 a, int4 b); \
int8 __OVERLOAD__ name(int8 a, int8 b); \
int16 __OVERLOAD__ name(int16 a, int16 b); \
uint __OVERLOAD__ name(uint a, uint b); \
uint2 __OVERLOAD__ name(uint2 a, uint2 b); \
uint3 __OVERLOAD__ name(uint3 a, uint3 b); \
uint4 __OVERLOAD__ name(uint4 a, uint4 b); \
uint8 __OVERLOAD__ name(uint8 a, uint8 b); \
uint16 __OVERLOAD__ name(uint16 a, uint16 b);


// Returns an int, takes all types of input
#define __CLFN_I_ALL(name) int __OVERLOAD__ name(uchar x); \
int __OVERLOAD__ name(uchar2 x); \
int __OVERLOAD__ name(uchar3 x); \
int __OVERLOAD__ name(uchar4 x); \
int __OVERLOAD__ name(uchar8 x); \
int __OVERLOAD__ name(uchar16 x); \
int __OVERLOAD__ name(char x); \
int __OVERLOAD__ name(char2 x); \
int __OVERLOAD__ name(char3 x); \
int __OVERLOAD__ name(char4 x); \
int __OVERLOAD__ name(char8 x); \
int __OVERLOAD__ name(char16 x); \
int __OVERLOAD__ name(ushort x); \
int __OVERLOAD__ name(ushort2 x); \
int __OVERLOAD__ name(ushort3 x); \
int __OVERLOAD__ name(ushort4 x); \
int __OVERLOAD__ name(ushort8 x); \
int __OVERLOAD__ name(ushort16 x); \
int __OVERLOAD__ name(short x); \
int __OVERLOAD__ name(short2 x); \
int __OVERLOAD__ name(short3 x); \
int __OVERLOAD__ name(short4 x); \
int __OVERLOAD__ name(short8 x); \
int __OVERLOAD__ name(short16 x); \
int __OVERLOAD__ name(int x); \
int __OVERLOAD__ name(int2 x); \
int __OVERLOAD__ name(int3 x); \
int __OVERLOAD__ name(int4 x); \
int __OVERLOAD__ name(int8 x); \
int __OVERLOAD__ name(int16 x); \
int __OVERLOAD__ name(uint x); \
int __OVERLOAD__ name(uint2 x); \
int __OVERLOAD__ name(uint3 x); \
int __OVERLOAD__ name(uint4 x); \
int __OVERLOAD__ name(uint8 x); \
int __OVERLOAD__ name(uint16 x); \
int __OVERLOAD__ name(long x); \
int __OVERLOAD__ name(long2 x); \
int __OVERLOAD__ name(long3 x); \
int __OVERLOAD__ name(long4 x); \
int __OVERLOAD__ name(long8 x); \
int __OVERLOAD__ name(long16 x); \
int __OVERLOAD__ name(ulong x); \
int __OVERLOAD__ name(ulong2 x); \
int __OVERLOAD__ name(ulong3 x); \
int __OVERLOAD__ name(ulong4 x); \
int __OVERLOAD__ name(ulong8 x); \
int __OVERLOAD__ name(ulong16 x); \
int __OVERLOAD__ name(float x); \
int __OVERLOAD__ name(float2 x); \
int __OVERLOAD__ name(float3 x); \
int __OVERLOAD__ name(float4 x); \
int __OVERLOAD__ name(float8 x); \
int __OVERLOAD__ name(float16 x); \
int __OVERLOAD__ name(double x); \
int __OVERLOAD__ name(double2 x); \
int __OVERLOAD__ name(double3 x); \
int __OVERLOAD__ name(double4 x); \
int __OVERLOAD__ name(double8 x); \
int __OVERLOAD__ name(double16 x);

// Returns an event, takes 2 inputs of all types with either a 
// local or global qualifier, and then a size_t and event_t input.
#define __CLFN_EVENT_ALL(name) event_t __OVERLOAD__ name(__global char *dest, const __local char *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global uchar *dest, const __local uchar *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local char *dest, const __global char *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local uchar *dest, const __global uchar *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global short *dest, const __local short *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ushort *dest, const __local ushort *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short *dest, const __global short *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort *dest, const __global ushort *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global short *dest, const __local short *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ushort *dest, const __local ushort *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short *dest, const __global short *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort *dest, const __global ushort *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short *dest, const __global short *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort *dest, const __global ushort *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global int *dest, const __local int *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global uint *dest, const __local uint *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local int *dest, const __global int *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local uint *dest, const __global uint *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global long *dest, const __local long *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ulong *dest, const __local ulong *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local long *dest, const __global long *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ulong *dest, const __global ulong *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global float *dest, const __local float *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global double *dest, const __local double *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local float *dest, const __global float *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local double *dest, const __global double *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global char2 *dest, const __local char2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global uchar2 *dest, const __local uchar2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local char2 *dest, const __global char2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local uchar2 *dest, const __global uchar2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global short2 *dest, const __local short2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ushort2 *dest, const __local ushort2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short2 *dest, const __global short2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort2 *dest, const __global ushort2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global short2 *dest, const __local short2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ushort2 *dest, const __local ushort2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short2 *dest, const __global short2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort2 *dest, const __global ushort2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short2 *dest, const __global short2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort2 *dest, const __global ushort2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global int2 *dest, const __local int2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global uint2 *dest, const __local uint2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local int2 *dest, const __global int2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local uint2 *dest, const __global uint2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global long2 *dest, const __local long2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ulong2 *dest, const __local ulong2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local long2 *dest, const __global long2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ulong2 *dest, const __global ulong2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global float2 *dest, const __local float2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global double2 *dest, const __local double2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local float2 *dest, const __global float2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local double2 *dest, const __global double2 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global char4 *dest, const __local char4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global uchar4 *dest, const __local uchar4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local char4 *dest, const __global char4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local uchar4 *dest, const __global uchar4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global short4 *dest, const __local short4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ushort4 *dest, const __local ushort4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short4 *dest, const __global short4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort4 *dest, const __global ushort4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global short4 *dest, const __local short4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ushort4 *dest, const __local ushort4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short4 *dest, const __global short4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort4 *dest, const __global ushort4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short4 *dest, const __global short4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort4 *dest, const __global ushort4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global int4 *dest, const __local int4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global uint4 *dest, const __local uint4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local int4 *dest, const __global int4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local uint4 *dest, const __global uint4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global long4 *dest, const __local long4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ulong4 *dest, const __local ulong4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local long4 *dest, const __global long4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ulong4 *dest, const __global ulong4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global float4 *dest, const __local float4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global double4 *dest, const __local double4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local float4 *dest, const __global float4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local double4 *dest, const __global double4 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global char8 *dest, const __local char8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global uchar8 *dest, const __local uchar8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local char8 *dest, const __global char8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local uchar8 *dest, const __global uchar8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global short8 *dest, const __local short8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ushort8 *dest, const __local ushort8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short8 *dest, const __global short8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort8 *dest, const __global ushort8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global short8 *dest, const __local short8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ushort8 *dest, const __local ushort8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short8 *dest, const __global short8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort8 *dest, const __global ushort8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short8 *dest, const __global short8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort8 *dest, const __global ushort8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global int8 *dest, const __local int8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global uint8 *dest, const __local uint8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local int8 *dest, const __global int8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local uint8 *dest, const __global uint8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global long8 *dest, const __local long8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ulong8 *dest, const __local ulong8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local long8 *dest, const __global long8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ulong8 *dest, const __global ulong8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global float8 *dest, const __local float8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global double8 *dest, const __local double8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local float8 *dest, const __global float8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local double8 *dest, const __global double8 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global char16 *dest, const __local char16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global uchar16 *dest, const __local uchar16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local char16 *dest, const __global char16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local uchar16 *dest, const __global uchar16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global short16 *dest, const __local short16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ushort16 *dest, const __local ushort16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short16 *dest, const __global short16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort16 *dest, const __global ushort16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global short16 *dest, const __local short16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ushort16 *dest, const __local ushort16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short16 *dest, const __global short16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort16 *dest, const __global ushort16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local short16 *dest, const __global short16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ushort16 *dest, const __global ushort16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global int16 *dest, const __local int16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global uint16 *dest, const __local uint16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local int16 *dest, const __global int16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local uint16 *dest, const __global uint16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global long16 *dest, const __local long16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global ulong16 *dest, const __local ulong16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local long16 *dest, const __global long16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local ulong16 *dest, const __global ulong16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global float16 *dest, const __local float16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__global double16 *dest, const __local double16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local float16 *dest, const __global float16 *src, size_t count, event_t evt); \
event_t __OVERLOAD__ name(__local double16 *dest, const __global double16 *src, size_t count, event_t evt); 

#define __CLFN_EVENT_ALL4(name) event_t __OVERLOAD__ name(__global char *dest, const __local char *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global uchar *dest, const __local uchar *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local char *dest, const __global char *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local uchar *dest, const __global uchar *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global short *dest, const __local short *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ushort *dest, const __local ushort *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short *dest, const __global short *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort *dest, const __global ushort *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global short *dest, const __local short *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ushort *dest, const __local ushort *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short *dest, const __global short *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort *dest, const __global ushort *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short *dest, const __global short *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort *dest, const __global ushort *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global int *dest, const __local int *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global uint *dest, const __local uint *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local int *dest, const __global int *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local uint *dest, const __global uint *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global long *dest, const __local long *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ulong *dest, const __local ulong *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local long *dest, const __global long *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ulong *dest, const __global ulong *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global float *dest, const __local float *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global double *dest, const __local double *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local float *dest, const __global float *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local double *dest, const __global double *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global char2 *dest, const __local char2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global uchar2 *dest, const __local uchar2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local char2 *dest, const __global char2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local uchar2 *dest, const __global uchar2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global short2 *dest, const __local short2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ushort2 *dest, const __local ushort2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short2 *dest, const __global short2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort2 *dest, const __global ushort2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global short2 *dest, const __local short2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ushort2 *dest, const __local ushort2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short2 *dest, const __global short2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort2 *dest, const __global ushort2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short2 *dest, const __global short2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort2 *dest, const __global ushort2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global int2 *dest, const __local int2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global uint2 *dest, const __local uint2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local int2 *dest, const __global int2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local uint2 *dest, const __global uint2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global long2 *dest, const __local long2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ulong2 *dest, const __local ulong2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local long2 *dest, const __global long2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ulong2 *dest, const __global ulong2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global float2 *dest, const __local float2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global double2 *dest, const __local double2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local float2 *dest, const __global float2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local double2 *dest, const __global double2 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global char4 *dest, const __local char4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global uchar4 *dest, const __local uchar4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local char4 *dest, const __global char4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local uchar4 *dest, const __global uchar4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global short4 *dest, const __local short4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ushort4 *dest, const __local ushort4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short4 *dest, const __global short4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort4 *dest, const __global ushort4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global short4 *dest, const __local short4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ushort4 *dest, const __local ushort4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short4 *dest, const __global short4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort4 *dest, const __global ushort4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short4 *dest, const __global short4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort4 *dest, const __global ushort4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global int4 *dest, const __local int4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global uint4 *dest, const __local uint4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local int4 *dest, const __global int4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local uint4 *dest, const __global uint4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global long4 *dest, const __local long4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ulong4 *dest, const __local ulong4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local long4 *dest, const __global long4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ulong4 *dest, const __global ulong4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global float4 *dest, const __local float4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global double4 *dest, const __local double4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local float4 *dest, const __global float4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local double4 *dest, const __global double4 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global char8 *dest, const __local char8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global uchar8 *dest, const __local uchar8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local char8 *dest, const __global char8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local uchar8 *dest, const __global uchar8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global short8 *dest, const __local short8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ushort8 *dest, const __local ushort8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short8 *dest, const __global short8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort8 *dest, const __global ushort8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global short8 *dest, const __local short8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ushort8 *dest, const __local ushort8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short8 *dest, const __global short8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort8 *dest, const __global ushort8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short8 *dest, const __global short8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort8 *dest, const __global ushort8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global int8 *dest, const __local int8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global uint8 *dest, const __local uint8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local int8 *dest, const __global int8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local uint8 *dest, const __global uint8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global long8 *dest, const __local long8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ulong8 *dest, const __local ulong8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local long8 *dest, const __global long8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ulong8 *dest, const __global ulong8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global float8 *dest, const __local float8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global double8 *dest, const __local double8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local float8 *dest, const __global float8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local double8 *dest, const __global double8 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global char16 *dest, const __local char16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global uchar16 *dest, const __local uchar16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local char16 *dest, const __global char16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local uchar16 *dest, const __global uchar16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global short16 *dest, const __local short16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ushort16 *dest, const __local ushort16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short16 *dest, const __global short16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort16 *dest, const __global ushort16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global short16 *dest, const __local short16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ushort16 *dest, const __local ushort16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short16 *dest, const __global short16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort16 *dest, const __global ushort16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local short16 *dest, const __global short16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ushort16 *dest, const __global ushort16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global int16 *dest, const __local int16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global uint16 *dest, const __local uint16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local int16 *dest, const __global int16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local uint16 *dest, const __global uint16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global long16 *dest, const __local long16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global ulong16 *dest, const __local ulong16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local long16 *dest, const __global long16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local ulong16 *dest, const __global ulong16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global float16 *dest, const __local float16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__global double16 *dest, const __local double16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local float16 *dest, const __global float16 *src, size_t count, size_t stride, event_t evt); \
event_t __OVERLOAD__ name(__local double16 *dest, const __global double16 *src, size_t count, size_t stride, event_t evt);


#define vec_step( _a )          __builtin_vec_step( _a )

#define CHAR_BIT    8
#define	SCHAR_MAX	127		/* min value for a signed char */
#define	SCHAR_MIN	(-128)		/* max value for a signed char */
#define	UCHAR_MAX	255		/* max value for an unsigned char */
#define	CHAR_MAX	SCHAR_MAX		/* max value for a char */
#define	CHAR_MIN	SCHAR_MIN		/* min value for a char */
#define	USHRT_MAX	65535		/* max value for an unsigned short */
#define	SHRT_MAX	32767		/* max value for a short */
#define	SHRT_MIN	(-32768)	/* min value for a short */
#define	UINT_MAX	0xffffffff	/* max value for an unsigned int */
#define	INT_MAX		2147483647	/* max value for an int */
#define	INT_MIN		(-2147483647-1)	/* min value for an int */
#define	ULONG_MAX	0xffffffffffffffffUL	/* max unsigned long */
#define	LONG_MAX	((long)0x7fffffffffffffffL)	/* max signed long */
#define	LONG_MIN	((long)(-0x7fffffffffffffffL-1)) /* min signed long */

#define FLT_DIG         6 
#define FLT_MANT_DIG    24 
#define FLT_MAX_10_EXP  +38 
#define FLT_MAX_EXP     +128 
#define FLT_MIN_10_EXP  -37 
#define FLT_MIN_EXP     -125 
#define FLT_RADIX       2 
#define FLT_MAX         0x1.fffffep127f 
#define FLT_MIN         0x1.0p-126f 
#define FLT_EPSILON     0x1.0p-23f 

#define FP_ILOGB0       INT_MIN
#define FP_ILOGBNAN     INT_MIN

// Section 6.9
#define __kernel_exec( _X, _type)   __kernel __attribute__ ((work_group_size_hint(_X, 1, 1)))  __attribute__((vec_type_hint(_type)))
#define kernel_exec( _X, _type)     __kernel_exec( _X, _type)
#define CL_VERSION_1_0  100
#define CL_VERSION_1_1  110

#define M_E_F         2.71828182845904523536028747135266250f   /* e */
#define M_LOG2E_F     1.44269504088896340735992468100189214f   /* log 2e */
#define M_LOG10E_F    0.434294481903251827651128918916605082f  /* log 10e */
#define M_LN2_F       0.693147180559945309417232121458176568f  /* log e2 */
#define M_LN10_F      2.3025850929940456840179914546843642f    /* log e10 */
#define M_PI_F        3.14159265358979323846264338327950288f   /* pi */
#define M_PI_2_F      1.57079632679489661923132169163975144f   /* pi/2 */
#define M_PI_4_F      0.785398163397448309615660845819875721f  /* pi/4 */
#define M_1_PI_F      0.318309886183790671537767526745028724f  /* 1/pi */
#define M_2_PI_F      0.636619772367581343075535053490057448f  /* 2/pi */
#define M_2_SQRTPI_F  1.12837916709551257389615890312154517f   /* 2/sqrt(pi) */
#define M_SQRT2_F     1.41421356237309504880168872420969808f   /* sqrt(2) */
#define M_SQRT1_2_F   0.707106781186547524400844362104849039f  /* 1/sqrt(2) */


#ifdef cl_khr_fp64	

    #define DBL_DIG         15 
    #define DBL_MANT_DIG    53 
    #define DBL_MAX_10_EXP  +308 
    #define DBL_MAX_EXP     +1024 
    #define DBL_MIN_10_EXP  -307 
    #define DBL_MIN_EXP     -1021 
    #define DBL_RADIX       2 
    #define DBL_MAX         0x1.fffffffffffffp1023 
    #define DBL_MIN         0x1.0p-1022 
    #define DBL_EPSILON     0x1.0p-52

    #define HUGE_VAL        __builtin_huge_val()

    #define M_E         2.71828182845904523536028747135266250   /* e */
    #define M_LOG2E     1.44269504088896340735992468100189214   /* log 2e */
    #define M_LOG10E    0.434294481903251827651128918916605082  /* log 10e */
    #define M_LN2       0.693147180559945309417232121458176568  /* log e2 */
    #define M_LN10      2.30258509299404568401799145468436421   /* log e10 */
    #define M_PI        3.14159265358979323846264338327950288   /* pi */
    #define M_PI_2      1.57079632679489661923132169163975144   /* pi/2 */
    #define M_PI_4      0.785398163397448309615660845819875721  /* pi/4 */
    #define M_1_PI      0.318309886183790671537767526745028724  /* 1/pi */
    #define M_2_PI      0.636619772367581343075535053490057448  /* 2/pi */
    #define M_2_SQRTPI  1.12837916709551257389615890312154517   /* 2/sqrt(pi) */
    #define M_SQRT2     1.41421356237309504880168872420969808   /* sqrt(2) */
    #define M_SQRT1_2   0.707106781186547524400844362104849039  /* 1/sqrt(2) */

#endif

#define __OPENCL_TYPES_DEFINED__ 1

/* vload2 */
char2     __OVERLOAD__ vload2(size_t index, const char *p);
uchar2    __OVERLOAD__ vload2(size_t index, const uchar *p);
short2    __OVERLOAD__ vload2(size_t index, const short *p);
ushort2   __OVERLOAD__ vload2(size_t index, const ushort *p);
int2      __OVERLOAD__ vload2(size_t index, const int *p);
uint2     __OVERLOAD__ vload2(size_t index, const uint *p);
long2     __OVERLOAD__ vload2(size_t index, const long *p);
ulong2    __OVERLOAD__ vload2(size_t index, const ulong *p);
float2    __OVERLOAD__ vload2(size_t index, const float *p);
double2   __OVERLOAD__ vload2(size_t index, const double *p);
char2     __OVERLOAD__ vload2(size_t index, const __global char *p);
uchar2    __OVERLOAD__ vload2(size_t index, const __global uchar *p);
short2    __OVERLOAD__ vload2(size_t index, const __global short *p);
ushort2   __OVERLOAD__ vload2(size_t index, const __global ushort *p);
int2      __OVERLOAD__ vload2(size_t index, const __global int *p);
uint2     __OVERLOAD__ vload2(size_t index, const __global uint *p);
long2     __OVERLOAD__ vload2(size_t index, const __global long *p);
ulong2    __OVERLOAD__ vload2(size_t index, const __global ulong *p);
float2    __OVERLOAD__ vload2(size_t index, const __global float *p);
double2   __OVERLOAD__ vload2(size_t index, const __global double *p);
char2     __OVERLOAD__ vload2(size_t index, const __local char *p);
uchar2    __OVERLOAD__ vload2(size_t index, const __local uchar *p);
short2    __OVERLOAD__ vload2(size_t index, const __local short *p);
ushort2   __OVERLOAD__ vload2(size_t index, const __local ushort *p);
int2      __OVERLOAD__ vload2(size_t index, const __local int *p);
uint2     __OVERLOAD__ vload2(size_t index, const __local uint *p);
long2     __OVERLOAD__ vload2(size_t index, const __local long *p);
ulong2    __OVERLOAD__ vload2(size_t index, const __local ulong *p);
float2    __OVERLOAD__ vload2(size_t index, const __local float *p);
double2   __OVERLOAD__ vload2(size_t index, const __local double *p);
char2     __OVERLOAD__ vload2(size_t index, const __constant char *p);
uchar2    __OVERLOAD__ vload2(size_t index, const __constant uchar *p);
short2    __OVERLOAD__ vload2(size_t index, const __constant short *p);
ushort2   __OVERLOAD__ vload2(size_t index, const __constant ushort *p);
int2      __OVERLOAD__ vload2(size_t index, const __constant int *p);
uint2     __OVERLOAD__ vload2(size_t index, const __constant uint *p);
long2     __OVERLOAD__ vload2(size_t index, const __constant long *p);
ulong2    __OVERLOAD__ vload2(size_t index, const __constant ulong *p);
float2    __OVERLOAD__ vload2(size_t index, const __constant float *p);
double2   __OVERLOAD__ vload2(size_t index, const __constant double *p);

/* vload3 */
char3     __OVERLOAD__ vload3(size_t index, const char *p);
uchar3    __OVERLOAD__ vload3(size_t index, const uchar *p);
short3    __OVERLOAD__ vload3(size_t index, const short *p);
ushort3   __OVERLOAD__ vload3(size_t index, const ushort *p);
int3      __OVERLOAD__ vload3(size_t index, const int *p);
uint3     __OVERLOAD__ vload3(size_t index, const uint *p);
long3     __OVERLOAD__ vload3(size_t index, const long *p);
ulong3    __OVERLOAD__ vload3(size_t index, const ulong *p);
float3    __OVERLOAD__ vload3(size_t index, const float *p);
double3   __OVERLOAD__ vload3(size_t index, const double *p);
char3     __OVERLOAD__ vload3(size_t index, const __global char *p);
uchar3    __OVERLOAD__ vload3(size_t index, const __global uchar *p);
short3    __OVERLOAD__ vload3(size_t index, const __global short *p);
ushort3   __OVERLOAD__ vload3(size_t index, const __global ushort *p);
int3      __OVERLOAD__ vload3(size_t index, const __global int *p);
uint3     __OVERLOAD__ vload3(size_t index, const __global uint *p);
long3     __OVERLOAD__ vload3(size_t index, const __global long *p);
ulong3    __OVERLOAD__ vload3(size_t index, const __global ulong *p);
float3    __OVERLOAD__ vload3(size_t index, const __global float *p);
double3   __OVERLOAD__ vload3(size_t index, const __global double *p);
char3     __OVERLOAD__ vload3(size_t index, const __local char *p);
uchar3    __OVERLOAD__ vload3(size_t index, const __local uchar *p);
short3    __OVERLOAD__ vload3(size_t index, const __local short *p);
ushort3   __OVERLOAD__ vload3(size_t index, const __local ushort *p);
int3      __OVERLOAD__ vload3(size_t index, const __local int *p);
uint3     __OVERLOAD__ vload3(size_t index, const __local uint *p);
long3     __OVERLOAD__ vload3(size_t index, const __local long *p);
ulong3    __OVERLOAD__ vload3(size_t index, const __local ulong *p);
float3    __OVERLOAD__ vload3(size_t index, const __local float *p);
double3   __OVERLOAD__ vload3(size_t index, const __local double *p);
char3     __OVERLOAD__ vload3(size_t index, const __constant char *p);
uchar3    __OVERLOAD__ vload3(size_t index, const __constant uchar *p);
short3    __OVERLOAD__ vload3(size_t index, const __constant short *p);
ushort3   __OVERLOAD__ vload3(size_t index, const __constant ushort *p);
int3      __OVERLOAD__ vload3(size_t index, const __constant int *p);
uint3     __OVERLOAD__ vload3(size_t index, const __constant uint *p);
long3     __OVERLOAD__ vload3(size_t index, const __constant long *p);
ulong3    __OVERLOAD__ vload3(size_t index, const __constant ulong *p);
float3    __OVERLOAD__ vload3(size_t index, const __constant float *p);
double3   __OVERLOAD__ vload3(size_t index, const __constant double *p);                                        
/* vload4 */
char4     __OVERLOAD__ vload4(size_t index, const char *p);
uchar4    __OVERLOAD__ vload4(size_t index, const uchar *p);
short4    __OVERLOAD__ vload4(size_t index, const short *p);
ushort4   __OVERLOAD__ vload4(size_t index, const ushort *p);
int4      __OVERLOAD__ vload4(size_t index, const int *p);
uint4     __OVERLOAD__ vload4(size_t index, const uint *p);
long4     __OVERLOAD__ vload4(size_t index, const long *p);
ulong4    __OVERLOAD__ vload4(size_t index, const ulong *p);
float4    __OVERLOAD__ vload4(size_t index, const float *p);
double4   __OVERLOAD__ vload4(size_t index, const double *p);
char4     __OVERLOAD__ vload4(size_t index, const __global char *p);
uchar4    __OVERLOAD__ vload4(size_t index, const __global uchar *p);
short4    __OVERLOAD__ vload4(size_t index, const __global short *p);
ushort4   __OVERLOAD__ vload4(size_t index, const __global ushort *p);
int4      __OVERLOAD__ vload4(size_t index, const __global int *p);
uint4     __OVERLOAD__ vload4(size_t index, const __global uint *p);
long4     __OVERLOAD__ vload4(size_t index, const __global long *p);
ulong4    __OVERLOAD__ vload4(size_t index, const __global ulong *p);
float4    __OVERLOAD__ vload4(size_t index, const __global float *p);
double4   __OVERLOAD__ vload4(size_t index, const __global double *p);
char4     __OVERLOAD__ vload4(size_t index, const __local char *p);
uchar4    __OVERLOAD__ vload4(size_t index, const __local uchar *p);
short4    __OVERLOAD__ vload4(size_t index, const __local short *p);
ushort4   __OVERLOAD__ vload4(size_t index, const __local ushort *p);
int4      __OVERLOAD__ vload4(size_t index, const __local int *p);
uint4     __OVERLOAD__ vload4(size_t index, const __local uint *p);
long4     __OVERLOAD__ vload4(size_t index, const __local long *p);
ulong4    __OVERLOAD__ vload4(size_t index, const __local ulong *p);
float4    __OVERLOAD__ vload4(size_t index, const __local float *p);
double4   __OVERLOAD__ vload4(size_t index, const __local double *p);
char4     __OVERLOAD__ vload4(size_t index, const __constant char *p);
uchar4    __OVERLOAD__ vload4(size_t index, const __constant uchar *p);
short4    __OVERLOAD__ vload4(size_t index, const __constant short *p);
ushort4   __OVERLOAD__ vload4(size_t index, const __constant ushort *p);
int4      __OVERLOAD__ vload4(size_t index, const __constant int *p);
uint4     __OVERLOAD__ vload4(size_t index, const __constant uint *p);
long4     __OVERLOAD__ vload4(size_t index, const __constant long *p);
ulong4    __OVERLOAD__ vload4(size_t index, const __constant ulong *p);
float4    __OVERLOAD__ vload4(size_t index, const __constant float *p);
double4   __OVERLOAD__ vload4(size_t index, const __constant double *p);

/* vload8 */
char8     __OVERLOAD__ vload8(size_t index, const char *p);
uchar8    __OVERLOAD__ vload8(size_t index, const uchar *p);
short8    __OVERLOAD__ vload8(size_t index, const short *p);
ushort8   __OVERLOAD__ vload8(size_t index, const ushort *p);
int8      __OVERLOAD__ vload8(size_t index, const int *p);
uint8     __OVERLOAD__ vload8(size_t index, const uint *p);
long8     __OVERLOAD__ vload8(size_t index, const long *p);
ulong8    __OVERLOAD__ vload8(size_t index, const ulong *p);
float8    __OVERLOAD__ vload8(size_t index, const float *p);
double8   __OVERLOAD__ vload8(size_t index, const double *p);
char8     __OVERLOAD__ vload8(size_t index, const __global char *p);
uchar8    __OVERLOAD__ vload8(size_t index, const __global uchar *p);
short8    __OVERLOAD__ vload8(size_t index, const __global short *p);
ushort8   __OVERLOAD__ vload8(size_t index, const __global ushort *p);
int8      __OVERLOAD__ vload8(size_t index, const __global int *p);
uint8     __OVERLOAD__ vload8(size_t index, const __global uint *p);
long8     __OVERLOAD__ vload8(size_t index, const __global long *p);
ulong8    __OVERLOAD__ vload8(size_t index, const __global ulong *p);
float8    __OVERLOAD__ vload8(size_t index, const __global float *p);
double8   __OVERLOAD__ vload8(size_t index, const __global double *p);
char8     __OVERLOAD__ vload8(size_t index, const __local char *p);
uchar8    __OVERLOAD__ vload8(size_t index, const __local uchar *p);
short8    __OVERLOAD__ vload8(size_t index, const __local short *p);
ushort8   __OVERLOAD__ vload8(size_t index, const __local ushort *p);
int8      __OVERLOAD__ vload8(size_t index, const __local int *p);
uint8     __OVERLOAD__ vload8(size_t index, const __local uint *p);
long8     __OVERLOAD__ vload8(size_t index, const __local long *p);
ulong8    __OVERLOAD__ vload8(size_t index, const __local ulong *p);
float8    __OVERLOAD__ vload8(size_t index, const __local float *p);
double8   __OVERLOAD__ vload8(size_t index, const __local double *p);
char8     __OVERLOAD__ vload8(size_t index, const __constant char *p);
uchar8    __OVERLOAD__ vload8(size_t index, const __constant uchar *p);
short8    __OVERLOAD__ vload8(size_t index, const __constant short *p);
ushort8   __OVERLOAD__ vload8(size_t index, const __constant ushort *p);
int8      __OVERLOAD__ vload8(size_t index, const __constant int *p);
uint8     __OVERLOAD__ vload8(size_t index, const __constant uint *p);
long8     __OVERLOAD__ vload8(size_t index, const __constant long *p);
ulong8    __OVERLOAD__ vload8(size_t index, const __constant ulong *p);
float8    __OVERLOAD__ vload8(size_t index, const __constant float *p);
double8   __OVERLOAD__ vload8(size_t index, const __constant double *p);

/* vload16 */
char16    __OVERLOAD__ vload16(size_t index, const char *p);
uchar16   __OVERLOAD__ vload16(size_t index, const uchar *p);
short16   __OVERLOAD__ vload16(size_t index, const short *p);
ushort16  __OVERLOAD__ vload16(size_t index, const ushort *p);
int16     __OVERLOAD__ vload16(size_t index, const int *p);
uint16    __OVERLOAD__ vload16(size_t index, const uint *p);
long16    __OVERLOAD__ vload16(size_t index, const long *p);
ulong16   __OVERLOAD__ vload16(size_t index, const ulong *p);
float16   __OVERLOAD__ vload16(size_t index, const float *p);
double16  __OVERLOAD__ vload16(size_t index, const double *p);
char16    __OVERLOAD__ vload16(size_t index, const __global char *p);
uchar16   __OVERLOAD__ vload16(size_t index, const __global uchar *p);
short16   __OVERLOAD__ vload16(size_t index, const __global short *p);
ushort16  __OVERLOAD__ vload16(size_t index, const __global ushort *p);
int16     __OVERLOAD__ vload16(size_t index, const __global int *p);
uint16    __OVERLOAD__ vload16(size_t index, const __global uint *p);
long16    __OVERLOAD__ vload16(size_t index, const __global long *p);
ulong16   __OVERLOAD__ vload16(size_t index, const __global ulong *p);
float16   __OVERLOAD__ vload16(size_t index, const __global float *p);
double16  __OVERLOAD__ vload16(size_t index, const __global double *p);
char16    __OVERLOAD__ vload16(size_t index, const __local char *p);
uchar16   __OVERLOAD__ vload16(size_t index, const __local uchar *p);
short16   __OVERLOAD__ vload16(size_t index, const __local short *p);
ushort16  __OVERLOAD__ vload16(size_t index, const __local ushort *p);
int16     __OVERLOAD__ vload16(size_t index, const __local int *p);
uint16    __OVERLOAD__ vload16(size_t index, const __local uint *p);
long16    __OVERLOAD__ vload16(size_t index, const __local long *p);
ulong16   __OVERLOAD__ vload16(size_t index, const __local ulong *p);
float16   __OVERLOAD__ vload16(size_t index, const __local float *p);
double16  __OVERLOAD__ vload16(size_t index, const __local double *p);
char16    __OVERLOAD__ vload16(size_t index, const __constant char *p);
uchar16   __OVERLOAD__ vload16(size_t index, const __constant uchar *p);
short16   __OVERLOAD__ vload16(size_t index, const __constant short *p);
ushort16  __OVERLOAD__ vload16(size_t index, const __constant ushort *p);
int16     __OVERLOAD__ vload16(size_t index, const __constant int *p);
uint16    __OVERLOAD__ vload16(size_t index, const __constant uint *p);
long16    __OVERLOAD__ vload16(size_t index, const __constant long *p);
ulong16   __OVERLOAD__ vload16(size_t index, const __constant ulong *p);
float16   __OVERLOAD__ vload16(size_t index, const __constant float *p);
double16  __OVERLOAD__ vload16(size_t index, const __constant double *p);

/* vload_half */
float   __OVERLOAD__ vload_half(size_t index, const half *p);
float   __OVERLOAD__ vload_half(size_t index, const __global half *p);
float   __OVERLOAD__ vload_half(size_t index, const __local half *p);
float   __OVERLOAD__ vload_half(size_t index, const __constant half *p);

/* vload_ahalf */
float __OVERLOAD__ vloada_half(size_t index, const half *p);
float __OVERLOAD__ vloada_half(size_t index, const __global half *p);
float __OVERLOAD__ vloada_half(size_t index, const __local half *p);
float __OVERLOAD__ vloada_half(size_t index, const __constant half *p);

/* vload_half2 */
float2  __OVERLOAD__ vload_half2(size_t index, const half *p);
float2  __OVERLOAD__ vload_half2(size_t index, const __global half *p);
float2  __OVERLOAD__ vload_half2(size_t index, const __local half *p);
float2  __OVERLOAD__ vload_half2(size_t index, const __constant half *p);

/* vloada_half2 */
float2 __OVERLOAD__ vloada_half2(size_t index, const half *p);
float2 __OVERLOAD__ vloada_half2(size_t index, const __global half *p);
float2 __OVERLOAD__ vloada_half2(size_t index, const __local half *p);
float2 __OVERLOAD__ vloada_half2(size_t index, const __constant half *p);

/* vload_half3 */
float3 __OVERLOAD__ vload_half3( size_t index, const half *p);
float3 __OVERLOAD__ vload_half3(size_t index, const __global half *p);
float3 __OVERLOAD__ vload_half3(size_t index, const __local half *p);
float3 __OVERLOAD__ vload_half3(size_t index, const __constant half *p);

/* vloada_half3 */
float3 __OVERLOAD__ vloada_half3(size_t index, const half *p);
float3 __OVERLOAD__ vloada_half3(size_t index, const __global half *p);
float3 __OVERLOAD__ vloada_half3(size_t index, const __local half *p);
float3 __OVERLOAD__ vloada_half3(size_t index, const __constant half *p);

/* vload_half4 */
float4 __OVERLOAD__ vload_half4(size_t index, const half *p);
float4 __OVERLOAD__ vload_half4(size_t index, const __global half *p);
float4 __OVERLOAD__ vload_half4(size_t index, const __local half *p);
float4 __OVERLOAD__ vload_half4(size_t index, const __constant half *p);

/* vloada_half4 */
float4 __OVERLOAD__ vloada_half4(size_t index, const half *p);
float4 __OVERLOAD__ vloada_half4(size_t index, const __global half *p);
float4 __OVERLOAD__ vloada_half4(size_t index, const __local half *p);
float4 __OVERLOAD__ vloada_half4(size_t index, const __constant half *p);

/* vload_half8 */
float8 __OVERLOAD__ vload_half8(size_t index, const half *p);
float8 __OVERLOAD__ vload_half8(size_t index, const __global half *p);
float8 __OVERLOAD__ vload_half8(size_t index, const __local half *p);
float8 __OVERLOAD__ vload_half8(size_t index, const __constant half *p);

/* vloada_half8 */
float8 __OVERLOAD__ vloada_half8(size_t index, const half *p);
float8 __OVERLOAD__ vloada_half8(size_t index, const __global half *p);
float8 __OVERLOAD__ vloada_half8(size_t index, const __local half *p);
float8 __OVERLOAD__ vloada_half8(size_t index, const __constant half *p);

/* vload_half16 */
float16 __OVERLOAD__ vload_half16(size_t index, const half *p);
float16 __OVERLOAD__ vload_half16(size_t index, const __global half *p);
float16 __OVERLOAD__ vload_half16(size_t index, const __local half *p);
float16 __OVERLOAD__ vload_half16(size_t index, const __constant half *p);

/* vloada_half16 */
float16 __OVERLOAD__ vloada_half16(size_t index, const half *p);
float16 __OVERLOAD__ vloada_half16(size_t index, const __global half *p);
float16 __OVERLOAD__ vloada_half16(size_t index, const __local half *p);
float16 __OVERLOAD__ vloada_half16(size_t index, const __constant half *p);

/* vstore2 */
void __OVERLOAD__ vstore2( char2 v, size_t index, char *p );
void __OVERLOAD__ vstore2( uchar2 v, size_t index, uchar *p );
void __OVERLOAD__ vstore2( short2 v, size_t index, short *p );
void __OVERLOAD__ vstore2( ushort2 v, size_t index, ushort *p );
void __OVERLOAD__ vstore2( int2 v, size_t index, int *p );
void __OVERLOAD__ vstore2( uint2 v, size_t index, uint *p );
void __OVERLOAD__ vstore2( long2 v, size_t index, long *p );
void __OVERLOAD__ vstore2( ulong2 v, size_t index, ulong *p );
void __OVERLOAD__ vstore2( float2 v, size_t index, float *p );
void __OVERLOAD__ vstore2( double2 v, size_t index, double *p );
void __OVERLOAD__ vstore2( char2 v, size_t index, __global char *p );
void __OVERLOAD__ vstore2( uchar2 v, size_t index, __global uchar *p );
void __OVERLOAD__ vstore2( short2 v, size_t index, __global short *p );
void __OVERLOAD__ vstore2( ushort2 v, size_t index, __global ushort *p );
void __OVERLOAD__ vstore2( int2 v, size_t index, __global int *p );
void __OVERLOAD__ vstore2( uint2 v, size_t index, __global uint *p );
void __OVERLOAD__ vstore2( long2 v, size_t index, __global long *p );
void __OVERLOAD__ vstore2( ulong2 v, size_t index, __global ulong *p );
void __OVERLOAD__ vstore2( float2 v, size_t index, __global float *p );
void __OVERLOAD__ vstore2( double2 v, size_t index, __global double *p );
void __OVERLOAD__ vstore2( char2 v, size_t index, __local char *p );
void __OVERLOAD__ vstore2( uchar2 v, size_t index, __local uchar *p );
void __OVERLOAD__ vstore2( short2 v, size_t index, __local short *p );
void __OVERLOAD__ vstore2( ushort2 v, size_t index, __local ushort *p );
void __OVERLOAD__ vstore2( int2 v, size_t index, __local int *p );
void __OVERLOAD__ vstore2( uint2 v, size_t index, __local uint *p );
void __OVERLOAD__ vstore2( long2 v, size_t index, __local long *p );
void __OVERLOAD__ vstore2( ulong2 v, size_t index, __local ulong *p );
void __OVERLOAD__ vstore2( float2 v, size_t index, __local float *p );
void __OVERLOAD__ vstore2( double2 v, size_t index, __local double *p );

/* vstore3 */
void __OVERLOAD__ vstore3( char3 v, size_t index, char *p );
void __OVERLOAD__ vstore3( uchar3 v, size_t index, uchar *p );
void __OVERLOAD__ vstore3( short3 v, size_t index, short *p );
void __OVERLOAD__ vstore3( ushort3 v, size_t index, ushort *p );
void __OVERLOAD__ vstore3( int3 v, size_t index, int *p );
void __OVERLOAD__ vstore3( uint3 v, size_t index, uint *p );
void __OVERLOAD__ vstore3( long3 v, size_t index, long *p );
void __OVERLOAD__ vstore3( ulong3 v, size_t index, ulong *p );
void __OVERLOAD__ vstore3( float3 v, size_t index, float *p );
void __OVERLOAD__ vstore3( double3 v, size_t index, double *p );
void __OVERLOAD__ vstore3( char3 v, size_t index, __global char *p );
void __OVERLOAD__ vstore3( uchar3 v, size_t index, __global uchar *p );
void __OVERLOAD__ vstore3( short3 v, size_t index, __global short *p );
void __OVERLOAD__ vstore3( ushort3 v, size_t index, __global ushort *p );
void __OVERLOAD__ vstore3( int3 v, size_t index, __global int *p );
void __OVERLOAD__ vstore3( uint3 v, size_t index, __global uint *p );
void __OVERLOAD__ vstore3( long3 v, size_t index, __global long *p );
void __OVERLOAD__ vstore3( ulong3 v, size_t index, __global ulong *p );
void __OVERLOAD__ vstore3( float3 v, size_t index, __global float *p );
void __OVERLOAD__ vstore3( double3 v, size_t index, __global double *p );
void __OVERLOAD__ vstore3( char3 v, size_t index, __local char *p );
void __OVERLOAD__ vstore3( uchar3 v, size_t index, __local uchar *p );
void __OVERLOAD__ vstore3( short3 v, size_t index, __local short *p );
void __OVERLOAD__ vstore3( ushort3 v, size_t index, __local ushort *p );
void __OVERLOAD__ vstore3( int3 v, size_t index, __local int *p );
void __OVERLOAD__ vstore3( uint3 v, size_t index, __local uint *p );
void __OVERLOAD__ vstore3( long3 v, size_t index, __local long *p );
void __OVERLOAD__ vstore3( ulong3 v, size_t index, __local ulong *p );
void __OVERLOAD__ vstore3( float3 v, size_t index, __local float *p );
void __OVERLOAD__ vstore3( double3 v, size_t index, __local double *p );


/* vstore4 */
void __OVERLOAD__ vstore4( char4 v, size_t index, char *p );
void __OVERLOAD__ vstore4( uchar4 v, size_t index, uchar *p );
void __OVERLOAD__ vstore4( short4 v, size_t index, short *p );
void __OVERLOAD__ vstore4( ushort4 v, size_t index, ushort *p );
void __OVERLOAD__ vstore4( int4 v, size_t index, int *p );
void __OVERLOAD__ vstore4( uint4 v, size_t index, uint *p );
void __OVERLOAD__ vstore4( long4 v, size_t index, long *p );
void __OVERLOAD__ vstore4( ulong4 v, size_t index, ulong *p );
void __OVERLOAD__ vstore4( float4 v, size_t index, float *p );
void __OVERLOAD__ vstore4( double4 v, size_t index, double *p );
void __OVERLOAD__ vstore4( char4 v, size_t index, __global char *p );
void __OVERLOAD__ vstore4( uchar4 v, size_t index, __global uchar *p );
void __OVERLOAD__ vstore4( short4 v, size_t index, __global short *p );
void __OVERLOAD__ vstore4( ushort4 v, size_t index, __global ushort *p );
void __OVERLOAD__ vstore4( int4 v, size_t index, __global int *p );
void __OVERLOAD__ vstore4( uint4 v, size_t index, __global uint *p );
void __OVERLOAD__ vstore4( long4 v, size_t index, __global long *p );
void __OVERLOAD__ vstore4( ulong4 v, size_t index, __global ulong *p );
void __OVERLOAD__ vstore4( float4 v, size_t index, __global float *p );
void __OVERLOAD__ vstore4( double4 v, size_t index, __global double *p );
void __OVERLOAD__ vstore4( char4 v, size_t index, __local char *p );
void __OVERLOAD__ vstore4( uchar4 v, size_t index, __local uchar *p );
void __OVERLOAD__ vstore4( short4 v, size_t index, __local short *p );
void __OVERLOAD__ vstore4( ushort4 v, size_t index, __local ushort *p );
void __OVERLOAD__ vstore4( int4 v, size_t index, __local int *p );
void __OVERLOAD__ vstore4( uint4 v, size_t index, __local uint *p );
void __OVERLOAD__ vstore4( long4 v, size_t index, __local long *p );
void __OVERLOAD__ vstore4( ulong4 v, size_t index, __local ulong *p );
void __OVERLOAD__ vstore4( float4 v, size_t index, __local float *p );
void __OVERLOAD__ vstore4( double4 v, size_t index, __local double *p );


/* vstore8 */
void __OVERLOAD__ vstore8( char8 v, size_t index, char *p );
void __OVERLOAD__ vstore8( uchar8 v, size_t index, uchar *p );
void __OVERLOAD__ vstore8( short8 v, size_t index, short *p );
void __OVERLOAD__ vstore8( ushort8 v, size_t index, ushort *p );
void __OVERLOAD__ vstore8( int8 v, size_t index, int *p );
void __OVERLOAD__ vstore8( uint8 v, size_t index, uint *p );
void __OVERLOAD__ vstore8( long8 v, size_t index, long *p );
void __OVERLOAD__ vstore8( ulong8 v, size_t index, ulong *p );
void __OVERLOAD__ vstore8( float8 v, size_t index, float *p );
void __OVERLOAD__ vstore8( double8 v, size_t index, double *p );
void __OVERLOAD__ vstore8( char8 v, size_t index, __global char *p );
void __OVERLOAD__ vstore8( uchar8 v, size_t index, __global uchar *p );
void __OVERLOAD__ vstore8( short8 v, size_t index, __global short *p );
void __OVERLOAD__ vstore8( ushort8 v, size_t index, __global ushort *p );
void __OVERLOAD__ vstore8( int8 v, size_t index, __global int *p );
void __OVERLOAD__ vstore8( uint8 v, size_t index, __global uint *p );
void __OVERLOAD__ vstore8( long8 v, size_t index, __global long *p );
void __OVERLOAD__ vstore8( ulong8 v, size_t index, __global ulong *p );
void __OVERLOAD__ vstore8( float8 v, size_t index, __global float *p );
void __OVERLOAD__ vstore8( double8 v, size_t index, __global double *p );
void __OVERLOAD__ vstore8( char8 v, size_t index, __local char *p );
void __OVERLOAD__ vstore8( uchar8 v, size_t index, __local uchar *p );
void __OVERLOAD__ vstore8( short8 v, size_t index, __local short *p );
void __OVERLOAD__ vstore8( ushort8 v, size_t index, __local ushort *p );
void __OVERLOAD__ vstore8( int8 v, size_t index, __local int *p );
void __OVERLOAD__ vstore8( uint8 v, size_t index, __local uint *p );
void __OVERLOAD__ vstore8( long8 v, size_t index, __local long *p );
void __OVERLOAD__ vstore8( ulong8 v, size_t index, __local ulong *p );
void __OVERLOAD__ vstore8( float8 v, size_t index, __local float *p );
void __OVERLOAD__ vstore8( double8 v, size_t index, __local double *p );


/* vstore16 */
void __OVERLOAD__ vstore16( char16 v, size_t index, char *p );
void __OVERLOAD__ vstore16( uchar16 v, size_t index, uchar *p );
void __OVERLOAD__ vstore16( short16 v, size_t index, short *p );
void __OVERLOAD__ vstore16( ushort16 v, size_t index, ushort *p );
void __OVERLOAD__ vstore16( int16 v, size_t index, int *p );
void __OVERLOAD__ vstore16( uint16 v, size_t index, uint *p );
void __OVERLOAD__ vstore16( long16 v, size_t index, long *p );
void __OVERLOAD__ vstore16( ulong16 v, size_t index, ulong *p );
void __OVERLOAD__ vstore16( float16 v, size_t index, float *p );
void __OVERLOAD__ vstore16( double16 v, size_t index, double *p );
void __OVERLOAD__ vstore16( char16 v, size_t index, __global char *p );
void __OVERLOAD__ vstore16( uchar16 v, size_t index, __global uchar *p );
void __OVERLOAD__ vstore16( short16 v, size_t index, __global short *p );
void __OVERLOAD__ vstore16( ushort16 v, size_t index, __global ushort *p );
void __OVERLOAD__ vstore16( int16 v, size_t index, __global int *p );
void __OVERLOAD__ vstore16( uint16 v, size_t index, __global uint *p );
void __OVERLOAD__ vstore16( long16 v, size_t index, __global long *p );
void __OVERLOAD__ vstore16( ulong16 v, size_t index, __global ulong *p );
void __OVERLOAD__ vstore16( float16 v, size_t index, __global float *p );
void __OVERLOAD__ vstore16( double16 v, size_t index, __global double *p );
void __OVERLOAD__ vstore16( char16 v, size_t index, __local char *p );
void __OVERLOAD__ vstore16( uchar16 v, size_t index, __local uchar *p );
void __OVERLOAD__ vstore16( short16 v, size_t index, __local short *p );
void __OVERLOAD__ vstore16( ushort16 v, size_t index, __local ushort *p );
void __OVERLOAD__ vstore16( int16 v, size_t index, __local int *p );
void __OVERLOAD__ vstore16( uint16 v, size_t index, __local uint *p );
void __OVERLOAD__ vstore16( long16 v, size_t index, __local long *p );
void __OVERLOAD__ vstore16( ulong16 v, size_t index, __local ulong *p );
void __OVERLOAD__ vstore16( float16 v, size_t index, __local float *p );
void __OVERLOAD__ vstore16( double16 v, size_t index, __local double *p );

/* vstore_half */
void __OVERLOAD__ vstore_half (float  f, size_t index, half *p);
void __OVERLOAD__ vstore_half (float  f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half (float  f, size_t index, __local half *p);
void __OVERLOAD__ vstore_half(double f, size_t index, half *p);
void __OVERLOAD__ vstore_half(double f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half(double f, size_t index, __local half *p);

/* vstore_half2 */
void __OVERLOAD__ vstore_half2(float2 f, size_t index, half *p);
void __OVERLOAD__ vstore_half2(float2 f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half2(float2 f, size_t index, __local half *p);
void __OVERLOAD__ vstore_half2(double2 f, size_t index, half *p);
void __OVERLOAD__ vstore_half2(double2 f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half2(double2 f, size_t index, __local half *p);

/* vstore_half3 */
void __OVERLOAD__ vstore_half3(float3 f, size_t index, half *p);
void __OVERLOAD__ vstore_half3(float3 f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half3(float3 f, size_t index, __local half *p);
void __OVERLOAD__ vstore_half3(double3 f, size_t index, half *p);
void __OVERLOAD__ vstore_half3(double3 f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half3(double3 f, size_t index, __local half *p);

/* vstore_half4 */
void __OVERLOAD__ vstore_half4(float4 x, size_t index, half *p);
void __OVERLOAD__ vstore_half4(float4 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half4(float4 x, size_t index, __local half *p);
void __OVERLOAD__ vstore_half4(double4 f, size_t index, half *p);
void __OVERLOAD__ vstore_half4(double4 f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half4(double4 f, size_t index, __local half *p);

/* vstore_half8 */
void __OVERLOAD__ vstore_half8(float8 x, size_t index, half *p);
void __OVERLOAD__ vstore_half8(float8 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half8(float8 x, size_t index, __local half *p);
void __OVERLOAD__ vstore_half8(double8 f, size_t index, half *p);
void __OVERLOAD__ vstore_half8(double8 f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half8(double8 f, size_t index, __local half *p);

/* vstore_half16 */
void __OVERLOAD__ vstore_half16(float16 v, size_t index, half *p);
void __OVERLOAD__ vstore_half16(float16 v, size_t index, __global half *p);
void __OVERLOAD__ vstore_half16(float16 v, size_t index, __local half *p);
void __OVERLOAD__ vstore_half16(double16 f, size_t index, half *p);
void __OVERLOAD__ vstore_half16(double16 f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half16(double16 f, size_t index, __local half *p);

/* vstorea_half */
void __OVERLOAD__ vstorea_half (float  f, size_t index, half *p);
void __OVERLOAD__ vstorea_half (float  f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half (float  f, size_t index, __local half *p);
void __OVERLOAD__ vstorea_half(double f, size_t index, half *p);
void __OVERLOAD__ vstorea_half(double f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half(double f, size_t index, __local half *p);

/* vstorea_half2 */
void __OVERLOAD__ vstorea_half2(float2 v, size_t i, half *p);
void __OVERLOAD__ vstorea_half2(float2 v, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half2(float2 v, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half2(double2 f, size_t index, half *p);
void __OVERLOAD__ vstorea_half2(double2 f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half2(double2 f, size_t index, __local half *p);

/* vstorea_half3 */
void __OVERLOAD__ vstorea_half3(float3 v, size_t i, half *p);
void __OVERLOAD__ vstorea_half3(float3 v, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half3(float3 v, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half3(double3 f, size_t index, half *p);
void __OVERLOAD__ vstorea_half3(double3 f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half3(double3 f, size_t index, __local half *p);

/* vstorea_half4 */
void __OVERLOAD__ vstorea_half4(float4 v, size_t i, half *p);
void __OVERLOAD__ vstorea_half4(float4 v, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half4(float4 v, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half4(double4 f, size_t index, half *p);
void __OVERLOAD__ vstorea_half4(double4 f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half4(double4 f, size_t index, __local half *p);

/* vstorea_half8 */
void __OVERLOAD__ vstorea_half8(float8 v, size_t i, half *p);
void __OVERLOAD__ vstorea_half8(float8 v, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half8(float8 v, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half8(double8 f, size_t index, half *p);
void __OVERLOAD__ vstorea_half8(double8 f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half8(double8 f, size_t index, __local half *p);

/* vstorea_half16 */
void __OVERLOAD__ vstorea_half16(float16 v, size_t i, half *p);
void __OVERLOAD__ vstorea_half16(float16 v, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half16(float16 v, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half16(double16 f, size_t index, half *p);
void __OVERLOAD__ vstorea_half16(double16 f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half16(double16 f, size_t index, __local half *p);

/* vstore_half_rte */
void __OVERLOAD__ vstore_half_rte(float f, size_t index, half *p);
void __OVERLOAD__ vstore_half_rte(float f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half_rte(float f, size_t index, __local half *p);
void __OVERLOAD__ vstore_half_rte(double f, size_t index, half *p);
void __OVERLOAD__ vstore_half_rte(double f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half_rte(double f, size_t index, __local half *p);

/* vstore_half2_rte */
void __OVERLOAD__ vstore_half2_rte(float2 f, size_t index, half *p);
void __OVERLOAD__ vstore_half2_rte(float2 f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half2_rte(float2 f, size_t index, __local half *p);
void __OVERLOAD__ vstore_half2_rte(double2 x, size_t index, half *p);
void __OVERLOAD__ vstore_half2_rte(double2 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half2_rte(double2 x, size_t index, __local half *p);

/* vstore_half3_rte */
void __OVERLOAD__ vstore_half3_rte(float3 x3, size_t index, half *p);
void __OVERLOAD__ vstore_half3_rte(float3 x3, size_t index, __global half *p);
void __OVERLOAD__ vstore_half3_rte(float3 x3, size_t index, __local half *p);
void __OVERLOAD__ vstore_half3_rte(double3 x, size_t index, half *p);
void __OVERLOAD__ vstore_half3_rte(double3 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half3_rte(double3 x, size_t index, __local half *p);

/* vstore_half4_rte */
void __OVERLOAD__ vstore_half4_rte(float4 x, size_t index, half *p);
void __OVERLOAD__ vstore_half4_rte(float4 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half4_rte(float4 x, size_t index, __local half *p);
void __OVERLOAD__ vstore_half4_rte(double4 x, size_t index, half *p);
void __OVERLOAD__ vstore_half4_rte(double4 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half4_rte(double4 x, size_t index, __local half *p);

/* vstore_half8_rte */
void __OVERLOAD__ vstore_half8_rte(float8 x, size_t index, half *p);
void __OVERLOAD__ vstore_half8_rte(float8 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half8_rte(float8 x, size_t index, __local half *p);
void __OVERLOAD__ vstore_half8_rte(double8 x, size_t index, half *p);
void __OVERLOAD__ vstore_half8_rte(double8 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half8_rte(double8 x, size_t index, __local half *p);

/* vstore_half16_rte */
void __OVERLOAD__ vstore_half16_rte(float16 x, size_t index, half *p);
void __OVERLOAD__ vstore_half16_rte(float16 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half16_rte(float16 x, size_t index, __local half *p);
void __OVERLOAD__ vstore_half16_rte(double16 x, size_t index, half *p);
void __OVERLOAD__ vstore_half16_rte(double16 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half16_rte(double16 x, size_t index, __local half *p);

/* vstorea_half_rte */
void __OVERLOAD__ vstorea_half_rte(float f, size_t index, half *p);
void __OVERLOAD__ vstorea_half_rte(float f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half_rte(float f, size_t index, __local half *p);
void __OVERLOAD__ vstorea_half_rte(double f, size_t index, half *p);
void __OVERLOAD__ vstorea_half_rte(double f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half_rte(double f, size_t index, __local half *p);

/* vstorea_half2_rte */ 
void __OVERLOAD__ vstorea_half2_rte(float2 v, size_t i, half *p);
void __OVERLOAD__ vstorea_half2_rte(float2 v, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half2_rte(float2 v, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half2_rte( double2 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half2_rte( double2 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half2_rte( double2 x, size_t index, __local half *p );

/* vstorea_half3_rte */
void __OVERLOAD__ vstorea_half3_rte(float3 v, size_t i, half *p);
void __OVERLOAD__ vstorea_half3_rte(float3 v, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half3_rte(float3 v, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half3_rte( double3 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half3_rte( double3 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half3_rte( double3 x, size_t index, __local half *p );

/* vstorea_half4_rte */
void __OVERLOAD__ vstorea_half4_rte(float4 v, size_t i, half *p);
void __OVERLOAD__ vstorea_half4_rte(float4 v, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half4_rte(float4 v, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half4_rte( double4 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half4_rte( double4 x, size_t index, __global  half *p );
void __OVERLOAD__ vstorea_half4_rte( double4 x, size_t index, __local half *p );

/* vstorea_half8_rte */
void __OVERLOAD__ vstorea_half8_rte(float8 x, size_t index, half *p);
void __OVERLOAD__ vstorea_half8_rte(float8 x, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half8_rte(float8 x, size_t index, __local half *p);
void __OVERLOAD__ vstorea_half8_rte( double8 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half8_rte( double8 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half8_rte( double8 x, size_t index, __local half *p );

/* vstorea_half16_rte */
void __OVERLOAD__ vstorea_half16_rte(float16 x, size_t index, half *p);
void __OVERLOAD__ vstorea_half16_rte(float16 x, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half16_rte(float16 x, size_t index, __local half *p);
void __OVERLOAD__ vstorea_half16_rte( double16 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half16_rte( double16 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half16_rte( double16 x, size_t index, __local half *p );

/* vstore_half_rtz */
void __OVERLOAD__ vstore_half_rtz(float f, size_t i, half *p);
void __OVERLOAD__ vstore_half_rtz(float f, size_t i, __global half *p);
void __OVERLOAD__ vstore_half_rtz(float f, size_t i, __local half *p);
void __OVERLOAD__ vstore_half_rtz(double f, size_t index, half *p);
void __OVERLOAD__ vstore_half_rtz(double f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half_rtz(double f, size_t index, __local half *p);

/* vstore_half2_rtz */
void __OVERLOAD__ vstore_half2_rtz(float2 f, size_t i, half *p);
void __OVERLOAD__ vstore_half2_rtz(float2 f, size_t i, __global half *p);
void __OVERLOAD__ vstore_half2_rtz(float2 f, size_t i, __local half *p);
void __OVERLOAD__ vstore_half2_rtz(double2 x, size_t index, half *p);
void __OVERLOAD__ vstore_half2_rtz(double2 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half2_rtz(double2 x, size_t index, __local half *p);

/* vstore_half3_rtz */
void __OVERLOAD__ vstore_half3_rtz(float3 f, size_t i, half *p);
void __OVERLOAD__ vstore_half3_rtz(float3 f, size_t i, __global half *p);
void __OVERLOAD__ vstore_half3_rtz(float3 f, size_t i, __local half *p);
void __OVERLOAD__ vstore_half3_rtz( double3 x, size_t index, half *p );
void __OVERLOAD__ vstore_half3_rtz( double3 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half3_rtz( double3 x, size_t index, __local half *p );

/* vstore_half4_rtz */
void __OVERLOAD__ vstore_half4_rtz(float4 f, size_t i, half *p);
void __OVERLOAD__ vstore_half4_rtz(float4 f, size_t i, __global half *p);
void __OVERLOAD__ vstore_half4_rtz(float4 f, size_t i, __local half *p);
void __OVERLOAD__ vstore_half4_rtz( double4 x, size_t index, half *p );
void __OVERLOAD__ vstore_half4_rtz( double4 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half4_rtz( double4 x, size_t index, __local half *p );

/* vstore_half8_rtz */
void __OVERLOAD__ vstore_half8_rtz(float8 f, size_t i, half *p);
void __OVERLOAD__ vstore_half8_rtz(float8 f, size_t i, __global half *p);
void __OVERLOAD__ vstore_half8_rtz(float8 f, size_t i, __local half *p);
void __OVERLOAD__ vstore_half8_rtz( double8 x, size_t index, half *p );
void __OVERLOAD__ vstore_half8_rtz( double8 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half8_rtz( double8 x, size_t index, __local half *p );

/* vstore_half16_rtz */
void __OVERLOAD__ vstore_half16_rtz(float16 f, size_t i, half *p);
void __OVERLOAD__ vstore_half16_rtz(float16 f, size_t i, __global half *p);
void __OVERLOAD__ vstore_half16_rtz(float16 f, size_t i, __local half *p);
void __OVERLOAD__ vstore_half16_rtz( double16 x, size_t index, half *p );
void __OVERLOAD__ vstore_half16_rtz( double16 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half16_rtz( double16 x, size_t index, __local half *p );

/* vstorea_half_rtz */
void __OVERLOAD__ vstorea_half_rtz(float f, size_t i, half *p);
void __OVERLOAD__ vstorea_half_rtz(float f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half_rtz(float f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half_rtz(double f, size_t index, half *p);
void __OVERLOAD__ vstorea_half_rtz(double f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half_rtz(double f, size_t index, __local half *p);

/* vstorea_half2_rtz */
void __OVERLOAD__ vstorea_half2_rtz(float2 f, size_t i, half *p);
void __OVERLOAD__ vstorea_half2_rtz(float2 f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half2_rtz(float2 f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half2_rtz( double2 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half2_rtz( double2 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half2_rtz( double2 x, size_t index, __local half *p );

/* vstorea_half3_rtz */
void __OVERLOAD__ vstorea_half3_rtz(float3 f, size_t i, half *p);
void __OVERLOAD__ vstorea_half3_rtz(float3 f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half3_rtz(float3 f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half3_rtz( double3 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half3_rtz( double3 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half3_rtz( double3 x, size_t index, __local half *p );

/* vstorea_half4_rtz */
void __OVERLOAD__ vstorea_half4_rtz(float4 f, size_t i, half *p);
void __OVERLOAD__ vstorea_half4_rtz(float4 f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half4_rtz(float4 f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half4_rtz( double4 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half4_rtz( double4 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half4_rtz( double4 x, size_t index, __local half *p );

/* vstorea_half8_rtz */
void __OVERLOAD__ vstorea_half8_rtz(float8 f, size_t i, half *p);
void __OVERLOAD__ vstorea_half8_rtz(float8 f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half8_rtz(float8 f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half8_rtz( double8 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half8_rtz( double8 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half8_rtz( double8 x, size_t index, __local half *p );

/* vstorea_half16_rtz */
void __OVERLOAD__ vstorea_half16_rtz(float16 f, size_t i, half *p);
void __OVERLOAD__ vstorea_half16_rtz(float16 f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half16_rtz(float16 f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half16_rtz( double16 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half16_rtz( double16 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half16_rtz( double16 x, size_t index, __local half *p );

/* vstore_half_rtp */
void __OVERLOAD__ vstore_half_rtp(float f, size_t index, half *p);
void __OVERLOAD__ vstore_half_rtp(float f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half_rtp(float f, size_t index, __local half *p);
void __OVERLOAD__ vstore_half_rtp(double f, size_t index, half *p);
void __OVERLOAD__ vstore_half_rtp(double f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half_rtp(double f, size_t index, __local half *p);

/* vstore_half2_rtp */
void __OVERLOAD__ vstore_half2_rtp(float2 f, size_t i , half *p);
void __OVERLOAD__ vstore_half2_rtp(float2 f, size_t i , __global half *p);
void __OVERLOAD__ vstore_half2_rtp(float2 f, size_t i , __local half *p);
void __OVERLOAD__ vstore_half2_rtp(double2 x, size_t index, half *p);
void __OVERLOAD__ vstore_half2_rtp(double2 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half2_rtp(double2 x, size_t index, __local half *p);

/* vstore_half3_rtp */
void __OVERLOAD__ vstore_half3_rtp(float3 f, size_t i , half *p);
void __OVERLOAD__ vstore_half3_rtp(float3 f, size_t i , __global half *p);
void __OVERLOAD__ vstore_half3_rtp(float3 f, size_t i , __local half *p);
void __OVERLOAD__ vstore_half3_rtp( double3 x, size_t index, half *p );
void __OVERLOAD__ vstore_half3_rtp( double3 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half3_rtp( double3 x, size_t index, __local half *p );

/* vstore_half4_rtp */
void __OVERLOAD__ vstore_half4_rtp(float4 f, size_t i , half *p);
void __OVERLOAD__ vstore_half4_rtp(float4 f, size_t i , __global half *p);
void __OVERLOAD__ vstore_half4_rtp(float4 f, size_t i , __local half *p);
void __OVERLOAD__ vstore_half4_rtp( double4 x, size_t index, half *p );
void __OVERLOAD__ vstore_half4_rtp( double4 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half4_rtp( double4 x, size_t index, __local half *p );

/* vstore_half8_rtp */
void __OVERLOAD__ vstore_half8_rtp(float8 f, size_t i , half *p);
void __OVERLOAD__ vstore_half8_rtp(float8 f, size_t i , __global half *p);
void __OVERLOAD__ vstore_half8_rtp(float8 f, size_t i , __local half *p);
void __OVERLOAD__ vstore_half8_rtp( double8 x, size_t index, half *p );
void __OVERLOAD__ vstore_half8_rtp( double8 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half8_rtp( double8 x, size_t index, __local half *p );

/* vstore_half16_rtp */
void __OVERLOAD__ vstore_half16_rtp(float16 f, size_t i , half *p);
void __OVERLOAD__ vstore_half16_rtp(float16 f, size_t i , __global half *p);
void __OVERLOAD__ vstore_half16_rtp(float16 f, size_t i , __local half *p);
void __OVERLOAD__ vstore_half16_rtp( double16 x, size_t index, half *p );
void __OVERLOAD__ vstore_half16_rtp( double16 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half16_rtp( double16 x, size_t index, __local half *p );

/* vstorea_half_rtp */
void __OVERLOAD__ vstorea_half_rtp(float f, size_t index, half *p);
void __OVERLOAD__ vstorea_half_rtp(float f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half_rtp(float f, size_t index, __local half *p);
void __OVERLOAD__ vstorea_half_rtp(double f, size_t index, half *p);
void __OVERLOAD__ vstorea_half_rtp(double f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half_rtp(double f, size_t index, __local half *p);

/* vstorea_half2_rtp */
void __OVERLOAD__ vstorea_half2_rtp(float2 f, size_t i, half *p);
void __OVERLOAD__ vstorea_half2_rtp(float2 f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half2_rtp(float2 f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half2_rtp( double2 x, size_t index, half *p);
void __OVERLOAD__ vstorea_half2_rtp( double2 x, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half2_rtp( double2 x, size_t index, __local half *p);

/* vstorea_half3_rtp */
void __OVERLOAD__ vstorea_half3_rtp(float3 f, size_t i, half *p);
void __OVERLOAD__ vstorea_half3_rtp(float3 f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half3_rtp(float3 f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half3_rtp( double3 x, size_t index, half *p);
void __OVERLOAD__ vstorea_half3_rtp( double3 x, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half3_rtp( double3 x, size_t index, __local half *p);

/* vstorea_half4_rtp */
void __OVERLOAD__ vstorea_half4_rtp(float4 f, size_t i, half *p);
void __OVERLOAD__ vstorea_half4_rtp(float4 f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half4_rtp(float4 f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half4_rtp( double4 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half4_rtp( double4 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half4_rtp( double4 x, size_t index, __local half *p );

/* vstorea_half8_rtp */
void __OVERLOAD__ vstorea_half8_rtp(float8 f, size_t i, half *p);
void __OVERLOAD__ vstorea_half8_rtp(float8 f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half8_rtp(float8 f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half8_rtp( double8 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half8_rtp( double8 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half8_rtp( double8 x, size_t index, __local half *p );

/* vstorea_half16_rtp */
void __OVERLOAD__ vstorea_half16_rtp(float16 f, size_t i, half *p);
void __OVERLOAD__ vstorea_half16_rtp(float16 f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half16_rtp(float16 f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half16_rtp( double16 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half16_rtp( double16 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half16_rtp( double16 x, size_t index, __local half *p );

/* vstore_half_rtn */
void __OVERLOAD__ vstore_half_rtn(float f, size_t index, half *p);
void __OVERLOAD__ vstore_half_rtn(float f, size_t index, __global half *p);
void __OVERLOAD__ vstore_half_rtn(float f, size_t index, __local half *p);
void __OVERLOAD__ vstore_half_rtn( double x, size_t index, half *p );
void __OVERLOAD__ vstore_half_rtn( double x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half_rtn( double x, size_t index, __local half *p );

/* vstore_half2_rtn */
void __OVERLOAD__ vstore_half2_rtn(float2 f, size_t i, half *p);
void __OVERLOAD__ vstore_half2_rtn(float2 f, size_t i, __global half *p);
void __OVERLOAD__ vstore_half2_rtn(float2 f, size_t i, __local half *p);
void __OVERLOAD__ vstore_half2_rtn(double2 x, size_t index, half *p);
void __OVERLOAD__ vstore_half2_rtn(double2 x, size_t index, __global half *p);
void __OVERLOAD__ vstore_half2_rtn(double2 x, size_t index, __local half *p);

/* vstore_half3_rtn */
void __OVERLOAD__ vstore_half3_rtn(float3 x3, size_t i, half *p);
void __OVERLOAD__ vstore_half3_rtn(float3 x3, size_t i, __global half *p);
void __OVERLOAD__ vstore_half3_rtn(float3 x3, size_t i, __local half *p);
void __OVERLOAD__ vstore_half3_rtn( double3 x, size_t index, half *p );
void __OVERLOAD__ vstore_half3_rtn( double3 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half3_rtn( double3 x, size_t index, __local half *p );

/* vstore_half4_rtn */
void __OVERLOAD__ vstore_half4_rtn(float4 x, size_t i, half *p);
void __OVERLOAD__ vstore_half4_rtn(float4 x, size_t i, __global half *p);
void __OVERLOAD__ vstore_half4_rtn(float4 x, size_t i, __local half *p);
void __OVERLOAD__ vstore_half4_rtn( double4 x, size_t index, half *p );
void __OVERLOAD__ vstore_half4_rtn( double4 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half4_rtn( double4 x, size_t index, __local half *p );

/* vstore_half8_rtn */
void __OVERLOAD__ vstore_half8_rtn(float8 x, size_t i, half *p);
void __OVERLOAD__ vstore_half8_rtn(float8 x, size_t i, __global half *p);
void __OVERLOAD__ vstore_half8_rtn(float8 x, size_t i, __local half *p);
void __OVERLOAD__ vstore_half8_rtn( double8 x, size_t index, half *p );
void __OVERLOAD__ vstore_half8_rtn( double8 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half8_rtn( double8 x, size_t index, __local half *p );

/* vstore_half16_rtn */
void __OVERLOAD__ vstore_half16_rtn(float16 v, size_t i, half *p);
void __OVERLOAD__ vstore_half16_rtn(float16 v, size_t i, __global half *p);
void __OVERLOAD__ vstore_half16_rtn(float16 v, size_t i, __local half *p);
void __OVERLOAD__ vstore_half16_rtn( double16 x, size_t index, half *p );
void __OVERLOAD__ vstore_half16_rtn( double16 x, size_t index, __global half *p );
void __OVERLOAD__ vstore_half16_rtn( double16 x, size_t index, __local half *p );

/* vstorea_half_rtn */
void __OVERLOAD__ vstorea_half_rtn(float f, size_t index, half *p);
void __OVERLOAD__ vstorea_half_rtn(float f, size_t index, __global half *p);
void __OVERLOAD__ vstorea_half_rtn(float f, size_t index, __local half *p);
void __OVERLOAD__ vstorea_half_rtn( double x, size_t index, half *p );
void __OVERLOAD__ vstorea_half_rtn( double x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half_rtn( double x, size_t index, __local half *p );

/* vstorea_half2_rtn */
void __OVERLOAD__ vstorea_half2_rtn(float2 f, size_t i, half *p);
void __OVERLOAD__ vstorea_half2_rtn(float2 f, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half2_rtn(float2 f, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half2_rtn( double2 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half2_rtn( double2 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half2_rtn( double2 x, size_t index, __local half *p );

/* vstorea_half3_rtn */
void __OVERLOAD__ vstorea_half3_rtn(float3 x3, size_t i, half *p);
void __OVERLOAD__ vstorea_half3_rtn(float3 x3, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half3_rtn(float3 x3, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half3_rtn( double3 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half3_rtn( double3 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half3_rtn( double3 x, size_t index, __local half *p );

/* vstorea_half4_rtn */
void __OVERLOAD__ vstorea_half4_rtn(float4 x, size_t i, half *p);
void __OVERLOAD__ vstorea_half4_rtn(float4 x, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half4_rtn(float4 x, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half4_rtn( double4 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half4_rtn( double4 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half4_rtn( double4 x, size_t index, __local half *p );

/* vstorea_half8_rtn */
void __OVERLOAD__ vstorea_half8_rtn(float8 x, size_t i, half *p);
void __OVERLOAD__ vstorea_half8_rtn(float8 x, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half8_rtn(float8 x, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half8_rtn( double8 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half8_rtn( double8 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half8_rtn( double8 x, size_t index, __local half *p );

/* vstorea_half16_rtn */
void __OVERLOAD__ vstorea_half16_rtn(float16 v, size_t i, half *p);
void __OVERLOAD__ vstorea_half16_rtn(float16 v, size_t i, __global half *p);
void __OVERLOAD__ vstorea_half16_rtn(float16 v, size_t i, __local half *p);
void __OVERLOAD__ vstorea_half16_rtn( double16 x, size_t index, half *p );
void __OVERLOAD__ vstorea_half16_rtn( double16 x, size_t index, __global half *p );
void __OVERLOAD__ vstorea_half16_rtn( double16 x, size_t index, __local half *p );


// Vector Components / Constructors to be backward compatible with initial release though they are not in the OpenCL1.0 spec.
#define make_uchar2(A,B) (uchar2)((A),(B))
#define make_uchar3(A,B,C) (uchar3)((A),(B),(C))
#define make_uchar4(A,B,C,D) (uchar4)((A),(B),(C),(D))
#define make_uchar8(A,B,C,D,E,F,G,H) (uchar8)((A),(B),(C),(D),(E),(F),(G),(H))
#define make_uchar16(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P) (uchar16)((A),(B),(C),(D),(E),(F),(G),(H),(I),(J),(K),(L),(M),(N),(O),(P))

#define make_char2(A,B) (char2)((A),(B))
#define make_char3(A,B,C) (char3)((A),(B),(C))
#define make_char4(A,B,C,D) (char4)((A),(B),(C),(D))
#define make_char8(A,B,C,D,E,F,G,H) (char8)((A),(B),(C),(D),(E),(F),(G),(H))
#define make_char16(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P) (char16)((A),(B),(C),(D),(E),(F),(G),(H),(I),(J),(K),(L),(M),(N),(O),(P))

#define make_short2(A,B) (short2)((A),(B))
#define make_short3(A,B,C) (short3)((A),(B),(C))
#define make_short4(A,B,C,D) (short4)((A),(B),(C),(D))
#define make_short8(A,B,C,D,E,F,G,H) (short8)((A),(B),(C),(D),(E),(F),(G),(H))
#define make_short16(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P) (short16)((A),(B),(C),(D),(E),(F),(G),(H),(I),(J),(K),(L),(M),(N),(O),(P))

#define make_ushort2(A,B) (ushort2)((A),(B))
#define make_ushort3(A,B,C) (ushort3)((A),(B),(C))
#define make_ushort4(A,B,C,D) (ushort4)((A),(B),(C),(D))
#define make_ushort8(A,B,C,D,E,F,G,H) (ushort8)((A),(B),(C),(D),(E),(F),(G),(H))
#define make_ushort16(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P) (ushort16)((A),(B),(C),(D),(E),(F),(G),(H),(I),(J),(K),(L),(M),(N),(O),(P))

#define make_int2(A,B) (int2)((A),(B))
#define make_int3(A,B,C) (int3)((A),(B),(C))
#define make_int4(A,B,C,D) (int4)((A),(B),(C),(D))
#define make_int8(A,B,C,D,E,F,G,H) (int8)((A),(B),(C),(D),(E),(F),(G),(H))
#define make_int16(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P) (int16)((A),(B),(C),(D),(E),(F),(G),(H),(I),(J),(K),(L),(M),(N),(O),(P))

#define make_uint2(A,B) (uint2)((A),(B))
#define make_uint3(A,B,C) (uint3)((A),(B),(C))
#define make_uint4(A,B,C,D) (uint4)((A),(B),(C),(D))
#define make_uint8(A,B,C,D,E,F,G,H) (uint8)((A),(B),(C),(D),(E),(F),(G),(H))
#define make_uint16(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P) (uint16)((A),(B),(C),(D),(E),(F),(G),(H),(I),(J),(K),(L),(M),(N),(O),(P))

#define make_long2(A,B) (long2)((A),(B))
#define make_long3(A,B,C) (long3)((A),(B),(C))
#define make_long4(A,B,C,D) (long4)((A),(B),(C),(D))
#define make_long8(A,B,C,D,E,F,G,H) (long8)((A),(B),(C),(D),(E),(F),(G),(H))
#define make_long16(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P) (long16)((A),(B),(C),(D),(E),(F),(G),(H),(I),(J),(K),(L),(M),(N),(O),(P))

#define make_ulong2(A,B) (ulong2)((A),(B))
#define make_ulong3(A,B,C) (ulong3)((A),(B),(C))
#define make_ulong4(A,B,C,D) (ulong4)((A),(B),(C),(D))
#define make_ulong8(A,B,C,D,E,F,G,H) (ulong8)((A),(B),(C),(D),(E),(F),(G),(H))
#define make_ulong16(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P) (ulong16)((A),(B),(C),(D),(E),(F),(G),(H),(I),(J),(K),(L),(M),(N),(O),(P))

#define make_float2(A,B) (float2)((A),(B))
#define make_float3(A,B,C) (float3)((A),(B),(C))
#define make_float3_SPI(A,B,C) (float3)((A),(B),(C))
#define make_float4(A,B,C,D) (float4)((A),(B),(C),(D))
#define make_float8(A,B,C,D,E,F,G,H) (float8)((A),(B),(C),(D),(E),(F),(G),(H))
#define make_float16(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P) (float16)((A),(B),(C),(D),(E),(F),(G),(H),(I),(J),(K),(L),(M),(N),(O),(P))

#define make_double2(A,B) (double2)((A),(B))
#define make_double3(A,B,C) (double3)((A),(B),(C))
#define make_double4(A,B,C,D) (double4)((A),(B),(C),(D))
#define make_double8(A,B,C,D,E,F,G,H) (double8)((A),(B),(C),(D),(E),(F),(G),(H))
#define make_double16(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P) (double16)((A),(B),(C),(D),(E),(F),(G),(H),(I),(J),(K),(L),(M),(N),(O),(P))


// 5.2.3 convert_ operators
typedef enum
{
    __kDefaultRoundingMode = 0,
    __kRoundToNearestEven = 1,
    __kRoundTowardNegativeInf = 2,
    __kRoundTowardInf = 3,
    __kRoundTowardZero = 4
}__clRoundingMode;

typedef enum
{
    __kUnsaturated = 0,
    __kSaturated = 1
}__clSaturationMode;

#if defined( __PTX__ )

    //type
    #define convert_char(_X)    __builtin_convert(_X, char, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uchar(_X)   __builtin_convert(_X, uchar, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_short(_X)   __builtin_convert(_X, short, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ushort(_X)  __builtin_convert(_X, ushort, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_int(_X)     __builtin_convert(_X, int, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uint(_X)    __builtin_convert(_X, uint, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_long(_X)    __builtin_convert(_X, long, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ulong(_X)   __builtin_convert(_X, ulong, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_float(_X)   __builtin_convert(_X, float, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_double(_X)  __builtin_convert(_X, double, __kDefaultRoundingMode, __kUnsaturated )

    #define convert_char_sat(_X)    __builtin_convert(_X, char, __kDefaultRoundingMode, __kSaturated )
    #define convert_uchar_sat(_X)   __builtin_convert(_X, uchar, __kDefaultRoundingMode, __kSaturated )
    #define convert_short_sat(_X)   __builtin_convert(_X, short, __kDefaultRoundingMode, __kSaturated )
    #define convert_ushort_sat(_X)  __builtin_convert(_X, ushort, __kDefaultRoundingMode, __kSaturated )
    #define convert_int_sat(_X)     __builtin_convert(_X, int, __kDefaultRoundingMode, __kSaturated )
    #define convert_uint_sat(_X)    __builtin_convert(_X, uint, __kDefaultRoundingMode, __kSaturated )
    #define convert_long_sat(_X)    __builtin_convert(_X, long, __kDefaultRoundingMode, __kSaturated )
    #define convert_ulong_sat(_X)   __builtin_convert(_X, ulong, __kDefaultRoundingMode, __kSaturated )
    #define convert_float_sat(_X)   __builtin_convert(_X, float, __kDefaultRoundingMode, __kSaturated )
    #define convert_double_sat(_X)  __builtin_convert(_X, double, __kDefaultRoundingMode, __kSaturated )

    #define convert_char_rte(_X)    __builtin_convert(_X, char, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uchar_rte(_X)   __builtin_convert(_X, uchar, __kRoundToNearestEven, __kUnsaturated )
    #define convert_short_rte(_X)   __builtin_convert(_X, short, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ushort_rte(_X)  __builtin_convert(_X, ushort, __kRoundToNearestEven, __kUnsaturated )
    #define convert_int_rte(_X)     __builtin_convert(_X, int, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uint_rte(_X)    __builtin_convert(_X, uint, __kRoundToNearestEven, __kUnsaturated )
    #define convert_long_rte(_X)    __builtin_convert(_X, long, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ulong_rte(_X)   __builtin_convert(_X, ulong, __kRoundToNearestEven, __kUnsaturated )
    #define convert_float_rte(_X)   __builtin_convert(_X, float, __kRoundToNearestEven, __kUnsaturated )
    #define convert_double_rte(_X)  __builtin_convert(_X, double, __kRoundToNearestEven, __kUnsaturated )

    #define convert_char_sat_rte(_X)    __builtin_convert(_X, char, __kRoundToNearestEven, __kSaturated )
    #define convert_uchar_sat_rte(_X)   __builtin_convert(_X, uchar, __kRoundToNearestEven, __kSaturated )
    #define convert_short_sat_rte(_X)   __builtin_convert(_X, short, __kRoundToNearestEven, __kSaturated )
    #define convert_ushort_sat_rte(_X)  __builtin_convert(_X, ushort, __kRoundToNearestEven, __kSaturated )
    #define convert_int_sat_rte(_X)     __builtin_convert(_X, int, __kRoundToNearestEven, __kSaturated )
    #define convert_uint_sat_rte(_X)    __builtin_convert(_X, uint, __kRoundToNearestEven, __kSaturated )
    #define convert_long_sat_rte(_X)    __builtin_convert(_X, long, __kRoundToNearestEven, __kSaturated )
    #define convert_ulong_sat_rte(_X)   __builtin_convert(_X, ulong, __kRoundToNearestEven, __kSaturated )
    #define convert_float_sat_rte(_X)   __builtin_convert(_X, float, __kRoundToNearestEven, __kSaturated )
    #define convert_double_sat_rte(_X)  __builtin_convert(_X, double, __kRoundToNearestEven, __kSaturated )

    #define convert_char_rtn(_X)    __builtin_convert(_X, char, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uchar_rtn(_X)   __builtin_convert(_X, uchar, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_short_rtn(_X)   __builtin_convert(_X, short, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ushort_rtn(_X)  __builtin_convert(_X, ushort, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_int_rtn(_X)     __builtin_convert(_X, int, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uint_rtn(_X)    __builtin_convert(_X, uint, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_long_rtn(_X)    __builtin_convert(_X, long, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ulong_rtn(_X)   __builtin_convert(_X, ulong, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_float_rtn(_X)   __builtin_convert(_X, float, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_double_rtn(_X)  __builtin_convert(_X, double, __kRoundTowardNegativeInf, __kUnsaturated )

    #define convert_char_sat_rtn(_X)    __builtin_convert(_X, char, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uchar_sat_rtn(_X)   __builtin_convert(_X, uchar, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_short_sat_rtn(_X)   __builtin_convert(_X, short, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ushort_sat_rtn(_X)  __builtin_convert(_X, ushort, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_int_sat_rtn(_X)     __builtin_convert(_X, int, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uint_sat_rtn(_X)    __builtin_convert(_X, uint, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_long_sat_rtn(_X)    __builtin_convert(_X, long, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ulong_sat_rtn(_X)   __builtin_convert(_X, ulong, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_float_sat_rtn(_X)   __builtin_convert(_X, float, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_double_sat_rtn(_X)  __builtin_convert(_X, double, __kRoundTowardNegativeInf, __kSaturated )

    #define convert_char_rtp(_X)    __builtin_convert(_X, char, __kRoundTowardInf, __kUnsaturated )
    #define convert_uchar_rtp(_X)   __builtin_convert(_X, uchar, __kRoundTowardInf, __kUnsaturated )
    #define convert_short_rtp(_X)   __builtin_convert(_X, short, __kRoundTowardInf, __kUnsaturated )
    #define convert_ushort_rtp(_X)  __builtin_convert(_X, ushort, __kRoundTowardInf, __kUnsaturated )
    #define convert_int_rtp(_X)     __builtin_convert(_X, int, __kRoundTowardInf, __kUnsaturated )
    #define convert_uint_rtp(_X)    __builtin_convert(_X, uint, __kRoundTowardInf, __kUnsaturated )
    #define convert_long_rtp(_X)    __builtin_convert(_X, long, __kRoundTowardInf, __kUnsaturated )
    #define convert_ulong_rtp(_X)   __builtin_convert(_X, ulong, __kRoundTowardInf, __kUnsaturated )
    #define convert_float_rtp(_X)   __builtin_convert(_X, float, __kRoundTowardInf, __kUnsaturated )
    #define convert_double_rtp(_X)  __builtin_convert(_X, double, __kRoundTowardInf, __kUnsaturated )

    #define convert_char_sat_rtp(_X)    __builtin_convert(_X, char, __kRoundTowardInf, __kSaturated )
    #define convert_uchar_sat_rtp(_X)   __builtin_convert(_X, uchar, __kRoundTowardInf, __kSaturated )
    #define convert_short_sat_rtp(_X)   __builtin_convert(_X, short, __kRoundTowardInf, __kSaturated )
    #define convert_ushort_sat_rtp(_X)  __builtin_convert(_X, ushort, __kRoundTowardInf, __kSaturated )
    #define convert_int_sat_rtp(_X)     __builtin_convert(_X, int, __kRoundTowardInf, __kSaturated )
    #define convert_uint_sat_rtp(_X)    __builtin_convert(_X, uint, __kRoundTowardInf, __kSaturated )
    #define convert_long_sat_rtp(_X)    __builtin_convert(_X, long, __kRoundTowardInf, __kSaturated )
    #define convert_ulong_sat_rtp(_X)   __builtin_convert(_X, ulong, __kRoundTowardInf, __kSaturated )
    #define convert_float_sat_rtp(_X)   __builtin_convert(_X, float, __kRoundTowardInf, __kSaturated )
    #define convert_double_sat_rtp(_X)  __builtin_convert(_X, double, __kRoundTowardInf, __kSaturated )

    #define convert_char_rtz(_X)    __builtin_convert(_X, char, __kRoundTowardZero, __kUnsaturated )
    #define convert_uchar_rtz(_X)   __builtin_convert(_X, uchar, __kRoundTowardZero, __kUnsaturated )
    #define convert_short_rtz(_X)   __builtin_convert(_X, short, __kRoundTowardZero, __kUnsaturated )
    #define convert_ushort_rtz(_X)  __builtin_convert(_X, ushort, __kRoundTowardZero, __kUnsaturated )
    #define convert_int_rtz(_X)     __builtin_convert(_X, int, __kRoundTowardZero, __kUnsaturated )
    #define convert_uint_rtz(_X)    __builtin_convert(_X, uint, __kRoundTowardZero, __kUnsaturated )
    #define convert_long_rtz(_X)    __builtin_convert(_X, long, __kRoundTowardZero, __kUnsaturated )
    #define convert_ulong_rtz(_X)   __builtin_convert(_X, ulong, __kRoundTowardZero, __kUnsaturated )
    #define convert_float_rtz(_X)   __builtin_convert(_X, float, __kRoundTowardZero, __kUnsaturated )
    #define convert_double_rtz(_X)  __builtin_convert(_X, double, __kRoundTowardZero, __kUnsaturated )

    #define convert_char_sat_rtz(_X)    __builtin_convert(_X, char, __kRoundTowardZero, __kSaturated )
    #define convert_uchar_sat_rtz(_X)   __builtin_convert(_X, uchar, __kRoundTowardZero, __kSaturated )
    #define convert_short_sat_rtz(_X)   __builtin_convert(_X, short, __kRoundTowardZero, __kSaturated )
    #define convert_ushort_sat_rtz(_X)  __builtin_convert(_X, ushort, __kRoundTowardZero, __kSaturated )
    #define convert_int_sat_rtz(_X)     __builtin_convert(_X, int, __kRoundTowardZero, __kSaturated )
    #define convert_uint_sat_rtz(_X)    __builtin_convert(_X, uint, __kRoundTowardZero, __kSaturated )
    #define convert_long_sat_rtz(_X)    __builtin_convert(_X, long, __kRoundTowardZero, __kSaturated )
    #define convert_ulong_sat_rtz(_X)   __builtin_convert(_X, ulong, __kRoundTowardZero, __kSaturated )
    #define convert_float_sat_rtz(_X)   __builtin_convert(_X, float, __kRoundTowardZero, __kSaturated )
    #define convert_double_sat_rtz(_X)  __builtin_convert(_X, double, __kRoundTowardZero, __kSaturated )

    //type2
    #define convert_char2(_X)    __builtin_convert(_X, char2, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uchar2(_X)   __builtin_convert(_X, uchar2, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_short2(_X)   __builtin_convert(_X, short2, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ushort2(_X)  __builtin_convert(_X, ushort2, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_int2(_X)     __builtin_convert(_X, int2, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uint2(_X)    __builtin_convert(_X, uint2, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_long2(_X)    __builtin_convert(_X, long2, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ulong2(_X)   __builtin_convert(_X, ulong2, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_float2(_X)   __builtin_convert(_X, float2, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_double2(_X)  __builtin_convert(_X, double2, __kDefaultRoundingMode, __kUnsaturated )

    #define convert_char2_sat(_X)    __builtin_convert(_X, char2, __kDefaultRoundingMode, __kSaturated )
    #define convert_uchar2_sat(_X)   __builtin_convert(_X, uchar2, __kDefaultRoundingMode, __kSaturated )
    #define convert_short2_sat(_X)   __builtin_convert(_X, short2, __kDefaultRoundingMode, __kSaturated )
    #define convert_ushort2_sat(_X)  __builtin_convert(_X, ushort2, __kDefaultRoundingMode, __kSaturated )
    #define convert_int2_sat(_X)     __builtin_convert(_X, int2, __kDefaultRoundingMode, __kSaturated )
    #define convert_uint2_sat(_X)    __builtin_convert(_X, uint2, __kDefaultRoundingMode, __kSaturated )
    #define convert_long2_sat(_X)    __builtin_convert(_X, long2, __kDefaultRoundingMode, __kSaturated )
    #define convert_ulong2_sat(_X)   __builtin_convert(_X, ulong2, __kDefaultRoundingMode, __kSaturated )
    #define convert_float2_sat(_X)   __builtin_convert(_X, float2, __kDefaultRoundingMode, __kSaturated )
    #define convert_double2_sat(_X)  __builtin_convert(_X, double2, __kDefaultRoundingMode, __kSaturated )

    #define convert_char2_rte(_X)    __builtin_convert(_X, char2, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uchar2_rte(_X)   __builtin_convert(_X, uchar2, __kRoundToNearestEven, __kUnsaturated )
    #define convert_short2_rte(_X)   __builtin_convert(_X, short2, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ushort2_rte(_X)  __builtin_convert(_X, ushort2, __kRoundToNearestEven, __kUnsaturated )
    #define convert_int2_rte(_X)     __builtin_convert(_X, int2, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uint2_rte(_X)    __builtin_convert(_X, uint2, __kRoundToNearestEven, __kUnsaturated )
    #define convert_long2_rte(_X)    __builtin_convert(_X, long2, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ulong2_rte(_X)   __builtin_convert(_X, ulong2, __kRoundToNearestEven, __kUnsaturated )
    #define convert_float2_rte(_X)   __builtin_convert(_X, float2, __kRoundToNearestEven, __kUnsaturated )
    #define convert_double2_rte(_X)  __builtin_convert(_X, double2, __kRoundToNearestEven, __kUnsaturated )

    #define convert_char2_sat_rte(_X)    __builtin_convert(_X, char2, __kRoundToNearestEven, __kSaturated )
    #define convert_uchar2_sat_rte(_X)   __builtin_convert(_X, uchar2, __kRoundToNearestEven, __kSaturated )
    #define convert_short2_sat_rte(_X)   __builtin_convert(_X, short2, __kRoundToNearestEven, __kSaturated )
    #define convert_ushort2_sat_rte(_X)  __builtin_convert(_X, ushort2, __kRoundToNearestEven, __kSaturated )
    #define convert_int2_sat_rte(_X)     __builtin_convert(_X, int2, __kRoundToNearestEven, __kSaturated )
    #define convert_uint2_sat_rte(_X)    __builtin_convert(_X, uint2, __kRoundToNearestEven, __kSaturated )
    #define convert_long2_sat_rte(_X)    __builtin_convert(_X, long2, __kRoundToNearestEven, __kSaturated )
    #define convert_ulong2_sat_rte(_X)   __builtin_convert(_X, ulong2, __kRoundToNearestEven, __kSaturated )
    #define convert_float2_sat_rte(_X)   __builtin_convert(_X, float2, __kRoundToNearestEven, __kSaturated )
    #define convert_double2_sat_rte(_X)  __builtin_convert(_X, double2, __kRoundToNearestEven, __kSaturated )

    #define convert_char2_rtn(_X)    __builtin_convert(_X, char2, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uchar2_rtn(_X)   __builtin_convert(_X, uchar2, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_short2_rtn(_X)   __builtin_convert(_X, short2, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ushort2_rtn(_X)  __builtin_convert(_X, ushort2, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_int2_rtn(_X)     __builtin_convert(_X, int2, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uint2_rtn(_X)    __builtin_convert(_X, uint2, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_long2_rtn(_X)    __builtin_convert(_X, long2, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ulong2_rtn(_X)   __builtin_convert(_X, ulong2, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_float2_rtn(_X)   __builtin_convert(_X, float2, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_double2_rtn(_X)  __builtin_convert(_X, double2, __kRoundTowardNegativeInf, __kUnsaturated )

    #define convert_char2_sat_rtn(_X)    __builtin_convert(_X, char2, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uchar2_sat_rtn(_X)   __builtin_convert(_X, uchar2, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_short2_sat_rtn(_X)   __builtin_convert(_X, short2, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ushort2_sat_rtn(_X)  __builtin_convert(_X, ushort2, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_int2_sat_rtn(_X)     __builtin_convert(_X, int2, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uint2_sat_rtn(_X)    __builtin_convert(_X, uint2, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_long2_sat_rtn(_X)    __builtin_convert(_X, long2, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ulong2_sat_rtn(_X)   __builtin_convert(_X, ulong2, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_float2_sat_rtn(_X)   __builtin_convert(_X, float2, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_double2_sat_rtn(_X)  __builtin_convert(_X, double2, __kRoundTowardNegativeInf, __kSaturated )

    #define convert_char2_rtp(_X)    __builtin_convert(_X, char2, __kRoundTowardInf, __kUnsaturated )
    #define convert_uchar2_rtp(_X)   __builtin_convert(_X, uchar2, __kRoundTowardInf, __kUnsaturated )
    #define convert_short2_rtp(_X)   __builtin_convert(_X, short2, __kRoundTowardInf, __kUnsaturated )
    #define convert_ushort2_rtp(_X)  __builtin_convert(_X, ushort2, __kRoundTowardInf, __kUnsaturated )
    #define convert_int2_rtp(_X)     __builtin_convert(_X, int2, __kRoundTowardInf, __kUnsaturated )
    #define convert_uint2_rtp(_X)    __builtin_convert(_X, uint2, __kRoundTowardInf, __kUnsaturated )
    #define convert_long2_rtp(_X)    __builtin_convert(_X, long2, __kRoundTowardInf, __kUnsaturated )
    #define convert_ulong2_rtp(_X)   __builtin_convert(_X, ulong2, __kRoundTowardInf, __kUnsaturated )
    #define convert_float2_rtp(_X)   __builtin_convert(_X, float2, __kRoundTowardInf, __kUnsaturated )
    #define convert_double2_rtp(_X)  __builtin_convert(_X, double2, __kRoundTowardInf, __kUnsaturated )

    #define convert_char2_sat_rtp(_X)    __builtin_convert(_X, char2, __kRoundTowardInf, __kSaturated )
    #define convert_uchar2_sat_rtp(_X)   __builtin_convert(_X, uchar2, __kRoundTowardInf, __kSaturated )
    #define convert_short2_sat_rtp(_X)   __builtin_convert(_X, short2, __kRoundTowardInf, __kSaturated )
    #define convert_ushort2_sat_rtp(_X)  __builtin_convert(_X, ushort2, __kRoundTowardInf, __kSaturated )
    #define convert_int2_sat_rtp(_X)     __builtin_convert(_X, int2, __kRoundTowardInf, __kSaturated )
    #define convert_uint2_sat_rtp(_X)    __builtin_convert(_X, uint2, __kRoundTowardInf, __kSaturated )
    #define convert_long2_sat_rtp(_X)    __builtin_convert(_X, long2, __kRoundTowardInf, __kSaturated )
    #define convert_ulong2_sat_rtp(_X)   __builtin_convert(_X, ulong2, __kRoundTowardInf, __kSaturated )
    #define convert_float2_sat_rtp(_X)   __builtin_convert(_X, float2, __kRoundTowardInf, __kSaturated )
    #define convert_double2_sat_rtp(_X)  __builtin_convert(_X, double2, __kRoundTowardInf, __kSaturated )

    #define convert_char2_rtz(_X)    __builtin_convert(_X, char2, __kRoundTowardZero, __kUnsaturated )
    #define convert_uchar2_rtz(_X)   __builtin_convert(_X, uchar2, __kRoundTowardZero, __kUnsaturated )
    #define convert_short2_rtz(_X)   __builtin_convert(_X, short2, __kRoundTowardZero, __kUnsaturated )
    #define convert_ushort2_rtz(_X)  __builtin_convert(_X, ushort2, __kRoundTowardZero, __kUnsaturated )
    #define convert_int2_rtz(_X)     __builtin_convert(_X, int2, __kRoundTowardZero, __kUnsaturated )
    #define convert_uint2_rtz(_X)    __builtin_convert(_X, uint2, __kRoundTowardZero, __kUnsaturated )
    #define convert_long2_rtz(_X)    __builtin_convert(_X, long2, __kRoundTowardZero, __kUnsaturated )
    #define convert_ulong2_rtz(_X)   __builtin_convert(_X, ulong2, __kRoundTowardZero, __kUnsaturated )
    #define convert_float2_rtz(_X)   __builtin_convert(_X, float2, __kRoundTowardZero, __kUnsaturated )
    #define convert_double2_rtz(_X)  __builtin_convert(_X, double2, __kRoundTowardZero, __kUnsaturated )

    #define convert_char2_sat_rtz(_X)    __builtin_convert(_X, char2, __kRoundTowardZero, __kSaturated )
    #define convert_uchar2_sat_rtz(_X)   __builtin_convert(_X, uchar2, __kRoundTowardZero, __kSaturated )
    #define convert_short2_sat_rtz(_X)   __builtin_convert(_X, short2, __kRoundTowardZero, __kSaturated )
    #define convert_ushort2_sat_rtz(_X)  __builtin_convert(_X, ushort2, __kRoundTowardZero, __kSaturated )
    #define convert_int2_sat_rtz(_X)     __builtin_convert(_X, int2, __kRoundTowardZero, __kSaturated )
    #define convert_uint2_sat_rtz(_X)    __builtin_convert(_X, uint2, __kRoundTowardZero, __kSaturated )
    #define convert_long2_sat_rtz(_X)    __builtin_convert(_X, long2, __kRoundTowardZero, __kSaturated )
    #define convert_ulong2_sat_rtz(_X)   __builtin_convert(_X, ulong2, __kRoundTowardZero, __kSaturated )
    #define convert_float2_sat_rtz(_X)   __builtin_convert(_X, float2, __kRoundTowardZero, __kSaturated )
    #define convert_double2_sat_rtz(_X)  __builtin_convert(_X, double2, __kRoundTowardZero, __kSaturated )

    //type3
    #define convert_char3(_X)    __builtin_convert(_X, char3, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uchar3(_X)   __builtin_convert(_X, uchar3, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_short3(_X)   __builtin_convert(_X, short3, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ushort3(_X)  __builtin_convert(_X, ushort3, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_int3(_X)     __builtin_convert(_X, int3, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uint3(_X)    __builtin_convert(_X, uint3, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_long3(_X)    __builtin_convert(_X, long3, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ulong3(_X)   __builtin_convert(_X, ulong3, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_float3(_X)   __builtin_convert(_X, float3, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_double3(_X)  __builtin_convert(_X, double3, __kDefaultRoundingMode, __kUnsaturated )

    #define convert_char3_sat(_X)    __builtin_convert(_X, char3, __kDefaultRoundingMode, __kSaturated )
    #define convert_uchar3_sat(_X)   __builtin_convert(_X, uchar3, __kDefaultRoundingMode, __kSaturated )
    #define convert_short3_sat(_X)   __builtin_convert(_X, short3, __kDefaultRoundingMode, __kSaturated )
    #define convert_ushort3_sat(_X)  __builtin_convert(_X, ushort3, __kDefaultRoundingMode, __kSaturated )
    #define convert_int3_sat(_X)     __builtin_convert(_X, int3, __kDefaultRoundingMode, __kSaturated )
    #define convert_uint3_sat(_X)    __builtin_convert(_X, uint3, __kDefaultRoundingMode, __kSaturated )
    #define convert_long3_sat(_X)    __builtin_convert(_X, long3, __kDefaultRoundingMode, __kSaturated )
    #define convert_ulong3_sat(_X)   __builtin_convert(_X, ulong3, __kDefaultRoundingMode, __kSaturated )
    #define convert_float3_sat(_X)   __builtin_convert(_X, float3, __kDefaultRoundingMode, __kSaturated )
    #define convert_double3_sat(_X)  __builtin_convert(_X, double3, __kDefaultRoundingMode, __kSaturated )

    #define convert_char3_rte(_X)    __builtin_convert(_X, char3, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uchar3_rte(_X)   __builtin_convert(_X, uchar3, __kRoundToNearestEven, __kUnsaturated )
    #define convert_short3_rte(_X)   __builtin_convert(_X, short3, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ushort3_rte(_X)  __builtin_convert(_X, ushort3, __kRoundToNearestEven, __kUnsaturated )
    #define convert_int3_rte(_X)     __builtin_convert(_X, int3, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uint3_rte(_X)    __builtin_convert(_X, uint3, __kRoundToNearestEven, __kUnsaturated )
    #define convert_long3_rte(_X)    __builtin_convert(_X, long3, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ulong3_rte(_X)   __builtin_convert(_X, ulong3, __kRoundToNearestEven, __kUnsaturated )
    #define convert_float3_rte(_X)   __builtin_convert(_X, float3, __kRoundToNearestEven, __kUnsaturated )
    #define convert_double3_rte(_X)  __builtin_convert(_X, double3, __kRoundToNearestEven, __kUnsaturated )

    #define convert_char3_sat_rte(_X)    __builtin_convert(_X, char3, __kRoundToNearestEven, __kSaturated )
    #define convert_uchar3_sat_rte(_X)   __builtin_convert(_X, uchar3, __kRoundToNearestEven, __kSaturated )
    #define convert_short3_sat_rte(_X)   __builtin_convert(_X, short3, __kRoundToNearestEven, __kSaturated )
    #define convert_ushort3_sat_rte(_X)  __builtin_convert(_X, ushort3, __kRoundToNearestEven, __kSaturated )
    #define convert_int3_sat_rte(_X)     __builtin_convert(_X, int3, __kRoundToNearestEven, __kSaturated )
    #define convert_uint3_sat_rte(_X)    __builtin_convert(_X, uint3, __kRoundToNearestEven, __kSaturated )
    #define convert_long3_sat_rte(_X)    __builtin_convert(_X, long3, __kRoundToNearestEven, __kSaturated )
    #define convert_ulong3_sat_rte(_X)   __builtin_convert(_X, ulong3, __kRoundToNearestEven, __kSaturated )
    #define convert_float3_sat_rte(_X)   __builtin_convert(_X, float3, __kRoundToNearestEven, __kSaturated )
    #define convert_double3_sat_rte(_X)  __builtin_convert(_X, double3, __kRoundToNearestEven, __kSaturated )

    #define convert_char3_rtn(_X)    __builtin_convert(_X, char3, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uchar3_rtn(_X)   __builtin_convert(_X, uchar3, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_short3_rtn(_X)   __builtin_convert(_X, short3, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ushort3_rtn(_X)  __builtin_convert(_X, ushort3, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_int3_rtn(_X)     __builtin_convert(_X, int3, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uint3_rtn(_X)    __builtin_convert(_X, uint3, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_long3_rtn(_X)    __builtin_convert(_X, long3, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ulong3_rtn(_X)   __builtin_convert(_X, ulong3, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_float3_rtn(_X)   __builtin_convert(_X, float3, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_double3_rtn(_X)  __builtin_convert(_X, double3, __kRoundTowardNegativeInf, __kUnsaturated )

    #define convert_char3_sat_rtn(_X)    __builtin_convert(_X, char3, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uchar3_sat_rtn(_X)   __builtin_convert(_X, uchar3, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_short3_sat_rtn(_X)   __builtin_convert(_X, short3, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ushort3_sat_rtn(_X)  __builtin_convert(_X, ushort3, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_int3_sat_rtn(_X)     __builtin_convert(_X, int3, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uint3_sat_rtn(_X)    __builtin_convert(_X, uint3, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_long3_sat_rtn(_X)    __builtin_convert(_X, long3, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ulong3_sat_rtn(_X)   __builtin_convert(_X, ulong3, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_float3_sat_rtn(_X)   __builtin_convert(_X, float3, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_double3_sat_rtn(_X)  __builtin_convert(_X, double3, __kRoundTowardNegativeInf, __kSaturated )

    #define convert_char3_rtp(_X)    __builtin_convert(_X, char3, __kRoundTowardInf, __kUnsaturated )
    #define convert_uchar3_rtp(_X)   __builtin_convert(_X, uchar3, __kRoundTowardInf, __kUnsaturated )
    #define convert_short3_rtp(_X)   __builtin_convert(_X, short3, __kRoundTowardInf, __kUnsaturated )
    #define convert_ushort3_rtp(_X)  __builtin_convert(_X, ushort3, __kRoundTowardInf, __kUnsaturated )
    #define convert_int3_rtp(_X)     __builtin_convert(_X, int3, __kRoundTowardInf, __kUnsaturated )
    #define convert_uint3_rtp(_X)    __builtin_convert(_X, uint3, __kRoundTowardInf, __kUnsaturated )
    #define convert_long3_rtp(_X)    __builtin_convert(_X, long3, __kRoundTowardInf, __kUnsaturated )
    #define convert_ulong3_rtp(_X)   __builtin_convert(_X, ulong3, __kRoundTowardInf, __kUnsaturated )
    #define convert_float3_rtp(_X)   __builtin_convert(_X, float3, __kRoundTowardInf, __kUnsaturated )
    #define convert_double3_rtp(_X)  __builtin_convert(_X, double3, __kRoundTowardInf, __kUnsaturated )

    #define convert_char3_sat_rtp(_X)    __builtin_convert(_X, char3, __kRoundTowardInf, __kSaturated )
    #define convert_uchar3_sat_rtp(_X)   __builtin_convert(_X, uchar3, __kRoundTowardInf, __kSaturated )
    #define convert_short3_sat_rtp(_X)   __builtin_convert(_X, short3, __kRoundTowardInf, __kSaturated )
    #define convert_ushort3_sat_rtp(_X)  __builtin_convert(_X, ushort3, __kRoundTowardInf, __kSaturated )
    #define convert_int3_sat_rtp(_X)     __builtin_convert(_X, int3, __kRoundTowardInf, __kSaturated )
    #define convert_uint3_sat_rtp(_X)    __builtin_convert(_X, uint3, __kRoundTowardInf, __kSaturated )
    #define convert_long3_sat_rtp(_X)    __builtin_convert(_X, long3, __kRoundTowardInf, __kSaturated )
    #define convert_ulong3_sat_rtp(_X)   __builtin_convert(_X, ulong3, __kRoundTowardInf, __kSaturated )
    #define convert_float3_sat_rtp(_X)   __builtin_convert(_X, float3, __kRoundTowardInf, __kSaturated )
    #define convert_double3_sat_rtp(_X)  __builtin_convert(_X, double3, __kRoundTowardInf, __kSaturated )

    #define convert_char3_rtz(_X)    __builtin_convert(_X, char3, __kRoundTowardZero, __kUnsaturated )
    #define convert_uchar3_rtz(_X)   __builtin_convert(_X, uchar3, __kRoundTowardZero, __kUnsaturated )
    #define convert_short3_rtz(_X)   __builtin_convert(_X, short3, __kRoundTowardZero, __kUnsaturated )
    #define convert_ushort3_rtz(_X)  __builtin_convert(_X, ushort3, __kRoundTowardZero, __kUnsaturated )
    #define convert_int3_rtz(_X)     __builtin_convert(_X, int3, __kRoundTowardZero, __kUnsaturated )
    #define convert_uint3_rtz(_X)    __builtin_convert(_X, uint3, __kRoundTowardZero, __kUnsaturated )
    #define convert_long3_rtz(_X)    __builtin_convert(_X, long3, __kRoundTowardZero, __kUnsaturated )
    #define convert_ulong3_rtz(_X)   __builtin_convert(_X, ulong3, __kRoundTowardZero, __kUnsaturated )
    #define convert_float3_rtz(_X)   __builtin_convert(_X, float3, __kRoundTowardZero, __kUnsaturated )
    #define convert_double3_rtz(_X)  __builtin_convert(_X, double3, __kRoundTowardZero, __kUnsaturated )

    #define convert_char3_sat_rtz(_X)    __builtin_convert(_X, char3, __kRoundTowardZero, __kSaturated )
    #define convert_uchar3_sat_rtz(_X)   __builtin_convert(_X, uchar3, __kRoundTowardZero, __kSaturated )
    #define convert_short3_sat_rtz(_X)   __builtin_convert(_X, short3, __kRoundTowardZero, __kSaturated )
    #define convert_ushort3_sat_rtz(_X)  __builtin_convert(_X, ushort3, __kRoundTowardZero, __kSaturated )
    #define convert_int3_sat_rtz(_X)     __builtin_convert(_X, int3, __kRoundTowardZero, __kSaturated )
    #define convert_uint3_sat_rtz(_X)    __builtin_convert(_X, uint3, __kRoundTowardZero, __kSaturated )
    #define convert_long3_sat_rtz(_X)    __builtin_convert(_X, long3, __kRoundTowardZero, __kSaturated )
    #define convert_ulong3_sat_rtz(_X)   __builtin_convert(_X, ulong3, __kRoundTowardZero, __kSaturated )
    #define convert_float3_sat_rtz(_X)   __builtin_convert(_X, float3, __kRoundTowardZero, __kSaturated )
    #define convert_double3_sat_rtz(_X)  __builtin_convert(_X, double3, __kRoundTowardZero, __kSaturated )

    //type4
    #define convert_char4(_X)    __builtin_convert(_X, char4, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uchar4(_X)   __builtin_convert(_X, uchar4, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_short4(_X)   __builtin_convert(_X, short4, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ushort4(_X)  __builtin_convert(_X, ushort4, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_int4(_X)     __builtin_convert(_X, int4, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uint4(_X)    __builtin_convert(_X, uint4, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_long4(_X)    __builtin_convert(_X, long4, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ulong4(_X)   __builtin_convert(_X, ulong4, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_float4(_X)   __builtin_convert(_X, float4, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_double4(_X)  __builtin_convert(_X, double4, __kDefaultRoundingMode, __kUnsaturated )

    #define convert_char4_sat(_X)    __builtin_convert(_X, char4, __kDefaultRoundingMode, __kSaturated )
    #define convert_uchar4_sat(_X)   __builtin_convert(_X, uchar4, __kDefaultRoundingMode, __kSaturated )
    #define convert_short4_sat(_X)   __builtin_convert(_X, short4, __kDefaultRoundingMode, __kSaturated )
    #define convert_ushort4_sat(_X)  __builtin_convert(_X, ushort4, __kDefaultRoundingMode, __kSaturated )
    #define convert_int4_sat(_X)     __builtin_convert(_X, int4, __kDefaultRoundingMode, __kSaturated )
    #define convert_uint4_sat(_X)    __builtin_convert(_X, uint4, __kDefaultRoundingMode, __kSaturated )
    #define convert_long4_sat(_X)    __builtin_convert(_X, long4, __kDefaultRoundingMode, __kSaturated )
    #define convert_ulong4_sat(_X)   __builtin_convert(_X, ulong4, __kDefaultRoundingMode, __kSaturated )
    #define convert_float4_sat(_X)   __builtin_convert(_X, float4, __kDefaultRoundingMode, __kSaturated )
    #define convert_double4_sat(_X)  __builtin_convert(_X, double4, __kDefaultRoundingMode, __kSaturated )

    #define convert_char4_rte(_X)    __builtin_convert(_X, char4, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uchar4_rte(_X)   __builtin_convert(_X, uchar4, __kRoundToNearestEven, __kUnsaturated )
    #define convert_short4_rte(_X)   __builtin_convert(_X, short4, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ushort4_rte(_X)  __builtin_convert(_X, ushort4, __kRoundToNearestEven, __kUnsaturated )
    #define convert_int4_rte(_X)     __builtin_convert(_X, int4, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uint4_rte(_X)    __builtin_convert(_X, uint4, __kRoundToNearestEven, __kUnsaturated )
    #define convert_long4_rte(_X)    __builtin_convert(_X, long4, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ulong4_rte(_X)   __builtin_convert(_X, ulong4, __kRoundToNearestEven, __kUnsaturated )
    #define convert_float4_rte(_X)   __builtin_convert(_X, float4, __kRoundToNearestEven, __kUnsaturated )
    #define convert_double4_rte(_X)  __builtin_convert(_X, double4, __kRoundToNearestEven, __kUnsaturated )

    #define convert_char4_sat_rte(_X)    __builtin_convert(_X, char4, __kRoundToNearestEven, __kSaturated )
    #define convert_uchar4_sat_rte(_X)   __builtin_convert(_X, uchar4, __kRoundToNearestEven, __kSaturated )
    #define convert_short4_sat_rte(_X)   __builtin_convert(_X, short4, __kRoundToNearestEven, __kSaturated )
    #define convert_ushort4_sat_rte(_X)  __builtin_convert(_X, ushort4, __kRoundToNearestEven, __kSaturated )
    #define convert_int4_sat_rte(_X)     __builtin_convert(_X, int4, __kRoundToNearestEven, __kSaturated )
    #define convert_uint4_sat_rte(_X)    __builtin_convert(_X, uint4, __kRoundToNearestEven, __kSaturated )
    #define convert_long4_sat_rte(_X)    __builtin_convert(_X, long4, __kRoundToNearestEven, __kSaturated )
    #define convert_ulong4_sat_rte(_X)   __builtin_convert(_X, ulong4, __kRoundToNearestEven, __kSaturated )
    #define convert_float4_sat_rte(_X)   __builtin_convert(_X, float4, __kRoundToNearestEven, __kSaturated )
    #define convert_double4_sat_rte(_X)  __builtin_convert(_X, double4, __kRoundToNearestEven, __kSaturated )

    #define convert_char4_rtn(_X)    __builtin_convert(_X, char4, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uchar4_rtn(_X)   __builtin_convert(_X, uchar4, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_short4_rtn(_X)   __builtin_convert(_X, short4, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ushort4_rtn(_X)  __builtin_convert(_X, ushort4, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_int4_rtn(_X)     __builtin_convert(_X, int4, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uint4_rtn(_X)    __builtin_convert(_X, uint4, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_long4_rtn(_X)    __builtin_convert(_X, long4, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ulong4_rtn(_X)   __builtin_convert(_X, ulong4, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_float4_rtn(_X)   __builtin_convert(_X, float4, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_double4_rtn(_X)  __builtin_convert(_X, double4, __kRoundTowardNegativeInf, __kUnsaturated )

    #define convert_char4_sat_rtn(_X)    __builtin_convert(_X, char4, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uchar4_sat_rtn(_X)   __builtin_convert(_X, uchar4, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_short4_sat_rtn(_X)   __builtin_convert(_X, short4, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ushort4_sat_rtn(_X)  __builtin_convert(_X, ushort4, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_int4_sat_rtn(_X)     __builtin_convert(_X, int4, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uint4_sat_rtn(_X)    __builtin_convert(_X, uint4, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_long4_sat_rtn(_X)    __builtin_convert(_X, long4, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ulong4_sat_rtn(_X)   __builtin_convert(_X, ulong4, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_float4_sat_rtn(_X)   __builtin_convert(_X, float4, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_double4_sat_rtn(_X)  __builtin_convert(_X, double4, __kRoundTowardNegativeInf, __kSaturated )

    #define convert_char4_rtp(_X)    __builtin_convert(_X, char4, __kRoundTowardInf, __kUnsaturated )
    #define convert_uchar4_rtp(_X)   __builtin_convert(_X, uchar4, __kRoundTowardInf, __kUnsaturated )
    #define convert_short4_rtp(_X)   __builtin_convert(_X, short4, __kRoundTowardInf, __kUnsaturated )
    #define convert_ushort4_rtp(_X)  __builtin_convert(_X, ushort4, __kRoundTowardInf, __kUnsaturated )
    #define convert_int4_rtp(_X)     __builtin_convert(_X, int4, __kRoundTowardInf, __kUnsaturated )
    #define convert_uint4_rtp(_X)    __builtin_convert(_X, uint4, __kRoundTowardInf, __kUnsaturated )
    #define convert_long4_rtp(_X)    __builtin_convert(_X, long4, __kRoundTowardInf, __kUnsaturated )
    #define convert_ulong4_rtp(_X)   __builtin_convert(_X, ulong4, __kRoundTowardInf, __kUnsaturated )
    #define convert_float4_rtp(_X)   __builtin_convert(_X, float4, __kRoundTowardInf, __kUnsaturated )
    #define convert_double4_rtp(_X)  __builtin_convert(_X, double4, __kRoundTowardInf, __kUnsaturated )

    #define convert_char4_sat_rtp(_X)    __builtin_convert(_X, char4, __kRoundTowardInf, __kSaturated )
    #define convert_uchar4_sat_rtp(_X)   __builtin_convert(_X, uchar4, __kRoundTowardInf, __kSaturated )
    #define convert_short4_sat_rtp(_X)   __builtin_convert(_X, short4, __kRoundTowardInf, __kSaturated )
    #define convert_ushort4_sat_rtp(_X)  __builtin_convert(_X, ushort4, __kRoundTowardInf, __kSaturated )
    #define convert_int4_sat_rtp(_X)     __builtin_convert(_X, int4, __kRoundTowardInf, __kSaturated )
    #define convert_uint4_sat_rtp(_X)    __builtin_convert(_X, uint4, __kRoundTowardInf, __kSaturated )
    #define convert_long4_sat_rtp(_X)    __builtin_convert(_X, long4, __kRoundTowardInf, __kSaturated )
    #define convert_ulong4_sat_rtp(_X)   __builtin_convert(_X, ulong4, __kRoundTowardInf, __kSaturated )
    #define convert_float4_sat_rtp(_X)   __builtin_convert(_X, float4, __kRoundTowardInf, __kSaturated )
    #define convert_double4_sat_rtp(_X)  __builtin_convert(_X, double4, __kRoundTowardInf, __kSaturated )

    #define convert_char4_rtz(_X)    __builtin_convert(_X, char4, __kRoundTowardZero, __kUnsaturated )
    #define convert_uchar4_rtz(_X)   __builtin_convert(_X, uchar4, __kRoundTowardZero, __kUnsaturated )
    #define convert_short4_rtz(_X)   __builtin_convert(_X, short4, __kRoundTowardZero, __kUnsaturated )
    #define convert_ushort4_rtz(_X)  __builtin_convert(_X, ushort4, __kRoundTowardZero, __kUnsaturated )
    #define convert_int4_rtz(_X)     __builtin_convert(_X, int4, __kRoundTowardZero, __kUnsaturated )
    #define convert_uint4_rtz(_X)    __builtin_convert(_X, uint4, __kRoundTowardZero, __kUnsaturated )
    #define convert_long4_rtz(_X)    __builtin_convert(_X, long4, __kRoundTowardZero, __kUnsaturated )
    #define convert_ulong4_rtz(_X)   __builtin_convert(_X, ulong4, __kRoundTowardZero, __kUnsaturated )
    #define convert_float4_rtz(_X)   __builtin_convert(_X, float4, __kRoundTowardZero, __kUnsaturated )
    #define convert_double4_rtz(_X)  __builtin_convert(_X, double4, __kRoundTowardZero, __kUnsaturated )

    #define convert_char4_sat_rtz(_X)    __builtin_convert(_X, char4, __kRoundTowardZero, __kSaturated )
    #define convert_uchar4_sat_rtz(_X)   __builtin_convert(_X, uchar4, __kRoundTowardZero, __kSaturated )
    #define convert_short4_sat_rtz(_X)   __builtin_convert(_X, short4, __kRoundTowardZero, __kSaturated )
    #define convert_ushort4_sat_rtz(_X)  __builtin_convert(_X, ushort4, __kRoundTowardZero, __kSaturated )
    #define convert_int4_sat_rtz(_X)     __builtin_convert(_X, int4, __kRoundTowardZero, __kSaturated )
    #define convert_uint4_sat_rtz(_X)    __builtin_convert(_X, uint4, __kRoundTowardZero, __kSaturated )
    #define convert_long4_sat_rtz(_X)    __builtin_convert(_X, long4, __kRoundTowardZero, __kSaturated )
    #define convert_ulong4_sat_rtz(_X)   __builtin_convert(_X, ulong4, __kRoundTowardZero, __kSaturated )
    #define convert_float4_sat_rtz(_X)   __builtin_convert(_X, float4, __kRoundTowardZero, __kSaturated )
    #define convert_double4_sat_rtz(_X)  __builtin_convert(_X, double4, __kRoundTowardZero, __kSaturated )

    //type8
    #define convert_char8(_X)    __builtin_convert(_X, char8, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uchar8(_X)   __builtin_convert(_X, uchar8, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_short8(_X)   __builtin_convert(_X, short8, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ushort8(_X)  __builtin_convert(_X, ushort8, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_int8(_X)     __builtin_convert(_X, int8, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uint8(_X)    __builtin_convert(_X, uint8, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_long8(_X)    __builtin_convert(_X, long8, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ulong8(_X)   __builtin_convert(_X, ulong8, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_float8(_X)   __builtin_convert(_X, float8, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_double8(_X)  __builtin_convert(_X, double8, __kDefaultRoundingMode, __kUnsaturated )

    #define convert_char8_sat(_X)    __builtin_convert(_X, char8, __kDefaultRoundingMode, __kSaturated )
    #define convert_uchar8_sat(_X)   __builtin_convert(_X, uchar8, __kDefaultRoundingMode, __kSaturated )
    #define convert_short8_sat(_X)   __builtin_convert(_X, short8, __kDefaultRoundingMode, __kSaturated )
    #define convert_ushort8_sat(_X)  __builtin_convert(_X, ushort8, __kDefaultRoundingMode, __kSaturated )
    #define convert_int8_sat(_X)     __builtin_convert(_X, int8, __kDefaultRoundingMode, __kSaturated )
    #define convert_uint8_sat(_X)    __builtin_convert(_X, uint8, __kDefaultRoundingMode, __kSaturated )
    #define convert_long8_sat(_X)    __builtin_convert(_X, long8, __kDefaultRoundingMode, __kSaturated )
    #define convert_ulong8_sat(_X)   __builtin_convert(_X, ulong8, __kDefaultRoundingMode, __kSaturated )
    #define convert_float8_sat(_X)   __builtin_convert(_X, float8, __kDefaultRoundingMode, __kSaturated )
    #define convert_double8_sat(_X)  __builtin_convert(_X, double8, __kDefaultRoundingMode, __kSaturated )

    #define convert_char8_rte(_X)    __builtin_convert(_X, char8, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uchar8_rte(_X)   __builtin_convert(_X, uchar8, __kRoundToNearestEven, __kUnsaturated )
    #define convert_short8_rte(_X)   __builtin_convert(_X, short8, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ushort8_rte(_X)  __builtin_convert(_X, ushort8, __kRoundToNearestEven, __kUnsaturated )
    #define convert_int8_rte(_X)     __builtin_convert(_X, int8, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uint8_rte(_X)    __builtin_convert(_X, uint8, __kRoundToNearestEven, __kUnsaturated )
    #define convert_long8_rte(_X)    __builtin_convert(_X, long8, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ulong8_rte(_X)   __builtin_convert(_X, ulong8, __kRoundToNearestEven, __kUnsaturated )
    #define convert_float8_rte(_X)   __builtin_convert(_X, float8, __kRoundToNearestEven, __kUnsaturated )
    #define convert_double8_rte(_X)  __builtin_convert(_X, double8, __kRoundToNearestEven, __kUnsaturated )

    #define convert_char8_sat_rte(_X)    __builtin_convert(_X, char8, __kRoundToNearestEven, __kSaturated )
    #define convert_uchar8_sat_rte(_X)   __builtin_convert(_X, uchar8, __kRoundToNearestEven, __kSaturated )
    #define convert_short8_sat_rte(_X)   __builtin_convert(_X, short8, __kRoundToNearestEven, __kSaturated )
    #define convert_ushort8_sat_rte(_X)  __builtin_convert(_X, ushort8, __kRoundToNearestEven, __kSaturated )
    #define convert_int8_sat_rte(_X)     __builtin_convert(_X, int8, __kRoundToNearestEven, __kSaturated )
    #define convert_uint8_sat_rte(_X)    __builtin_convert(_X, uint8, __kRoundToNearestEven, __kSaturated )
    #define convert_long8_sat_rte(_X)    __builtin_convert(_X, long8, __kRoundToNearestEven, __kSaturated )
    #define convert_ulong8_sat_rte(_X)   __builtin_convert(_X, ulong8, __kRoundToNearestEven, __kSaturated )
    #define convert_float8_sat_rte(_X)   __builtin_convert(_X, float8, __kRoundToNearestEven, __kSaturated )
    #define convert_double8_sat_rte(_X)  __builtin_convert(_X, double8, __kRoundToNearestEven, __kSaturated )

    #define convert_char8_rtn(_X)    __builtin_convert(_X, char8, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uchar8_rtn(_X)   __builtin_convert(_X, uchar8, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_short8_rtn(_X)   __builtin_convert(_X, short8, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ushort8_rtn(_X)  __builtin_convert(_X, ushort8, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_int8_rtn(_X)     __builtin_convert(_X, int8, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uint8_rtn(_X)    __builtin_convert(_X, uint8, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_long8_rtn(_X)    __builtin_convert(_X, long8, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ulong8_rtn(_X)   __builtin_convert(_X, ulong8, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_float8_rtn(_X)   __builtin_convert(_X, float8, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_double8_rtn(_X)  __builtin_convert(_X, double8, __kRoundTowardNegativeInf, __kUnsaturated )

    #define convert_char8_sat_rtn(_X)    __builtin_convert(_X, char8, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uchar8_sat_rtn(_X)   __builtin_convert(_X, uchar8, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_short8_sat_rtn(_X)   __builtin_convert(_X, short8, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ushort8_sat_rtn(_X)  __builtin_convert(_X, ushort8, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_int8_sat_rtn(_X)     __builtin_convert(_X, int8, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uint8_sat_rtn(_X)    __builtin_convert(_X, uint8, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_long8_sat_rtn(_X)    __builtin_convert(_X, long8, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ulong8_sat_rtn(_X)   __builtin_convert(_X, ulong8, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_float8_sat_rtn(_X)   __builtin_convert(_X, float8, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_double8_sat_rtn(_X)  __builtin_convert(_X, double8, __kRoundTowardNegativeInf, __kSaturated )

    #define convert_char8_rtp(_X)    __builtin_convert(_X, char8, __kRoundTowardInf, __kUnsaturated )
    #define convert_uchar8_rtp(_X)   __builtin_convert(_X, uchar8, __kRoundTowardInf, __kUnsaturated )
    #define convert_short8_rtp(_X)   __builtin_convert(_X, short8, __kRoundTowardInf, __kUnsaturated )
    #define convert_ushort8_rtp(_X)  __builtin_convert(_X, ushort8, __kRoundTowardInf, __kUnsaturated )
    #define convert_int8_rtp(_X)     __builtin_convert(_X, int8, __kRoundTowardInf, __kUnsaturated )
    #define convert_uint8_rtp(_X)    __builtin_convert(_X, uint8, __kRoundTowardInf, __kUnsaturated )
    #define convert_long8_rtp(_X)    __builtin_convert(_X, long8, __kRoundTowardInf, __kUnsaturated )
    #define convert_ulong8_rtp(_X)   __builtin_convert(_X, ulong8, __kRoundTowardInf, __kUnsaturated )
    #define convert_float8_rtp(_X)   __builtin_convert(_X, float8, __kRoundTowardInf, __kUnsaturated )
    #define convert_double8_rtp(_X)  __builtin_convert(_X, double8, __kRoundTowardInf, __kUnsaturated )

    #define convert_char8_sat_rtp(_X)    __builtin_convert(_X, char8, __kRoundTowardInf, __kSaturated )
    #define convert_uchar8_sat_rtp(_X)   __builtin_convert(_X, uchar8, __kRoundTowardInf, __kSaturated )
    #define convert_short8_sat_rtp(_X)   __builtin_convert(_X, short8, __kRoundTowardInf, __kSaturated )
    #define convert_ushort8_sat_rtp(_X)  __builtin_convert(_X, ushort8, __kRoundTowardInf, __kSaturated )
    #define convert_int8_sat_rtp(_X)     __builtin_convert(_X, int8, __kRoundTowardInf, __kSaturated )
    #define convert_uint8_sat_rtp(_X)    __builtin_convert(_X, uint8, __kRoundTowardInf, __kSaturated )
    #define convert_long8_sat_rtp(_X)    __builtin_convert(_X, long8, __kRoundTowardInf, __kSaturated )
    #define convert_ulong8_sat_rtp(_X)   __builtin_convert(_X, ulong8, __kRoundTowardInf, __kSaturated )
    #define convert_float8_sat_rtp(_X)   __builtin_convert(_X, float8, __kRoundTowardInf, __kSaturated )
    #define convert_double8_sat_rtp(_X)  __builtin_convert(_X, double8, __kRoundTowardInf, __kSaturated )

    #define convert_char8_rtz(_X)    __builtin_convert(_X, char8, __kRoundTowardZero, __kUnsaturated )
    #define convert_uchar8_rtz(_X)   __builtin_convert(_X, uchar8, __kRoundTowardZero, __kUnsaturated )
    #define convert_short8_rtz(_X)   __builtin_convert(_X, short8, __kRoundTowardZero, __kUnsaturated )
    #define convert_ushort8_rtz(_X)  __builtin_convert(_X, ushort8, __kRoundTowardZero, __kUnsaturated )
    #define convert_int8_rtz(_X)     __builtin_convert(_X, int8, __kRoundTowardZero, __kUnsaturated )
    #define convert_uint8_rtz(_X)    __builtin_convert(_X, uint8, __kRoundTowardZero, __kUnsaturated )
    #define convert_long8_rtz(_X)    __builtin_convert(_X, long8, __kRoundTowardZero, __kUnsaturated )
    #define convert_ulong8_rtz(_X)   __builtin_convert(_X, ulong8, __kRoundTowardZero, __kUnsaturated )
    #define convert_float8_rtz(_X)   __builtin_convert(_X, float8, __kRoundTowardZero, __kUnsaturated )
    #define convert_double8_rtz(_X)  __builtin_convert(_X, double8, __kRoundTowardZero, __kUnsaturated )

    #define convert_char8_sat_rtz(_X)    __builtin_convert(_X, char8, __kRoundTowardZero, __kSaturated )
    #define convert_uchar8_sat_rtz(_X)   __builtin_convert(_X, uchar8, __kRoundTowardZero, __kSaturated )
    #define convert_short8_sat_rtz(_X)   __builtin_convert(_X, short8, __kRoundTowardZero, __kSaturated )
    #define convert_ushort8_sat_rtz(_X)  __builtin_convert(_X, ushort8, __kRoundTowardZero, __kSaturated )
    #define convert_int8_sat_rtz(_X)     __builtin_convert(_X, int8, __kRoundTowardZero, __kSaturated )
    #define convert_uint8_sat_rtz(_X)    __builtin_convert(_X, uint8, __kRoundTowardZero, __kSaturated )
    #define convert_long8_sat_rtz(_X)    __builtin_convert(_X, long8, __kRoundTowardZero, __kSaturated )
    #define convert_ulong8_sat_rtz(_X)   __builtin_convert(_X, ulong8, __kRoundTowardZero, __kSaturated )
    #define convert_float8_sat_rtz(_X)   __builtin_convert(_X, float8, __kRoundTowardZero, __kSaturated )
    #define convert_double8_sat_rtz(_X)  __builtin_convert(_X, double8, __kRoundTowardZero, __kSaturated )

    //type16
    #define convert_char16(_X)    __builtin_convert(_X, char16, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uchar16(_X)   __builtin_convert(_X, uchar16, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_short16(_X)   __builtin_convert(_X, short16, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ushort16(_X)  __builtin_convert(_X, ushort16, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_int16(_X)     __builtin_convert(_X, int16, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_uint16(_X)    __builtin_convert(_X, uint16, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_long16(_X)    __builtin_convert(_X, long16, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_ulong16(_X)   __builtin_convert(_X, ulong16, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_float16(_X)   __builtin_convert(_X, float16, __kDefaultRoundingMode, __kUnsaturated )
    #define convert_double16(_X)  __builtin_convert(_X, double16, __kDefaultRoundingMode, __kUnsaturated )

    #define convert_char16_sat(_X)    __builtin_convert(_X, char16, __kDefaultRoundingMode, __kSaturated )
    #define convert_uchar16_sat(_X)   __builtin_convert(_X, uchar16, __kDefaultRoundingMode, __kSaturated )
    #define convert_short16_sat(_X)   __builtin_convert(_X, short16, __kDefaultRoundingMode, __kSaturated )
    #define convert_ushort16_sat(_X)  __builtin_convert(_X, ushort16, __kDefaultRoundingMode, __kSaturated )
    #define convert_int16_sat(_X)     __builtin_convert(_X, int16, __kDefaultRoundingMode, __kSaturated )
    #define convert_uint16_sat(_X)    __builtin_convert(_X, uint16, __kDefaultRoundingMode, __kSaturated )
    #define convert_long16_sat(_X)    __builtin_convert(_X, long16, __kDefaultRoundingMode, __kSaturated )
    #define convert_ulong16_sat(_X)   __builtin_convert(_X, ulong16, __kDefaultRoundingMode, __kSaturated )
    #define convert_float16_sat(_X)   __builtin_convert(_X, float16, __kDefaultRoundingMode, __kSaturated )
    #define convert_double16_sat(_X)  __builtin_convert(_X, double16, __kDefaultRoundingMode, __kSaturated )

    #define convert_char16_rte(_X)    __builtin_convert(_X, char16, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uchar16_rte(_X)   __builtin_convert(_X, uchar16, __kRoundToNearestEven, __kUnsaturated )
    #define convert_short16_rte(_X)   __builtin_convert(_X, short16, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ushort16_rte(_X)  __builtin_convert(_X, ushort16, __kRoundToNearestEven, __kUnsaturated )
    #define convert_int16_rte(_X)     __builtin_convert(_X, int16, __kRoundToNearestEven, __kUnsaturated )
    #define convert_uint16_rte(_X)    __builtin_convert(_X, uint16, __kRoundToNearestEven, __kUnsaturated )
    #define convert_long16_rte(_X)    __builtin_convert(_X, long16, __kRoundToNearestEven, __kUnsaturated )
    #define convert_ulong16_rte(_X)   __builtin_convert(_X, ulong16, __kRoundToNearestEven, __kUnsaturated )
    #define convert_float16_rte(_X)   __builtin_convert(_X, float16, __kRoundToNearestEven, __kUnsaturated )
    #define convert_double16_rte(_X)  __builtin_convert(_X, double16, __kRoundToNearestEven, __kUnsaturated )

    #define convert_char16_sat_rte(_X)    __builtin_convert(_X, char16, __kRoundToNearestEven, __kSaturated )
    #define convert_uchar16_sat_rte(_X)   __builtin_convert(_X, uchar16, __kRoundToNearestEven, __kSaturated )
    #define convert_short16_sat_rte(_X)   __builtin_convert(_X, short16, __kRoundToNearestEven, __kSaturated )
    #define convert_ushort16_sat_rte(_X)  __builtin_convert(_X, ushort16, __kRoundToNearestEven, __kSaturated )
    #define convert_int16_sat_rte(_X)     __builtin_convert(_X, int16, __kRoundToNearestEven, __kSaturated )
    #define convert_uint16_sat_rte(_X)    __builtin_convert(_X, uint16, __kRoundToNearestEven, __kSaturated )
    #define convert_long16_sat_rte(_X)    __builtin_convert(_X, long16, __kRoundToNearestEven, __kSaturated )
    #define convert_ulong16_sat_rte(_X)   __builtin_convert(_X, ulong16, __kRoundToNearestEven, __kSaturated )
    #define convert_float16_sat_rte(_X)   __builtin_convert(_X, float16, __kRoundToNearestEven, __kSaturated )
    #define convert_double16_sat_rte(_X)  __builtin_convert(_X, double16, __kRoundToNearestEven, __kSaturated )

    #define convert_char16_rtn(_X)    __builtin_convert(_X, char16, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uchar16_rtn(_X)   __builtin_convert(_X, uchar16, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_short16_rtn(_X)   __builtin_convert(_X, short16, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ushort16_rtn(_X)  __builtin_convert(_X, ushort16, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_int16_rtn(_X)     __builtin_convert(_X, int16, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_uint16_rtn(_X)    __builtin_convert(_X, uint16, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_long16_rtn(_X)    __builtin_convert(_X, long16, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_ulong16_rtn(_X)   __builtin_convert(_X, ulong16, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_float16_rtn(_X)   __builtin_convert(_X, float16, __kRoundTowardNegativeInf, __kUnsaturated )
    #define convert_double16_rtn(_X)  __builtin_convert(_X, double16, __kRoundTowardNegativeInf, __kUnsaturated )

    #define convert_char16_sat_rtn(_X)    __builtin_convert(_X, char16, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uchar16_sat_rtn(_X)   __builtin_convert(_X, uchar16, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_short16_sat_rtn(_X)   __builtin_convert(_X, short16, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ushort16_sat_rtn(_X)  __builtin_convert(_X, ushort16, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_int16_sat_rtn(_X)     __builtin_convert(_X, int16, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_uint16_sat_rtn(_X)    __builtin_convert(_X, uint16, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_long16_sat_rtn(_X)    __builtin_convert(_X, long16, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_ulong16_sat_rtn(_X)   __builtin_convert(_X, ulong16, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_float16_sat_rtn(_X)   __builtin_convert(_X, float16, __kRoundTowardNegativeInf, __kSaturated )
    #define convert_double16_sat_rtn(_X)  __builtin_convert(_X, double16, __kRoundTowardNegativeInf, __kSaturated )

    #define convert_char16_rtp(_X)    __builtin_convert(_X, char16, __kRoundTowardInf, __kUnsaturated )
    #define convert_uchar16_rtp(_X)   __builtin_convert(_X, uchar16, __kRoundTowardInf, __kUnsaturated )
    #define convert_short16_rtp(_X)   __builtin_convert(_X, short16, __kRoundTowardInf, __kUnsaturated )
    #define convert_ushort16_rtp(_X)  __builtin_convert(_X, ushort16, __kRoundTowardInf, __kUnsaturated )
    #define convert_int16_rtp(_X)     __builtin_convert(_X, int16, __kRoundTowardInf, __kUnsaturated )
    #define convert_uint16_rtp(_X)    __builtin_convert(_X, uint16, __kRoundTowardInf, __kUnsaturated )
    #define convert_long16_rtp(_X)    __builtin_convert(_X, long16, __kRoundTowardInf, __kUnsaturated )
    #define convert_ulong16_rtp(_X)   __builtin_convert(_X, ulong16, __kRoundTowardInf, __kUnsaturated )
    #define convert_float16_rtp(_X)   __builtin_convert(_X, float16, __kRoundTowardInf, __kUnsaturated )
    #define convert_double16_rtp(_X)  __builtin_convert(_X, double16, __kRoundTowardInf, __kUnsaturated )

    #define convert_char16_sat_rtp(_X)    __builtin_convert(_X, char16, __kRoundTowardInf, __kSaturated )
    #define convert_uchar16_sat_rtp(_X)   __builtin_convert(_X, uchar16, __kRoundTowardInf, __kSaturated )
    #define convert_short16_sat_rtp(_X)   __builtin_convert(_X, short16, __kRoundTowardInf, __kSaturated )
    #define convert_ushort16_sat_rtp(_X)  __builtin_convert(_X, ushort16, __kRoundTowardInf, __kSaturated )
    #define convert_int16_sat_rtp(_X)     __builtin_convert(_X, int16, __kRoundTowardInf, __kSaturated )
    #define convert_uint16_sat_rtp(_X)    __builtin_convert(_X, uint16, __kRoundTowardInf, __kSaturated )
    #define convert_long16_sat_rtp(_X)    __builtin_convert(_X, long16, __kRoundTowardInf, __kSaturated )
    #define convert_ulong16_sat_rtp(_X)   __builtin_convert(_X, ulong16, __kRoundTowardInf, __kSaturated )
    #define convert_float16_sat_rtp(_X)   __builtin_convert(_X, float16, __kRoundTowardInf, __kSaturated )
    #define convert_double16_sat_rtp(_X)  __builtin_convert(_X, double16, __kRoundTowardInf, __kSaturated )

    #define convert_char16_rtz(_X)    __builtin_convert(_X, char16, __kRoundTowardZero, __kUnsaturated )
    #define convert_uchar16_rtz(_X)   __builtin_convert(_X, uchar16, __kRoundTowardZero, __kUnsaturated )
    #define convert_short16_rtz(_X)   __builtin_convert(_X, short16, __kRoundTowardZero, __kUnsaturated )
    #define convert_ushort16_rtz(_X)  __builtin_convert(_X, ushort16, __kRoundTowardZero, __kUnsaturated )
    #define convert_int16_rtz(_X)     __builtin_convert(_X, int16, __kRoundTowardZero, __kUnsaturated )
    #define convert_uint16_rtz(_X)    __builtin_convert(_X, uint16, __kRoundTowardZero, __kUnsaturated )
    #define convert_long16_rtz(_X)    __builtin_convert(_X, long16, __kRoundTowardZero, __kUnsaturated )
    #define convert_ulong16_rtz(_X)   __builtin_convert(_X, ulong16, __kRoundTowardZero, __kUnsaturated )
    #define convert_float16_rtz(_X)   __builtin_convert(_X, float16, __kRoundTowardZero, __kUnsaturated )
    #define convert_double16_rtz(_X)  __builtin_convert(_X, double16, __kRoundTowardZero, __kUnsaturated )

    #define convert_char16_sat_rtz(_X)    __builtin_convert(_X, char16, __kRoundTowardZero, __kSaturated )
    #define convert_uchar16_sat_rtz(_X)   __builtin_convert(_X, uchar16, __kRoundTowardZero, __kSaturated )
    #define convert_short16_sat_rtz(_X)   __builtin_convert(_X, short16, __kRoundTowardZero, __kSaturated )
    #define convert_ushort16_sat_rtz(_X)  __builtin_convert(_X, ushort16, __kRoundTowardZero, __kSaturated )
    #define convert_int16_sat_rtz(_X)     __builtin_convert(_X, int16, __kRoundTowardZero, __kSaturated )
    #define convert_uint16_sat_rtz(_X)    __builtin_convert(_X, uint16, __kRoundTowardZero, __kSaturated )
    #define convert_long16_sat_rtz(_X)    __builtin_convert(_X, long16, __kRoundTowardZero, __kSaturated )
    #define convert_ulong16_sat_rtz(_X)   __builtin_convert(_X, ulong16, __kRoundTowardZero, __kSaturated )
    #define convert_float16_sat_rtz(_X)   __builtin_convert(_X, float16, __kRoundTowardZero, __kSaturated )
    #define convert_double16_sat_rtz(_X)  __builtin_convert(_X, double16, __kRoundTowardZero, __kSaturated )


#else

    uchar __OVERLOAD__ convert_uchar( uchar );
    uchar __OVERLOAD__ convert_uchar( ushort );
    uchar __OVERLOAD__ convert_uchar( uint );
    uchar __OVERLOAD__ convert_uchar( ulong );
    uchar __OVERLOAD__ convert_uchar( char );
    uchar __OVERLOAD__ convert_uchar( short );
    uchar __OVERLOAD__ convert_uchar( int );
    uchar __OVERLOAD__ convert_uchar( long );
    uchar __OVERLOAD__ convert_uchar( float );
    uchar __OVERLOAD__ convert_uchar( double );
    uchar2 __OVERLOAD__ convert_uchar2( uchar2 );
    uchar2 __OVERLOAD__ convert_uchar2( ushort2 );
    uchar2 __OVERLOAD__ convert_uchar2( uint2 );
    uchar2 __OVERLOAD__ convert_uchar2( ulong2 );
    uchar2 __OVERLOAD__ convert_uchar2( char2 );
    uchar2 __OVERLOAD__ convert_uchar2( short2 );
    uchar2 __OVERLOAD__ convert_uchar2( int2 );
    uchar2 __OVERLOAD__ convert_uchar2( long2 );
    uchar2 __OVERLOAD__ convert_uchar2( float2 );
    uchar2 __OVERLOAD__ convert_uchar2( double2 );
    uchar3 __OVERLOAD__ convert_uchar3( uchar3 );
    uchar3 __OVERLOAD__ convert_uchar3( ushort3 );
    uchar3 __OVERLOAD__ convert_uchar3( uint3 );
    uchar3 __OVERLOAD__ convert_uchar3( ulong3 );
    uchar3 __OVERLOAD__ convert_uchar3( char3 );
    uchar3 __OVERLOAD__ convert_uchar3( short3 );
    uchar3 __OVERLOAD__ convert_uchar3( int3 );
    uchar3 __OVERLOAD__ convert_uchar3( long3 );
    uchar3 __OVERLOAD__ convert_uchar3( float3 );
    uchar3 __OVERLOAD__ convert_uchar3( double3 );
    uchar4 __OVERLOAD__ convert_uchar4( uchar4 );
    uchar4 __OVERLOAD__ convert_uchar4( ushort4 );
    uchar4 __OVERLOAD__ convert_uchar4( uint4 );
    uchar4 __OVERLOAD__ convert_uchar4( ulong4 );
    uchar4 __OVERLOAD__ convert_uchar4( char4 );
    uchar4 __OVERLOAD__ convert_uchar4( short4 );
    uchar4 __OVERLOAD__ convert_uchar4( int4 );
    uchar4 __OVERLOAD__ convert_uchar4( long4 );
    uchar4 __OVERLOAD__ convert_uchar4( float4 );
    uchar4 __OVERLOAD__ convert_uchar4( double4 );
    uchar8 __OVERLOAD__ convert_uchar8( uchar8 );
    uchar8 __OVERLOAD__ convert_uchar8( ushort8 );
    uchar8 __OVERLOAD__ convert_uchar8( uint8 );
    uchar8 __OVERLOAD__ convert_uchar8( ulong8 );
    uchar8 __OVERLOAD__ convert_uchar8( char8 );
    uchar8 __OVERLOAD__ convert_uchar8( short8 );
    uchar8 __OVERLOAD__ convert_uchar8( int8 );
    uchar8 __OVERLOAD__ convert_uchar8( long8 );
    uchar8 __OVERLOAD__ convert_uchar8( float8 );
    uchar8 __OVERLOAD__ convert_uchar8( double8 );
    uchar16 __OVERLOAD__ convert_uchar16( uchar16 );
    uchar16 __OVERLOAD__ convert_uchar16( ushort16 );
    uchar16 __OVERLOAD__ convert_uchar16( uint16 );
    uchar16 __OVERLOAD__ convert_uchar16( ulong16 );
    uchar16 __OVERLOAD__ convert_uchar16( char16 );
    uchar16 __OVERLOAD__ convert_uchar16( short16 );
    uchar16 __OVERLOAD__ convert_uchar16( int16 );
    uchar16 __OVERLOAD__ convert_uchar16( long16 );
    uchar16 __OVERLOAD__ convert_uchar16( float16 );
    uchar16 __OVERLOAD__ convert_uchar16( double16 );
    uchar __OVERLOAD__ convert_uchar_rte( uchar );
    uchar __OVERLOAD__ convert_uchar_rte( ushort );
    uchar __OVERLOAD__ convert_uchar_rte( uint );
    uchar __OVERLOAD__ convert_uchar_rte( ulong );
    uchar __OVERLOAD__ convert_uchar_rte( char );
    uchar __OVERLOAD__ convert_uchar_rte( short );
    uchar __OVERLOAD__ convert_uchar_rte( int );
    uchar __OVERLOAD__ convert_uchar_rte( long );
    uchar __OVERLOAD__ convert_uchar_rte( float );
    uchar __OVERLOAD__ convert_uchar_rte( double );
    uchar2 __OVERLOAD__ convert_uchar2_rte( uchar2 );
    uchar2 __OVERLOAD__ convert_uchar2_rte( ushort2 );
    uchar2 __OVERLOAD__ convert_uchar2_rte( uint2 );
    uchar2 __OVERLOAD__ convert_uchar2_rte( ulong2 );
    uchar2 __OVERLOAD__ convert_uchar2_rte( char2 );
    uchar2 __OVERLOAD__ convert_uchar2_rte( short2 );
    uchar2 __OVERLOAD__ convert_uchar2_rte( int2 );
    uchar2 __OVERLOAD__ convert_uchar2_rte( long2 );
    uchar2 __OVERLOAD__ convert_uchar2_rte( float2 );
    uchar2 __OVERLOAD__ convert_uchar2_rte( double2 );
    uchar3 __OVERLOAD__ convert_uchar3_rte( uchar3 );
    uchar3 __OVERLOAD__ convert_uchar3_rte( ushort3 );
    uchar3 __OVERLOAD__ convert_uchar3_rte( uint3 );
    uchar3 __OVERLOAD__ convert_uchar3_rte( ulong3 );
    uchar3 __OVERLOAD__ convert_uchar3_rte( char3 );
    uchar3 __OVERLOAD__ convert_uchar3_rte( short3 );
    uchar3 __OVERLOAD__ convert_uchar3_rte( int3 );
    uchar3 __OVERLOAD__ convert_uchar3_rte( long3 );
    uchar3 __OVERLOAD__ convert_uchar3_rte( float3 );
    uchar3 __OVERLOAD__ convert_uchar3_rte( double3 );
    uchar4 __OVERLOAD__ convert_uchar4_rte( uchar4 );
    uchar4 __OVERLOAD__ convert_uchar4_rte( ushort4 );
    uchar4 __OVERLOAD__ convert_uchar4_rte( uint4 );
    uchar4 __OVERLOAD__ convert_uchar4_rte( ulong4 );
    uchar4 __OVERLOAD__ convert_uchar4_rte( char4 );
    uchar4 __OVERLOAD__ convert_uchar4_rte( short4 );
    uchar4 __OVERLOAD__ convert_uchar4_rte( int4 );
    uchar4 __OVERLOAD__ convert_uchar4_rte( long4 );
    uchar4 __OVERLOAD__ convert_uchar4_rte( float4 );
    uchar4 __OVERLOAD__ convert_uchar4_rte( double4 );
    uchar8 __OVERLOAD__ convert_uchar8_rte( uchar8 );
    uchar8 __OVERLOAD__ convert_uchar8_rte( ushort8 );
    uchar8 __OVERLOAD__ convert_uchar8_rte( uint8 );
    uchar8 __OVERLOAD__ convert_uchar8_rte( ulong8 );
    uchar8 __OVERLOAD__ convert_uchar8_rte( char8 );
    uchar8 __OVERLOAD__ convert_uchar8_rte( short8 );
    uchar8 __OVERLOAD__ convert_uchar8_rte( int8 );
    uchar8 __OVERLOAD__ convert_uchar8_rte( long8 );
    uchar8 __OVERLOAD__ convert_uchar8_rte( float8 );
    uchar8 __OVERLOAD__ convert_uchar8_rte( double8 );
    uchar16 __OVERLOAD__ convert_uchar16_rte( uchar16 );
    uchar16 __OVERLOAD__ convert_uchar16_rte( ushort16 );
    uchar16 __OVERLOAD__ convert_uchar16_rte( uint16 );
    uchar16 __OVERLOAD__ convert_uchar16_rte( ulong16 );
    uchar16 __OVERLOAD__ convert_uchar16_rte( char16 );
    uchar16 __OVERLOAD__ convert_uchar16_rte( short16 );
    uchar16 __OVERLOAD__ convert_uchar16_rte( int16 );
    uchar16 __OVERLOAD__ convert_uchar16_rte( long16 );
    uchar16 __OVERLOAD__ convert_uchar16_rte( float16 );
    uchar16 __OVERLOAD__ convert_uchar16_rte( double16 );
    uchar __OVERLOAD__ convert_uchar_rtp( uchar );
    uchar __OVERLOAD__ convert_uchar_rtp( ushort );
    uchar __OVERLOAD__ convert_uchar_rtp( uint );
    uchar __OVERLOAD__ convert_uchar_rtp( ulong );
    uchar __OVERLOAD__ convert_uchar_rtp( char );
    uchar __OVERLOAD__ convert_uchar_rtp( short );
    uchar __OVERLOAD__ convert_uchar_rtp( int );
    uchar __OVERLOAD__ convert_uchar_rtp( long );
    uchar __OVERLOAD__ convert_uchar_rtp( float );
    uchar __OVERLOAD__ convert_uchar_rtp( double );
    uchar2 __OVERLOAD__ convert_uchar2_rtp( uchar2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtp( ushort2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtp( uint2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtp( ulong2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtp( char2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtp( short2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtp( int2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtp( long2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtp( float2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtp( double2 );
    uchar3 __OVERLOAD__ convert_uchar3_rtp( uchar3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtp( ushort3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtp( uint3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtp( ulong3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtp( char3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtp( short3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtp( int3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtp( long3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtp( float3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtp( double3 );
    uchar4 __OVERLOAD__ convert_uchar4_rtp( uchar4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtp( ushort4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtp( uint4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtp( ulong4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtp( char4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtp( short4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtp( int4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtp( long4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtp( float4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtp( double4 );
    uchar8 __OVERLOAD__ convert_uchar8_rtp( uchar8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtp( ushort8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtp( uint8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtp( ulong8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtp( char8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtp( short8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtp( int8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtp( long8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtp( float8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtp( double8 );
    uchar16 __OVERLOAD__ convert_uchar16_rtp( uchar16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtp( ushort16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtp( uint16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtp( ulong16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtp( char16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtp( short16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtp( int16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtp( long16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtp( float16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtp( double16 );
    uchar __OVERLOAD__ convert_uchar_rtn( uchar );
    uchar __OVERLOAD__ convert_uchar_rtn( ushort );
    uchar __OVERLOAD__ convert_uchar_rtn( uint );
    uchar __OVERLOAD__ convert_uchar_rtn( ulong );
    uchar __OVERLOAD__ convert_uchar_rtn( char );
    uchar __OVERLOAD__ convert_uchar_rtn( short );
    uchar __OVERLOAD__ convert_uchar_rtn( int );
    uchar __OVERLOAD__ convert_uchar_rtn( long );
    uchar __OVERLOAD__ convert_uchar_rtn( float );
    uchar __OVERLOAD__ convert_uchar_rtn( double );
    uchar2 __OVERLOAD__ convert_uchar2_rtn( uchar2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtn( ushort2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtn( uint2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtn( ulong2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtn( char2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtn( short2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtn( int2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtn( long2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtn( float2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtn( double2 );
    uchar3 __OVERLOAD__ convert_uchar3_rtn( uchar3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtn( ushort3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtn( uint3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtn( ulong3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtn( char3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtn( short3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtn( int3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtn( long3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtn( float3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtn( double3 );
    uchar4 __OVERLOAD__ convert_uchar4_rtn( uchar4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtn( ushort4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtn( uint4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtn( ulong4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtn( char4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtn( short4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtn( int4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtn( long4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtn( float4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtn( double4 );
    uchar8 __OVERLOAD__ convert_uchar8_rtn( uchar8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtn( ushort8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtn( uint8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtn( ulong8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtn( char8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtn( short8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtn( int8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtn( long8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtn( float8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtn( double8 );
    uchar16 __OVERLOAD__ convert_uchar16_rtn( uchar16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtn( ushort16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtn( uint16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtn( ulong16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtn( char16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtn( short16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtn( int16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtn( long16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtn( float16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtn( double16 );
    uchar __OVERLOAD__ convert_uchar_rtz( uchar );
    uchar __OVERLOAD__ convert_uchar_rtz( ushort );
    uchar __OVERLOAD__ convert_uchar_rtz( uint );
    uchar __OVERLOAD__ convert_uchar_rtz( ulong );
    uchar __OVERLOAD__ convert_uchar_rtz( char );
    uchar __OVERLOAD__ convert_uchar_rtz( short );
    uchar __OVERLOAD__ convert_uchar_rtz( int );
    uchar __OVERLOAD__ convert_uchar_rtz( long );
    uchar __OVERLOAD__ convert_uchar_rtz( float );
    uchar __OVERLOAD__ convert_uchar_rtz( double );
    uchar2 __OVERLOAD__ convert_uchar2_rtz( uchar2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtz( ushort2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtz( uint2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtz( ulong2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtz( char2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtz( short2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtz( int2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtz( long2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtz( float2 );
    uchar2 __OVERLOAD__ convert_uchar2_rtz( double2 );
    uchar3 __OVERLOAD__ convert_uchar3_rtz( uchar3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtz( ushort3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtz( uint3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtz( ulong3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtz( char3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtz( short3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtz( int3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtz( long3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtz( float3 );
    uchar3 __OVERLOAD__ convert_uchar3_rtz( double3 );
    uchar4 __OVERLOAD__ convert_uchar4_rtz( uchar4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtz( ushort4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtz( uint4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtz( ulong4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtz( char4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtz( short4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtz( int4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtz( long4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtz( float4 );
    uchar4 __OVERLOAD__ convert_uchar4_rtz( double4 );
    uchar8 __OVERLOAD__ convert_uchar8_rtz( uchar8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtz( ushort8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtz( uint8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtz( ulong8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtz( char8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtz( short8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtz( int8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtz( long8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtz( float8 );
    uchar8 __OVERLOAD__ convert_uchar8_rtz( double8 );
    uchar16 __OVERLOAD__ convert_uchar16_rtz( uchar16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtz( ushort16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtz( uint16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtz( ulong16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtz( char16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtz( short16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtz( int16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtz( long16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtz( float16 );
    uchar16 __OVERLOAD__ convert_uchar16_rtz( double16 );
    uchar __OVERLOAD__ convert_uchar_sat( uchar );
    uchar __OVERLOAD__ convert_uchar_sat( ushort );
    uchar __OVERLOAD__ convert_uchar_sat( uint );
    uchar __OVERLOAD__ convert_uchar_sat( ulong );
    uchar __OVERLOAD__ convert_uchar_sat( char );
    uchar __OVERLOAD__ convert_uchar_sat( short );
    uchar __OVERLOAD__ convert_uchar_sat( int );
    uchar __OVERLOAD__ convert_uchar_sat( long );
    uchar __OVERLOAD__ convert_uchar_sat( float );
    uchar __OVERLOAD__ convert_uchar_sat( double );
    uchar2 __OVERLOAD__ convert_uchar2_sat( uchar2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat( ushort2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat( uint2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat( ulong2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat( char2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat( short2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat( int2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat( long2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat( float2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat( double2 );
    uchar3 __OVERLOAD__ convert_uchar3_sat( uchar3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat( ushort3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat( uint3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat( ulong3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat( char3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat( short3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat( int3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat( long3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat( float3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat( double3 );
    uchar4 __OVERLOAD__ convert_uchar4_sat( uchar4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat( ushort4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat( uint4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat( ulong4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat( char4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat( short4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat( int4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat( long4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat( float4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat( double4 );
    uchar8 __OVERLOAD__ convert_uchar8_sat( uchar8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat( ushort8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat( uint8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat( ulong8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat( char8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat( short8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat( int8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat( long8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat( float8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat( double8 );
    uchar16 __OVERLOAD__ convert_uchar16_sat( uchar16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat( ushort16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat( uint16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat( ulong16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat( char16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat( short16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat( int16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat( long16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat( float16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat( double16 );
    uchar __OVERLOAD__ convert_uchar_sat_rte( uchar );
    uchar __OVERLOAD__ convert_uchar_sat_rte( ushort );
    uchar __OVERLOAD__ convert_uchar_sat_rte( uint );
    uchar __OVERLOAD__ convert_uchar_sat_rte( ulong );
    uchar __OVERLOAD__ convert_uchar_sat_rte( char );
    uchar __OVERLOAD__ convert_uchar_sat_rte( short );
    uchar __OVERLOAD__ convert_uchar_sat_rte( int );
    uchar __OVERLOAD__ convert_uchar_sat_rte( long );
    uchar __OVERLOAD__ convert_uchar_sat_rte( float );
    uchar __OVERLOAD__ convert_uchar_sat_rte( double );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rte( uchar2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rte( ushort2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rte( uint2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rte( ulong2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rte( char2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rte( short2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rte( int2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rte( long2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rte( float2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rte( double2 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rte( uchar3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rte( ushort3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rte( uint3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rte( ulong3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rte( char3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rte( short3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rte( int3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rte( long3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rte( float3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rte( double3 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rte( uchar4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rte( ushort4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rte( uint4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rte( ulong4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rte( char4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rte( short4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rte( int4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rte( long4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rte( float4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rte( double4 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rte( uchar8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rte( ushort8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rte( uint8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rte( ulong8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rte( char8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rte( short8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rte( int8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rte( long8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rte( float8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rte( double8 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rte( uchar16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rte( ushort16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rte( uint16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rte( ulong16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rte( char16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rte( short16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rte( int16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rte( long16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rte( float16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rte( double16 );
    uchar __OVERLOAD__ convert_uchar_sat_rtp( uchar );
    uchar __OVERLOAD__ convert_uchar_sat_rtp( ushort );
    uchar __OVERLOAD__ convert_uchar_sat_rtp( uint );
    uchar __OVERLOAD__ convert_uchar_sat_rtp( ulong );
    uchar __OVERLOAD__ convert_uchar_sat_rtp( char );
    uchar __OVERLOAD__ convert_uchar_sat_rtp( short );
    uchar __OVERLOAD__ convert_uchar_sat_rtp( int );
    uchar __OVERLOAD__ convert_uchar_sat_rtp( long );
    uchar __OVERLOAD__ convert_uchar_sat_rtp( float );
    uchar __OVERLOAD__ convert_uchar_sat_rtp( double );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtp( uchar2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtp( ushort2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtp( uint2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtp( ulong2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtp( char2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtp( short2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtp( int2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtp( long2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtp( float2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtp( double2 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtp( uchar3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtp( ushort3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtp( uint3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtp( ulong3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtp( char3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtp( short3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtp( int3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtp( long3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtp( float3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtp( double3 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtp( uchar4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtp( ushort4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtp( uint4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtp( ulong4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtp( char4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtp( short4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtp( int4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtp( long4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtp( float4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtp( double4 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtp( uchar8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtp( ushort8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtp( uint8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtp( ulong8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtp( char8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtp( short8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtp( int8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtp( long8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtp( float8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtp( double8 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtp( uchar16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtp( ushort16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtp( uint16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtp( ulong16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtp( char16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtp( short16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtp( int16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtp( long16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtp( float16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtp( double16 );
    uchar __OVERLOAD__ convert_uchar_sat_rtn( uchar );
    uchar __OVERLOAD__ convert_uchar_sat_rtn( ushort );
    uchar __OVERLOAD__ convert_uchar_sat_rtn( uint );
    uchar __OVERLOAD__ convert_uchar_sat_rtn( ulong );
    uchar __OVERLOAD__ convert_uchar_sat_rtn( char );
    uchar __OVERLOAD__ convert_uchar_sat_rtn( short );
    uchar __OVERLOAD__ convert_uchar_sat_rtn( int );
    uchar __OVERLOAD__ convert_uchar_sat_rtn( long );
    uchar __OVERLOAD__ convert_uchar_sat_rtn( float );
    uchar __OVERLOAD__ convert_uchar_sat_rtn( double );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtn( uchar2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtn( ushort2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtn( uint2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtn( ulong2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtn( char2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtn( short2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtn( int2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtn( long2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtn( float2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtn( double2 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtn( uchar3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtn( ushort3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtn( uint3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtn( ulong3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtn( char3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtn( short3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtn( int3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtn( long3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtn( float3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtn( double3 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtn( uchar4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtn( ushort4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtn( uint4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtn( ulong4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtn( char4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtn( short4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtn( int4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtn( long4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtn( float4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtn( double4 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtn( uchar8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtn( ushort8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtn( uint8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtn( ulong8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtn( char8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtn( short8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtn( int8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtn( long8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtn( float8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtn( double8 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtn( uchar16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtn( ushort16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtn( uint16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtn( ulong16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtn( char16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtn( short16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtn( int16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtn( long16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtn( float16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtn( double16 );
    uchar __OVERLOAD__ convert_uchar_sat_rtz( uchar );
    uchar __OVERLOAD__ convert_uchar_sat_rtz( ushort );
    uchar __OVERLOAD__ convert_uchar_sat_rtz( uint );
    uchar __OVERLOAD__ convert_uchar_sat_rtz( ulong );
    uchar __OVERLOAD__ convert_uchar_sat_rtz( char );
    uchar __OVERLOAD__ convert_uchar_sat_rtz( short );
    uchar __OVERLOAD__ convert_uchar_sat_rtz( int );
    uchar __OVERLOAD__ convert_uchar_sat_rtz( long );
    uchar __OVERLOAD__ convert_uchar_sat_rtz( float );
    uchar __OVERLOAD__ convert_uchar_sat_rtz( double );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtz( uchar2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtz( ushort2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtz( uint2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtz( ulong2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtz( char2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtz( short2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtz( int2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtz( long2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtz( float2 );
    uchar2 __OVERLOAD__ convert_uchar2_sat_rtz( double2 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtz( uchar3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtz( ushort3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtz( uint3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtz( ulong3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtz( char3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtz( short3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtz( int3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtz( long3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtz( float3 );
    uchar3 __OVERLOAD__ convert_uchar3_sat_rtz( double3 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtz( uchar4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtz( ushort4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtz( uint4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtz( ulong4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtz( char4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtz( short4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtz( int4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtz( long4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtz( float4 );
    uchar4 __OVERLOAD__ convert_uchar4_sat_rtz( double4 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtz( uchar8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtz( ushort8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtz( uint8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtz( ulong8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtz( char8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtz( short8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtz( int8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtz( long8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtz( float8 );
    uchar8 __OVERLOAD__ convert_uchar8_sat_rtz( double8 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtz( uchar16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtz( ushort16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtz( uint16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtz( ulong16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtz( char16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtz( short16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtz( int16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtz( long16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtz( float16 );
    uchar16 __OVERLOAD__ convert_uchar16_sat_rtz( double16 );
    ushort __OVERLOAD__ convert_ushort( uchar );
    ushort __OVERLOAD__ convert_ushort( ushort );
    ushort __OVERLOAD__ convert_ushort( uint );
    ushort __OVERLOAD__ convert_ushort( ulong );
    ushort __OVERLOAD__ convert_ushort( char );
    ushort __OVERLOAD__ convert_ushort( short );
    ushort __OVERLOAD__ convert_ushort( int );
    ushort __OVERLOAD__ convert_ushort( long );
    ushort __OVERLOAD__ convert_ushort( float );
    ushort __OVERLOAD__ convert_ushort( double );
    ushort2 __OVERLOAD__ convert_ushort2( uchar2 );
    ushort2 __OVERLOAD__ convert_ushort2( ushort2 );
    ushort2 __OVERLOAD__ convert_ushort2( uint2 );
    ushort2 __OVERLOAD__ convert_ushort2( ulong2 );
    ushort2 __OVERLOAD__ convert_ushort2( char2 );
    ushort2 __OVERLOAD__ convert_ushort2( short2 );
    ushort2 __OVERLOAD__ convert_ushort2( int2 );
    ushort2 __OVERLOAD__ convert_ushort2( long2 );
    ushort2 __OVERLOAD__ convert_ushort2( float2 );
    ushort2 __OVERLOAD__ convert_ushort2( double2 );
    ushort3 __OVERLOAD__ convert_ushort3( uchar3 );
    ushort3 __OVERLOAD__ convert_ushort3( ushort3 );
    ushort3 __OVERLOAD__ convert_ushort3( uint3 );
    ushort3 __OVERLOAD__ convert_ushort3( ulong3 );
    ushort3 __OVERLOAD__ convert_ushort3( char3 );
    ushort3 __OVERLOAD__ convert_ushort3( short3 );
    ushort3 __OVERLOAD__ convert_ushort3( int3 );
    ushort3 __OVERLOAD__ convert_ushort3( long3 );
    ushort3 __OVERLOAD__ convert_ushort3( float3 );
    ushort3 __OVERLOAD__ convert_ushort3( double3 );
    ushort4 __OVERLOAD__ convert_ushort4( uchar4 );
    ushort4 __OVERLOAD__ convert_ushort4( ushort4 );
    ushort4 __OVERLOAD__ convert_ushort4( uint4 );
    ushort4 __OVERLOAD__ convert_ushort4( ulong4 );
    ushort4 __OVERLOAD__ convert_ushort4( char4 );
    ushort4 __OVERLOAD__ convert_ushort4( short4 );
    ushort4 __OVERLOAD__ convert_ushort4( int4 );
    ushort4 __OVERLOAD__ convert_ushort4( long4 );
    ushort4 __OVERLOAD__ convert_ushort4( float4 );
    ushort4 __OVERLOAD__ convert_ushort4( double4 );
    ushort8 __OVERLOAD__ convert_ushort8( uchar8 );
    ushort8 __OVERLOAD__ convert_ushort8( ushort8 );
    ushort8 __OVERLOAD__ convert_ushort8( uint8 );
    ushort8 __OVERLOAD__ convert_ushort8( ulong8 );
    ushort8 __OVERLOAD__ convert_ushort8( char8 );
    ushort8 __OVERLOAD__ convert_ushort8( short8 );
    ushort8 __OVERLOAD__ convert_ushort8( int8 );
    ushort8 __OVERLOAD__ convert_ushort8( long8 );
    ushort8 __OVERLOAD__ convert_ushort8( float8 );
    ushort8 __OVERLOAD__ convert_ushort8( double8 );
    ushort16 __OVERLOAD__ convert_ushort16( uchar16 );
    ushort16 __OVERLOAD__ convert_ushort16( ushort16 );
    ushort16 __OVERLOAD__ convert_ushort16( uint16 );
    ushort16 __OVERLOAD__ convert_ushort16( ulong16 );
    ushort16 __OVERLOAD__ convert_ushort16( char16 );
    ushort16 __OVERLOAD__ convert_ushort16( short16 );
    ushort16 __OVERLOAD__ convert_ushort16( int16 );
    ushort16 __OVERLOAD__ convert_ushort16( long16 );
    ushort16 __OVERLOAD__ convert_ushort16( float16 );
    ushort16 __OVERLOAD__ convert_ushort16( double16 );
    ushort __OVERLOAD__ convert_ushort_rte( uchar );
    ushort __OVERLOAD__ convert_ushort_rte( ushort );
    ushort __OVERLOAD__ convert_ushort_rte( uint );
    ushort __OVERLOAD__ convert_ushort_rte( ulong );
    ushort __OVERLOAD__ convert_ushort_rte( char );
    ushort __OVERLOAD__ convert_ushort_rte( short );
    ushort __OVERLOAD__ convert_ushort_rte( int );
    ushort __OVERLOAD__ convert_ushort_rte( long );
    ushort __OVERLOAD__ convert_ushort_rte( float );
    ushort __OVERLOAD__ convert_ushort_rte( double );
    ushort2 __OVERLOAD__ convert_ushort2_rte( uchar2 );
    ushort2 __OVERLOAD__ convert_ushort2_rte( ushort2 );
    ushort2 __OVERLOAD__ convert_ushort2_rte( uint2 );
    ushort2 __OVERLOAD__ convert_ushort2_rte( ulong2 );
    ushort2 __OVERLOAD__ convert_ushort2_rte( char2 );
    ushort2 __OVERLOAD__ convert_ushort2_rte( short2 );
    ushort2 __OVERLOAD__ convert_ushort2_rte( int2 );
    ushort2 __OVERLOAD__ convert_ushort2_rte( long2 );
    ushort2 __OVERLOAD__ convert_ushort2_rte( float2 );
    ushort2 __OVERLOAD__ convert_ushort2_rte( double2 );
    ushort3 __OVERLOAD__ convert_ushort3_rte( uchar3 );
    ushort3 __OVERLOAD__ convert_ushort3_rte( ushort3 );
    ushort3 __OVERLOAD__ convert_ushort3_rte( uint3 );
    ushort3 __OVERLOAD__ convert_ushort3_rte( ulong3 );
    ushort3 __OVERLOAD__ convert_ushort3_rte( char3 );
    ushort3 __OVERLOAD__ convert_ushort3_rte( short3 );
    ushort3 __OVERLOAD__ convert_ushort3_rte( int3 );
    ushort3 __OVERLOAD__ convert_ushort3_rte( long3 );
    ushort3 __OVERLOAD__ convert_ushort3_rte( float3 );
    ushort3 __OVERLOAD__ convert_ushort3_rte( double3 );
    ushort4 __OVERLOAD__ convert_ushort4_rte( uchar4 );
    ushort4 __OVERLOAD__ convert_ushort4_rte( ushort4 );
    ushort4 __OVERLOAD__ convert_ushort4_rte( uint4 );
    ushort4 __OVERLOAD__ convert_ushort4_rte( ulong4 );
    ushort4 __OVERLOAD__ convert_ushort4_rte( char4 );
    ushort4 __OVERLOAD__ convert_ushort4_rte( short4 );
    ushort4 __OVERLOAD__ convert_ushort4_rte( int4 );
    ushort4 __OVERLOAD__ convert_ushort4_rte( long4 );
    ushort4 __OVERLOAD__ convert_ushort4_rte( float4 );
    ushort4 __OVERLOAD__ convert_ushort4_rte( double4 );
    ushort8 __OVERLOAD__ convert_ushort8_rte( uchar8 );
    ushort8 __OVERLOAD__ convert_ushort8_rte( ushort8 );
    ushort8 __OVERLOAD__ convert_ushort8_rte( uint8 );
    ushort8 __OVERLOAD__ convert_ushort8_rte( ulong8 );
    ushort8 __OVERLOAD__ convert_ushort8_rte( char8 );
    ushort8 __OVERLOAD__ convert_ushort8_rte( short8 );
    ushort8 __OVERLOAD__ convert_ushort8_rte( int8 );
    ushort8 __OVERLOAD__ convert_ushort8_rte( long8 );
    ushort8 __OVERLOAD__ convert_ushort8_rte( float8 );
    ushort8 __OVERLOAD__ convert_ushort8_rte( double8 );
    ushort16 __OVERLOAD__ convert_ushort16_rte( uchar16 );
    ushort16 __OVERLOAD__ convert_ushort16_rte( ushort16 );
    ushort16 __OVERLOAD__ convert_ushort16_rte( uint16 );
    ushort16 __OVERLOAD__ convert_ushort16_rte( ulong16 );
    ushort16 __OVERLOAD__ convert_ushort16_rte( char16 );
    ushort16 __OVERLOAD__ convert_ushort16_rte( short16 );
    ushort16 __OVERLOAD__ convert_ushort16_rte( int16 );
    ushort16 __OVERLOAD__ convert_ushort16_rte( long16 );
    ushort16 __OVERLOAD__ convert_ushort16_rte( float16 );
    ushort16 __OVERLOAD__ convert_ushort16_rte( double16 );
    ushort __OVERLOAD__ convert_ushort_rtp( uchar );
    ushort __OVERLOAD__ convert_ushort_rtp( ushort );
    ushort __OVERLOAD__ convert_ushort_rtp( uint );
    ushort __OVERLOAD__ convert_ushort_rtp( ulong );
    ushort __OVERLOAD__ convert_ushort_rtp( char );
    ushort __OVERLOAD__ convert_ushort_rtp( short );
    ushort __OVERLOAD__ convert_ushort_rtp( int );
    ushort __OVERLOAD__ convert_ushort_rtp( long );
    ushort __OVERLOAD__ convert_ushort_rtp( float );
    ushort __OVERLOAD__ convert_ushort_rtp( double );
    ushort2 __OVERLOAD__ convert_ushort2_rtp( uchar2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtp( ushort2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtp( uint2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtp( ulong2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtp( char2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtp( short2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtp( int2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtp( long2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtp( float2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtp( double2 );
    ushort3 __OVERLOAD__ convert_ushort3_rtp( uchar3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtp( ushort3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtp( uint3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtp( ulong3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtp( char3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtp( short3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtp( int3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtp( long3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtp( float3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtp( double3 );
    ushort4 __OVERLOAD__ convert_ushort4_rtp( uchar4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtp( ushort4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtp( uint4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtp( ulong4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtp( char4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtp( short4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtp( int4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtp( long4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtp( float4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtp( double4 );
    ushort8 __OVERLOAD__ convert_ushort8_rtp( uchar8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtp( ushort8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtp( uint8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtp( ulong8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtp( char8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtp( short8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtp( int8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtp( long8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtp( float8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtp( double8 );
    ushort16 __OVERLOAD__ convert_ushort16_rtp( uchar16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtp( ushort16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtp( uint16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtp( ulong16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtp( char16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtp( short16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtp( int16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtp( long16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtp( float16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtp( double16 );
    ushort __OVERLOAD__ convert_ushort_rtn( uchar );
    ushort __OVERLOAD__ convert_ushort_rtn( ushort );
    ushort __OVERLOAD__ convert_ushort_rtn( uint );
    ushort __OVERLOAD__ convert_ushort_rtn( ulong );
    ushort __OVERLOAD__ convert_ushort_rtn( char );
    ushort __OVERLOAD__ convert_ushort_rtn( short );
    ushort __OVERLOAD__ convert_ushort_rtn( int );
    ushort __OVERLOAD__ convert_ushort_rtn( long );
    ushort __OVERLOAD__ convert_ushort_rtn( float );
    ushort __OVERLOAD__ convert_ushort_rtn( double );
    ushort2 __OVERLOAD__ convert_ushort2_rtn( uchar2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtn( ushort2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtn( uint2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtn( ulong2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtn( char2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtn( short2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtn( int2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtn( long2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtn( float2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtn( double2 );
    ushort3 __OVERLOAD__ convert_ushort3_rtn( uchar3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtn( ushort3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtn( uint3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtn( ulong3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtn( char3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtn( short3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtn( int3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtn( long3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtn( float3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtn( double3 );
    ushort4 __OVERLOAD__ convert_ushort4_rtn( uchar4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtn( ushort4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtn( uint4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtn( ulong4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtn( char4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtn( short4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtn( int4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtn( long4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtn( float4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtn( double4 );
    ushort8 __OVERLOAD__ convert_ushort8_rtn( uchar8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtn( ushort8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtn( uint8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtn( ulong8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtn( char8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtn( short8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtn( int8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtn( long8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtn( float8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtn( double8 );
    ushort16 __OVERLOAD__ convert_ushort16_rtn( uchar16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtn( ushort16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtn( uint16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtn( ulong16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtn( char16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtn( short16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtn( int16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtn( long16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtn( float16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtn( double16 );
    ushort __OVERLOAD__ convert_ushort_rtz( uchar );
    ushort __OVERLOAD__ convert_ushort_rtz( ushort );
    ushort __OVERLOAD__ convert_ushort_rtz( uint );
    ushort __OVERLOAD__ convert_ushort_rtz( ulong );
    ushort __OVERLOAD__ convert_ushort_rtz( char );
    ushort __OVERLOAD__ convert_ushort_rtz( short );
    ushort __OVERLOAD__ convert_ushort_rtz( int );
    ushort __OVERLOAD__ convert_ushort_rtz( long );
    ushort __OVERLOAD__ convert_ushort_rtz( float );
    ushort __OVERLOAD__ convert_ushort_rtz( double );
    ushort2 __OVERLOAD__ convert_ushort2_rtz( uchar2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtz( ushort2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtz( uint2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtz( ulong2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtz( char2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtz( short2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtz( int2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtz( long2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtz( float2 );
    ushort2 __OVERLOAD__ convert_ushort2_rtz( double2 );
    ushort3 __OVERLOAD__ convert_ushort3_rtz( uchar3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtz( ushort3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtz( uint3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtz( ulong3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtz( char3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtz( short3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtz( int3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtz( long3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtz( float3 );
    ushort3 __OVERLOAD__ convert_ushort3_rtz( double3 );
    ushort4 __OVERLOAD__ convert_ushort4_rtz( uchar4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtz( ushort4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtz( uint4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtz( ulong4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtz( char4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtz( short4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtz( int4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtz( long4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtz( float4 );
    ushort4 __OVERLOAD__ convert_ushort4_rtz( double4 );
    ushort8 __OVERLOAD__ convert_ushort8_rtz( uchar8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtz( ushort8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtz( uint8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtz( ulong8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtz( char8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtz( short8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtz( int8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtz( long8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtz( float8 );
    ushort8 __OVERLOAD__ convert_ushort8_rtz( double8 );
    ushort16 __OVERLOAD__ convert_ushort16_rtz( uchar16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtz( ushort16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtz( uint16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtz( ulong16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtz( char16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtz( short16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtz( int16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtz( long16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtz( float16 );
    ushort16 __OVERLOAD__ convert_ushort16_rtz( double16 );
    ushort __OVERLOAD__ convert_ushort_sat( uchar );
    ushort __OVERLOAD__ convert_ushort_sat( ushort );
    ushort __OVERLOAD__ convert_ushort_sat( uint );
    ushort __OVERLOAD__ convert_ushort_sat( ulong );
    ushort __OVERLOAD__ convert_ushort_sat( char );
    ushort __OVERLOAD__ convert_ushort_sat( short );
    ushort __OVERLOAD__ convert_ushort_sat( int );
    ushort __OVERLOAD__ convert_ushort_sat( long );
    ushort __OVERLOAD__ convert_ushort_sat( float );
    ushort __OVERLOAD__ convert_ushort_sat( double );
    ushort2 __OVERLOAD__ convert_ushort2_sat( uchar2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat( ushort2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat( uint2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat( ulong2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat( char2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat( short2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat( int2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat( long2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat( float2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat( double2 );
    ushort3 __OVERLOAD__ convert_ushort3_sat( uchar3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat( ushort3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat( uint3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat( ulong3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat( char3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat( short3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat( int3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat( long3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat( float3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat( double3 );
    ushort4 __OVERLOAD__ convert_ushort4_sat( uchar4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat( ushort4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat( uint4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat( ulong4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat( char4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat( short4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat( int4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat( long4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat( float4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat( double4 );
    ushort8 __OVERLOAD__ convert_ushort8_sat( uchar8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat( ushort8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat( uint8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat( ulong8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat( char8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat( short8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat( int8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat( long8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat( float8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat( double8 );
    ushort16 __OVERLOAD__ convert_ushort16_sat( uchar16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat( ushort16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat( uint16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat( ulong16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat( char16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat( short16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat( int16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat( long16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat( float16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat( double16 );
    ushort __OVERLOAD__ convert_ushort_sat_rte( uchar );
    ushort __OVERLOAD__ convert_ushort_sat_rte( ushort );
    ushort __OVERLOAD__ convert_ushort_sat_rte( uint );
    ushort __OVERLOAD__ convert_ushort_sat_rte( ulong );
    ushort __OVERLOAD__ convert_ushort_sat_rte( char );
    ushort __OVERLOAD__ convert_ushort_sat_rte( short );
    ushort __OVERLOAD__ convert_ushort_sat_rte( int );
    ushort __OVERLOAD__ convert_ushort_sat_rte( long );
    ushort __OVERLOAD__ convert_ushort_sat_rte( float );
    ushort __OVERLOAD__ convert_ushort_sat_rte( double );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rte( uchar2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rte( ushort2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rte( uint2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rte( ulong2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rte( char2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rte( short2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rte( int2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rte( long2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rte( float2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rte( double2 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rte( uchar3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rte( ushort3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rte( uint3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rte( ulong3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rte( char3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rte( short3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rte( int3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rte( long3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rte( float3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rte( double3 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rte( uchar4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rte( ushort4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rte( uint4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rte( ulong4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rte( char4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rte( short4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rte( int4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rte( long4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rte( float4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rte( double4 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rte( uchar8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rte( ushort8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rte( uint8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rte( ulong8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rte( char8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rte( short8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rte( int8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rte( long8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rte( float8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rte( double8 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rte( uchar16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rte( ushort16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rte( uint16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rte( ulong16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rte( char16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rte( short16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rte( int16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rte( long16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rte( float16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rte( double16 );
    ushort __OVERLOAD__ convert_ushort_sat_rtp( uchar );
    ushort __OVERLOAD__ convert_ushort_sat_rtp( ushort );
    ushort __OVERLOAD__ convert_ushort_sat_rtp( uint );
    ushort __OVERLOAD__ convert_ushort_sat_rtp( ulong );
    ushort __OVERLOAD__ convert_ushort_sat_rtp( char );
    ushort __OVERLOAD__ convert_ushort_sat_rtp( short );
    ushort __OVERLOAD__ convert_ushort_sat_rtp( int );
    ushort __OVERLOAD__ convert_ushort_sat_rtp( long );
    ushort __OVERLOAD__ convert_ushort_sat_rtp( float );
    ushort __OVERLOAD__ convert_ushort_sat_rtp( double );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtp( uchar2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtp( ushort2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtp( uint2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtp( ulong2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtp( char2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtp( short2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtp( int2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtp( long2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtp( float2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtp( double2 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtp( uchar3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtp( ushort3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtp( uint3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtp( ulong3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtp( char3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtp( short3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtp( int3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtp( long3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtp( float3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtp( double3 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtp( uchar4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtp( ushort4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtp( uint4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtp( ulong4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtp( char4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtp( short4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtp( int4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtp( long4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtp( float4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtp( double4 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtp( uchar8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtp( ushort8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtp( uint8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtp( ulong8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtp( char8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtp( short8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtp( int8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtp( long8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtp( float8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtp( double8 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtp( uchar16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtp( ushort16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtp( uint16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtp( ulong16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtp( char16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtp( short16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtp( int16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtp( long16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtp( float16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtp( double16 );
    ushort __OVERLOAD__ convert_ushort_sat_rtn( uchar );
    ushort __OVERLOAD__ convert_ushort_sat_rtn( ushort );
    ushort __OVERLOAD__ convert_ushort_sat_rtn( uint );
    ushort __OVERLOAD__ convert_ushort_sat_rtn( ulong );
    ushort __OVERLOAD__ convert_ushort_sat_rtn( char );
    ushort __OVERLOAD__ convert_ushort_sat_rtn( short );
    ushort __OVERLOAD__ convert_ushort_sat_rtn( int );
    ushort __OVERLOAD__ convert_ushort_sat_rtn( long );
    ushort __OVERLOAD__ convert_ushort_sat_rtn( float );
    ushort __OVERLOAD__ convert_ushort_sat_rtn( double );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtn( uchar2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtn( ushort2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtn( uint2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtn( ulong2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtn( char2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtn( short2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtn( int2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtn( long2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtn( float2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtn( double2 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtn( uchar3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtn( ushort3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtn( uint3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtn( ulong3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtn( char3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtn( short3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtn( int3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtn( long3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtn( float3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtn( double3 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtn( uchar4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtn( ushort4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtn( uint4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtn( ulong4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtn( char4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtn( short4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtn( int4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtn( long4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtn( float4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtn( double4 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtn( uchar8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtn( ushort8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtn( uint8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtn( ulong8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtn( char8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtn( short8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtn( int8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtn( long8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtn( float8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtn( double8 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtn( uchar16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtn( ushort16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtn( uint16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtn( ulong16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtn( char16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtn( short16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtn( int16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtn( long16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtn( float16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtn( double16 );
    ushort __OVERLOAD__ convert_ushort_sat_rtz( uchar );
    ushort __OVERLOAD__ convert_ushort_sat_rtz( ushort );
    ushort __OVERLOAD__ convert_ushort_sat_rtz( uint );
    ushort __OVERLOAD__ convert_ushort_sat_rtz( ulong );
    ushort __OVERLOAD__ convert_ushort_sat_rtz( char );
    ushort __OVERLOAD__ convert_ushort_sat_rtz( short );
    ushort __OVERLOAD__ convert_ushort_sat_rtz( int );
    ushort __OVERLOAD__ convert_ushort_sat_rtz( long );
    ushort __OVERLOAD__ convert_ushort_sat_rtz( float );
    ushort __OVERLOAD__ convert_ushort_sat_rtz( double );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtz( uchar2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtz( ushort2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtz( uint2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtz( ulong2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtz( char2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtz( short2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtz( int2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtz( long2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtz( float2 );
    ushort2 __OVERLOAD__ convert_ushort2_sat_rtz( double2 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtz( uchar3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtz( ushort3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtz( uint3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtz( ulong3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtz( char3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtz( short3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtz( int3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtz( long3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtz( float3 );
    ushort3 __OVERLOAD__ convert_ushort3_sat_rtz( double3 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtz( uchar4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtz( ushort4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtz( uint4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtz( ulong4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtz( char4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtz( short4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtz( int4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtz( long4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtz( float4 );
    ushort4 __OVERLOAD__ convert_ushort4_sat_rtz( double4 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtz( uchar8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtz( ushort8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtz( uint8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtz( ulong8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtz( char8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtz( short8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtz( int8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtz( long8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtz( float8 );
    ushort8 __OVERLOAD__ convert_ushort8_sat_rtz( double8 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtz( uchar16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtz( ushort16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtz( uint16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtz( ulong16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtz( char16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtz( short16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtz( int16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtz( long16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtz( float16 );
    ushort16 __OVERLOAD__ convert_ushort16_sat_rtz( double16 );
    uint __OVERLOAD__ convert_uint( uchar );
    uint __OVERLOAD__ convert_uint( ushort );
    uint __OVERLOAD__ convert_uint( uint );
    uint __OVERLOAD__ convert_uint( ulong );
    uint __OVERLOAD__ convert_uint( char );
    uint __OVERLOAD__ convert_uint( short );
    uint __OVERLOAD__ convert_uint( int );
    uint __OVERLOAD__ convert_uint( long );
    uint __OVERLOAD__ convert_uint( float );
    uint __OVERLOAD__ convert_uint( double );
    uint2 __OVERLOAD__ convert_uint2( uchar2 );
    uint2 __OVERLOAD__ convert_uint2( ushort2 );
    uint2 __OVERLOAD__ convert_uint2( uint2 );
    uint2 __OVERLOAD__ convert_uint2( ulong2 );
    uint2 __OVERLOAD__ convert_uint2( char2 );
    uint2 __OVERLOAD__ convert_uint2( short2 );
    uint2 __OVERLOAD__ convert_uint2( int2 );
    uint2 __OVERLOAD__ convert_uint2( long2 );
    uint2 __OVERLOAD__ convert_uint2( float2 );
    uint2 __OVERLOAD__ convert_uint2( double2 );
    uint3 __OVERLOAD__ convert_uint3( uchar3 );
    uint3 __OVERLOAD__ convert_uint3( ushort3 );
    uint3 __OVERLOAD__ convert_uint3( uint3 );
    uint3 __OVERLOAD__ convert_uint3( ulong3 );
    uint3 __OVERLOAD__ convert_uint3( char3 );
    uint3 __OVERLOAD__ convert_uint3( short3 );
    uint3 __OVERLOAD__ convert_uint3( int3 );
    uint3 __OVERLOAD__ convert_uint3( long3 );
    uint3 __OVERLOAD__ convert_uint3( float3 );
    uint3 __OVERLOAD__ convert_uint3( double3 );
    uint4 __OVERLOAD__ convert_uint4( uchar4 );
    uint4 __OVERLOAD__ convert_uint4( ushort4 );
    uint4 __OVERLOAD__ convert_uint4( uint4 );
    uint4 __OVERLOAD__ convert_uint4( ulong4 );
    uint4 __OVERLOAD__ convert_uint4( char4 );
    uint4 __OVERLOAD__ convert_uint4( short4 );
    uint4 __OVERLOAD__ convert_uint4( int4 );
    uint4 __OVERLOAD__ convert_uint4( long4 );
    uint4 __OVERLOAD__ convert_uint4( float4 );
    uint4 __OVERLOAD__ convert_uint4( double4 );
    uint8 __OVERLOAD__ convert_uint8( uchar8 );
    uint8 __OVERLOAD__ convert_uint8( ushort8 );
    uint8 __OVERLOAD__ convert_uint8( uint8 );
    uint8 __OVERLOAD__ convert_uint8( ulong8 );
    uint8 __OVERLOAD__ convert_uint8( char8 );
    uint8 __OVERLOAD__ convert_uint8( short8 );
    uint8 __OVERLOAD__ convert_uint8( int8 );
    uint8 __OVERLOAD__ convert_uint8( long8 );
    uint8 __OVERLOAD__ convert_uint8( float8 );
    uint8 __OVERLOAD__ convert_uint8( double8 );
    uint16 __OVERLOAD__ convert_uint16( uchar16 );
    uint16 __OVERLOAD__ convert_uint16( ushort16 );
    uint16 __OVERLOAD__ convert_uint16( uint16 );
    uint16 __OVERLOAD__ convert_uint16( ulong16 );
    uint16 __OVERLOAD__ convert_uint16( char16 );
    uint16 __OVERLOAD__ convert_uint16( short16 );
    uint16 __OVERLOAD__ convert_uint16( int16 );
    uint16 __OVERLOAD__ convert_uint16( long16 );
    uint16 __OVERLOAD__ convert_uint16( float16 );
    uint16 __OVERLOAD__ convert_uint16( double16 );
    uint __OVERLOAD__ convert_uint_rte( uchar );
    uint __OVERLOAD__ convert_uint_rte( ushort );
    uint __OVERLOAD__ convert_uint_rte( uint );
    uint __OVERLOAD__ convert_uint_rte( ulong );
    uint __OVERLOAD__ convert_uint_rte( char );
    uint __OVERLOAD__ convert_uint_rte( short );
    uint __OVERLOAD__ convert_uint_rte( int );
    uint __OVERLOAD__ convert_uint_rte( long );
    uint __OVERLOAD__ convert_uint_rte( float );
    uint __OVERLOAD__ convert_uint_rte( double );
    uint2 __OVERLOAD__ convert_uint2_rte( uchar2 );
    uint2 __OVERLOAD__ convert_uint2_rte( ushort2 );
    uint2 __OVERLOAD__ convert_uint2_rte( uint2 );
    uint2 __OVERLOAD__ convert_uint2_rte( ulong2 );
    uint2 __OVERLOAD__ convert_uint2_rte( char2 );
    uint2 __OVERLOAD__ convert_uint2_rte( short2 );
    uint2 __OVERLOAD__ convert_uint2_rte( int2 );
    uint2 __OVERLOAD__ convert_uint2_rte( long2 );
    uint2 __OVERLOAD__ convert_uint2_rte( float2 );
    uint2 __OVERLOAD__ convert_uint2_rte( double2 );
    uint3 __OVERLOAD__ convert_uint3_rte( uchar3 );
    uint3 __OVERLOAD__ convert_uint3_rte( ushort3 );
    uint3 __OVERLOAD__ convert_uint3_rte( uint3 );
    uint3 __OVERLOAD__ convert_uint3_rte( ulong3 );
    uint3 __OVERLOAD__ convert_uint3_rte( char3 );
    uint3 __OVERLOAD__ convert_uint3_rte( short3 );
    uint3 __OVERLOAD__ convert_uint3_rte( int3 );
    uint3 __OVERLOAD__ convert_uint3_rte( long3 );
    uint3 __OVERLOAD__ convert_uint3_rte( float3 );
    uint3 __OVERLOAD__ convert_uint3_rte( double3 );
    uint4 __OVERLOAD__ convert_uint4_rte( uchar4 );
    uint4 __OVERLOAD__ convert_uint4_rte( ushort4 );
    uint4 __OVERLOAD__ convert_uint4_rte( uint4 );
    uint4 __OVERLOAD__ convert_uint4_rte( ulong4 );
    uint4 __OVERLOAD__ convert_uint4_rte( char4 );
    uint4 __OVERLOAD__ convert_uint4_rte( short4 );
    uint4 __OVERLOAD__ convert_uint4_rte( int4 );
    uint4 __OVERLOAD__ convert_uint4_rte( long4 );
    uint4 __OVERLOAD__ convert_uint4_rte( float4 );
    uint4 __OVERLOAD__ convert_uint4_rte( double4 );
    uint8 __OVERLOAD__ convert_uint8_rte( uchar8 );
    uint8 __OVERLOAD__ convert_uint8_rte( ushort8 );
    uint8 __OVERLOAD__ convert_uint8_rte( uint8 );
    uint8 __OVERLOAD__ convert_uint8_rte( ulong8 );
    uint8 __OVERLOAD__ convert_uint8_rte( char8 );
    uint8 __OVERLOAD__ convert_uint8_rte( short8 );
    uint8 __OVERLOAD__ convert_uint8_rte( int8 );
    uint8 __OVERLOAD__ convert_uint8_rte( long8 );
    uint8 __OVERLOAD__ convert_uint8_rte( float8 );
    uint8 __OVERLOAD__ convert_uint8_rte( double8 );
    uint16 __OVERLOAD__ convert_uint16_rte( uchar16 );
    uint16 __OVERLOAD__ convert_uint16_rte( ushort16 );
    uint16 __OVERLOAD__ convert_uint16_rte( uint16 );
    uint16 __OVERLOAD__ convert_uint16_rte( ulong16 );
    uint16 __OVERLOAD__ convert_uint16_rte( char16 );
    uint16 __OVERLOAD__ convert_uint16_rte( short16 );
    uint16 __OVERLOAD__ convert_uint16_rte( int16 );
    uint16 __OVERLOAD__ convert_uint16_rte( long16 );
    uint16 __OVERLOAD__ convert_uint16_rte( float16 );
    uint16 __OVERLOAD__ convert_uint16_rte( double16 );
    uint __OVERLOAD__ convert_uint_rtp( uchar );
    uint __OVERLOAD__ convert_uint_rtp( ushort );
    uint __OVERLOAD__ convert_uint_rtp( uint );
    uint __OVERLOAD__ convert_uint_rtp( ulong );
    uint __OVERLOAD__ convert_uint_rtp( char );
    uint __OVERLOAD__ convert_uint_rtp( short );
    uint __OVERLOAD__ convert_uint_rtp( int );
    uint __OVERLOAD__ convert_uint_rtp( long );
    uint __OVERLOAD__ convert_uint_rtp( float );
    uint __OVERLOAD__ convert_uint_rtp( double );
    uint2 __OVERLOAD__ convert_uint2_rtp( uchar2 );
    uint2 __OVERLOAD__ convert_uint2_rtp( ushort2 );
    uint2 __OVERLOAD__ convert_uint2_rtp( uint2 );
    uint2 __OVERLOAD__ convert_uint2_rtp( ulong2 );
    uint2 __OVERLOAD__ convert_uint2_rtp( char2 );
    uint2 __OVERLOAD__ convert_uint2_rtp( short2 );
    uint2 __OVERLOAD__ convert_uint2_rtp( int2 );
    uint2 __OVERLOAD__ convert_uint2_rtp( long2 );
    uint2 __OVERLOAD__ convert_uint2_rtp( float2 );
    uint2 __OVERLOAD__ convert_uint2_rtp( double2 );
    uint3 __OVERLOAD__ convert_uint3_rtp( uchar3 );
    uint3 __OVERLOAD__ convert_uint3_rtp( ushort3 );
    uint3 __OVERLOAD__ convert_uint3_rtp( uint3 );
    uint3 __OVERLOAD__ convert_uint3_rtp( ulong3 );
    uint3 __OVERLOAD__ convert_uint3_rtp( char3 );
    uint3 __OVERLOAD__ convert_uint3_rtp( short3 );
    uint3 __OVERLOAD__ convert_uint3_rtp( int3 );
    uint3 __OVERLOAD__ convert_uint3_rtp( long3 );
    uint3 __OVERLOAD__ convert_uint3_rtp( float3 );
    uint3 __OVERLOAD__ convert_uint3_rtp( double3 );
    uint4 __OVERLOAD__ convert_uint4_rtp( uchar4 );
    uint4 __OVERLOAD__ convert_uint4_rtp( ushort4 );
    uint4 __OVERLOAD__ convert_uint4_rtp( uint4 );
    uint4 __OVERLOAD__ convert_uint4_rtp( ulong4 );
    uint4 __OVERLOAD__ convert_uint4_rtp( char4 );
    uint4 __OVERLOAD__ convert_uint4_rtp( short4 );
    uint4 __OVERLOAD__ convert_uint4_rtp( int4 );
    uint4 __OVERLOAD__ convert_uint4_rtp( long4 );
    uint4 __OVERLOAD__ convert_uint4_rtp( float4 );
    uint4 __OVERLOAD__ convert_uint4_rtp( double4 );
    uint8 __OVERLOAD__ convert_uint8_rtp( uchar8 );
    uint8 __OVERLOAD__ convert_uint8_rtp( ushort8 );
    uint8 __OVERLOAD__ convert_uint8_rtp( uint8 );
    uint8 __OVERLOAD__ convert_uint8_rtp( ulong8 );
    uint8 __OVERLOAD__ convert_uint8_rtp( char8 );
    uint8 __OVERLOAD__ convert_uint8_rtp( short8 );
    uint8 __OVERLOAD__ convert_uint8_rtp( int8 );
    uint8 __OVERLOAD__ convert_uint8_rtp( long8 );
    uint8 __OVERLOAD__ convert_uint8_rtp( float8 );
    uint8 __OVERLOAD__ convert_uint8_rtp( double8 );
    uint16 __OVERLOAD__ convert_uint16_rtp( uchar16 );
    uint16 __OVERLOAD__ convert_uint16_rtp( ushort16 );
    uint16 __OVERLOAD__ convert_uint16_rtp( uint16 );
    uint16 __OVERLOAD__ convert_uint16_rtp( ulong16 );
    uint16 __OVERLOAD__ convert_uint16_rtp( char16 );
    uint16 __OVERLOAD__ convert_uint16_rtp( short16 );
    uint16 __OVERLOAD__ convert_uint16_rtp( int16 );
    uint16 __OVERLOAD__ convert_uint16_rtp( long16 );
    uint16 __OVERLOAD__ convert_uint16_rtp( float16 );
    uint16 __OVERLOAD__ convert_uint16_rtp( double16 );
    uint __OVERLOAD__ convert_uint_rtn( uchar );
    uint __OVERLOAD__ convert_uint_rtn( ushort );
    uint __OVERLOAD__ convert_uint_rtn( uint );
    uint __OVERLOAD__ convert_uint_rtn( ulong );
    uint __OVERLOAD__ convert_uint_rtn( char );
    uint __OVERLOAD__ convert_uint_rtn( short );
    uint __OVERLOAD__ convert_uint_rtn( int );
    uint __OVERLOAD__ convert_uint_rtn( long );
    uint __OVERLOAD__ convert_uint_rtn( float );
    uint __OVERLOAD__ convert_uint_rtn( double );
    uint2 __OVERLOAD__ convert_uint2_rtn( uchar2 );
    uint2 __OVERLOAD__ convert_uint2_rtn( ushort2 );
    uint2 __OVERLOAD__ convert_uint2_rtn( uint2 );
    uint2 __OVERLOAD__ convert_uint2_rtn( ulong2 );
    uint2 __OVERLOAD__ convert_uint2_rtn( char2 );
    uint2 __OVERLOAD__ convert_uint2_rtn( short2 );
    uint2 __OVERLOAD__ convert_uint2_rtn( int2 );
    uint2 __OVERLOAD__ convert_uint2_rtn( long2 );
    uint2 __OVERLOAD__ convert_uint2_rtn( float2 );
    uint2 __OVERLOAD__ convert_uint2_rtn( double2 );
    uint3 __OVERLOAD__ convert_uint3_rtn( uchar3 );
    uint3 __OVERLOAD__ convert_uint3_rtn( ushort3 );
    uint3 __OVERLOAD__ convert_uint3_rtn( uint3 );
    uint3 __OVERLOAD__ convert_uint3_rtn( ulong3 );
    uint3 __OVERLOAD__ convert_uint3_rtn( char3 );
    uint3 __OVERLOAD__ convert_uint3_rtn( short3 );
    uint3 __OVERLOAD__ convert_uint3_rtn( int3 );
    uint3 __OVERLOAD__ convert_uint3_rtn( long3 );
    uint3 __OVERLOAD__ convert_uint3_rtn( float3 );
    uint3 __OVERLOAD__ convert_uint3_rtn( double3 );
    uint4 __OVERLOAD__ convert_uint4_rtn( uchar4 );
    uint4 __OVERLOAD__ convert_uint4_rtn( ushort4 );
    uint4 __OVERLOAD__ convert_uint4_rtn( uint4 );
    uint4 __OVERLOAD__ convert_uint4_rtn( ulong4 );
    uint4 __OVERLOAD__ convert_uint4_rtn( char4 );
    uint4 __OVERLOAD__ convert_uint4_rtn( short4 );
    uint4 __OVERLOAD__ convert_uint4_rtn( int4 );
    uint4 __OVERLOAD__ convert_uint4_rtn( long4 );
    uint4 __OVERLOAD__ convert_uint4_rtn( float4 );
    uint4 __OVERLOAD__ convert_uint4_rtn( double4 );
    uint8 __OVERLOAD__ convert_uint8_rtn( uchar8 );
    uint8 __OVERLOAD__ convert_uint8_rtn( ushort8 );
    uint8 __OVERLOAD__ convert_uint8_rtn( uint8 );
    uint8 __OVERLOAD__ convert_uint8_rtn( ulong8 );
    uint8 __OVERLOAD__ convert_uint8_rtn( char8 );
    uint8 __OVERLOAD__ convert_uint8_rtn( short8 );
    uint8 __OVERLOAD__ convert_uint8_rtn( int8 );
    uint8 __OVERLOAD__ convert_uint8_rtn( long8 );
    uint8 __OVERLOAD__ convert_uint8_rtn( float8 );
    uint8 __OVERLOAD__ convert_uint8_rtn( double8 );
    uint16 __OVERLOAD__ convert_uint16_rtn( uchar16 );
    uint16 __OVERLOAD__ convert_uint16_rtn( ushort16 );
    uint16 __OVERLOAD__ convert_uint16_rtn( uint16 );
    uint16 __OVERLOAD__ convert_uint16_rtn( ulong16 );
    uint16 __OVERLOAD__ convert_uint16_rtn( char16 );
    uint16 __OVERLOAD__ convert_uint16_rtn( short16 );
    uint16 __OVERLOAD__ convert_uint16_rtn( int16 );
    uint16 __OVERLOAD__ convert_uint16_rtn( long16 );
    uint16 __OVERLOAD__ convert_uint16_rtn( float16 );
    uint16 __OVERLOAD__ convert_uint16_rtn( double16 );
    uint __OVERLOAD__ convert_uint_rtz( uchar );
    uint __OVERLOAD__ convert_uint_rtz( ushort );
    uint __OVERLOAD__ convert_uint_rtz( uint );
    uint __OVERLOAD__ convert_uint_rtz( ulong );
    uint __OVERLOAD__ convert_uint_rtz( char );
    uint __OVERLOAD__ convert_uint_rtz( short );
    uint __OVERLOAD__ convert_uint_rtz( int );
    uint __OVERLOAD__ convert_uint_rtz( long );
    uint __OVERLOAD__ convert_uint_rtz( float );
    uint __OVERLOAD__ convert_uint_rtz( double );
    uint2 __OVERLOAD__ convert_uint2_rtz( uchar2 );
    uint2 __OVERLOAD__ convert_uint2_rtz( ushort2 );
    uint2 __OVERLOAD__ convert_uint2_rtz( uint2 );
    uint2 __OVERLOAD__ convert_uint2_rtz( ulong2 );
    uint2 __OVERLOAD__ convert_uint2_rtz( char2 );
    uint2 __OVERLOAD__ convert_uint2_rtz( short2 );
    uint2 __OVERLOAD__ convert_uint2_rtz( int2 );
    uint2 __OVERLOAD__ convert_uint2_rtz( long2 );
    uint2 __OVERLOAD__ convert_uint2_rtz( float2 );
    uint2 __OVERLOAD__ convert_uint2_rtz( double2 );
    uint3 __OVERLOAD__ convert_uint3_rtz( uchar3 );
    uint3 __OVERLOAD__ convert_uint3_rtz( ushort3 );
    uint3 __OVERLOAD__ convert_uint3_rtz( uint3 );
    uint3 __OVERLOAD__ convert_uint3_rtz( ulong3 );
    uint3 __OVERLOAD__ convert_uint3_rtz( char3 );
    uint3 __OVERLOAD__ convert_uint3_rtz( short3 );
    uint3 __OVERLOAD__ convert_uint3_rtz( int3 );
    uint3 __OVERLOAD__ convert_uint3_rtz( long3 );
    uint3 __OVERLOAD__ convert_uint3_rtz( float3 );
    uint3 __OVERLOAD__ convert_uint3_rtz( double3 );
    uint4 __OVERLOAD__ convert_uint4_rtz( uchar4 );
    uint4 __OVERLOAD__ convert_uint4_rtz( ushort4 );
    uint4 __OVERLOAD__ convert_uint4_rtz( uint4 );
    uint4 __OVERLOAD__ convert_uint4_rtz( ulong4 );
    uint4 __OVERLOAD__ convert_uint4_rtz( char4 );
    uint4 __OVERLOAD__ convert_uint4_rtz( short4 );
    uint4 __OVERLOAD__ convert_uint4_rtz( int4 );
    uint4 __OVERLOAD__ convert_uint4_rtz( long4 );
    uint4 __OVERLOAD__ convert_uint4_rtz( float4 );
    uint4 __OVERLOAD__ convert_uint4_rtz( double4 );
    uint8 __OVERLOAD__ convert_uint8_rtz( uchar8 );
    uint8 __OVERLOAD__ convert_uint8_rtz( ushort8 );
    uint8 __OVERLOAD__ convert_uint8_rtz( uint8 );
    uint8 __OVERLOAD__ convert_uint8_rtz( ulong8 );
    uint8 __OVERLOAD__ convert_uint8_rtz( char8 );
    uint8 __OVERLOAD__ convert_uint8_rtz( short8 );
    uint8 __OVERLOAD__ convert_uint8_rtz( int8 );
    uint8 __OVERLOAD__ convert_uint8_rtz( long8 );
    uint8 __OVERLOAD__ convert_uint8_rtz( float8 );
    uint8 __OVERLOAD__ convert_uint8_rtz( double8 );
    uint16 __OVERLOAD__ convert_uint16_rtz( uchar16 );
    uint16 __OVERLOAD__ convert_uint16_rtz( ushort16 );
    uint16 __OVERLOAD__ convert_uint16_rtz( uint16 );
    uint16 __OVERLOAD__ convert_uint16_rtz( ulong16 );
    uint16 __OVERLOAD__ convert_uint16_rtz( char16 );
    uint16 __OVERLOAD__ convert_uint16_rtz( short16 );
    uint16 __OVERLOAD__ convert_uint16_rtz( int16 );
    uint16 __OVERLOAD__ convert_uint16_rtz( long16 );
    uint16 __OVERLOAD__ convert_uint16_rtz( float16 );
    uint16 __OVERLOAD__ convert_uint16_rtz( double16 );
    uint __OVERLOAD__ convert_uint_sat( uchar );
    uint __OVERLOAD__ convert_uint_sat( ushort );
    uint __OVERLOAD__ convert_uint_sat( uint );
    uint __OVERLOAD__ convert_uint_sat( ulong );
    uint __OVERLOAD__ convert_uint_sat( char );
    uint __OVERLOAD__ convert_uint_sat( short );
    uint __OVERLOAD__ convert_uint_sat( int );
    uint __OVERLOAD__ convert_uint_sat( long );
    uint __OVERLOAD__ convert_uint_sat( float );
    uint __OVERLOAD__ convert_uint_sat( double );
    uint2 __OVERLOAD__ convert_uint2_sat( uchar2 );
    uint2 __OVERLOAD__ convert_uint2_sat( ushort2 );
    uint2 __OVERLOAD__ convert_uint2_sat( uint2 );
    uint2 __OVERLOAD__ convert_uint2_sat( ulong2 );
    uint2 __OVERLOAD__ convert_uint2_sat( char2 );
    uint2 __OVERLOAD__ convert_uint2_sat( short2 );
    uint2 __OVERLOAD__ convert_uint2_sat( int2 );
    uint2 __OVERLOAD__ convert_uint2_sat( long2 );
    uint2 __OVERLOAD__ convert_uint2_sat( float2 );
    uint2 __OVERLOAD__ convert_uint2_sat( double2 );
    uint3 __OVERLOAD__ convert_uint3_sat( uchar3 );
    uint3 __OVERLOAD__ convert_uint3_sat( ushort3 );
    uint3 __OVERLOAD__ convert_uint3_sat( uint3 );
    uint3 __OVERLOAD__ convert_uint3_sat( ulong3 );
    uint3 __OVERLOAD__ convert_uint3_sat( char3 );
    uint3 __OVERLOAD__ convert_uint3_sat( short3 );
    uint3 __OVERLOAD__ convert_uint3_sat( int3 );
    uint3 __OVERLOAD__ convert_uint3_sat( long3 );
    uint3 __OVERLOAD__ convert_uint3_sat( float3 );
    uint3 __OVERLOAD__ convert_uint3_sat( double3 );
    uint4 __OVERLOAD__ convert_uint4_sat( uchar4 );
    uint4 __OVERLOAD__ convert_uint4_sat( ushort4 );
    uint4 __OVERLOAD__ convert_uint4_sat( uint4 );
    uint4 __OVERLOAD__ convert_uint4_sat( ulong4 );
    uint4 __OVERLOAD__ convert_uint4_sat( char4 );
    uint4 __OVERLOAD__ convert_uint4_sat( short4 );
    uint4 __OVERLOAD__ convert_uint4_sat( int4 );
    uint4 __OVERLOAD__ convert_uint4_sat( long4 );
    uint4 __OVERLOAD__ convert_uint4_sat( float4 );
    uint4 __OVERLOAD__ convert_uint4_sat( double4 );
    uint8 __OVERLOAD__ convert_uint8_sat( uchar8 );
    uint8 __OVERLOAD__ convert_uint8_sat( ushort8 );
    uint8 __OVERLOAD__ convert_uint8_sat( uint8 );
    uint8 __OVERLOAD__ convert_uint8_sat( ulong8 );
    uint8 __OVERLOAD__ convert_uint8_sat( char8 );
    uint8 __OVERLOAD__ convert_uint8_sat( short8 );
    uint8 __OVERLOAD__ convert_uint8_sat( int8 );
    uint8 __OVERLOAD__ convert_uint8_sat( long8 );
    uint8 __OVERLOAD__ convert_uint8_sat( float8 );
    uint8 __OVERLOAD__ convert_uint8_sat( double8 );
    uint16 __OVERLOAD__ convert_uint16_sat( uchar16 );
    uint16 __OVERLOAD__ convert_uint16_sat( ushort16 );
    uint16 __OVERLOAD__ convert_uint16_sat( uint16 );
    uint16 __OVERLOAD__ convert_uint16_sat( ulong16 );
    uint16 __OVERLOAD__ convert_uint16_sat( char16 );
    uint16 __OVERLOAD__ convert_uint16_sat( short16 );
    uint16 __OVERLOAD__ convert_uint16_sat( int16 );
    uint16 __OVERLOAD__ convert_uint16_sat( long16 );
    uint16 __OVERLOAD__ convert_uint16_sat( float16 );
    uint16 __OVERLOAD__ convert_uint16_sat( double16 );
    uint __OVERLOAD__ convert_uint_sat_rte( uchar );
    uint __OVERLOAD__ convert_uint_sat_rte( ushort );
    uint __OVERLOAD__ convert_uint_sat_rte( uint );
    uint __OVERLOAD__ convert_uint_sat_rte( ulong );
    uint __OVERLOAD__ convert_uint_sat_rte( char );
    uint __OVERLOAD__ convert_uint_sat_rte( short );
    uint __OVERLOAD__ convert_uint_sat_rte( int );
    uint __OVERLOAD__ convert_uint_sat_rte( long );
    uint __OVERLOAD__ convert_uint_sat_rte( float );
    uint __OVERLOAD__ convert_uint_sat_rte( double );
    uint2 __OVERLOAD__ convert_uint2_sat_rte( uchar2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rte( ushort2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rte( uint2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rte( ulong2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rte( char2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rte( short2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rte( int2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rte( long2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rte( float2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rte( double2 );
    uint3 __OVERLOAD__ convert_uint3_sat_rte( uchar3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rte( ushort3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rte( uint3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rte( ulong3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rte( char3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rte( short3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rte( int3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rte( long3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rte( float3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rte( double3 );
    uint4 __OVERLOAD__ convert_uint4_sat_rte( uchar4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rte( ushort4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rte( uint4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rte( ulong4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rte( char4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rte( short4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rte( int4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rte( long4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rte( float4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rte( double4 );
    uint8 __OVERLOAD__ convert_uint8_sat_rte( uchar8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rte( ushort8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rte( uint8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rte( ulong8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rte( char8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rte( short8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rte( int8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rte( long8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rte( float8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rte( double8 );
    uint16 __OVERLOAD__ convert_uint16_sat_rte( uchar16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rte( ushort16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rte( uint16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rte( ulong16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rte( char16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rte( short16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rte( int16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rte( long16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rte( float16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rte( double16 );
    uint __OVERLOAD__ convert_uint_sat_rtp( uchar );
    uint __OVERLOAD__ convert_uint_sat_rtp( ushort );
    uint __OVERLOAD__ convert_uint_sat_rtp( uint );
    uint __OVERLOAD__ convert_uint_sat_rtp( ulong );
    uint __OVERLOAD__ convert_uint_sat_rtp( char );
    uint __OVERLOAD__ convert_uint_sat_rtp( short );
    uint __OVERLOAD__ convert_uint_sat_rtp( int );
    uint __OVERLOAD__ convert_uint_sat_rtp( long );
    uint __OVERLOAD__ convert_uint_sat_rtp( float );
    uint __OVERLOAD__ convert_uint_sat_rtp( double );
    uint2 __OVERLOAD__ convert_uint2_sat_rtp( uchar2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtp( ushort2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtp( uint2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtp( ulong2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtp( char2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtp( short2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtp( int2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtp( long2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtp( float2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtp( double2 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtp( uchar3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtp( ushort3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtp( uint3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtp( ulong3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtp( char3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtp( short3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtp( int3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtp( long3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtp( float3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtp( double3 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtp( uchar4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtp( ushort4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtp( uint4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtp( ulong4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtp( char4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtp( short4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtp( int4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtp( long4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtp( float4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtp( double4 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtp( uchar8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtp( ushort8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtp( uint8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtp( ulong8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtp( char8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtp( short8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtp( int8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtp( long8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtp( float8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtp( double8 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtp( uchar16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtp( ushort16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtp( uint16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtp( ulong16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtp( char16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtp( short16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtp( int16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtp( long16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtp( float16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtp( double16 );
    uint __OVERLOAD__ convert_uint_sat_rtn( uchar );
    uint __OVERLOAD__ convert_uint_sat_rtn( ushort );
    uint __OVERLOAD__ convert_uint_sat_rtn( uint );
    uint __OVERLOAD__ convert_uint_sat_rtn( ulong );
    uint __OVERLOAD__ convert_uint_sat_rtn( char );
    uint __OVERLOAD__ convert_uint_sat_rtn( short );
    uint __OVERLOAD__ convert_uint_sat_rtn( int );
    uint __OVERLOAD__ convert_uint_sat_rtn( long );
    uint __OVERLOAD__ convert_uint_sat_rtn( float );
    uint __OVERLOAD__ convert_uint_sat_rtn( double );
    uint2 __OVERLOAD__ convert_uint2_sat_rtn( uchar2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtn( ushort2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtn( uint2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtn( ulong2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtn( char2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtn( short2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtn( int2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtn( long2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtn( float2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtn( double2 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtn( uchar3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtn( ushort3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtn( uint3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtn( ulong3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtn( char3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtn( short3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtn( int3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtn( long3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtn( float3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtn( double3 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtn( uchar4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtn( ushort4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtn( uint4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtn( ulong4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtn( char4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtn( short4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtn( int4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtn( long4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtn( float4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtn( double4 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtn( uchar8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtn( ushort8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtn( uint8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtn( ulong8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtn( char8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtn( short8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtn( int8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtn( long8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtn( float8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtn( double8 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtn( uchar16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtn( ushort16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtn( uint16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtn( ulong16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtn( char16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtn( short16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtn( int16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtn( long16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtn( float16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtn( double16 );
    uint __OVERLOAD__ convert_uint_sat_rtz( uchar );
    uint __OVERLOAD__ convert_uint_sat_rtz( ushort );
    uint __OVERLOAD__ convert_uint_sat_rtz( uint );
    uint __OVERLOAD__ convert_uint_sat_rtz( ulong );
    uint __OVERLOAD__ convert_uint_sat_rtz( char );
    uint __OVERLOAD__ convert_uint_sat_rtz( short );
    uint __OVERLOAD__ convert_uint_sat_rtz( int );
    uint __OVERLOAD__ convert_uint_sat_rtz( long );
    uint __OVERLOAD__ convert_uint_sat_rtz( float );
    uint __OVERLOAD__ convert_uint_sat_rtz( double );
    uint2 __OVERLOAD__ convert_uint2_sat_rtz( uchar2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtz( ushort2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtz( uint2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtz( ulong2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtz( char2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtz( short2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtz( int2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtz( long2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtz( float2 );
    uint2 __OVERLOAD__ convert_uint2_sat_rtz( double2 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtz( uchar3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtz( ushort3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtz( uint3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtz( ulong3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtz( char3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtz( short3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtz( int3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtz( long3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtz( float3 );
    uint3 __OVERLOAD__ convert_uint3_sat_rtz( double3 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtz( uchar4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtz( ushort4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtz( uint4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtz( ulong4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtz( char4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtz( short4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtz( int4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtz( long4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtz( float4 );
    uint4 __OVERLOAD__ convert_uint4_sat_rtz( double4 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtz( uchar8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtz( ushort8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtz( uint8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtz( ulong8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtz( char8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtz( short8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtz( int8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtz( long8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtz( float8 );
    uint8 __OVERLOAD__ convert_uint8_sat_rtz( double8 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtz( uchar16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtz( ushort16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtz( uint16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtz( ulong16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtz( char16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtz( short16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtz( int16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtz( long16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtz( float16 );
    uint16 __OVERLOAD__ convert_uint16_sat_rtz( double16 );
    ulong __OVERLOAD__ convert_ulong( uchar );
    ulong __OVERLOAD__ convert_ulong( ushort );
    ulong __OVERLOAD__ convert_ulong( uint );
    ulong __OVERLOAD__ convert_ulong( ulong );
    ulong __OVERLOAD__ convert_ulong( char );
    ulong __OVERLOAD__ convert_ulong( short );
    ulong __OVERLOAD__ convert_ulong( int );
    ulong __OVERLOAD__ convert_ulong( long );
    ulong __OVERLOAD__ convert_ulong( float );
    ulong __OVERLOAD__ convert_ulong( double );
    ulong2 __OVERLOAD__ convert_ulong2( uchar2 );
    ulong2 __OVERLOAD__ convert_ulong2( ushort2 );
    ulong2 __OVERLOAD__ convert_ulong2( uint2 );
    ulong2 __OVERLOAD__ convert_ulong2( ulong2 );
    ulong2 __OVERLOAD__ convert_ulong2( char2 );
    ulong2 __OVERLOAD__ convert_ulong2( short2 );
    ulong2 __OVERLOAD__ convert_ulong2( int2 );
    ulong2 __OVERLOAD__ convert_ulong2( long2 );
    ulong2 __OVERLOAD__ convert_ulong2( float2 );
    ulong2 __OVERLOAD__ convert_ulong2( double2 );
    ulong3 __OVERLOAD__ convert_ulong3( uchar3 );
    ulong3 __OVERLOAD__ convert_ulong3( ushort3 );
    ulong3 __OVERLOAD__ convert_ulong3( uint3 );
    ulong3 __OVERLOAD__ convert_ulong3( ulong3 );
    ulong3 __OVERLOAD__ convert_ulong3( char3 );
    ulong3 __OVERLOAD__ convert_ulong3( short3 );
    ulong3 __OVERLOAD__ convert_ulong3( int3 );
    ulong3 __OVERLOAD__ convert_ulong3( long3 );
    ulong3 __OVERLOAD__ convert_ulong3( float3 );
    ulong3 __OVERLOAD__ convert_ulong3( double3 );
    ulong4 __OVERLOAD__ convert_ulong4( uchar4 );
    ulong4 __OVERLOAD__ convert_ulong4( ushort4 );
    ulong4 __OVERLOAD__ convert_ulong4( uint4 );
    ulong4 __OVERLOAD__ convert_ulong4( ulong4 );
    ulong4 __OVERLOAD__ convert_ulong4( char4 );
    ulong4 __OVERLOAD__ convert_ulong4( short4 );
    ulong4 __OVERLOAD__ convert_ulong4( int4 );
    ulong4 __OVERLOAD__ convert_ulong4( long4 );
    ulong4 __OVERLOAD__ convert_ulong4( float4 );
    ulong4 __OVERLOAD__ convert_ulong4( double4 );
    ulong8 __OVERLOAD__ convert_ulong8( uchar8 );
    ulong8 __OVERLOAD__ convert_ulong8( ushort8 );
    ulong8 __OVERLOAD__ convert_ulong8( uint8 );
    ulong8 __OVERLOAD__ convert_ulong8( ulong8 );
    ulong8 __OVERLOAD__ convert_ulong8( char8 );
    ulong8 __OVERLOAD__ convert_ulong8( short8 );
    ulong8 __OVERLOAD__ convert_ulong8( int8 );
    ulong8 __OVERLOAD__ convert_ulong8( long8 );
    ulong8 __OVERLOAD__ convert_ulong8( float8 );
    ulong8 __OVERLOAD__ convert_ulong8( double8 );
    ulong16 __OVERLOAD__ convert_ulong16( uchar16 );
    ulong16 __OVERLOAD__ convert_ulong16( ushort16 );
    ulong16 __OVERLOAD__ convert_ulong16( uint16 );
    ulong16 __OVERLOAD__ convert_ulong16( ulong16 );
    ulong16 __OVERLOAD__ convert_ulong16( char16 );
    ulong16 __OVERLOAD__ convert_ulong16( short16 );
    ulong16 __OVERLOAD__ convert_ulong16( int16 );
    ulong16 __OVERLOAD__ convert_ulong16( long16 );
    ulong16 __OVERLOAD__ convert_ulong16( float16 );
    ulong16 __OVERLOAD__ convert_ulong16( double16 );
    ulong __OVERLOAD__ convert_ulong_rte( uchar );
    ulong __OVERLOAD__ convert_ulong_rte( ushort );
    ulong __OVERLOAD__ convert_ulong_rte( uint );
    ulong __OVERLOAD__ convert_ulong_rte( ulong );
    ulong __OVERLOAD__ convert_ulong_rte( char );
    ulong __OVERLOAD__ convert_ulong_rte( short );
    ulong __OVERLOAD__ convert_ulong_rte( int );
    ulong __OVERLOAD__ convert_ulong_rte( long );
    ulong __OVERLOAD__ convert_ulong_rte( float );
    ulong __OVERLOAD__ convert_ulong_rte( double );
    ulong2 __OVERLOAD__ convert_ulong2_rte( uchar2 );
    ulong2 __OVERLOAD__ convert_ulong2_rte( ushort2 );
    ulong2 __OVERLOAD__ convert_ulong2_rte( uint2 );
    ulong2 __OVERLOAD__ convert_ulong2_rte( ulong2 );
    ulong2 __OVERLOAD__ convert_ulong2_rte( char2 );
    ulong2 __OVERLOAD__ convert_ulong2_rte( short2 );
    ulong2 __OVERLOAD__ convert_ulong2_rte( int2 );
    ulong2 __OVERLOAD__ convert_ulong2_rte( long2 );
    ulong2 __OVERLOAD__ convert_ulong2_rte( float2 );
    ulong2 __OVERLOAD__ convert_ulong2_rte( double2 );
    ulong3 __OVERLOAD__ convert_ulong3_rte( uchar3 );
    ulong3 __OVERLOAD__ convert_ulong3_rte( ushort3 );
    ulong3 __OVERLOAD__ convert_ulong3_rte( uint3 );
    ulong3 __OVERLOAD__ convert_ulong3_rte( ulong3 );
    ulong3 __OVERLOAD__ convert_ulong3_rte( char3 );
    ulong3 __OVERLOAD__ convert_ulong3_rte( short3 );
    ulong3 __OVERLOAD__ convert_ulong3_rte( int3 );
    ulong3 __OVERLOAD__ convert_ulong3_rte( long3 );
    ulong3 __OVERLOAD__ convert_ulong3_rte( float3 );
    ulong3 __OVERLOAD__ convert_ulong3_rte( double3 );
    ulong4 __OVERLOAD__ convert_ulong4_rte( uchar4 );
    ulong4 __OVERLOAD__ convert_ulong4_rte( ushort4 );
    ulong4 __OVERLOAD__ convert_ulong4_rte( uint4 );
    ulong4 __OVERLOAD__ convert_ulong4_rte( ulong4 );
    ulong4 __OVERLOAD__ convert_ulong4_rte( char4 );
    ulong4 __OVERLOAD__ convert_ulong4_rte( short4 );
    ulong4 __OVERLOAD__ convert_ulong4_rte( int4 );
    ulong4 __OVERLOAD__ convert_ulong4_rte( long4 );
    ulong4 __OVERLOAD__ convert_ulong4_rte( float4 );
    ulong4 __OVERLOAD__ convert_ulong4_rte( double4 );
    ulong8 __OVERLOAD__ convert_ulong8_rte( uchar8 );
    ulong8 __OVERLOAD__ convert_ulong8_rte( ushort8 );
    ulong8 __OVERLOAD__ convert_ulong8_rte( uint8 );
    ulong8 __OVERLOAD__ convert_ulong8_rte( ulong8 );
    ulong8 __OVERLOAD__ convert_ulong8_rte( char8 );
    ulong8 __OVERLOAD__ convert_ulong8_rte( short8 );
    ulong8 __OVERLOAD__ convert_ulong8_rte( int8 );
    ulong8 __OVERLOAD__ convert_ulong8_rte( long8 );
    ulong8 __OVERLOAD__ convert_ulong8_rte( float8 );
    ulong8 __OVERLOAD__ convert_ulong8_rte( double8 );
    ulong16 __OVERLOAD__ convert_ulong16_rte( uchar16 );
    ulong16 __OVERLOAD__ convert_ulong16_rte( ushort16 );
    ulong16 __OVERLOAD__ convert_ulong16_rte( uint16 );
    ulong16 __OVERLOAD__ convert_ulong16_rte( ulong16 );
    ulong16 __OVERLOAD__ convert_ulong16_rte( char16 );
    ulong16 __OVERLOAD__ convert_ulong16_rte( short16 );
    ulong16 __OVERLOAD__ convert_ulong16_rte( int16 );
    ulong16 __OVERLOAD__ convert_ulong16_rte( long16 );
    ulong16 __OVERLOAD__ convert_ulong16_rte( float16 );
    ulong16 __OVERLOAD__ convert_ulong16_rte( double16 );
    ulong __OVERLOAD__ convert_ulong_rtp( uchar );
    ulong __OVERLOAD__ convert_ulong_rtp( ushort );
    ulong __OVERLOAD__ convert_ulong_rtp( uint );
    ulong __OVERLOAD__ convert_ulong_rtp( ulong );
    ulong __OVERLOAD__ convert_ulong_rtp( char );
    ulong __OVERLOAD__ convert_ulong_rtp( short );
    ulong __OVERLOAD__ convert_ulong_rtp( int );
    ulong __OVERLOAD__ convert_ulong_rtp( long );
    ulong __OVERLOAD__ convert_ulong_rtp( float );
    ulong __OVERLOAD__ convert_ulong_rtp( double );
    ulong2 __OVERLOAD__ convert_ulong2_rtp( uchar2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtp( ushort2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtp( uint2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtp( ulong2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtp( char2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtp( short2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtp( int2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtp( long2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtp( float2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtp( double2 );
    ulong3 __OVERLOAD__ convert_ulong3_rtp( uchar3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtp( ushort3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtp( uint3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtp( ulong3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtp( char3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtp( short3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtp( int3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtp( long3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtp( float3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtp( double3 );
    ulong4 __OVERLOAD__ convert_ulong4_rtp( uchar4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtp( ushort4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtp( uint4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtp( ulong4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtp( char4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtp( short4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtp( int4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtp( long4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtp( float4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtp( double4 );
    ulong8 __OVERLOAD__ convert_ulong8_rtp( uchar8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtp( ushort8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtp( uint8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtp( ulong8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtp( char8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtp( short8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtp( int8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtp( long8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtp( float8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtp( double8 );
    ulong16 __OVERLOAD__ convert_ulong16_rtp( uchar16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtp( ushort16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtp( uint16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtp( ulong16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtp( char16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtp( short16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtp( int16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtp( long16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtp( float16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtp( double16 );
    ulong __OVERLOAD__ convert_ulong_rtn( uchar );
    ulong __OVERLOAD__ convert_ulong_rtn( ushort );
    ulong __OVERLOAD__ convert_ulong_rtn( uint );
    ulong __OVERLOAD__ convert_ulong_rtn( ulong );
    ulong __OVERLOAD__ convert_ulong_rtn( char );
    ulong __OVERLOAD__ convert_ulong_rtn( short );
    ulong __OVERLOAD__ convert_ulong_rtn( int );
    ulong __OVERLOAD__ convert_ulong_rtn( long );
    ulong __OVERLOAD__ convert_ulong_rtn( float );
    ulong __OVERLOAD__ convert_ulong_rtn( double );
    ulong2 __OVERLOAD__ convert_ulong2_rtn( uchar2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtn( ushort2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtn( uint2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtn( ulong2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtn( char2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtn( short2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtn( int2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtn( long2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtn( float2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtn( double2 );
    ulong3 __OVERLOAD__ convert_ulong3_rtn( uchar3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtn( ushort3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtn( uint3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtn( ulong3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtn( char3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtn( short3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtn( int3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtn( long3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtn( float3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtn( double3 );
    ulong4 __OVERLOAD__ convert_ulong4_rtn( uchar4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtn( ushort4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtn( uint4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtn( ulong4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtn( char4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtn( short4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtn( int4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtn( long4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtn( float4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtn( double4 );
    ulong8 __OVERLOAD__ convert_ulong8_rtn( uchar8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtn( ushort8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtn( uint8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtn( ulong8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtn( char8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtn( short8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtn( int8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtn( long8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtn( float8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtn( double8 );
    ulong16 __OVERLOAD__ convert_ulong16_rtn( uchar16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtn( ushort16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtn( uint16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtn( ulong16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtn( char16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtn( short16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtn( int16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtn( long16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtn( float16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtn( double16 );
    ulong __OVERLOAD__ convert_ulong_rtz( uchar );
    ulong __OVERLOAD__ convert_ulong_rtz( ushort );
    ulong __OVERLOAD__ convert_ulong_rtz( uint );
    ulong __OVERLOAD__ convert_ulong_rtz( ulong );
    ulong __OVERLOAD__ convert_ulong_rtz( char );
    ulong __OVERLOAD__ convert_ulong_rtz( short );
    ulong __OVERLOAD__ convert_ulong_rtz( int );
    ulong __OVERLOAD__ convert_ulong_rtz( long );
    ulong __OVERLOAD__ convert_ulong_rtz( float );
    ulong __OVERLOAD__ convert_ulong_rtz( double );
    ulong2 __OVERLOAD__ convert_ulong2_rtz( uchar2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtz( ushort2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtz( uint2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtz( ulong2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtz( char2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtz( short2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtz( int2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtz( long2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtz( float2 );
    ulong2 __OVERLOAD__ convert_ulong2_rtz( double2 );
    ulong3 __OVERLOAD__ convert_ulong3_rtz( uchar3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtz( ushort3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtz( uint3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtz( ulong3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtz( char3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtz( short3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtz( int3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtz( long3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtz( float3 );
    ulong3 __OVERLOAD__ convert_ulong3_rtz( double3 );
    ulong4 __OVERLOAD__ convert_ulong4_rtz( uchar4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtz( ushort4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtz( uint4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtz( ulong4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtz( char4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtz( short4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtz( int4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtz( long4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtz( float4 );
    ulong4 __OVERLOAD__ convert_ulong4_rtz( double4 );
    ulong8 __OVERLOAD__ convert_ulong8_rtz( uchar8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtz( ushort8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtz( uint8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtz( ulong8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtz( char8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtz( short8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtz( int8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtz( long8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtz( float8 );
    ulong8 __OVERLOAD__ convert_ulong8_rtz( double8 );
    ulong16 __OVERLOAD__ convert_ulong16_rtz( uchar16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtz( ushort16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtz( uint16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtz( ulong16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtz( char16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtz( short16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtz( int16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtz( long16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtz( float16 );
    ulong16 __OVERLOAD__ convert_ulong16_rtz( double16 );
    ulong __OVERLOAD__ convert_ulong_sat( uchar );
    ulong __OVERLOAD__ convert_ulong_sat( ushort );
    ulong __OVERLOAD__ convert_ulong_sat( uint );
    ulong __OVERLOAD__ convert_ulong_sat( ulong );
    ulong __OVERLOAD__ convert_ulong_sat( char );
    ulong __OVERLOAD__ convert_ulong_sat( short );
    ulong __OVERLOAD__ convert_ulong_sat( int );
    ulong __OVERLOAD__ convert_ulong_sat( long );
    ulong __OVERLOAD__ convert_ulong_sat( float );
    ulong __OVERLOAD__ convert_ulong_sat( double );
    ulong2 __OVERLOAD__ convert_ulong2_sat( uchar2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat( ushort2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat( uint2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat( ulong2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat( char2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat( short2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat( int2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat( long2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat( float2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat( double2 );
    ulong3 __OVERLOAD__ convert_ulong3_sat( uchar3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat( ushort3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat( uint3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat( ulong3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat( char3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat( short3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat( int3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat( long3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat( float3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat( double3 );
    ulong4 __OVERLOAD__ convert_ulong4_sat( uchar4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat( ushort4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat( uint4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat( ulong4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat( char4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat( short4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat( int4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat( long4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat( float4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat( double4 );
    ulong8 __OVERLOAD__ convert_ulong8_sat( uchar8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat( ushort8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat( uint8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat( ulong8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat( char8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat( short8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat( int8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat( long8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat( float8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat( double8 );
    ulong16 __OVERLOAD__ convert_ulong16_sat( uchar16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat( ushort16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat( uint16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat( ulong16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat( char16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat( short16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat( int16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat( long16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat( float16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat( double16 );
    ulong __OVERLOAD__ convert_ulong_sat_rte( uchar );
    ulong __OVERLOAD__ convert_ulong_sat_rte( ushort );
    ulong __OVERLOAD__ convert_ulong_sat_rte( uint );
    ulong __OVERLOAD__ convert_ulong_sat_rte( ulong );
    ulong __OVERLOAD__ convert_ulong_sat_rte( char );
    ulong __OVERLOAD__ convert_ulong_sat_rte( short );
    ulong __OVERLOAD__ convert_ulong_sat_rte( int );
    ulong __OVERLOAD__ convert_ulong_sat_rte( long );
    ulong __OVERLOAD__ convert_ulong_sat_rte( float );
    ulong __OVERLOAD__ convert_ulong_sat_rte( double );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rte( uchar2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rte( ushort2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rte( uint2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rte( ulong2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rte( char2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rte( short2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rte( int2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rte( long2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rte( float2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rte( double2 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rte( uchar3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rte( ushort3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rte( uint3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rte( ulong3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rte( char3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rte( short3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rte( int3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rte( long3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rte( float3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rte( double3 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rte( uchar4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rte( ushort4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rte( uint4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rte( ulong4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rte( char4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rte( short4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rte( int4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rte( long4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rte( float4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rte( double4 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rte( uchar8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rte( ushort8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rte( uint8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rte( ulong8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rte( char8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rte( short8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rte( int8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rte( long8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rte( float8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rte( double8 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rte( uchar16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rte( ushort16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rte( uint16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rte( ulong16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rte( char16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rte( short16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rte( int16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rte( long16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rte( float16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rte( double16 );
    ulong __OVERLOAD__ convert_ulong_sat_rtp( uchar );
    ulong __OVERLOAD__ convert_ulong_sat_rtp( ushort );
    ulong __OVERLOAD__ convert_ulong_sat_rtp( uint );
    ulong __OVERLOAD__ convert_ulong_sat_rtp( ulong );
    ulong __OVERLOAD__ convert_ulong_sat_rtp( char );
    ulong __OVERLOAD__ convert_ulong_sat_rtp( short );
    ulong __OVERLOAD__ convert_ulong_sat_rtp( int );
    ulong __OVERLOAD__ convert_ulong_sat_rtp( long );
    ulong __OVERLOAD__ convert_ulong_sat_rtp( float );
    ulong __OVERLOAD__ convert_ulong_sat_rtp( double );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtp( uchar2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtp( ushort2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtp( uint2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtp( ulong2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtp( char2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtp( short2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtp( int2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtp( long2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtp( float2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtp( double2 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtp( uchar3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtp( ushort3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtp( uint3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtp( ulong3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtp( char3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtp( short3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtp( int3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtp( long3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtp( float3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtp( double3 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtp( uchar4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtp( ushort4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtp( uint4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtp( ulong4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtp( char4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtp( short4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtp( int4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtp( long4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtp( float4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtp( double4 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtp( uchar8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtp( ushort8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtp( uint8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtp( ulong8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtp( char8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtp( short8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtp( int8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtp( long8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtp( float8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtp( double8 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtp( uchar16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtp( ushort16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtp( uint16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtp( ulong16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtp( char16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtp( short16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtp( int16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtp( long16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtp( float16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtp( double16 );
    ulong __OVERLOAD__ convert_ulong_sat_rtn( uchar );
    ulong __OVERLOAD__ convert_ulong_sat_rtn( ushort );
    ulong __OVERLOAD__ convert_ulong_sat_rtn( uint );
    ulong __OVERLOAD__ convert_ulong_sat_rtn( ulong );
    ulong __OVERLOAD__ convert_ulong_sat_rtn( char );
    ulong __OVERLOAD__ convert_ulong_sat_rtn( short );
    ulong __OVERLOAD__ convert_ulong_sat_rtn( int );
    ulong __OVERLOAD__ convert_ulong_sat_rtn( long );
    ulong __OVERLOAD__ convert_ulong_sat_rtn( float );
    ulong __OVERLOAD__ convert_ulong_sat_rtn( double );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtn( uchar2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtn( ushort2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtn( uint2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtn( ulong2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtn( char2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtn( short2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtn( int2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtn( long2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtn( float2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtn( double2 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtn( uchar3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtn( ushort3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtn( uint3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtn( ulong3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtn( char3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtn( short3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtn( int3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtn( long3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtn( float3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtn( double3 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtn( uchar4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtn( ushort4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtn( uint4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtn( ulong4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtn( char4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtn( short4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtn( int4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtn( long4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtn( float4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtn( double4 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtn( uchar8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtn( ushort8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtn( uint8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtn( ulong8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtn( char8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtn( short8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtn( int8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtn( long8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtn( float8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtn( double8 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtn( uchar16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtn( ushort16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtn( uint16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtn( ulong16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtn( char16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtn( short16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtn( int16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtn( long16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtn( float16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtn( double16 );
    ulong __OVERLOAD__ convert_ulong_sat_rtz( uchar );
    ulong __OVERLOAD__ convert_ulong_sat_rtz( ushort );
    ulong __OVERLOAD__ convert_ulong_sat_rtz( uint );
    ulong __OVERLOAD__ convert_ulong_sat_rtz( ulong );
    ulong __OVERLOAD__ convert_ulong_sat_rtz( char );
    ulong __OVERLOAD__ convert_ulong_sat_rtz( short );
    ulong __OVERLOAD__ convert_ulong_sat_rtz( int );
    ulong __OVERLOAD__ convert_ulong_sat_rtz( long );
    ulong __OVERLOAD__ convert_ulong_sat_rtz( float );
    ulong __OVERLOAD__ convert_ulong_sat_rtz( double );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtz( uchar2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtz( ushort2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtz( uint2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtz( ulong2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtz( char2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtz( short2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtz( int2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtz( long2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtz( float2 );
    ulong2 __OVERLOAD__ convert_ulong2_sat_rtz( double2 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtz( uchar3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtz( ushort3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtz( uint3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtz( ulong3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtz( char3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtz( short3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtz( int3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtz( long3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtz( float3 );
    ulong3 __OVERLOAD__ convert_ulong3_sat_rtz( double3 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtz( uchar4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtz( ushort4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtz( uint4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtz( ulong4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtz( char4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtz( short4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtz( int4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtz( long4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtz( float4 );
    ulong4 __OVERLOAD__ convert_ulong4_sat_rtz( double4 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtz( uchar8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtz( ushort8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtz( uint8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtz( ulong8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtz( char8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtz( short8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtz( int8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtz( long8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtz( float8 );
    ulong8 __OVERLOAD__ convert_ulong8_sat_rtz( double8 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtz( uchar16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtz( ushort16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtz( uint16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtz( ulong16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtz( char16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtz( short16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtz( int16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtz( long16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtz( float16 );
    ulong16 __OVERLOAD__ convert_ulong16_sat_rtz( double16 );
    char __OVERLOAD__ convert_char( uchar );
    char __OVERLOAD__ convert_char( ushort );
    char __OVERLOAD__ convert_char( uint );
    char __OVERLOAD__ convert_char( ulong );
    char __OVERLOAD__ convert_char( char );
    char __OVERLOAD__ convert_char( short );
    char __OVERLOAD__ convert_char( int );
    char __OVERLOAD__ convert_char( long );
    char __OVERLOAD__ convert_char( float );
    char __OVERLOAD__ convert_char( double );
    char2 __OVERLOAD__ convert_char2( uchar2 );
    char2 __OVERLOAD__ convert_char2( ushort2 );
    char2 __OVERLOAD__ convert_char2( uint2 );
    char2 __OVERLOAD__ convert_char2( ulong2 );
    char2 __OVERLOAD__ convert_char2( char2 );
    char2 __OVERLOAD__ convert_char2( short2 );
    char2 __OVERLOAD__ convert_char2( int2 );
    char2 __OVERLOAD__ convert_char2( long2 );
    char2 __OVERLOAD__ convert_char2( float2 );
    char2 __OVERLOAD__ convert_char2( double2 );
    char3 __OVERLOAD__ convert_char3( uchar3 );
    char3 __OVERLOAD__ convert_char3( ushort3 );
    char3 __OVERLOAD__ convert_char3( uint3 );
    char3 __OVERLOAD__ convert_char3( ulong3 );
    char3 __OVERLOAD__ convert_char3( char3 );
    char3 __OVERLOAD__ convert_char3( short3 );
    char3 __OVERLOAD__ convert_char3( int3 );
    char3 __OVERLOAD__ convert_char3( long3 );
    char3 __OVERLOAD__ convert_char3( float3 );
    char3 __OVERLOAD__ convert_char3( double3 );
    char4 __OVERLOAD__ convert_char4( uchar4 );
    char4 __OVERLOAD__ convert_char4( ushort4 );
    char4 __OVERLOAD__ convert_char4( uint4 );
    char4 __OVERLOAD__ convert_char4( ulong4 );
    char4 __OVERLOAD__ convert_char4( char4 );
    char4 __OVERLOAD__ convert_char4( short4 );
    char4 __OVERLOAD__ convert_char4( int4 );
    char4 __OVERLOAD__ convert_char4( long4 );
    char4 __OVERLOAD__ convert_char4( float4 );
    char4 __OVERLOAD__ convert_char4( double4 );
    char8 __OVERLOAD__ convert_char8( uchar8 );
    char8 __OVERLOAD__ convert_char8( ushort8 );
    char8 __OVERLOAD__ convert_char8( uint8 );
    char8 __OVERLOAD__ convert_char8( ulong8 );
    char8 __OVERLOAD__ convert_char8( char8 );
    char8 __OVERLOAD__ convert_char8( short8 );
    char8 __OVERLOAD__ convert_char8( int8 );
    char8 __OVERLOAD__ convert_char8( long8 );
    char8 __OVERLOAD__ convert_char8( float8 );
    char8 __OVERLOAD__ convert_char8( double8 );
    char16 __OVERLOAD__ convert_char16( uchar16 );
    char16 __OVERLOAD__ convert_char16( ushort16 );
    char16 __OVERLOAD__ convert_char16( uint16 );
    char16 __OVERLOAD__ convert_char16( ulong16 );
    char16 __OVERLOAD__ convert_char16( char16 );
    char16 __OVERLOAD__ convert_char16( short16 );
    char16 __OVERLOAD__ convert_char16( int16 );
    char16 __OVERLOAD__ convert_char16( long16 );
    char16 __OVERLOAD__ convert_char16( float16 );
    char16 __OVERLOAD__ convert_char16( double16 );
    char __OVERLOAD__ convert_char_rte( uchar );
    char __OVERLOAD__ convert_char_rte( ushort );
    char __OVERLOAD__ convert_char_rte( uint );
    char __OVERLOAD__ convert_char_rte( ulong );
    char __OVERLOAD__ convert_char_rte( char );
    char __OVERLOAD__ convert_char_rte( short );
    char __OVERLOAD__ convert_char_rte( int );
    char __OVERLOAD__ convert_char_rte( long );
    char __OVERLOAD__ convert_char_rte( float );
    char __OVERLOAD__ convert_char_rte( double );
    char2 __OVERLOAD__ convert_char2_rte( uchar2 );
    char2 __OVERLOAD__ convert_char2_rte( ushort2 );
    char2 __OVERLOAD__ convert_char2_rte( uint2 );
    char2 __OVERLOAD__ convert_char2_rte( ulong2 );
    char2 __OVERLOAD__ convert_char2_rte( char2 );
    char2 __OVERLOAD__ convert_char2_rte( short2 );
    char2 __OVERLOAD__ convert_char2_rte( int2 );
    char2 __OVERLOAD__ convert_char2_rte( long2 );
    char2 __OVERLOAD__ convert_char2_rte( float2 );
    char2 __OVERLOAD__ convert_char2_rte( double2 );
    char3 __OVERLOAD__ convert_char3_rte( uchar3 );
    char3 __OVERLOAD__ convert_char3_rte( ushort3 );
    char3 __OVERLOAD__ convert_char3_rte( uint3 );
    char3 __OVERLOAD__ convert_char3_rte( ulong3 );
    char3 __OVERLOAD__ convert_char3_rte( char3 );
    char3 __OVERLOAD__ convert_char3_rte( short3 );
    char3 __OVERLOAD__ convert_char3_rte( int3 );
    char3 __OVERLOAD__ convert_char3_rte( long3 );
    char3 __OVERLOAD__ convert_char3_rte( float3 );
    char3 __OVERLOAD__ convert_char3_rte( double3 );
    char4 __OVERLOAD__ convert_char4_rte( uchar4 );
    char4 __OVERLOAD__ convert_char4_rte( ushort4 );
    char4 __OVERLOAD__ convert_char4_rte( uint4 );
    char4 __OVERLOAD__ convert_char4_rte( ulong4 );
    char4 __OVERLOAD__ convert_char4_rte( char4 );
    char4 __OVERLOAD__ convert_char4_rte( short4 );
    char4 __OVERLOAD__ convert_char4_rte( int4 );
    char4 __OVERLOAD__ convert_char4_rte( long4 );
    char4 __OVERLOAD__ convert_char4_rte( float4 );
    char4 __OVERLOAD__ convert_char4_rte( double4 );
    char8 __OVERLOAD__ convert_char8_rte( uchar8 );
    char8 __OVERLOAD__ convert_char8_rte( ushort8 );
    char8 __OVERLOAD__ convert_char8_rte( uint8 );
    char8 __OVERLOAD__ convert_char8_rte( ulong8 );
    char8 __OVERLOAD__ convert_char8_rte( char8 );
    char8 __OVERLOAD__ convert_char8_rte( short8 );
    char8 __OVERLOAD__ convert_char8_rte( int8 );
    char8 __OVERLOAD__ convert_char8_rte( long8 );
    char8 __OVERLOAD__ convert_char8_rte( float8 );
    char8 __OVERLOAD__ convert_char8_rte( double8 );
    char16 __OVERLOAD__ convert_char16_rte( uchar16 );
    char16 __OVERLOAD__ convert_char16_rte( ushort16 );
    char16 __OVERLOAD__ convert_char16_rte( uint16 );
    char16 __OVERLOAD__ convert_char16_rte( ulong16 );
    char16 __OVERLOAD__ convert_char16_rte( char16 );
    char16 __OVERLOAD__ convert_char16_rte( short16 );
    char16 __OVERLOAD__ convert_char16_rte( int16 );
    char16 __OVERLOAD__ convert_char16_rte( long16 );
    char16 __OVERLOAD__ convert_char16_rte( float16 );
    char16 __OVERLOAD__ convert_char16_rte( double16 );
    char __OVERLOAD__ convert_char_rtp( uchar );
    char __OVERLOAD__ convert_char_rtp( ushort );
    char __OVERLOAD__ convert_char_rtp( uint );
    char __OVERLOAD__ convert_char_rtp( ulong );
    char __OVERLOAD__ convert_char_rtp( char );
    char __OVERLOAD__ convert_char_rtp( short );
    char __OVERLOAD__ convert_char_rtp( int );
    char __OVERLOAD__ convert_char_rtp( long );
    char __OVERLOAD__ convert_char_rtp( float );
    char __OVERLOAD__ convert_char_rtp( double );
    char2 __OVERLOAD__ convert_char2_rtp( uchar2 );
    char2 __OVERLOAD__ convert_char2_rtp( ushort2 );
    char2 __OVERLOAD__ convert_char2_rtp( uint2 );
    char2 __OVERLOAD__ convert_char2_rtp( ulong2 );
    char2 __OVERLOAD__ convert_char2_rtp( char2 );
    char2 __OVERLOAD__ convert_char2_rtp( short2 );
    char2 __OVERLOAD__ convert_char2_rtp( int2 );
    char2 __OVERLOAD__ convert_char2_rtp( long2 );
    char2 __OVERLOAD__ convert_char2_rtp( float2 );
    char2 __OVERLOAD__ convert_char2_rtp( double2 );
    char3 __OVERLOAD__ convert_char3_rtp( uchar3 );
    char3 __OVERLOAD__ convert_char3_rtp( ushort3 );
    char3 __OVERLOAD__ convert_char3_rtp( uint3 );
    char3 __OVERLOAD__ convert_char3_rtp( ulong3 );
    char3 __OVERLOAD__ convert_char3_rtp( char3 );
    char3 __OVERLOAD__ convert_char3_rtp( short3 );
    char3 __OVERLOAD__ convert_char3_rtp( int3 );
    char3 __OVERLOAD__ convert_char3_rtp( long3 );
    char3 __OVERLOAD__ convert_char3_rtp( float3 );
    char3 __OVERLOAD__ convert_char3_rtp( double3 );
    char4 __OVERLOAD__ convert_char4_rtp( uchar4 );
    char4 __OVERLOAD__ convert_char4_rtp( ushort4 );
    char4 __OVERLOAD__ convert_char4_rtp( uint4 );
    char4 __OVERLOAD__ convert_char4_rtp( ulong4 );
    char4 __OVERLOAD__ convert_char4_rtp( char4 );
    char4 __OVERLOAD__ convert_char4_rtp( short4 );
    char4 __OVERLOAD__ convert_char4_rtp( int4 );
    char4 __OVERLOAD__ convert_char4_rtp( long4 );
    char4 __OVERLOAD__ convert_char4_rtp( float4 );
    char4 __OVERLOAD__ convert_char4_rtp( double4 );
    char8 __OVERLOAD__ convert_char8_rtp( uchar8 );
    char8 __OVERLOAD__ convert_char8_rtp( ushort8 );
    char8 __OVERLOAD__ convert_char8_rtp( uint8 );
    char8 __OVERLOAD__ convert_char8_rtp( ulong8 );
    char8 __OVERLOAD__ convert_char8_rtp( char8 );
    char8 __OVERLOAD__ convert_char8_rtp( short8 );
    char8 __OVERLOAD__ convert_char8_rtp( int8 );
    char8 __OVERLOAD__ convert_char8_rtp( long8 );
    char8 __OVERLOAD__ convert_char8_rtp( float8 );
    char8 __OVERLOAD__ convert_char8_rtp( double8 );
    char16 __OVERLOAD__ convert_char16_rtp( uchar16 );
    char16 __OVERLOAD__ convert_char16_rtp( ushort16 );
    char16 __OVERLOAD__ convert_char16_rtp( uint16 );
    char16 __OVERLOAD__ convert_char16_rtp( ulong16 );
    char16 __OVERLOAD__ convert_char16_rtp( char16 );
    char16 __OVERLOAD__ convert_char16_rtp( short16 );
    char16 __OVERLOAD__ convert_char16_rtp( int16 );
    char16 __OVERLOAD__ convert_char16_rtp( long16 );
    char16 __OVERLOAD__ convert_char16_rtp( float16 );
    char16 __OVERLOAD__ convert_char16_rtp( double16 );
    char __OVERLOAD__ convert_char_rtn( uchar );
    char __OVERLOAD__ convert_char_rtn( ushort );
    char __OVERLOAD__ convert_char_rtn( uint );
    char __OVERLOAD__ convert_char_rtn( ulong );
    char __OVERLOAD__ convert_char_rtn( char );
    char __OVERLOAD__ convert_char_rtn( short );
    char __OVERLOAD__ convert_char_rtn( int );
    char __OVERLOAD__ convert_char_rtn( long );
    char __OVERLOAD__ convert_char_rtn( float );
    char __OVERLOAD__ convert_char_rtn( double );
    char2 __OVERLOAD__ convert_char2_rtn( uchar2 );
    char2 __OVERLOAD__ convert_char2_rtn( ushort2 );
    char2 __OVERLOAD__ convert_char2_rtn( uint2 );
    char2 __OVERLOAD__ convert_char2_rtn( ulong2 );
    char2 __OVERLOAD__ convert_char2_rtn( char2 );
    char2 __OVERLOAD__ convert_char2_rtn( short2 );
    char2 __OVERLOAD__ convert_char2_rtn( int2 );
    char2 __OVERLOAD__ convert_char2_rtn( long2 );
    char2 __OVERLOAD__ convert_char2_rtn( float2 );
    char2 __OVERLOAD__ convert_char2_rtn( double2 );
    char3 __OVERLOAD__ convert_char3_rtn( uchar3 );
    char3 __OVERLOAD__ convert_char3_rtn( ushort3 );
    char3 __OVERLOAD__ convert_char3_rtn( uint3 );
    char3 __OVERLOAD__ convert_char3_rtn( ulong3 );
    char3 __OVERLOAD__ convert_char3_rtn( char3 );
    char3 __OVERLOAD__ convert_char3_rtn( short3 );
    char3 __OVERLOAD__ convert_char3_rtn( int3 );
    char3 __OVERLOAD__ convert_char3_rtn( long3 );
    char3 __OVERLOAD__ convert_char3_rtn( float3 );
    char3 __OVERLOAD__ convert_char3_rtn( double3 );
    char4 __OVERLOAD__ convert_char4_rtn( uchar4 );
    char4 __OVERLOAD__ convert_char4_rtn( ushort4 );
    char4 __OVERLOAD__ convert_char4_rtn( uint4 );
    char4 __OVERLOAD__ convert_char4_rtn( ulong4 );
    char4 __OVERLOAD__ convert_char4_rtn( char4 );
    char4 __OVERLOAD__ convert_char4_rtn( short4 );
    char4 __OVERLOAD__ convert_char4_rtn( int4 );
    char4 __OVERLOAD__ convert_char4_rtn( long4 );
    char4 __OVERLOAD__ convert_char4_rtn( float4 );
    char4 __OVERLOAD__ convert_char4_rtn( double4 );
    char8 __OVERLOAD__ convert_char8_rtn( uchar8 );
    char8 __OVERLOAD__ convert_char8_rtn( ushort8 );
    char8 __OVERLOAD__ convert_char8_rtn( uint8 );
    char8 __OVERLOAD__ convert_char8_rtn( ulong8 );
    char8 __OVERLOAD__ convert_char8_rtn( char8 );
    char8 __OVERLOAD__ convert_char8_rtn( short8 );
    char8 __OVERLOAD__ convert_char8_rtn( int8 );
    char8 __OVERLOAD__ convert_char8_rtn( long8 );
    char8 __OVERLOAD__ convert_char8_rtn( float8 );
    char8 __OVERLOAD__ convert_char8_rtn( double8 );
    char16 __OVERLOAD__ convert_char16_rtn( uchar16 );
    char16 __OVERLOAD__ convert_char16_rtn( ushort16 );
    char16 __OVERLOAD__ convert_char16_rtn( uint16 );
    char16 __OVERLOAD__ convert_char16_rtn( ulong16 );
    char16 __OVERLOAD__ convert_char16_rtn( char16 );
    char16 __OVERLOAD__ convert_char16_rtn( short16 );
    char16 __OVERLOAD__ convert_char16_rtn( int16 );
    char16 __OVERLOAD__ convert_char16_rtn( long16 );
    char16 __OVERLOAD__ convert_char16_rtn( float16 );
    char16 __OVERLOAD__ convert_char16_rtn( double16 );
    char __OVERLOAD__ convert_char_rtz( uchar );
    char __OVERLOAD__ convert_char_rtz( ushort );
    char __OVERLOAD__ convert_char_rtz( uint );
    char __OVERLOAD__ convert_char_rtz( ulong );
    char __OVERLOAD__ convert_char_rtz( char );
    char __OVERLOAD__ convert_char_rtz( short );
    char __OVERLOAD__ convert_char_rtz( int );
    char __OVERLOAD__ convert_char_rtz( long );
    char __OVERLOAD__ convert_char_rtz( float );
    char __OVERLOAD__ convert_char_rtz( double );
    char2 __OVERLOAD__ convert_char2_rtz( uchar2 );
    char2 __OVERLOAD__ convert_char2_rtz( ushort2 );
    char2 __OVERLOAD__ convert_char2_rtz( uint2 );
    char2 __OVERLOAD__ convert_char2_rtz( ulong2 );
    char2 __OVERLOAD__ convert_char2_rtz( char2 );
    char2 __OVERLOAD__ convert_char2_rtz( short2 );
    char2 __OVERLOAD__ convert_char2_rtz( int2 );
    char2 __OVERLOAD__ convert_char2_rtz( long2 );
    char2 __OVERLOAD__ convert_char2_rtz( float2 );
    char2 __OVERLOAD__ convert_char2_rtz( double2 );
    char3 __OVERLOAD__ convert_char3_rtz( uchar3 );
    char3 __OVERLOAD__ convert_char3_rtz( ushort3 );
    char3 __OVERLOAD__ convert_char3_rtz( uint3 );
    char3 __OVERLOAD__ convert_char3_rtz( ulong3 );
    char3 __OVERLOAD__ convert_char3_rtz( char3 );
    char3 __OVERLOAD__ convert_char3_rtz( short3 );
    char3 __OVERLOAD__ convert_char3_rtz( int3 );
    char3 __OVERLOAD__ convert_char3_rtz( long3 );
    char3 __OVERLOAD__ convert_char3_rtz( float3 );
    char3 __OVERLOAD__ convert_char3_rtz( double3 );
    char4 __OVERLOAD__ convert_char4_rtz( uchar4 );
    char4 __OVERLOAD__ convert_char4_rtz( ushort4 );
    char4 __OVERLOAD__ convert_char4_rtz( uint4 );
    char4 __OVERLOAD__ convert_char4_rtz( ulong4 );
    char4 __OVERLOAD__ convert_char4_rtz( char4 );
    char4 __OVERLOAD__ convert_char4_rtz( short4 );
    char4 __OVERLOAD__ convert_char4_rtz( int4 );
    char4 __OVERLOAD__ convert_char4_rtz( long4 );
    char4 __OVERLOAD__ convert_char4_rtz( float4 );
    char4 __OVERLOAD__ convert_char4_rtz( double4 );
    char8 __OVERLOAD__ convert_char8_rtz( uchar8 );
    char8 __OVERLOAD__ convert_char8_rtz( ushort8 );
    char8 __OVERLOAD__ convert_char8_rtz( uint8 );
    char8 __OVERLOAD__ convert_char8_rtz( ulong8 );
    char8 __OVERLOAD__ convert_char8_rtz( char8 );
    char8 __OVERLOAD__ convert_char8_rtz( short8 );
    char8 __OVERLOAD__ convert_char8_rtz( int8 );
    char8 __OVERLOAD__ convert_char8_rtz( long8 );
    char8 __OVERLOAD__ convert_char8_rtz( float8 );
    char8 __OVERLOAD__ convert_char8_rtz( double8 );
    char16 __OVERLOAD__ convert_char16_rtz( uchar16 );
    char16 __OVERLOAD__ convert_char16_rtz( ushort16 );
    char16 __OVERLOAD__ convert_char16_rtz( uint16 );
    char16 __OVERLOAD__ convert_char16_rtz( ulong16 );
    char16 __OVERLOAD__ convert_char16_rtz( char16 );
    char16 __OVERLOAD__ convert_char16_rtz( short16 );
    char16 __OVERLOAD__ convert_char16_rtz( int16 );
    char16 __OVERLOAD__ convert_char16_rtz( long16 );
    char16 __OVERLOAD__ convert_char16_rtz( float16 );
    char16 __OVERLOAD__ convert_char16_rtz( double16 );
    char __OVERLOAD__ convert_char_sat( uchar );
    char __OVERLOAD__ convert_char_sat( ushort );
    char __OVERLOAD__ convert_char_sat( uint );
    char __OVERLOAD__ convert_char_sat( ulong );
    char __OVERLOAD__ convert_char_sat( char );
    char __OVERLOAD__ convert_char_sat( short );
    char __OVERLOAD__ convert_char_sat( int );
    char __OVERLOAD__ convert_char_sat( long );
    char __OVERLOAD__ convert_char_sat( float );
    char __OVERLOAD__ convert_char_sat( double );
    char2 __OVERLOAD__ convert_char2_sat( uchar2 );
    char2 __OVERLOAD__ convert_char2_sat( ushort2 );
    char2 __OVERLOAD__ convert_char2_sat( uint2 );
    char2 __OVERLOAD__ convert_char2_sat( ulong2 );
    char2 __OVERLOAD__ convert_char2_sat( char2 );
    char2 __OVERLOAD__ convert_char2_sat( short2 );
    char2 __OVERLOAD__ convert_char2_sat( int2 );
    char2 __OVERLOAD__ convert_char2_sat( long2 );
    char2 __OVERLOAD__ convert_char2_sat( float2 );
    char2 __OVERLOAD__ convert_char2_sat( double2 );
    char3 __OVERLOAD__ convert_char3_sat( uchar3 );
    char3 __OVERLOAD__ convert_char3_sat( ushort3 );
    char3 __OVERLOAD__ convert_char3_sat( uint3 );
    char3 __OVERLOAD__ convert_char3_sat( ulong3 );
    char3 __OVERLOAD__ convert_char3_sat( char3 );
    char3 __OVERLOAD__ convert_char3_sat( short3 );
    char3 __OVERLOAD__ convert_char3_sat( int3 );
    char3 __OVERLOAD__ convert_char3_sat( long3 );
    char3 __OVERLOAD__ convert_char3_sat( float3 );
    char3 __OVERLOAD__ convert_char3_sat( double3 );
    char4 __OVERLOAD__ convert_char4_sat( uchar4 );
    char4 __OVERLOAD__ convert_char4_sat( ushort4 );
    char4 __OVERLOAD__ convert_char4_sat( uint4 );
    char4 __OVERLOAD__ convert_char4_sat( ulong4 );
    char4 __OVERLOAD__ convert_char4_sat( char4 );
    char4 __OVERLOAD__ convert_char4_sat( short4 );
    char4 __OVERLOAD__ convert_char4_sat( int4 );
    char4 __OVERLOAD__ convert_char4_sat( long4 );
    char4 __OVERLOAD__ convert_char4_sat( float4 );
    char4 __OVERLOAD__ convert_char4_sat( double4 );
    char8 __OVERLOAD__ convert_char8_sat( uchar8 );
    char8 __OVERLOAD__ convert_char8_sat( ushort8 );
    char8 __OVERLOAD__ convert_char8_sat( uint8 );
    char8 __OVERLOAD__ convert_char8_sat( ulong8 );
    char8 __OVERLOAD__ convert_char8_sat( char8 );
    char8 __OVERLOAD__ convert_char8_sat( short8 );
    char8 __OVERLOAD__ convert_char8_sat( int8 );
    char8 __OVERLOAD__ convert_char8_sat( long8 );
    char8 __OVERLOAD__ convert_char8_sat( float8 );
    char8 __OVERLOAD__ convert_char8_sat( double8 );
    char16 __OVERLOAD__ convert_char16_sat( uchar16 );
    char16 __OVERLOAD__ convert_char16_sat( ushort16 );
    char16 __OVERLOAD__ convert_char16_sat( uint16 );
    char16 __OVERLOAD__ convert_char16_sat( ulong16 );
    char16 __OVERLOAD__ convert_char16_sat( char16 );
    char16 __OVERLOAD__ convert_char16_sat( short16 );
    char16 __OVERLOAD__ convert_char16_sat( int16 );
    char16 __OVERLOAD__ convert_char16_sat( long16 );
    char16 __OVERLOAD__ convert_char16_sat( float16 );
    char16 __OVERLOAD__ convert_char16_sat( double16 );
    char __OVERLOAD__ convert_char_sat_rte( uchar );
    char __OVERLOAD__ convert_char_sat_rte( ushort );
    char __OVERLOAD__ convert_char_sat_rte( uint );
    char __OVERLOAD__ convert_char_sat_rte( ulong );
    char __OVERLOAD__ convert_char_sat_rte( char );
    char __OVERLOAD__ convert_char_sat_rte( short );
    char __OVERLOAD__ convert_char_sat_rte( int );
    char __OVERLOAD__ convert_char_sat_rte( long );
    char __OVERLOAD__ convert_char_sat_rte( float );
    char __OVERLOAD__ convert_char_sat_rte( double );
    char2 __OVERLOAD__ convert_char2_sat_rte( uchar2 );
    char2 __OVERLOAD__ convert_char2_sat_rte( ushort2 );
    char2 __OVERLOAD__ convert_char2_sat_rte( uint2 );
    char2 __OVERLOAD__ convert_char2_sat_rte( ulong2 );
    char2 __OVERLOAD__ convert_char2_sat_rte( char2 );
    char2 __OVERLOAD__ convert_char2_sat_rte( short2 );
    char2 __OVERLOAD__ convert_char2_sat_rte( int2 );
    char2 __OVERLOAD__ convert_char2_sat_rte( long2 );
    char2 __OVERLOAD__ convert_char2_sat_rte( float2 );
    char2 __OVERLOAD__ convert_char2_sat_rte( double2 );
    char3 __OVERLOAD__ convert_char3_sat_rte( uchar3 );
    char3 __OVERLOAD__ convert_char3_sat_rte( ushort3 );
    char3 __OVERLOAD__ convert_char3_sat_rte( uint3 );
    char3 __OVERLOAD__ convert_char3_sat_rte( ulong3 );
    char3 __OVERLOAD__ convert_char3_sat_rte( char3 );
    char3 __OVERLOAD__ convert_char3_sat_rte( short3 );
    char3 __OVERLOAD__ convert_char3_sat_rte( int3 );
    char3 __OVERLOAD__ convert_char3_sat_rte( long3 );
    char3 __OVERLOAD__ convert_char3_sat_rte( float3 );
    char3 __OVERLOAD__ convert_char3_sat_rte( double3 );
    char4 __OVERLOAD__ convert_char4_sat_rte( uchar4 );
    char4 __OVERLOAD__ convert_char4_sat_rte( ushort4 );
    char4 __OVERLOAD__ convert_char4_sat_rte( uint4 );
    char4 __OVERLOAD__ convert_char4_sat_rte( ulong4 );
    char4 __OVERLOAD__ convert_char4_sat_rte( char4 );
    char4 __OVERLOAD__ convert_char4_sat_rte( short4 );
    char4 __OVERLOAD__ convert_char4_sat_rte( int4 );
    char4 __OVERLOAD__ convert_char4_sat_rte( long4 );
    char4 __OVERLOAD__ convert_char4_sat_rte( float4 );
    char4 __OVERLOAD__ convert_char4_sat_rte( double4 );
    char8 __OVERLOAD__ convert_char8_sat_rte( uchar8 );
    char8 __OVERLOAD__ convert_char8_sat_rte( ushort8 );
    char8 __OVERLOAD__ convert_char8_sat_rte( uint8 );
    char8 __OVERLOAD__ convert_char8_sat_rte( ulong8 );
    char8 __OVERLOAD__ convert_char8_sat_rte( char8 );
    char8 __OVERLOAD__ convert_char8_sat_rte( short8 );
    char8 __OVERLOAD__ convert_char8_sat_rte( int8 );
    char8 __OVERLOAD__ convert_char8_sat_rte( long8 );
    char8 __OVERLOAD__ convert_char8_sat_rte( float8 );
    char8 __OVERLOAD__ convert_char8_sat_rte( double8 );
    char16 __OVERLOAD__ convert_char16_sat_rte( uchar16 );
    char16 __OVERLOAD__ convert_char16_sat_rte( ushort16 );
    char16 __OVERLOAD__ convert_char16_sat_rte( uint16 );
    char16 __OVERLOAD__ convert_char16_sat_rte( ulong16 );
    char16 __OVERLOAD__ convert_char16_sat_rte( char16 );
    char16 __OVERLOAD__ convert_char16_sat_rte( short16 );
    char16 __OVERLOAD__ convert_char16_sat_rte( int16 );
    char16 __OVERLOAD__ convert_char16_sat_rte( long16 );
    char16 __OVERLOAD__ convert_char16_sat_rte( float16 );
    char16 __OVERLOAD__ convert_char16_sat_rte( double16 );
    char __OVERLOAD__ convert_char_sat_rtp( uchar );
    char __OVERLOAD__ convert_char_sat_rtp( ushort );
    char __OVERLOAD__ convert_char_sat_rtp( uint );
    char __OVERLOAD__ convert_char_sat_rtp( ulong );
    char __OVERLOAD__ convert_char_sat_rtp( char );
    char __OVERLOAD__ convert_char_sat_rtp( short );
    char __OVERLOAD__ convert_char_sat_rtp( int );
    char __OVERLOAD__ convert_char_sat_rtp( long );
    char __OVERLOAD__ convert_char_sat_rtp( float );
    char __OVERLOAD__ convert_char_sat_rtp( double );
    char2 __OVERLOAD__ convert_char2_sat_rtp( uchar2 );
    char2 __OVERLOAD__ convert_char2_sat_rtp( ushort2 );
    char2 __OVERLOAD__ convert_char2_sat_rtp( uint2 );
    char2 __OVERLOAD__ convert_char2_sat_rtp( ulong2 );
    char2 __OVERLOAD__ convert_char2_sat_rtp( char2 );
    char2 __OVERLOAD__ convert_char2_sat_rtp( short2 );
    char2 __OVERLOAD__ convert_char2_sat_rtp( int2 );
    char2 __OVERLOAD__ convert_char2_sat_rtp( long2 );
    char2 __OVERLOAD__ convert_char2_sat_rtp( float2 );
    char2 __OVERLOAD__ convert_char2_sat_rtp( double2 );
    char3 __OVERLOAD__ convert_char3_sat_rtp( uchar3 );
    char3 __OVERLOAD__ convert_char3_sat_rtp( ushort3 );
    char3 __OVERLOAD__ convert_char3_sat_rtp( uint3 );
    char3 __OVERLOAD__ convert_char3_sat_rtp( ulong3 );
    char3 __OVERLOAD__ convert_char3_sat_rtp( char3 );
    char3 __OVERLOAD__ convert_char3_sat_rtp( short3 );
    char3 __OVERLOAD__ convert_char3_sat_rtp( int3 );
    char3 __OVERLOAD__ convert_char3_sat_rtp( long3 );
    char3 __OVERLOAD__ convert_char3_sat_rtp( float3 );
    char3 __OVERLOAD__ convert_char3_sat_rtp( double3 );
    char4 __OVERLOAD__ convert_char4_sat_rtp( uchar4 );
    char4 __OVERLOAD__ convert_char4_sat_rtp( ushort4 );
    char4 __OVERLOAD__ convert_char4_sat_rtp( uint4 );
    char4 __OVERLOAD__ convert_char4_sat_rtp( ulong4 );
    char4 __OVERLOAD__ convert_char4_sat_rtp( char4 );
    char4 __OVERLOAD__ convert_char4_sat_rtp( short4 );
    char4 __OVERLOAD__ convert_char4_sat_rtp( int4 );
    char4 __OVERLOAD__ convert_char4_sat_rtp( long4 );
    char4 __OVERLOAD__ convert_char4_sat_rtp( float4 );
    char4 __OVERLOAD__ convert_char4_sat_rtp( double4 );
    char8 __OVERLOAD__ convert_char8_sat_rtp( uchar8 );
    char8 __OVERLOAD__ convert_char8_sat_rtp( ushort8 );
    char8 __OVERLOAD__ convert_char8_sat_rtp( uint8 );
    char8 __OVERLOAD__ convert_char8_sat_rtp( ulong8 );
    char8 __OVERLOAD__ convert_char8_sat_rtp( char8 );
    char8 __OVERLOAD__ convert_char8_sat_rtp( short8 );
    char8 __OVERLOAD__ convert_char8_sat_rtp( int8 );
    char8 __OVERLOAD__ convert_char8_sat_rtp( long8 );
    char8 __OVERLOAD__ convert_char8_sat_rtp( float8 );
    char8 __OVERLOAD__ convert_char8_sat_rtp( double8 );
    char16 __OVERLOAD__ convert_char16_sat_rtp( uchar16 );
    char16 __OVERLOAD__ convert_char16_sat_rtp( ushort16 );
    char16 __OVERLOAD__ convert_char16_sat_rtp( uint16 );
    char16 __OVERLOAD__ convert_char16_sat_rtp( ulong16 );
    char16 __OVERLOAD__ convert_char16_sat_rtp( char16 );
    char16 __OVERLOAD__ convert_char16_sat_rtp( short16 );
    char16 __OVERLOAD__ convert_char16_sat_rtp( int16 );
    char16 __OVERLOAD__ convert_char16_sat_rtp( long16 );
    char16 __OVERLOAD__ convert_char16_sat_rtp( float16 );
    char16 __OVERLOAD__ convert_char16_sat_rtp( double16 );
    char __OVERLOAD__ convert_char_sat_rtn( uchar );
    char __OVERLOAD__ convert_char_sat_rtn( ushort );
    char __OVERLOAD__ convert_char_sat_rtn( uint );
    char __OVERLOAD__ convert_char_sat_rtn( ulong );
    char __OVERLOAD__ convert_char_sat_rtn( char );
    char __OVERLOAD__ convert_char_sat_rtn( short );
    char __OVERLOAD__ convert_char_sat_rtn( int );
    char __OVERLOAD__ convert_char_sat_rtn( long );
    char __OVERLOAD__ convert_char_sat_rtn( float );
    char __OVERLOAD__ convert_char_sat_rtn( double );
    char2 __OVERLOAD__ convert_char2_sat_rtn( uchar2 );
    char2 __OVERLOAD__ convert_char2_sat_rtn( ushort2 );
    char2 __OVERLOAD__ convert_char2_sat_rtn( uint2 );
    char2 __OVERLOAD__ convert_char2_sat_rtn( ulong2 );
    char2 __OVERLOAD__ convert_char2_sat_rtn( char2 );
    char2 __OVERLOAD__ convert_char2_sat_rtn( short2 );
    char2 __OVERLOAD__ convert_char2_sat_rtn( int2 );
    char2 __OVERLOAD__ convert_char2_sat_rtn( long2 );
    char2 __OVERLOAD__ convert_char2_sat_rtn( float2 );
    char2 __OVERLOAD__ convert_char2_sat_rtn( double2 );
    char3 __OVERLOAD__ convert_char3_sat_rtn( uchar3 );
    char3 __OVERLOAD__ convert_char3_sat_rtn( ushort3 );
    char3 __OVERLOAD__ convert_char3_sat_rtn( uint3 );
    char3 __OVERLOAD__ convert_char3_sat_rtn( ulong3 );
    char3 __OVERLOAD__ convert_char3_sat_rtn( char3 );
    char3 __OVERLOAD__ convert_char3_sat_rtn( short3 );
    char3 __OVERLOAD__ convert_char3_sat_rtn( int3 );
    char3 __OVERLOAD__ convert_char3_sat_rtn( long3 );
    char3 __OVERLOAD__ convert_char3_sat_rtn( float3 );
    char3 __OVERLOAD__ convert_char3_sat_rtn( double3 );
    char4 __OVERLOAD__ convert_char4_sat_rtn( uchar4 );
    char4 __OVERLOAD__ convert_char4_sat_rtn( ushort4 );
    char4 __OVERLOAD__ convert_char4_sat_rtn( uint4 );
    char4 __OVERLOAD__ convert_char4_sat_rtn( ulong4 );
    char4 __OVERLOAD__ convert_char4_sat_rtn( char4 );
    char4 __OVERLOAD__ convert_char4_sat_rtn( short4 );
    char4 __OVERLOAD__ convert_char4_sat_rtn( int4 );
    char4 __OVERLOAD__ convert_char4_sat_rtn( long4 );
    char4 __OVERLOAD__ convert_char4_sat_rtn( float4 );
    char4 __OVERLOAD__ convert_char4_sat_rtn( double4 );
    char8 __OVERLOAD__ convert_char8_sat_rtn( uchar8 );
    char8 __OVERLOAD__ convert_char8_sat_rtn( ushort8 );
    char8 __OVERLOAD__ convert_char8_sat_rtn( uint8 );
    char8 __OVERLOAD__ convert_char8_sat_rtn( ulong8 );
    char8 __OVERLOAD__ convert_char8_sat_rtn( char8 );
    char8 __OVERLOAD__ convert_char8_sat_rtn( short8 );
    char8 __OVERLOAD__ convert_char8_sat_rtn( int8 );
    char8 __OVERLOAD__ convert_char8_sat_rtn( long8 );
    char8 __OVERLOAD__ convert_char8_sat_rtn( float8 );
    char8 __OVERLOAD__ convert_char8_sat_rtn( double8 );
    char16 __OVERLOAD__ convert_char16_sat_rtn( uchar16 );
    char16 __OVERLOAD__ convert_char16_sat_rtn( ushort16 );
    char16 __OVERLOAD__ convert_char16_sat_rtn( uint16 );
    char16 __OVERLOAD__ convert_char16_sat_rtn( ulong16 );
    char16 __OVERLOAD__ convert_char16_sat_rtn( char16 );
    char16 __OVERLOAD__ convert_char16_sat_rtn( short16 );
    char16 __OVERLOAD__ convert_char16_sat_rtn( int16 );
    char16 __OVERLOAD__ convert_char16_sat_rtn( long16 );
    char16 __OVERLOAD__ convert_char16_sat_rtn( float16 );
    char16 __OVERLOAD__ convert_char16_sat_rtn( double16 );
    char __OVERLOAD__ convert_char_sat_rtz( uchar );
    char __OVERLOAD__ convert_char_sat_rtz( ushort );
    char __OVERLOAD__ convert_char_sat_rtz( uint );
    char __OVERLOAD__ convert_char_sat_rtz( ulong );
    char __OVERLOAD__ convert_char_sat_rtz( char );
    char __OVERLOAD__ convert_char_sat_rtz( short );
    char __OVERLOAD__ convert_char_sat_rtz( int );
    char __OVERLOAD__ convert_char_sat_rtz( long );
    char __OVERLOAD__ convert_char_sat_rtz( float );
    char __OVERLOAD__ convert_char_sat_rtz( double );
    char2 __OVERLOAD__ convert_char2_sat_rtz( uchar2 );
    char2 __OVERLOAD__ convert_char2_sat_rtz( ushort2 );
    char2 __OVERLOAD__ convert_char2_sat_rtz( uint2 );
    char2 __OVERLOAD__ convert_char2_sat_rtz( ulong2 );
    char2 __OVERLOAD__ convert_char2_sat_rtz( char2 );
    char2 __OVERLOAD__ convert_char2_sat_rtz( short2 );
    char2 __OVERLOAD__ convert_char2_sat_rtz( int2 );
    char2 __OVERLOAD__ convert_char2_sat_rtz( long2 );
    char2 __OVERLOAD__ convert_char2_sat_rtz( float2 );
    char2 __OVERLOAD__ convert_char2_sat_rtz( double2 );
    char3 __OVERLOAD__ convert_char3_sat_rtz( uchar3 );
    char3 __OVERLOAD__ convert_char3_sat_rtz( ushort3 );
    char3 __OVERLOAD__ convert_char3_sat_rtz( uint3 );
    char3 __OVERLOAD__ convert_char3_sat_rtz( ulong3 );
    char3 __OVERLOAD__ convert_char3_sat_rtz( char3 );
    char3 __OVERLOAD__ convert_char3_sat_rtz( short3 );
    char3 __OVERLOAD__ convert_char3_sat_rtz( int3 );
    char3 __OVERLOAD__ convert_char3_sat_rtz( long3 );
    char3 __OVERLOAD__ convert_char3_sat_rtz( float3 );
    char3 __OVERLOAD__ convert_char3_sat_rtz( double3 );
    char4 __OVERLOAD__ convert_char4_sat_rtz( uchar4 );
    char4 __OVERLOAD__ convert_char4_sat_rtz( ushort4 );
    char4 __OVERLOAD__ convert_char4_sat_rtz( uint4 );
    char4 __OVERLOAD__ convert_char4_sat_rtz( ulong4 );
    char4 __OVERLOAD__ convert_char4_sat_rtz( char4 );
    char4 __OVERLOAD__ convert_char4_sat_rtz( short4 );
    char4 __OVERLOAD__ convert_char4_sat_rtz( int4 );
    char4 __OVERLOAD__ convert_char4_sat_rtz( long4 );
    char4 __OVERLOAD__ convert_char4_sat_rtz( float4 );
    char4 __OVERLOAD__ convert_char4_sat_rtz( double4 );
    char8 __OVERLOAD__ convert_char8_sat_rtz( uchar8 );
    char8 __OVERLOAD__ convert_char8_sat_rtz( ushort8 );
    char8 __OVERLOAD__ convert_char8_sat_rtz( uint8 );
    char8 __OVERLOAD__ convert_char8_sat_rtz( ulong8 );
    char8 __OVERLOAD__ convert_char8_sat_rtz( char8 );
    char8 __OVERLOAD__ convert_char8_sat_rtz( short8 );
    char8 __OVERLOAD__ convert_char8_sat_rtz( int8 );
    char8 __OVERLOAD__ convert_char8_sat_rtz( long8 );
    char8 __OVERLOAD__ convert_char8_sat_rtz( float8 );
    char8 __OVERLOAD__ convert_char8_sat_rtz( double8 );
    char16 __OVERLOAD__ convert_char16_sat_rtz( uchar16 );
    char16 __OVERLOAD__ convert_char16_sat_rtz( ushort16 );
    char16 __OVERLOAD__ convert_char16_sat_rtz( uint16 );
    char16 __OVERLOAD__ convert_char16_sat_rtz( ulong16 );
    char16 __OVERLOAD__ convert_char16_sat_rtz( char16 );
    char16 __OVERLOAD__ convert_char16_sat_rtz( short16 );
    char16 __OVERLOAD__ convert_char16_sat_rtz( int16 );
    char16 __OVERLOAD__ convert_char16_sat_rtz( long16 );
    char16 __OVERLOAD__ convert_char16_sat_rtz( float16 );
    char16 __OVERLOAD__ convert_char16_sat_rtz( double16 );
    short __OVERLOAD__ convert_short( uchar );
    short __OVERLOAD__ convert_short( ushort );
    short __OVERLOAD__ convert_short( uint );
    short __OVERLOAD__ convert_short( ulong );
    short __OVERLOAD__ convert_short( char );
    short __OVERLOAD__ convert_short( short );
    short __OVERLOAD__ convert_short( int );
    short __OVERLOAD__ convert_short( long );
    short __OVERLOAD__ convert_short( float );
    short __OVERLOAD__ convert_short( double );
    short2 __OVERLOAD__ convert_short2( uchar2 );
    short2 __OVERLOAD__ convert_short2( ushort2 );
    short2 __OVERLOAD__ convert_short2( uint2 );
    short2 __OVERLOAD__ convert_short2( ulong2 );
    short2 __OVERLOAD__ convert_short2( char2 );
    short2 __OVERLOAD__ convert_short2( short2 );
    short2 __OVERLOAD__ convert_short2( int2 );
    short2 __OVERLOAD__ convert_short2( long2 );
    short2 __OVERLOAD__ convert_short2( float2 );
    short2 __OVERLOAD__ convert_short2( double2 );
    short3 __OVERLOAD__ convert_short3( uchar3 );
    short3 __OVERLOAD__ convert_short3( ushort3 );
    short3 __OVERLOAD__ convert_short3( uint3 );
    short3 __OVERLOAD__ convert_short3( ulong3 );
    short3 __OVERLOAD__ convert_short3( char3 );
    short3 __OVERLOAD__ convert_short3( short3 );
    short3 __OVERLOAD__ convert_short3( int3 );
    short3 __OVERLOAD__ convert_short3( long3 );
    short3 __OVERLOAD__ convert_short3( float3 );
    short3 __OVERLOAD__ convert_short3( double3 );
    short4 __OVERLOAD__ convert_short4( uchar4 );
    short4 __OVERLOAD__ convert_short4( ushort4 );
    short4 __OVERLOAD__ convert_short4( uint4 );
    short4 __OVERLOAD__ convert_short4( ulong4 );
    short4 __OVERLOAD__ convert_short4( char4 );
    short4 __OVERLOAD__ convert_short4( short4 );
    short4 __OVERLOAD__ convert_short4( int4 );
    short4 __OVERLOAD__ convert_short4( long4 );
    short4 __OVERLOAD__ convert_short4( float4 );
    short4 __OVERLOAD__ convert_short4( double4 );
    short8 __OVERLOAD__ convert_short8( uchar8 );
    short8 __OVERLOAD__ convert_short8( ushort8 );
    short8 __OVERLOAD__ convert_short8( uint8 );
    short8 __OVERLOAD__ convert_short8( ulong8 );
    short8 __OVERLOAD__ convert_short8( char8 );
    short8 __OVERLOAD__ convert_short8( short8 );
    short8 __OVERLOAD__ convert_short8( int8 );
    short8 __OVERLOAD__ convert_short8( long8 );
    short8 __OVERLOAD__ convert_short8( float8 );
    short8 __OVERLOAD__ convert_short8( double8 );
    short16 __OVERLOAD__ convert_short16( uchar16 );
    short16 __OVERLOAD__ convert_short16( ushort16 );
    short16 __OVERLOAD__ convert_short16( uint16 );
    short16 __OVERLOAD__ convert_short16( ulong16 );
    short16 __OVERLOAD__ convert_short16( char16 );
    short16 __OVERLOAD__ convert_short16( short16 );
    short16 __OVERLOAD__ convert_short16( int16 );
    short16 __OVERLOAD__ convert_short16( long16 );
    short16 __OVERLOAD__ convert_short16( float16 );
    short16 __OVERLOAD__ convert_short16( double16 );
    short __OVERLOAD__ convert_short_rte( uchar );
    short __OVERLOAD__ convert_short_rte( ushort );
    short __OVERLOAD__ convert_short_rte( uint );
    short __OVERLOAD__ convert_short_rte( ulong );
    short __OVERLOAD__ convert_short_rte( char );
    short __OVERLOAD__ convert_short_rte( short );
    short __OVERLOAD__ convert_short_rte( int );
    short __OVERLOAD__ convert_short_rte( long );
    short __OVERLOAD__ convert_short_rte( float );
    short __OVERLOAD__ convert_short_rte( double );
    short2 __OVERLOAD__ convert_short2_rte( uchar2 );
    short2 __OVERLOAD__ convert_short2_rte( ushort2 );
    short2 __OVERLOAD__ convert_short2_rte( uint2 );
    short2 __OVERLOAD__ convert_short2_rte( ulong2 );
    short2 __OVERLOAD__ convert_short2_rte( char2 );
    short2 __OVERLOAD__ convert_short2_rte( short2 );
    short2 __OVERLOAD__ convert_short2_rte( int2 );
    short2 __OVERLOAD__ convert_short2_rte( long2 );
    short2 __OVERLOAD__ convert_short2_rte( float2 );
    short2 __OVERLOAD__ convert_short2_rte( double2 );
    short3 __OVERLOAD__ convert_short3_rte( uchar3 );
    short3 __OVERLOAD__ convert_short3_rte( ushort3 );
    short3 __OVERLOAD__ convert_short3_rte( uint3 );
    short3 __OVERLOAD__ convert_short3_rte( ulong3 );
    short3 __OVERLOAD__ convert_short3_rte( char3 );
    short3 __OVERLOAD__ convert_short3_rte( short3 );
    short3 __OVERLOAD__ convert_short3_rte( int3 );
    short3 __OVERLOAD__ convert_short3_rte( long3 );
    short3 __OVERLOAD__ convert_short3_rte( float3 );
    short3 __OVERLOAD__ convert_short3_rte( double3 );
    short4 __OVERLOAD__ convert_short4_rte( uchar4 );
    short4 __OVERLOAD__ convert_short4_rte( ushort4 );
    short4 __OVERLOAD__ convert_short4_rte( uint4 );
    short4 __OVERLOAD__ convert_short4_rte( ulong4 );
    short4 __OVERLOAD__ convert_short4_rte( char4 );
    short4 __OVERLOAD__ convert_short4_rte( short4 );
    short4 __OVERLOAD__ convert_short4_rte( int4 );
    short4 __OVERLOAD__ convert_short4_rte( long4 );
    short4 __OVERLOAD__ convert_short4_rte( float4 );
    short4 __OVERLOAD__ convert_short4_rte( double4 );
    short8 __OVERLOAD__ convert_short8_rte( uchar8 );
    short8 __OVERLOAD__ convert_short8_rte( ushort8 );
    short8 __OVERLOAD__ convert_short8_rte( uint8 );
    short8 __OVERLOAD__ convert_short8_rte( ulong8 );
    short8 __OVERLOAD__ convert_short8_rte( char8 );
    short8 __OVERLOAD__ convert_short8_rte( short8 );
    short8 __OVERLOAD__ convert_short8_rte( int8 );
    short8 __OVERLOAD__ convert_short8_rte( long8 );
    short8 __OVERLOAD__ convert_short8_rte( float8 );
    short8 __OVERLOAD__ convert_short8_rte( double8 );
    short16 __OVERLOAD__ convert_short16_rte( uchar16 );
    short16 __OVERLOAD__ convert_short16_rte( ushort16 );
    short16 __OVERLOAD__ convert_short16_rte( uint16 );
    short16 __OVERLOAD__ convert_short16_rte( ulong16 );
    short16 __OVERLOAD__ convert_short16_rte( char16 );
    short16 __OVERLOAD__ convert_short16_rte( short16 );
    short16 __OVERLOAD__ convert_short16_rte( int16 );
    short16 __OVERLOAD__ convert_short16_rte( long16 );
    short16 __OVERLOAD__ convert_short16_rte( float16 );
    short16 __OVERLOAD__ convert_short16_rte( double16 );
    short __OVERLOAD__ convert_short_rtp( uchar );
    short __OVERLOAD__ convert_short_rtp( ushort );
    short __OVERLOAD__ convert_short_rtp( uint );
    short __OVERLOAD__ convert_short_rtp( ulong );
    short __OVERLOAD__ convert_short_rtp( char );
    short __OVERLOAD__ convert_short_rtp( short );
    short __OVERLOAD__ convert_short_rtp( int );
    short __OVERLOAD__ convert_short_rtp( long );
    short __OVERLOAD__ convert_short_rtp( float );
    short __OVERLOAD__ convert_short_rtp( double );
    short2 __OVERLOAD__ convert_short2_rtp( uchar2 );
    short2 __OVERLOAD__ convert_short2_rtp( ushort2 );
    short2 __OVERLOAD__ convert_short2_rtp( uint2 );
    short2 __OVERLOAD__ convert_short2_rtp( ulong2 );
    short2 __OVERLOAD__ convert_short2_rtp( char2 );
    short2 __OVERLOAD__ convert_short2_rtp( short2 );
    short2 __OVERLOAD__ convert_short2_rtp( int2 );
    short2 __OVERLOAD__ convert_short2_rtp( long2 );
    short2 __OVERLOAD__ convert_short2_rtp( float2 );
    short2 __OVERLOAD__ convert_short2_rtp( double2 );
    short3 __OVERLOAD__ convert_short3_rtp( uchar3 );
    short3 __OVERLOAD__ convert_short3_rtp( ushort3 );
    short3 __OVERLOAD__ convert_short3_rtp( uint3 );
    short3 __OVERLOAD__ convert_short3_rtp( ulong3 );
    short3 __OVERLOAD__ convert_short3_rtp( char3 );
    short3 __OVERLOAD__ convert_short3_rtp( short3 );
    short3 __OVERLOAD__ convert_short3_rtp( int3 );
    short3 __OVERLOAD__ convert_short3_rtp( long3 );
    short3 __OVERLOAD__ convert_short3_rtp( float3 );
    short3 __OVERLOAD__ convert_short3_rtp( double3 );
    short4 __OVERLOAD__ convert_short4_rtp( uchar4 );
    short4 __OVERLOAD__ convert_short4_rtp( ushort4 );
    short4 __OVERLOAD__ convert_short4_rtp( uint4 );
    short4 __OVERLOAD__ convert_short4_rtp( ulong4 );
    short4 __OVERLOAD__ convert_short4_rtp( char4 );
    short4 __OVERLOAD__ convert_short4_rtp( short4 );
    short4 __OVERLOAD__ convert_short4_rtp( int4 );
    short4 __OVERLOAD__ convert_short4_rtp( long4 );
    short4 __OVERLOAD__ convert_short4_rtp( float4 );
    short4 __OVERLOAD__ convert_short4_rtp( double4 );
    short8 __OVERLOAD__ convert_short8_rtp( uchar8 );
    short8 __OVERLOAD__ convert_short8_rtp( ushort8 );
    short8 __OVERLOAD__ convert_short8_rtp( uint8 );
    short8 __OVERLOAD__ convert_short8_rtp( ulong8 );
    short8 __OVERLOAD__ convert_short8_rtp( char8 );
    short8 __OVERLOAD__ convert_short8_rtp( short8 );
    short8 __OVERLOAD__ convert_short8_rtp( int8 );
    short8 __OVERLOAD__ convert_short8_rtp( long8 );
    short8 __OVERLOAD__ convert_short8_rtp( float8 );
    short8 __OVERLOAD__ convert_short8_rtp( double8 );
    short16 __OVERLOAD__ convert_short16_rtp( uchar16 );
    short16 __OVERLOAD__ convert_short16_rtp( ushort16 );
    short16 __OVERLOAD__ convert_short16_rtp( uint16 );
    short16 __OVERLOAD__ convert_short16_rtp( ulong16 );
    short16 __OVERLOAD__ convert_short16_rtp( char16 );
    short16 __OVERLOAD__ convert_short16_rtp( short16 );
    short16 __OVERLOAD__ convert_short16_rtp( int16 );
    short16 __OVERLOAD__ convert_short16_rtp( long16 );
    short16 __OVERLOAD__ convert_short16_rtp( float16 );
    short16 __OVERLOAD__ convert_short16_rtp( double16 );
    short __OVERLOAD__ convert_short_rtn( uchar );
    short __OVERLOAD__ convert_short_rtn( ushort );
    short __OVERLOAD__ convert_short_rtn( uint );
    short __OVERLOAD__ convert_short_rtn( ulong );
    short __OVERLOAD__ convert_short_rtn( char );
    short __OVERLOAD__ convert_short_rtn( short );
    short __OVERLOAD__ convert_short_rtn( int );
    short __OVERLOAD__ convert_short_rtn( long );
    short __OVERLOAD__ convert_short_rtn( float );
    short __OVERLOAD__ convert_short_rtn( double );
    short2 __OVERLOAD__ convert_short2_rtn( uchar2 );
    short2 __OVERLOAD__ convert_short2_rtn( ushort2 );
    short2 __OVERLOAD__ convert_short2_rtn( uint2 );
    short2 __OVERLOAD__ convert_short2_rtn( ulong2 );
    short2 __OVERLOAD__ convert_short2_rtn( char2 );
    short2 __OVERLOAD__ convert_short2_rtn( short2 );
    short2 __OVERLOAD__ convert_short2_rtn( int2 );
    short2 __OVERLOAD__ convert_short2_rtn( long2 );
    short2 __OVERLOAD__ convert_short2_rtn( float2 );
    short2 __OVERLOAD__ convert_short2_rtn( double2 );
    short3 __OVERLOAD__ convert_short3_rtn( uchar3 );
    short3 __OVERLOAD__ convert_short3_rtn( ushort3 );
    short3 __OVERLOAD__ convert_short3_rtn( uint3 );
    short3 __OVERLOAD__ convert_short3_rtn( ulong3 );
    short3 __OVERLOAD__ convert_short3_rtn( char3 );
    short3 __OVERLOAD__ convert_short3_rtn( short3 );
    short3 __OVERLOAD__ convert_short3_rtn( int3 );
    short3 __OVERLOAD__ convert_short3_rtn( long3 );
    short3 __OVERLOAD__ convert_short3_rtn( float3 );
    short3 __OVERLOAD__ convert_short3_rtn( double3 );
    short4 __OVERLOAD__ convert_short4_rtn( uchar4 );
    short4 __OVERLOAD__ convert_short4_rtn( ushort4 );
    short4 __OVERLOAD__ convert_short4_rtn( uint4 );
    short4 __OVERLOAD__ convert_short4_rtn( ulong4 );
    short4 __OVERLOAD__ convert_short4_rtn( char4 );
    short4 __OVERLOAD__ convert_short4_rtn( short4 );
    short4 __OVERLOAD__ convert_short4_rtn( int4 );
    short4 __OVERLOAD__ convert_short4_rtn( long4 );
    short4 __OVERLOAD__ convert_short4_rtn( float4 );
    short4 __OVERLOAD__ convert_short4_rtn( double4 );
    short8 __OVERLOAD__ convert_short8_rtn( uchar8 );
    short8 __OVERLOAD__ convert_short8_rtn( ushort8 );
    short8 __OVERLOAD__ convert_short8_rtn( uint8 );
    short8 __OVERLOAD__ convert_short8_rtn( ulong8 );
    short8 __OVERLOAD__ convert_short8_rtn( char8 );
    short8 __OVERLOAD__ convert_short8_rtn( short8 );
    short8 __OVERLOAD__ convert_short8_rtn( int8 );
    short8 __OVERLOAD__ convert_short8_rtn( long8 );
    short8 __OVERLOAD__ convert_short8_rtn( float8 );
    short8 __OVERLOAD__ convert_short8_rtn( double8 );
    short16 __OVERLOAD__ convert_short16_rtn( uchar16 );
    short16 __OVERLOAD__ convert_short16_rtn( ushort16 );
    short16 __OVERLOAD__ convert_short16_rtn( uint16 );
    short16 __OVERLOAD__ convert_short16_rtn( ulong16 );
    short16 __OVERLOAD__ convert_short16_rtn( char16 );
    short16 __OVERLOAD__ convert_short16_rtn( short16 );
    short16 __OVERLOAD__ convert_short16_rtn( int16 );
    short16 __OVERLOAD__ convert_short16_rtn( long16 );
    short16 __OVERLOAD__ convert_short16_rtn( float16 );
    short16 __OVERLOAD__ convert_short16_rtn( double16 );
    short __OVERLOAD__ convert_short_rtz( uchar );
    short __OVERLOAD__ convert_short_rtz( ushort );
    short __OVERLOAD__ convert_short_rtz( uint );
    short __OVERLOAD__ convert_short_rtz( ulong );
    short __OVERLOAD__ convert_short_rtz( char );
    short __OVERLOAD__ convert_short_rtz( short );
    short __OVERLOAD__ convert_short_rtz( int );
    short __OVERLOAD__ convert_short_rtz( long );
    short __OVERLOAD__ convert_short_rtz( float );
    short __OVERLOAD__ convert_short_rtz( double );
    short2 __OVERLOAD__ convert_short2_rtz( uchar2 );
    short2 __OVERLOAD__ convert_short2_rtz( ushort2 );
    short2 __OVERLOAD__ convert_short2_rtz( uint2 );
    short2 __OVERLOAD__ convert_short2_rtz( ulong2 );
    short2 __OVERLOAD__ convert_short2_rtz( char2 );
    short2 __OVERLOAD__ convert_short2_rtz( short2 );
    short2 __OVERLOAD__ convert_short2_rtz( int2 );
    short2 __OVERLOAD__ convert_short2_rtz( long2 );
    short2 __OVERLOAD__ convert_short2_rtz( float2 );
    short2 __OVERLOAD__ convert_short2_rtz( double2 );
    short3 __OVERLOAD__ convert_short3_rtz( uchar3 );
    short3 __OVERLOAD__ convert_short3_rtz( ushort3 );
    short3 __OVERLOAD__ convert_short3_rtz( uint3 );
    short3 __OVERLOAD__ convert_short3_rtz( ulong3 );
    short3 __OVERLOAD__ convert_short3_rtz( char3 );
    short3 __OVERLOAD__ convert_short3_rtz( short3 );
    short3 __OVERLOAD__ convert_short3_rtz( int3 );
    short3 __OVERLOAD__ convert_short3_rtz( long3 );
    short3 __OVERLOAD__ convert_short3_rtz( float3 );
    short3 __OVERLOAD__ convert_short3_rtz( double3 );
    short4 __OVERLOAD__ convert_short4_rtz( uchar4 );
    short4 __OVERLOAD__ convert_short4_rtz( ushort4 );
    short4 __OVERLOAD__ convert_short4_rtz( uint4 );
    short4 __OVERLOAD__ convert_short4_rtz( ulong4 );
    short4 __OVERLOAD__ convert_short4_rtz( char4 );
    short4 __OVERLOAD__ convert_short4_rtz( short4 );
    short4 __OVERLOAD__ convert_short4_rtz( int4 );
    short4 __OVERLOAD__ convert_short4_rtz( long4 );
    short4 __OVERLOAD__ convert_short4_rtz( float4 );
    short4 __OVERLOAD__ convert_short4_rtz( double4 );
    short8 __OVERLOAD__ convert_short8_rtz( uchar8 );
    short8 __OVERLOAD__ convert_short8_rtz( ushort8 );
    short8 __OVERLOAD__ convert_short8_rtz( uint8 );
    short8 __OVERLOAD__ convert_short8_rtz( ulong8 );
    short8 __OVERLOAD__ convert_short8_rtz( char8 );
    short8 __OVERLOAD__ convert_short8_rtz( short8 );
    short8 __OVERLOAD__ convert_short8_rtz( int8 );
    short8 __OVERLOAD__ convert_short8_rtz( long8 );
    short8 __OVERLOAD__ convert_short8_rtz( float8 );
    short8 __OVERLOAD__ convert_short8_rtz( double8 );
    short16 __OVERLOAD__ convert_short16_rtz( uchar16 );
    short16 __OVERLOAD__ convert_short16_rtz( ushort16 );
    short16 __OVERLOAD__ convert_short16_rtz( uint16 );
    short16 __OVERLOAD__ convert_short16_rtz( ulong16 );
    short16 __OVERLOAD__ convert_short16_rtz( char16 );
    short16 __OVERLOAD__ convert_short16_rtz( short16 );
    short16 __OVERLOAD__ convert_short16_rtz( int16 );
    short16 __OVERLOAD__ convert_short16_rtz( long16 );
    short16 __OVERLOAD__ convert_short16_rtz( float16 );
    short16 __OVERLOAD__ convert_short16_rtz( double16 );
    short __OVERLOAD__ convert_short_sat( uchar );
    short __OVERLOAD__ convert_short_sat( ushort );
    short __OVERLOAD__ convert_short_sat( uint );
    short __OVERLOAD__ convert_short_sat( ulong );
    short __OVERLOAD__ convert_short_sat( char );
    short __OVERLOAD__ convert_short_sat( short );
    short __OVERLOAD__ convert_short_sat( int );
    short __OVERLOAD__ convert_short_sat( long );
    short __OVERLOAD__ convert_short_sat( float );
    short __OVERLOAD__ convert_short_sat( double );
    short2 __OVERLOAD__ convert_short2_sat( uchar2 );
    short2 __OVERLOAD__ convert_short2_sat( ushort2 );
    short2 __OVERLOAD__ convert_short2_sat( uint2 );
    short2 __OVERLOAD__ convert_short2_sat( ulong2 );
    short2 __OVERLOAD__ convert_short2_sat( char2 );
    short2 __OVERLOAD__ convert_short2_sat( short2 );
    short2 __OVERLOAD__ convert_short2_sat( int2 );
    short2 __OVERLOAD__ convert_short2_sat( long2 );
    short2 __OVERLOAD__ convert_short2_sat( float2 );
    short2 __OVERLOAD__ convert_short2_sat( double2 );
    short3 __OVERLOAD__ convert_short3_sat( uchar3 );
    short3 __OVERLOAD__ convert_short3_sat( ushort3 );
    short3 __OVERLOAD__ convert_short3_sat( uint3 );
    short3 __OVERLOAD__ convert_short3_sat( ulong3 );
    short3 __OVERLOAD__ convert_short3_sat( char3 );
    short3 __OVERLOAD__ convert_short3_sat( short3 );
    short3 __OVERLOAD__ convert_short3_sat( int3 );
    short3 __OVERLOAD__ convert_short3_sat( long3 );
    short3 __OVERLOAD__ convert_short3_sat( float3 );
    short3 __OVERLOAD__ convert_short3_sat( double3 );
    short4 __OVERLOAD__ convert_short4_sat( uchar4 );
    short4 __OVERLOAD__ convert_short4_sat( ushort4 );
    short4 __OVERLOAD__ convert_short4_sat( uint4 );
    short4 __OVERLOAD__ convert_short4_sat( ulong4 );
    short4 __OVERLOAD__ convert_short4_sat( char4 );
    short4 __OVERLOAD__ convert_short4_sat( short4 );
    short4 __OVERLOAD__ convert_short4_sat( int4 );
    short4 __OVERLOAD__ convert_short4_sat( long4 );
    short4 __OVERLOAD__ convert_short4_sat( float4 );
    short4 __OVERLOAD__ convert_short4_sat( double4 );
    short8 __OVERLOAD__ convert_short8_sat( uchar8 );
    short8 __OVERLOAD__ convert_short8_sat( ushort8 );
    short8 __OVERLOAD__ convert_short8_sat( uint8 );
    short8 __OVERLOAD__ convert_short8_sat( ulong8 );
    short8 __OVERLOAD__ convert_short8_sat( char8 );
    short8 __OVERLOAD__ convert_short8_sat( short8 );
    short8 __OVERLOAD__ convert_short8_sat( int8 );
    short8 __OVERLOAD__ convert_short8_sat( long8 );
    short8 __OVERLOAD__ convert_short8_sat( float8 );
    short8 __OVERLOAD__ convert_short8_sat( double8 );
    short16 __OVERLOAD__ convert_short16_sat( uchar16 );
    short16 __OVERLOAD__ convert_short16_sat( ushort16 );
    short16 __OVERLOAD__ convert_short16_sat( uint16 );
    short16 __OVERLOAD__ convert_short16_sat( ulong16 );
    short16 __OVERLOAD__ convert_short16_sat( char16 );
    short16 __OVERLOAD__ convert_short16_sat( short16 );
    short16 __OVERLOAD__ convert_short16_sat( int16 );
    short16 __OVERLOAD__ convert_short16_sat( long16 );
    short16 __OVERLOAD__ convert_short16_sat( float16 );
    short16 __OVERLOAD__ convert_short16_sat( double16 );
    short __OVERLOAD__ convert_short_sat_rte( uchar );
    short __OVERLOAD__ convert_short_sat_rte( ushort );
    short __OVERLOAD__ convert_short_sat_rte( uint );
    short __OVERLOAD__ convert_short_sat_rte( ulong );
    short __OVERLOAD__ convert_short_sat_rte( char );
    short __OVERLOAD__ convert_short_sat_rte( short );
    short __OVERLOAD__ convert_short_sat_rte( int );
    short __OVERLOAD__ convert_short_sat_rte( long );
    short __OVERLOAD__ convert_short_sat_rte( float );
    short __OVERLOAD__ convert_short_sat_rte( double );
    short2 __OVERLOAD__ convert_short2_sat_rte( uchar2 );
    short2 __OVERLOAD__ convert_short2_sat_rte( ushort2 );
    short2 __OVERLOAD__ convert_short2_sat_rte( uint2 );
    short2 __OVERLOAD__ convert_short2_sat_rte( ulong2 );
    short2 __OVERLOAD__ convert_short2_sat_rte( char2 );
    short2 __OVERLOAD__ convert_short2_sat_rte( short2 );
    short2 __OVERLOAD__ convert_short2_sat_rte( int2 );
    short2 __OVERLOAD__ convert_short2_sat_rte( long2 );
    short2 __OVERLOAD__ convert_short2_sat_rte( float2 );
    short2 __OVERLOAD__ convert_short2_sat_rte( double2 );
    short3 __OVERLOAD__ convert_short3_sat_rte( uchar3 );
    short3 __OVERLOAD__ convert_short3_sat_rte( ushort3 );
    short3 __OVERLOAD__ convert_short3_sat_rte( uint3 );
    short3 __OVERLOAD__ convert_short3_sat_rte( ulong3 );
    short3 __OVERLOAD__ convert_short3_sat_rte( char3 );
    short3 __OVERLOAD__ convert_short3_sat_rte( short3 );
    short3 __OVERLOAD__ convert_short3_sat_rte( int3 );
    short3 __OVERLOAD__ convert_short3_sat_rte( long3 );
    short3 __OVERLOAD__ convert_short3_sat_rte( float3 );
    short3 __OVERLOAD__ convert_short3_sat_rte( double3 );
    short4 __OVERLOAD__ convert_short4_sat_rte( uchar4 );
    short4 __OVERLOAD__ convert_short4_sat_rte( ushort4 );
    short4 __OVERLOAD__ convert_short4_sat_rte( uint4 );
    short4 __OVERLOAD__ convert_short4_sat_rte( ulong4 );
    short4 __OVERLOAD__ convert_short4_sat_rte( char4 );
    short4 __OVERLOAD__ convert_short4_sat_rte( short4 );
    short4 __OVERLOAD__ convert_short4_sat_rte( int4 );
    short4 __OVERLOAD__ convert_short4_sat_rte( long4 );
    short4 __OVERLOAD__ convert_short4_sat_rte( float4 );
    short4 __OVERLOAD__ convert_short4_sat_rte( double4 );
    short8 __OVERLOAD__ convert_short8_sat_rte( uchar8 );
    short8 __OVERLOAD__ convert_short8_sat_rte( ushort8 );
    short8 __OVERLOAD__ convert_short8_sat_rte( uint8 );
    short8 __OVERLOAD__ convert_short8_sat_rte( ulong8 );
    short8 __OVERLOAD__ convert_short8_sat_rte( char8 );
    short8 __OVERLOAD__ convert_short8_sat_rte( short8 );
    short8 __OVERLOAD__ convert_short8_sat_rte( int8 );
    short8 __OVERLOAD__ convert_short8_sat_rte( long8 );
    short8 __OVERLOAD__ convert_short8_sat_rte( float8 );
    short8 __OVERLOAD__ convert_short8_sat_rte( double8 );
    short16 __OVERLOAD__ convert_short16_sat_rte( uchar16 );
    short16 __OVERLOAD__ convert_short16_sat_rte( ushort16 );
    short16 __OVERLOAD__ convert_short16_sat_rte( uint16 );
    short16 __OVERLOAD__ convert_short16_sat_rte( ulong16 );
    short16 __OVERLOAD__ convert_short16_sat_rte( char16 );
    short16 __OVERLOAD__ convert_short16_sat_rte( short16 );
    short16 __OVERLOAD__ convert_short16_sat_rte( int16 );
    short16 __OVERLOAD__ convert_short16_sat_rte( long16 );
    short16 __OVERLOAD__ convert_short16_sat_rte( float16 );
    short16 __OVERLOAD__ convert_short16_sat_rte( double16 );
    short __OVERLOAD__ convert_short_sat_rtp( uchar );
    short __OVERLOAD__ convert_short_sat_rtp( ushort );
    short __OVERLOAD__ convert_short_sat_rtp( uint );
    short __OVERLOAD__ convert_short_sat_rtp( ulong );
    short __OVERLOAD__ convert_short_sat_rtp( char );
    short __OVERLOAD__ convert_short_sat_rtp( short );
    short __OVERLOAD__ convert_short_sat_rtp( int );
    short __OVERLOAD__ convert_short_sat_rtp( long );
    short __OVERLOAD__ convert_short_sat_rtp( float );
    short __OVERLOAD__ convert_short_sat_rtp( double );
    short2 __OVERLOAD__ convert_short2_sat_rtp( uchar2 );
    short2 __OVERLOAD__ convert_short2_sat_rtp( ushort2 );
    short2 __OVERLOAD__ convert_short2_sat_rtp( uint2 );
    short2 __OVERLOAD__ convert_short2_sat_rtp( ulong2 );
    short2 __OVERLOAD__ convert_short2_sat_rtp( char2 );
    short2 __OVERLOAD__ convert_short2_sat_rtp( short2 );
    short2 __OVERLOAD__ convert_short2_sat_rtp( int2 );
    short2 __OVERLOAD__ convert_short2_sat_rtp( long2 );
    short2 __OVERLOAD__ convert_short2_sat_rtp( float2 );
    short2 __OVERLOAD__ convert_short2_sat_rtp( double2 );
    short3 __OVERLOAD__ convert_short3_sat_rtp( uchar3 );
    short3 __OVERLOAD__ convert_short3_sat_rtp( ushort3 );
    short3 __OVERLOAD__ convert_short3_sat_rtp( uint3 );
    short3 __OVERLOAD__ convert_short3_sat_rtp( ulong3 );
    short3 __OVERLOAD__ convert_short3_sat_rtp( char3 );
    short3 __OVERLOAD__ convert_short3_sat_rtp( short3 );
    short3 __OVERLOAD__ convert_short3_sat_rtp( int3 );
    short3 __OVERLOAD__ convert_short3_sat_rtp( long3 );
    short3 __OVERLOAD__ convert_short3_sat_rtp( float3 );
    short3 __OVERLOAD__ convert_short3_sat_rtp( double3 );
    short4 __OVERLOAD__ convert_short4_sat_rtp( uchar4 );
    short4 __OVERLOAD__ convert_short4_sat_rtp( ushort4 );
    short4 __OVERLOAD__ convert_short4_sat_rtp( uint4 );
    short4 __OVERLOAD__ convert_short4_sat_rtp( ulong4 );
    short4 __OVERLOAD__ convert_short4_sat_rtp( char4 );
    short4 __OVERLOAD__ convert_short4_sat_rtp( short4 );
    short4 __OVERLOAD__ convert_short4_sat_rtp( int4 );
    short4 __OVERLOAD__ convert_short4_sat_rtp( long4 );
    short4 __OVERLOAD__ convert_short4_sat_rtp( float4 );
    short4 __OVERLOAD__ convert_short4_sat_rtp( double4 );
    short8 __OVERLOAD__ convert_short8_sat_rtp( uchar8 );
    short8 __OVERLOAD__ convert_short8_sat_rtp( ushort8 );
    short8 __OVERLOAD__ convert_short8_sat_rtp( uint8 );
    short8 __OVERLOAD__ convert_short8_sat_rtp( ulong8 );
    short8 __OVERLOAD__ convert_short8_sat_rtp( char8 );
    short8 __OVERLOAD__ convert_short8_sat_rtp( short8 );
    short8 __OVERLOAD__ convert_short8_sat_rtp( int8 );
    short8 __OVERLOAD__ convert_short8_sat_rtp( long8 );
    short8 __OVERLOAD__ convert_short8_sat_rtp( float8 );
    short8 __OVERLOAD__ convert_short8_sat_rtp( double8 );
    short16 __OVERLOAD__ convert_short16_sat_rtp( uchar16 );
    short16 __OVERLOAD__ convert_short16_sat_rtp( ushort16 );
    short16 __OVERLOAD__ convert_short16_sat_rtp( uint16 );
    short16 __OVERLOAD__ convert_short16_sat_rtp( ulong16 );
    short16 __OVERLOAD__ convert_short16_sat_rtp( char16 );
    short16 __OVERLOAD__ convert_short16_sat_rtp( short16 );
    short16 __OVERLOAD__ convert_short16_sat_rtp( int16 );
    short16 __OVERLOAD__ convert_short16_sat_rtp( long16 );
    short16 __OVERLOAD__ convert_short16_sat_rtp( float16 );
    short16 __OVERLOAD__ convert_short16_sat_rtp( double16 );
    short __OVERLOAD__ convert_short_sat_rtn( uchar );
    short __OVERLOAD__ convert_short_sat_rtn( ushort );
    short __OVERLOAD__ convert_short_sat_rtn( uint );
    short __OVERLOAD__ convert_short_sat_rtn( ulong );
    short __OVERLOAD__ convert_short_sat_rtn( char );
    short __OVERLOAD__ convert_short_sat_rtn( short );
    short __OVERLOAD__ convert_short_sat_rtn( int );
    short __OVERLOAD__ convert_short_sat_rtn( long );
    short __OVERLOAD__ convert_short_sat_rtn( float );
    short __OVERLOAD__ convert_short_sat_rtn( double );
    short2 __OVERLOAD__ convert_short2_sat_rtn( uchar2 );
    short2 __OVERLOAD__ convert_short2_sat_rtn( ushort2 );
    short2 __OVERLOAD__ convert_short2_sat_rtn( uint2 );
    short2 __OVERLOAD__ convert_short2_sat_rtn( ulong2 );
    short2 __OVERLOAD__ convert_short2_sat_rtn( char2 );
    short2 __OVERLOAD__ convert_short2_sat_rtn( short2 );
    short2 __OVERLOAD__ convert_short2_sat_rtn( int2 );
    short2 __OVERLOAD__ convert_short2_sat_rtn( long2 );
    short2 __OVERLOAD__ convert_short2_sat_rtn( float2 );
    short2 __OVERLOAD__ convert_short2_sat_rtn( double2 );
    short3 __OVERLOAD__ convert_short3_sat_rtn( uchar3 );
    short3 __OVERLOAD__ convert_short3_sat_rtn( ushort3 );
    short3 __OVERLOAD__ convert_short3_sat_rtn( uint3 );
    short3 __OVERLOAD__ convert_short3_sat_rtn( ulong3 );
    short3 __OVERLOAD__ convert_short3_sat_rtn( char3 );
    short3 __OVERLOAD__ convert_short3_sat_rtn( short3 );
    short3 __OVERLOAD__ convert_short3_sat_rtn( int3 );
    short3 __OVERLOAD__ convert_short3_sat_rtn( long3 );
    short3 __OVERLOAD__ convert_short3_sat_rtn( float3 );
    short3 __OVERLOAD__ convert_short3_sat_rtn( double3 );
    short4 __OVERLOAD__ convert_short4_sat_rtn( uchar4 );
    short4 __OVERLOAD__ convert_short4_sat_rtn( ushort4 );
    short4 __OVERLOAD__ convert_short4_sat_rtn( uint4 );
    short4 __OVERLOAD__ convert_short4_sat_rtn( ulong4 );
    short4 __OVERLOAD__ convert_short4_sat_rtn( char4 );
    short4 __OVERLOAD__ convert_short4_sat_rtn( short4 );
    short4 __OVERLOAD__ convert_short4_sat_rtn( int4 );
    short4 __OVERLOAD__ convert_short4_sat_rtn( long4 );
    short4 __OVERLOAD__ convert_short4_sat_rtn( float4 );
    short4 __OVERLOAD__ convert_short4_sat_rtn( double4 );
    short8 __OVERLOAD__ convert_short8_sat_rtn( uchar8 );
    short8 __OVERLOAD__ convert_short8_sat_rtn( ushort8 );
    short8 __OVERLOAD__ convert_short8_sat_rtn( uint8 );
    short8 __OVERLOAD__ convert_short8_sat_rtn( ulong8 );
    short8 __OVERLOAD__ convert_short8_sat_rtn( char8 );
    short8 __OVERLOAD__ convert_short8_sat_rtn( short8 );
    short8 __OVERLOAD__ convert_short8_sat_rtn( int8 );
    short8 __OVERLOAD__ convert_short8_sat_rtn( long8 );
    short8 __OVERLOAD__ convert_short8_sat_rtn( float8 );
    short8 __OVERLOAD__ convert_short8_sat_rtn( double8 );
    short16 __OVERLOAD__ convert_short16_sat_rtn( uchar16 );
    short16 __OVERLOAD__ convert_short16_sat_rtn( ushort16 );
    short16 __OVERLOAD__ convert_short16_sat_rtn( uint16 );
    short16 __OVERLOAD__ convert_short16_sat_rtn( ulong16 );
    short16 __OVERLOAD__ convert_short16_sat_rtn( char16 );
    short16 __OVERLOAD__ convert_short16_sat_rtn( short16 );
    short16 __OVERLOAD__ convert_short16_sat_rtn( int16 );
    short16 __OVERLOAD__ convert_short16_sat_rtn( long16 );
    short16 __OVERLOAD__ convert_short16_sat_rtn( float16 );
    short16 __OVERLOAD__ convert_short16_sat_rtn( double16 );
    short __OVERLOAD__ convert_short_sat_rtz( uchar );
    short __OVERLOAD__ convert_short_sat_rtz( ushort );
    short __OVERLOAD__ convert_short_sat_rtz( uint );
    short __OVERLOAD__ convert_short_sat_rtz( ulong );
    short __OVERLOAD__ convert_short_sat_rtz( char );
    short __OVERLOAD__ convert_short_sat_rtz( short );
    short __OVERLOAD__ convert_short_sat_rtz( int );
    short __OVERLOAD__ convert_short_sat_rtz( long );
    short __OVERLOAD__ convert_short_sat_rtz( float );
    short __OVERLOAD__ convert_short_sat_rtz( double );
    short2 __OVERLOAD__ convert_short2_sat_rtz( uchar2 );
    short2 __OVERLOAD__ convert_short2_sat_rtz( ushort2 );
    short2 __OVERLOAD__ convert_short2_sat_rtz( uint2 );
    short2 __OVERLOAD__ convert_short2_sat_rtz( ulong2 );
    short2 __OVERLOAD__ convert_short2_sat_rtz( char2 );
    short2 __OVERLOAD__ convert_short2_sat_rtz( short2 );
    short2 __OVERLOAD__ convert_short2_sat_rtz( int2 );
    short2 __OVERLOAD__ convert_short2_sat_rtz( long2 );
    short2 __OVERLOAD__ convert_short2_sat_rtz( float2 );
    short2 __OVERLOAD__ convert_short2_sat_rtz( double2 );
    short3 __OVERLOAD__ convert_short3_sat_rtz( uchar3 );
    short3 __OVERLOAD__ convert_short3_sat_rtz( ushort3 );
    short3 __OVERLOAD__ convert_short3_sat_rtz( uint3 );
    short3 __OVERLOAD__ convert_short3_sat_rtz( ulong3 );
    short3 __OVERLOAD__ convert_short3_sat_rtz( char3 );
    short3 __OVERLOAD__ convert_short3_sat_rtz( short3 );
    short3 __OVERLOAD__ convert_short3_sat_rtz( int3 );
    short3 __OVERLOAD__ convert_short3_sat_rtz( long3 );
    short3 __OVERLOAD__ convert_short3_sat_rtz( float3 );
    short3 __OVERLOAD__ convert_short3_sat_rtz( double3 );
    short4 __OVERLOAD__ convert_short4_sat_rtz( uchar4 );
    short4 __OVERLOAD__ convert_short4_sat_rtz( ushort4 );
    short4 __OVERLOAD__ convert_short4_sat_rtz( uint4 );
    short4 __OVERLOAD__ convert_short4_sat_rtz( ulong4 );
    short4 __OVERLOAD__ convert_short4_sat_rtz( char4 );
    short4 __OVERLOAD__ convert_short4_sat_rtz( short4 );
    short4 __OVERLOAD__ convert_short4_sat_rtz( int4 );
    short4 __OVERLOAD__ convert_short4_sat_rtz( long4 );
    short4 __OVERLOAD__ convert_short4_sat_rtz( float4 );
    short4 __OVERLOAD__ convert_short4_sat_rtz( double4 );
    short8 __OVERLOAD__ convert_short8_sat_rtz( uchar8 );
    short8 __OVERLOAD__ convert_short8_sat_rtz( ushort8 );
    short8 __OVERLOAD__ convert_short8_sat_rtz( uint8 );
    short8 __OVERLOAD__ convert_short8_sat_rtz( ulong8 );
    short8 __OVERLOAD__ convert_short8_sat_rtz( char8 );
    short8 __OVERLOAD__ convert_short8_sat_rtz( short8 );
    short8 __OVERLOAD__ convert_short8_sat_rtz( int8 );
    short8 __OVERLOAD__ convert_short8_sat_rtz( long8 );
    short8 __OVERLOAD__ convert_short8_sat_rtz( float8 );
    short8 __OVERLOAD__ convert_short8_sat_rtz( double8 );
    short16 __OVERLOAD__ convert_short16_sat_rtz( uchar16 );
    short16 __OVERLOAD__ convert_short16_sat_rtz( ushort16 );
    short16 __OVERLOAD__ convert_short16_sat_rtz( uint16 );
    short16 __OVERLOAD__ convert_short16_sat_rtz( ulong16 );
    short16 __OVERLOAD__ convert_short16_sat_rtz( char16 );
    short16 __OVERLOAD__ convert_short16_sat_rtz( short16 );
    short16 __OVERLOAD__ convert_short16_sat_rtz( int16 );
    short16 __OVERLOAD__ convert_short16_sat_rtz( long16 );
    short16 __OVERLOAD__ convert_short16_sat_rtz( float16 );
    short16 __OVERLOAD__ convert_short16_sat_rtz( double16 );
    int __OVERLOAD__ convert_int( uchar );
    int __OVERLOAD__ convert_int( ushort );
    int __OVERLOAD__ convert_int( uint );
    int __OVERLOAD__ convert_int( ulong );
    int __OVERLOAD__ convert_int( char );
    int __OVERLOAD__ convert_int( short );
    int __OVERLOAD__ convert_int( int );
    int __OVERLOAD__ convert_int( long );
    int __OVERLOAD__ convert_int( float );
    int __OVERLOAD__ convert_int( double );
    int2 __OVERLOAD__ convert_int2( uchar2 );
    int2 __OVERLOAD__ convert_int2( ushort2 );
    int2 __OVERLOAD__ convert_int2( uint2 );
    int2 __OVERLOAD__ convert_int2( ulong2 );
    int2 __OVERLOAD__ convert_int2( char2 );
    int2 __OVERLOAD__ convert_int2( short2 );
    int2 __OVERLOAD__ convert_int2( int2 );
    int2 __OVERLOAD__ convert_int2( long2 );
    int2 __OVERLOAD__ convert_int2( float2 );
    int2 __OVERLOAD__ convert_int2( double2 );
    int3 __OVERLOAD__ convert_int3( uchar3 );
    int3 __OVERLOAD__ convert_int3( ushort3 );
    int3 __OVERLOAD__ convert_int3( uint3 );
    int3 __OVERLOAD__ convert_int3( ulong3 );
    int3 __OVERLOAD__ convert_int3( char3 );
    int3 __OVERLOAD__ convert_int3( short3 );
    int3 __OVERLOAD__ convert_int3( int3 );
    int3 __OVERLOAD__ convert_int3( long3 );
    int3 __OVERLOAD__ convert_int3( float3 );
    int3 __OVERLOAD__ convert_int3( double3 );
    int4 __OVERLOAD__ convert_int4( uchar4 );
    int4 __OVERLOAD__ convert_int4( ushort4 );
    int4 __OVERLOAD__ convert_int4( uint4 );
    int4 __OVERLOAD__ convert_int4( ulong4 );
    int4 __OVERLOAD__ convert_int4( char4 );
    int4 __OVERLOAD__ convert_int4( short4 );
    int4 __OVERLOAD__ convert_int4( int4 );
    int4 __OVERLOAD__ convert_int4( long4 );
    int4 __OVERLOAD__ convert_int4( float4 );
    int4 __OVERLOAD__ convert_int4( double4 );
    int8 __OVERLOAD__ convert_int8( uchar8 );
    int8 __OVERLOAD__ convert_int8( ushort8 );
    int8 __OVERLOAD__ convert_int8( uint8 );
    int8 __OVERLOAD__ convert_int8( ulong8 );
    int8 __OVERLOAD__ convert_int8( char8 );
    int8 __OVERLOAD__ convert_int8( short8 );
    int8 __OVERLOAD__ convert_int8( int8 );
    int8 __OVERLOAD__ convert_int8( long8 );
    int8 __OVERLOAD__ convert_int8( float8 );
    int8 __OVERLOAD__ convert_int8( double8 );
    int16 __OVERLOAD__ convert_int16( uchar16 );
    int16 __OVERLOAD__ convert_int16( ushort16 );
    int16 __OVERLOAD__ convert_int16( uint16 );
    int16 __OVERLOAD__ convert_int16( ulong16 );
    int16 __OVERLOAD__ convert_int16( char16 );
    int16 __OVERLOAD__ convert_int16( short16 );
    int16 __OVERLOAD__ convert_int16( int16 );
    int16 __OVERLOAD__ convert_int16( long16 );
    int16 __OVERLOAD__ convert_int16( float16 );
    int16 __OVERLOAD__ convert_int16( double16 );
    int __OVERLOAD__ convert_int_rte( uchar );
    int __OVERLOAD__ convert_int_rte( ushort );
    int __OVERLOAD__ convert_int_rte( uint );
    int __OVERLOAD__ convert_int_rte( ulong );
    int __OVERLOAD__ convert_int_rte( char );
    int __OVERLOAD__ convert_int_rte( short );
    int __OVERLOAD__ convert_int_rte( int );
    int __OVERLOAD__ convert_int_rte( long );
    int __OVERLOAD__ convert_int_rte( float );
    int __OVERLOAD__ convert_int_rte( double );
    int2 __OVERLOAD__ convert_int2_rte( uchar2 );
    int2 __OVERLOAD__ convert_int2_rte( ushort2 );
    int2 __OVERLOAD__ convert_int2_rte( uint2 );
    int2 __OVERLOAD__ convert_int2_rte( ulong2 );
    int2 __OVERLOAD__ convert_int2_rte( char2 );
    int2 __OVERLOAD__ convert_int2_rte( short2 );
    int2 __OVERLOAD__ convert_int2_rte( int2 );
    int2 __OVERLOAD__ convert_int2_rte( long2 );
    int2 __OVERLOAD__ convert_int2_rte( float2 );
    int2 __OVERLOAD__ convert_int2_rte( double2 );
    int3 __OVERLOAD__ convert_int3_rte( uchar3 );
    int3 __OVERLOAD__ convert_int3_rte( ushort3 );
    int3 __OVERLOAD__ convert_int3_rte( uint3 );
    int3 __OVERLOAD__ convert_int3_rte( ulong3 );
    int3 __OVERLOAD__ convert_int3_rte( char3 );
    int3 __OVERLOAD__ convert_int3_rte( short3 );
    int3 __OVERLOAD__ convert_int3_rte( int3 );
    int3 __OVERLOAD__ convert_int3_rte( long3 );
    int3 __OVERLOAD__ convert_int3_rte( float3 );
    int3 __OVERLOAD__ convert_int3_rte( double3 );
    int4 __OVERLOAD__ convert_int4_rte( uchar4 );
    int4 __OVERLOAD__ convert_int4_rte( ushort4 );
    int4 __OVERLOAD__ convert_int4_rte( uint4 );
    int4 __OVERLOAD__ convert_int4_rte( ulong4 );
    int4 __OVERLOAD__ convert_int4_rte( char4 );
    int4 __OVERLOAD__ convert_int4_rte( short4 );
    int4 __OVERLOAD__ convert_int4_rte( int4 );
    int4 __OVERLOAD__ convert_int4_rte( long4 );
    int4 __OVERLOAD__ convert_int4_rte( float4 );
    int4 __OVERLOAD__ convert_int4_rte( double4 );
    int8 __OVERLOAD__ convert_int8_rte( uchar8 );
    int8 __OVERLOAD__ convert_int8_rte( ushort8 );
    int8 __OVERLOAD__ convert_int8_rte( uint8 );
    int8 __OVERLOAD__ convert_int8_rte( ulong8 );
    int8 __OVERLOAD__ convert_int8_rte( char8 );
    int8 __OVERLOAD__ convert_int8_rte( short8 );
    int8 __OVERLOAD__ convert_int8_rte( int8 );
    int8 __OVERLOAD__ convert_int8_rte( long8 );
    int8 __OVERLOAD__ convert_int8_rte( float8 );
    int8 __OVERLOAD__ convert_int8_rte( double8 );
    int16 __OVERLOAD__ convert_int16_rte( uchar16 );
    int16 __OVERLOAD__ convert_int16_rte( ushort16 );
    int16 __OVERLOAD__ convert_int16_rte( uint16 );
    int16 __OVERLOAD__ convert_int16_rte( ulong16 );
    int16 __OVERLOAD__ convert_int16_rte( char16 );
    int16 __OVERLOAD__ convert_int16_rte( short16 );
    int16 __OVERLOAD__ convert_int16_rte( int16 );
    int16 __OVERLOAD__ convert_int16_rte( long16 );
    int16 __OVERLOAD__ convert_int16_rte( float16 );
    int16 __OVERLOAD__ convert_int16_rte( double16 );
    int __OVERLOAD__ convert_int_rtp( uchar );
    int __OVERLOAD__ convert_int_rtp( ushort );
    int __OVERLOAD__ convert_int_rtp( uint );
    int __OVERLOAD__ convert_int_rtp( ulong );
    int __OVERLOAD__ convert_int_rtp( char );
    int __OVERLOAD__ convert_int_rtp( short );
    int __OVERLOAD__ convert_int_rtp( int );
    int __OVERLOAD__ convert_int_rtp( long );
    int __OVERLOAD__ convert_int_rtp( float );
    int __OVERLOAD__ convert_int_rtp( double );
    int2 __OVERLOAD__ convert_int2_rtp( uchar2 );
    int2 __OVERLOAD__ convert_int2_rtp( ushort2 );
    int2 __OVERLOAD__ convert_int2_rtp( uint2 );
    int2 __OVERLOAD__ convert_int2_rtp( ulong2 );
    int2 __OVERLOAD__ convert_int2_rtp( char2 );
    int2 __OVERLOAD__ convert_int2_rtp( short2 );
    int2 __OVERLOAD__ convert_int2_rtp( int2 );
    int2 __OVERLOAD__ convert_int2_rtp( long2 );
    int2 __OVERLOAD__ convert_int2_rtp( float2 );
    int2 __OVERLOAD__ convert_int2_rtp( double2 );
    int3 __OVERLOAD__ convert_int3_rtp( uchar3 );
    int3 __OVERLOAD__ convert_int3_rtp( ushort3 );
    int3 __OVERLOAD__ convert_int3_rtp( uint3 );
    int3 __OVERLOAD__ convert_int3_rtp( ulong3 );
    int3 __OVERLOAD__ convert_int3_rtp( char3 );
    int3 __OVERLOAD__ convert_int3_rtp( short3 );
    int3 __OVERLOAD__ convert_int3_rtp( int3 );
    int3 __OVERLOAD__ convert_int3_rtp( long3 );
    int3 __OVERLOAD__ convert_int3_rtp( float3 );
    int3 __OVERLOAD__ convert_int3_rtp( double3 );
    int4 __OVERLOAD__ convert_int4_rtp( uchar4 );
    int4 __OVERLOAD__ convert_int4_rtp( ushort4 );
    int4 __OVERLOAD__ convert_int4_rtp( uint4 );
    int4 __OVERLOAD__ convert_int4_rtp( ulong4 );
    int4 __OVERLOAD__ convert_int4_rtp( char4 );
    int4 __OVERLOAD__ convert_int4_rtp( short4 );
    int4 __OVERLOAD__ convert_int4_rtp( int4 );
    int4 __OVERLOAD__ convert_int4_rtp( long4 );
    int4 __OVERLOAD__ convert_int4_rtp( float4 );
    int4 __OVERLOAD__ convert_int4_rtp( double4 );
    int8 __OVERLOAD__ convert_int8_rtp( uchar8 );
    int8 __OVERLOAD__ convert_int8_rtp( ushort8 );
    int8 __OVERLOAD__ convert_int8_rtp( uint8 );
    int8 __OVERLOAD__ convert_int8_rtp( ulong8 );
    int8 __OVERLOAD__ convert_int8_rtp( char8 );
    int8 __OVERLOAD__ convert_int8_rtp( short8 );
    int8 __OVERLOAD__ convert_int8_rtp( int8 );
    int8 __OVERLOAD__ convert_int8_rtp( long8 );
    int8 __OVERLOAD__ convert_int8_rtp( float8 );
    int8 __OVERLOAD__ convert_int8_rtp( double8 );
    int16 __OVERLOAD__ convert_int16_rtp( uchar16 );
    int16 __OVERLOAD__ convert_int16_rtp( ushort16 );
    int16 __OVERLOAD__ convert_int16_rtp( uint16 );
    int16 __OVERLOAD__ convert_int16_rtp( ulong16 );
    int16 __OVERLOAD__ convert_int16_rtp( char16 );
    int16 __OVERLOAD__ convert_int16_rtp( short16 );
    int16 __OVERLOAD__ convert_int16_rtp( int16 );
    int16 __OVERLOAD__ convert_int16_rtp( long16 );
    int16 __OVERLOAD__ convert_int16_rtp( float16 );
    int16 __OVERLOAD__ convert_int16_rtp( double16 );
    int __OVERLOAD__ convert_int_rtn( uchar );
    int __OVERLOAD__ convert_int_rtn( ushort );
    int __OVERLOAD__ convert_int_rtn( uint );
    int __OVERLOAD__ convert_int_rtn( ulong );
    int __OVERLOAD__ convert_int_rtn( char );
    int __OVERLOAD__ convert_int_rtn( short );
    int __OVERLOAD__ convert_int_rtn( int );
    int __OVERLOAD__ convert_int_rtn( long );
    int __OVERLOAD__ convert_int_rtn( float );
    int __OVERLOAD__ convert_int_rtn( double );
    int2 __OVERLOAD__ convert_int2_rtn( uchar2 );
    int2 __OVERLOAD__ convert_int2_rtn( ushort2 );
    int2 __OVERLOAD__ convert_int2_rtn( uint2 );
    int2 __OVERLOAD__ convert_int2_rtn( ulong2 );
    int2 __OVERLOAD__ convert_int2_rtn( char2 );
    int2 __OVERLOAD__ convert_int2_rtn( short2 );
    int2 __OVERLOAD__ convert_int2_rtn( int2 );
    int2 __OVERLOAD__ convert_int2_rtn( long2 );
    int2 __OVERLOAD__ convert_int2_rtn( float2 );
    int2 __OVERLOAD__ convert_int2_rtn( double2 );
    int3 __OVERLOAD__ convert_int3_rtn( uchar3 );
    int3 __OVERLOAD__ convert_int3_rtn( ushort3 );
    int3 __OVERLOAD__ convert_int3_rtn( uint3 );
    int3 __OVERLOAD__ convert_int3_rtn( ulong3 );
    int3 __OVERLOAD__ convert_int3_rtn( char3 );
    int3 __OVERLOAD__ convert_int3_rtn( short3 );
    int3 __OVERLOAD__ convert_int3_rtn( int3 );
    int3 __OVERLOAD__ convert_int3_rtn( long3 );
    int3 __OVERLOAD__ convert_int3_rtn( float3 );
    int3 __OVERLOAD__ convert_int3_rtn( double3 );
    int4 __OVERLOAD__ convert_int4_rtn( uchar4 );
    int4 __OVERLOAD__ convert_int4_rtn( ushort4 );
    int4 __OVERLOAD__ convert_int4_rtn( uint4 );
    int4 __OVERLOAD__ convert_int4_rtn( ulong4 );
    int4 __OVERLOAD__ convert_int4_rtn( char4 );
    int4 __OVERLOAD__ convert_int4_rtn( short4 );
    int4 __OVERLOAD__ convert_int4_rtn( int4 );
    int4 __OVERLOAD__ convert_int4_rtn( long4 );
    int4 __OVERLOAD__ convert_int4_rtn( float4 );
    int4 __OVERLOAD__ convert_int4_rtn( double4 );
    int8 __OVERLOAD__ convert_int8_rtn( uchar8 );
    int8 __OVERLOAD__ convert_int8_rtn( ushort8 );
    int8 __OVERLOAD__ convert_int8_rtn( uint8 );
    int8 __OVERLOAD__ convert_int8_rtn( ulong8 );
    int8 __OVERLOAD__ convert_int8_rtn( char8 );
    int8 __OVERLOAD__ convert_int8_rtn( short8 );
    int8 __OVERLOAD__ convert_int8_rtn( int8 );
    int8 __OVERLOAD__ convert_int8_rtn( long8 );
    int8 __OVERLOAD__ convert_int8_rtn( float8 );
    int8 __OVERLOAD__ convert_int8_rtn( double8 );
    int16 __OVERLOAD__ convert_int16_rtn( uchar16 );
    int16 __OVERLOAD__ convert_int16_rtn( ushort16 );
    int16 __OVERLOAD__ convert_int16_rtn( uint16 );
    int16 __OVERLOAD__ convert_int16_rtn( ulong16 );
    int16 __OVERLOAD__ convert_int16_rtn( char16 );
    int16 __OVERLOAD__ convert_int16_rtn( short16 );
    int16 __OVERLOAD__ convert_int16_rtn( int16 );
    int16 __OVERLOAD__ convert_int16_rtn( long16 );
    int16 __OVERLOAD__ convert_int16_rtn( float16 );
    int16 __OVERLOAD__ convert_int16_rtn( double16 );
    int __OVERLOAD__ convert_int_rtz( uchar );
    int __OVERLOAD__ convert_int_rtz( ushort );
    int __OVERLOAD__ convert_int_rtz( uint );
    int __OVERLOAD__ convert_int_rtz( ulong );
    int __OVERLOAD__ convert_int_rtz( char );
    int __OVERLOAD__ convert_int_rtz( short );
    int __OVERLOAD__ convert_int_rtz( int );
    int __OVERLOAD__ convert_int_rtz( long );
    int __OVERLOAD__ convert_int_rtz( float );
    int __OVERLOAD__ convert_int_rtz( double );
    int2 __OVERLOAD__ convert_int2_rtz( uchar2 );
    int2 __OVERLOAD__ convert_int2_rtz( ushort2 );
    int2 __OVERLOAD__ convert_int2_rtz( uint2 );
    int2 __OVERLOAD__ convert_int2_rtz( ulong2 );
    int2 __OVERLOAD__ convert_int2_rtz( char2 );
    int2 __OVERLOAD__ convert_int2_rtz( short2 );
    int2 __OVERLOAD__ convert_int2_rtz( int2 );
    int2 __OVERLOAD__ convert_int2_rtz( long2 );
    int2 __OVERLOAD__ convert_int2_rtz( float2 );
    int2 __OVERLOAD__ convert_int2_rtz( double2 );
    int3 __OVERLOAD__ convert_int3_rtz( uchar3 );
    int3 __OVERLOAD__ convert_int3_rtz( ushort3 );
    int3 __OVERLOAD__ convert_int3_rtz( uint3 );
    int3 __OVERLOAD__ convert_int3_rtz( ulong3 );
    int3 __OVERLOAD__ convert_int3_rtz( char3 );
    int3 __OVERLOAD__ convert_int3_rtz( short3 );
    int3 __OVERLOAD__ convert_int3_rtz( int3 );
    int3 __OVERLOAD__ convert_int3_rtz( long3 );
    int3 __OVERLOAD__ convert_int3_rtz( float3 );
    int3 __OVERLOAD__ convert_int3_rtz( double3 );
    int4 __OVERLOAD__ convert_int4_rtz( uchar4 );
    int4 __OVERLOAD__ convert_int4_rtz( ushort4 );
    int4 __OVERLOAD__ convert_int4_rtz( uint4 );
    int4 __OVERLOAD__ convert_int4_rtz( ulong4 );
    int4 __OVERLOAD__ convert_int4_rtz( char4 );
    int4 __OVERLOAD__ convert_int4_rtz( short4 );
    int4 __OVERLOAD__ convert_int4_rtz( int4 );
    int4 __OVERLOAD__ convert_int4_rtz( long4 );
    int4 __OVERLOAD__ convert_int4_rtz( float4 );
    int4 __OVERLOAD__ convert_int4_rtz( double4 );
    int8 __OVERLOAD__ convert_int8_rtz( uchar8 );
    int8 __OVERLOAD__ convert_int8_rtz( ushort8 );
    int8 __OVERLOAD__ convert_int8_rtz( uint8 );
    int8 __OVERLOAD__ convert_int8_rtz( ulong8 );
    int8 __OVERLOAD__ convert_int8_rtz( char8 );
    int8 __OVERLOAD__ convert_int8_rtz( short8 );
    int8 __OVERLOAD__ convert_int8_rtz( int8 );
    int8 __OVERLOAD__ convert_int8_rtz( long8 );
    int8 __OVERLOAD__ convert_int8_rtz( float8 );
    int8 __OVERLOAD__ convert_int8_rtz( double8 );
    int16 __OVERLOAD__ convert_int16_rtz( uchar16 );
    int16 __OVERLOAD__ convert_int16_rtz( ushort16 );
    int16 __OVERLOAD__ convert_int16_rtz( uint16 );
    int16 __OVERLOAD__ convert_int16_rtz( ulong16 );
    int16 __OVERLOAD__ convert_int16_rtz( char16 );
    int16 __OVERLOAD__ convert_int16_rtz( short16 );
    int16 __OVERLOAD__ convert_int16_rtz( int16 );
    int16 __OVERLOAD__ convert_int16_rtz( long16 );
    int16 __OVERLOAD__ convert_int16_rtz( float16 );
    int16 __OVERLOAD__ convert_int16_rtz( double16 );
    int __OVERLOAD__ convert_int_sat( uchar );
    int __OVERLOAD__ convert_int_sat( ushort );
    int __OVERLOAD__ convert_int_sat( uint );
    int __OVERLOAD__ convert_int_sat( ulong );
    int __OVERLOAD__ convert_int_sat( char );
    int __OVERLOAD__ convert_int_sat( short );
    int __OVERLOAD__ convert_int_sat( int );
    int __OVERLOAD__ convert_int_sat( long );
    int __OVERLOAD__ convert_int_sat( float );
    int __OVERLOAD__ convert_int_sat( double );
    int2 __OVERLOAD__ convert_int2_sat( uchar2 );
    int2 __OVERLOAD__ convert_int2_sat( ushort2 );
    int2 __OVERLOAD__ convert_int2_sat( uint2 );
    int2 __OVERLOAD__ convert_int2_sat( ulong2 );
    int2 __OVERLOAD__ convert_int2_sat( char2 );
    int2 __OVERLOAD__ convert_int2_sat( short2 );
    int2 __OVERLOAD__ convert_int2_sat( int2 );
    int2 __OVERLOAD__ convert_int2_sat( long2 );
    int2 __OVERLOAD__ convert_int2_sat( float2 );
    int2 __OVERLOAD__ convert_int2_sat( double2 );
    int3 __OVERLOAD__ convert_int3_sat( uchar3 );
    int3 __OVERLOAD__ convert_int3_sat( ushort3 );
    int3 __OVERLOAD__ convert_int3_sat( uint3 );
    int3 __OVERLOAD__ convert_int3_sat( ulong3 );
    int3 __OVERLOAD__ convert_int3_sat( char3 );
    int3 __OVERLOAD__ convert_int3_sat( short3 );
    int3 __OVERLOAD__ convert_int3_sat( int3 );
    int3 __OVERLOAD__ convert_int3_sat( long3 );
    int3 __OVERLOAD__ convert_int3_sat( float3 );
    int3 __OVERLOAD__ convert_int3_sat( double3 );
    int4 __OVERLOAD__ convert_int4_sat( uchar4 );
    int4 __OVERLOAD__ convert_int4_sat( ushort4 );
    int4 __OVERLOAD__ convert_int4_sat( uint4 );
    int4 __OVERLOAD__ convert_int4_sat( ulong4 );
    int4 __OVERLOAD__ convert_int4_sat( char4 );
    int4 __OVERLOAD__ convert_int4_sat( short4 );
    int4 __OVERLOAD__ convert_int4_sat( int4 );
    int4 __OVERLOAD__ convert_int4_sat( long4 );
    int4 __OVERLOAD__ convert_int4_sat( float4 );
    int4 __OVERLOAD__ convert_int4_sat( double4 );
    int8 __OVERLOAD__ convert_int8_sat( uchar8 );
    int8 __OVERLOAD__ convert_int8_sat( ushort8 );
    int8 __OVERLOAD__ convert_int8_sat( uint8 );
    int8 __OVERLOAD__ convert_int8_sat( ulong8 );
    int8 __OVERLOAD__ convert_int8_sat( char8 );
    int8 __OVERLOAD__ convert_int8_sat( short8 );
    int8 __OVERLOAD__ convert_int8_sat( int8 );
    int8 __OVERLOAD__ convert_int8_sat( long8 );
    int8 __OVERLOAD__ convert_int8_sat( float8 );
    int8 __OVERLOAD__ convert_int8_sat( double8 );
    int16 __OVERLOAD__ convert_int16_sat( uchar16 );
    int16 __OVERLOAD__ convert_int16_sat( ushort16 );
    int16 __OVERLOAD__ convert_int16_sat( uint16 );
    int16 __OVERLOAD__ convert_int16_sat( ulong16 );
    int16 __OVERLOAD__ convert_int16_sat( char16 );
    int16 __OVERLOAD__ convert_int16_sat( short16 );
    int16 __OVERLOAD__ convert_int16_sat( int16 );
    int16 __OVERLOAD__ convert_int16_sat( long16 );
    int16 __OVERLOAD__ convert_int16_sat( float16 );
    int16 __OVERLOAD__ convert_int16_sat( double16 );
    int __OVERLOAD__ convert_int_sat_rte( uchar );
    int __OVERLOAD__ convert_int_sat_rte( ushort );
    int __OVERLOAD__ convert_int_sat_rte( uint );
    int __OVERLOAD__ convert_int_sat_rte( ulong );
    int __OVERLOAD__ convert_int_sat_rte( char );
    int __OVERLOAD__ convert_int_sat_rte( short );
    int __OVERLOAD__ convert_int_sat_rte( int );
    int __OVERLOAD__ convert_int_sat_rte( long );
    int __OVERLOAD__ convert_int_sat_rte( float );
    int __OVERLOAD__ convert_int_sat_rte( double );
    int2 __OVERLOAD__ convert_int2_sat_rte( uchar2 );
    int2 __OVERLOAD__ convert_int2_sat_rte( ushort2 );
    int2 __OVERLOAD__ convert_int2_sat_rte( uint2 );
    int2 __OVERLOAD__ convert_int2_sat_rte( ulong2 );
    int2 __OVERLOAD__ convert_int2_sat_rte( char2 );
    int2 __OVERLOAD__ convert_int2_sat_rte( short2 );
    int2 __OVERLOAD__ convert_int2_sat_rte( int2 );
    int2 __OVERLOAD__ convert_int2_sat_rte( long2 );
    int2 __OVERLOAD__ convert_int2_sat_rte( float2 );
    int2 __OVERLOAD__ convert_int2_sat_rte( double2 );
    int3 __OVERLOAD__ convert_int3_sat_rte( uchar3 );
    int3 __OVERLOAD__ convert_int3_sat_rte( ushort3 );
    int3 __OVERLOAD__ convert_int3_sat_rte( uint3 );
    int3 __OVERLOAD__ convert_int3_sat_rte( ulong3 );
    int3 __OVERLOAD__ convert_int3_sat_rte( char3 );
    int3 __OVERLOAD__ convert_int3_sat_rte( short3 );
    int3 __OVERLOAD__ convert_int3_sat_rte( int3 );
    int3 __OVERLOAD__ convert_int3_sat_rte( long3 );
    int3 __OVERLOAD__ convert_int3_sat_rte( float3 );
    int3 __OVERLOAD__ convert_int3_sat_rte( double3 );
    int4 __OVERLOAD__ convert_int4_sat_rte( uchar4 );
    int4 __OVERLOAD__ convert_int4_sat_rte( ushort4 );
    int4 __OVERLOAD__ convert_int4_sat_rte( uint4 );
    int4 __OVERLOAD__ convert_int4_sat_rte( ulong4 );
    int4 __OVERLOAD__ convert_int4_sat_rte( char4 );
    int4 __OVERLOAD__ convert_int4_sat_rte( short4 );
    int4 __OVERLOAD__ convert_int4_sat_rte( int4 );
    int4 __OVERLOAD__ convert_int4_sat_rte( long4 );
    int4 __OVERLOAD__ convert_int4_sat_rte( float4 );
    int4 __OVERLOAD__ convert_int4_sat_rte( double4 );
    int8 __OVERLOAD__ convert_int8_sat_rte( uchar8 );
    int8 __OVERLOAD__ convert_int8_sat_rte( ushort8 );
    int8 __OVERLOAD__ convert_int8_sat_rte( uint8 );
    int8 __OVERLOAD__ convert_int8_sat_rte( ulong8 );
    int8 __OVERLOAD__ convert_int8_sat_rte( char8 );
    int8 __OVERLOAD__ convert_int8_sat_rte( short8 );
    int8 __OVERLOAD__ convert_int8_sat_rte( int8 );
    int8 __OVERLOAD__ convert_int8_sat_rte( long8 );
    int8 __OVERLOAD__ convert_int8_sat_rte( float8 );
    int8 __OVERLOAD__ convert_int8_sat_rte( double8 );
    int16 __OVERLOAD__ convert_int16_sat_rte( uchar16 );
    int16 __OVERLOAD__ convert_int16_sat_rte( ushort16 );
    int16 __OVERLOAD__ convert_int16_sat_rte( uint16 );
    int16 __OVERLOAD__ convert_int16_sat_rte( ulong16 );
    int16 __OVERLOAD__ convert_int16_sat_rte( char16 );
    int16 __OVERLOAD__ convert_int16_sat_rte( short16 );
    int16 __OVERLOAD__ convert_int16_sat_rte( int16 );
    int16 __OVERLOAD__ convert_int16_sat_rte( long16 );
    int16 __OVERLOAD__ convert_int16_sat_rte( float16 );
    int16 __OVERLOAD__ convert_int16_sat_rte( double16 );
    int __OVERLOAD__ convert_int_sat_rtp( uchar );
    int __OVERLOAD__ convert_int_sat_rtp( ushort );
    int __OVERLOAD__ convert_int_sat_rtp( uint );
    int __OVERLOAD__ convert_int_sat_rtp( ulong );
    int __OVERLOAD__ convert_int_sat_rtp( char );
    int __OVERLOAD__ convert_int_sat_rtp( short );
    int __OVERLOAD__ convert_int_sat_rtp( int );
    int __OVERLOAD__ convert_int_sat_rtp( long );
    int __OVERLOAD__ convert_int_sat_rtp( float );
    int __OVERLOAD__ convert_int_sat_rtp( double );
    int2 __OVERLOAD__ convert_int2_sat_rtp( uchar2 );
    int2 __OVERLOAD__ convert_int2_sat_rtp( ushort2 );
    int2 __OVERLOAD__ convert_int2_sat_rtp( uint2 );
    int2 __OVERLOAD__ convert_int2_sat_rtp( ulong2 );
    int2 __OVERLOAD__ convert_int2_sat_rtp( char2 );
    int2 __OVERLOAD__ convert_int2_sat_rtp( short2 );
    int2 __OVERLOAD__ convert_int2_sat_rtp( int2 );
    int2 __OVERLOAD__ convert_int2_sat_rtp( long2 );
    int2 __OVERLOAD__ convert_int2_sat_rtp( float2 );
    int2 __OVERLOAD__ convert_int2_sat_rtp( double2 );
    int3 __OVERLOAD__ convert_int3_sat_rtp( uchar3 );
    int3 __OVERLOAD__ convert_int3_sat_rtp( ushort3 );
    int3 __OVERLOAD__ convert_int3_sat_rtp( uint3 );
    int3 __OVERLOAD__ convert_int3_sat_rtp( ulong3 );
    int3 __OVERLOAD__ convert_int3_sat_rtp( char3 );
    int3 __OVERLOAD__ convert_int3_sat_rtp( short3 );
    int3 __OVERLOAD__ convert_int3_sat_rtp( int3 );
    int3 __OVERLOAD__ convert_int3_sat_rtp( long3 );
    int3 __OVERLOAD__ convert_int3_sat_rtp( float3 );
    int3 __OVERLOAD__ convert_int3_sat_rtp( double3 );
    int4 __OVERLOAD__ convert_int4_sat_rtp( uchar4 );
    int4 __OVERLOAD__ convert_int4_sat_rtp( ushort4 );
    int4 __OVERLOAD__ convert_int4_sat_rtp( uint4 );
    int4 __OVERLOAD__ convert_int4_sat_rtp( ulong4 );
    int4 __OVERLOAD__ convert_int4_sat_rtp( char4 );
    int4 __OVERLOAD__ convert_int4_sat_rtp( short4 );
    int4 __OVERLOAD__ convert_int4_sat_rtp( int4 );
    int4 __OVERLOAD__ convert_int4_sat_rtp( long4 );
    int4 __OVERLOAD__ convert_int4_sat_rtp( float4 );
    int4 __OVERLOAD__ convert_int4_sat_rtp( double4 );
    int8 __OVERLOAD__ convert_int8_sat_rtp( uchar8 );
    int8 __OVERLOAD__ convert_int8_sat_rtp( ushort8 );
    int8 __OVERLOAD__ convert_int8_sat_rtp( uint8 );
    int8 __OVERLOAD__ convert_int8_sat_rtp( ulong8 );
    int8 __OVERLOAD__ convert_int8_sat_rtp( char8 );
    int8 __OVERLOAD__ convert_int8_sat_rtp( short8 );
    int8 __OVERLOAD__ convert_int8_sat_rtp( int8 );
    int8 __OVERLOAD__ convert_int8_sat_rtp( long8 );
    int8 __OVERLOAD__ convert_int8_sat_rtp( float8 );
    int8 __OVERLOAD__ convert_int8_sat_rtp( double8 );
    int16 __OVERLOAD__ convert_int16_sat_rtp( uchar16 );
    int16 __OVERLOAD__ convert_int16_sat_rtp( ushort16 );
    int16 __OVERLOAD__ convert_int16_sat_rtp( uint16 );
    int16 __OVERLOAD__ convert_int16_sat_rtp( ulong16 );
    int16 __OVERLOAD__ convert_int16_sat_rtp( char16 );
    int16 __OVERLOAD__ convert_int16_sat_rtp( short16 );
    int16 __OVERLOAD__ convert_int16_sat_rtp( int16 );
    int16 __OVERLOAD__ convert_int16_sat_rtp( long16 );
    int16 __OVERLOAD__ convert_int16_sat_rtp( float16 );
    int16 __OVERLOAD__ convert_int16_sat_rtp( double16 );
    int __OVERLOAD__ convert_int_sat_rtn( uchar );
    int __OVERLOAD__ convert_int_sat_rtn( ushort );
    int __OVERLOAD__ convert_int_sat_rtn( uint );
    int __OVERLOAD__ convert_int_sat_rtn( ulong );
    int __OVERLOAD__ convert_int_sat_rtn( char );
    int __OVERLOAD__ convert_int_sat_rtn( short );
    int __OVERLOAD__ convert_int_sat_rtn( int );
    int __OVERLOAD__ convert_int_sat_rtn( long );
    int __OVERLOAD__ convert_int_sat_rtn( float );
    int __OVERLOAD__ convert_int_sat_rtn( double );
    int2 __OVERLOAD__ convert_int2_sat_rtn( uchar2 );
    int2 __OVERLOAD__ convert_int2_sat_rtn( ushort2 );
    int2 __OVERLOAD__ convert_int2_sat_rtn( uint2 );
    int2 __OVERLOAD__ convert_int2_sat_rtn( ulong2 );
    int2 __OVERLOAD__ convert_int2_sat_rtn( char2 );
    int2 __OVERLOAD__ convert_int2_sat_rtn( short2 );
    int2 __OVERLOAD__ convert_int2_sat_rtn( int2 );
    int2 __OVERLOAD__ convert_int2_sat_rtn( long2 );
    int2 __OVERLOAD__ convert_int2_sat_rtn( float2 );
    int2 __OVERLOAD__ convert_int2_sat_rtn( double2 );
    int3 __OVERLOAD__ convert_int3_sat_rtn( uchar3 );
    int3 __OVERLOAD__ convert_int3_sat_rtn( ushort3 );
    int3 __OVERLOAD__ convert_int3_sat_rtn( uint3 );
    int3 __OVERLOAD__ convert_int3_sat_rtn( ulong3 );
    int3 __OVERLOAD__ convert_int3_sat_rtn( char3 );
    int3 __OVERLOAD__ convert_int3_sat_rtn( short3 );
    int3 __OVERLOAD__ convert_int3_sat_rtn( int3 );
    int3 __OVERLOAD__ convert_int3_sat_rtn( long3 );
    int3 __OVERLOAD__ convert_int3_sat_rtn( float3 );
    int3 __OVERLOAD__ convert_int3_sat_rtn( double3 );
    int4 __OVERLOAD__ convert_int4_sat_rtn( uchar4 );
    int4 __OVERLOAD__ convert_int4_sat_rtn( ushort4 );
    int4 __OVERLOAD__ convert_int4_sat_rtn( uint4 );
    int4 __OVERLOAD__ convert_int4_sat_rtn( ulong4 );
    int4 __OVERLOAD__ convert_int4_sat_rtn( char4 );
    int4 __OVERLOAD__ convert_int4_sat_rtn( short4 );
    int4 __OVERLOAD__ convert_int4_sat_rtn( int4 );
    int4 __OVERLOAD__ convert_int4_sat_rtn( long4 );
    int4 __OVERLOAD__ convert_int4_sat_rtn( float4 );
    int4 __OVERLOAD__ convert_int4_sat_rtn( double4 );
    int8 __OVERLOAD__ convert_int8_sat_rtn( uchar8 );
    int8 __OVERLOAD__ convert_int8_sat_rtn( ushort8 );
    int8 __OVERLOAD__ convert_int8_sat_rtn( uint8 );
    int8 __OVERLOAD__ convert_int8_sat_rtn( ulong8 );
    int8 __OVERLOAD__ convert_int8_sat_rtn( char8 );
    int8 __OVERLOAD__ convert_int8_sat_rtn( short8 );
    int8 __OVERLOAD__ convert_int8_sat_rtn( int8 );
    int8 __OVERLOAD__ convert_int8_sat_rtn( long8 );
    int8 __OVERLOAD__ convert_int8_sat_rtn( float8 );
    int8 __OVERLOAD__ convert_int8_sat_rtn( double8 );
    int16 __OVERLOAD__ convert_int16_sat_rtn( uchar16 );
    int16 __OVERLOAD__ convert_int16_sat_rtn( ushort16 );
    int16 __OVERLOAD__ convert_int16_sat_rtn( uint16 );
    int16 __OVERLOAD__ convert_int16_sat_rtn( ulong16 );
    int16 __OVERLOAD__ convert_int16_sat_rtn( char16 );
    int16 __OVERLOAD__ convert_int16_sat_rtn( short16 );
    int16 __OVERLOAD__ convert_int16_sat_rtn( int16 );
    int16 __OVERLOAD__ convert_int16_sat_rtn( long16 );
    int16 __OVERLOAD__ convert_int16_sat_rtn( float16 );
    int16 __OVERLOAD__ convert_int16_sat_rtn( double16 );
    int __OVERLOAD__ convert_int_sat_rtz( uchar );
    int __OVERLOAD__ convert_int_sat_rtz( ushort );
    int __OVERLOAD__ convert_int_sat_rtz( uint );
    int __OVERLOAD__ convert_int_sat_rtz( ulong );
    int __OVERLOAD__ convert_int_sat_rtz( char );
    int __OVERLOAD__ convert_int_sat_rtz( short );
    int __OVERLOAD__ convert_int_sat_rtz( int );
    int __OVERLOAD__ convert_int_sat_rtz( long );
    int __OVERLOAD__ convert_int_sat_rtz( float );
    int __OVERLOAD__ convert_int_sat_rtz( double );
    int2 __OVERLOAD__ convert_int2_sat_rtz( uchar2 );
    int2 __OVERLOAD__ convert_int2_sat_rtz( ushort2 );
    int2 __OVERLOAD__ convert_int2_sat_rtz( uint2 );
    int2 __OVERLOAD__ convert_int2_sat_rtz( ulong2 );
    int2 __OVERLOAD__ convert_int2_sat_rtz( char2 );
    int2 __OVERLOAD__ convert_int2_sat_rtz( short2 );
    int2 __OVERLOAD__ convert_int2_sat_rtz( int2 );
    int2 __OVERLOAD__ convert_int2_sat_rtz( long2 );
    int2 __OVERLOAD__ convert_int2_sat_rtz( float2 );
    int2 __OVERLOAD__ convert_int2_sat_rtz( double2 );
    int3 __OVERLOAD__ convert_int3_sat_rtz( uchar3 );
    int3 __OVERLOAD__ convert_int3_sat_rtz( ushort3 );
    int3 __OVERLOAD__ convert_int3_sat_rtz( uint3 );
    int3 __OVERLOAD__ convert_int3_sat_rtz( ulong3 );
    int3 __OVERLOAD__ convert_int3_sat_rtz( char3 );
    int3 __OVERLOAD__ convert_int3_sat_rtz( short3 );
    int3 __OVERLOAD__ convert_int3_sat_rtz( int3 );
    int3 __OVERLOAD__ convert_int3_sat_rtz( long3 );
    int3 __OVERLOAD__ convert_int3_sat_rtz( float3 );
    int3 __OVERLOAD__ convert_int3_sat_rtz( double3 );
    int4 __OVERLOAD__ convert_int4_sat_rtz( uchar4 );
    int4 __OVERLOAD__ convert_int4_sat_rtz( ushort4 );
    int4 __OVERLOAD__ convert_int4_sat_rtz( uint4 );
    int4 __OVERLOAD__ convert_int4_sat_rtz( ulong4 );
    int4 __OVERLOAD__ convert_int4_sat_rtz( char4 );
    int4 __OVERLOAD__ convert_int4_sat_rtz( short4 );
    int4 __OVERLOAD__ convert_int4_sat_rtz( int4 );
    int4 __OVERLOAD__ convert_int4_sat_rtz( long4 );
    int4 __OVERLOAD__ convert_int4_sat_rtz( float4 );
    int4 __OVERLOAD__ convert_int4_sat_rtz( double4 );
    int8 __OVERLOAD__ convert_int8_sat_rtz( uchar8 );
    int8 __OVERLOAD__ convert_int8_sat_rtz( ushort8 );
    int8 __OVERLOAD__ convert_int8_sat_rtz( uint8 );
    int8 __OVERLOAD__ convert_int8_sat_rtz( ulong8 );
    int8 __OVERLOAD__ convert_int8_sat_rtz( char8 );
    int8 __OVERLOAD__ convert_int8_sat_rtz( short8 );
    int8 __OVERLOAD__ convert_int8_sat_rtz( int8 );
    int8 __OVERLOAD__ convert_int8_sat_rtz( long8 );
    int8 __OVERLOAD__ convert_int8_sat_rtz( float8 );
    int8 __OVERLOAD__ convert_int8_sat_rtz( double8 );
    int16 __OVERLOAD__ convert_int16_sat_rtz( uchar16 );
    int16 __OVERLOAD__ convert_int16_sat_rtz( ushort16 );
    int16 __OVERLOAD__ convert_int16_sat_rtz( uint16 );
    int16 __OVERLOAD__ convert_int16_sat_rtz( ulong16 );
    int16 __OVERLOAD__ convert_int16_sat_rtz( char16 );
    int16 __OVERLOAD__ convert_int16_sat_rtz( short16 );
    int16 __OVERLOAD__ convert_int16_sat_rtz( int16 );
    int16 __OVERLOAD__ convert_int16_sat_rtz( long16 );
    int16 __OVERLOAD__ convert_int16_sat_rtz( float16 );
    int16 __OVERLOAD__ convert_int16_sat_rtz( double16 );
    long __OVERLOAD__ convert_long( uchar );
    long __OVERLOAD__ convert_long( ushort );
    long __OVERLOAD__ convert_long( uint );
    long __OVERLOAD__ convert_long( ulong );
    long __OVERLOAD__ convert_long( char );
    long __OVERLOAD__ convert_long( short );
    long __OVERLOAD__ convert_long( int );
    long __OVERLOAD__ convert_long( long );
    long __OVERLOAD__ convert_long( float );
    long __OVERLOAD__ convert_long( double );
    long2 __OVERLOAD__ convert_long2( uchar2 );
    long2 __OVERLOAD__ convert_long2( ushort2 );
    long2 __OVERLOAD__ convert_long2( uint2 );
    long2 __OVERLOAD__ convert_long2( ulong2 );
    long2 __OVERLOAD__ convert_long2( char2 );
    long2 __OVERLOAD__ convert_long2( short2 );
    long2 __OVERLOAD__ convert_long2( int2 );
    long2 __OVERLOAD__ convert_long2( long2 );
    long2 __OVERLOAD__ convert_long2( float2 );
    long2 __OVERLOAD__ convert_long2( double2 );
    long3 __OVERLOAD__ convert_long3( uchar3 );
    long3 __OVERLOAD__ convert_long3( ushort3 );
    long3 __OVERLOAD__ convert_long3( uint3 );
    long3 __OVERLOAD__ convert_long3( ulong3 );
    long3 __OVERLOAD__ convert_long3( char3 );
    long3 __OVERLOAD__ convert_long3( short3 );
    long3 __OVERLOAD__ convert_long3( int3 );
    long3 __OVERLOAD__ convert_long3( long3 );
    long3 __OVERLOAD__ convert_long3( float3 );
    long3 __OVERLOAD__ convert_long3( double3 );
    long4 __OVERLOAD__ convert_long4( uchar4 );
    long4 __OVERLOAD__ convert_long4( ushort4 );
    long4 __OVERLOAD__ convert_long4( uint4 );
    long4 __OVERLOAD__ convert_long4( ulong4 );
    long4 __OVERLOAD__ convert_long4( char4 );
    long4 __OVERLOAD__ convert_long4( short4 );
    long4 __OVERLOAD__ convert_long4( int4 );
    long4 __OVERLOAD__ convert_long4( long4 );
    long4 __OVERLOAD__ convert_long4( float4 );
    long4 __OVERLOAD__ convert_long4( double4 );
    long8 __OVERLOAD__ convert_long8( uchar8 );
    long8 __OVERLOAD__ convert_long8( ushort8 );
    long8 __OVERLOAD__ convert_long8( uint8 );
    long8 __OVERLOAD__ convert_long8( ulong8 );
    long8 __OVERLOAD__ convert_long8( char8 );
    long8 __OVERLOAD__ convert_long8( short8 );
    long8 __OVERLOAD__ convert_long8( int8 );
    long8 __OVERLOAD__ convert_long8( long8 );
    long8 __OVERLOAD__ convert_long8( float8 );
    long8 __OVERLOAD__ convert_long8( double8 );
    long16 __OVERLOAD__ convert_long16( uchar16 );
    long16 __OVERLOAD__ convert_long16( ushort16 );
    long16 __OVERLOAD__ convert_long16( uint16 );
    long16 __OVERLOAD__ convert_long16( ulong16 );
    long16 __OVERLOAD__ convert_long16( char16 );
    long16 __OVERLOAD__ convert_long16( short16 );
    long16 __OVERLOAD__ convert_long16( int16 );
    long16 __OVERLOAD__ convert_long16( long16 );
    long16 __OVERLOAD__ convert_long16( float16 );
    long16 __OVERLOAD__ convert_long16( double16 );
    long __OVERLOAD__ convert_long_rte( uchar );
    long __OVERLOAD__ convert_long_rte( ushort );
    long __OVERLOAD__ convert_long_rte( uint );
    long __OVERLOAD__ convert_long_rte( ulong );
    long __OVERLOAD__ convert_long_rte( char );
    long __OVERLOAD__ convert_long_rte( short );
    long __OVERLOAD__ convert_long_rte( int );
    long __OVERLOAD__ convert_long_rte( long );
    long __OVERLOAD__ convert_long_rte( float );
    long __OVERLOAD__ convert_long_rte( double );
    long2 __OVERLOAD__ convert_long2_rte( uchar2 );
    long2 __OVERLOAD__ convert_long2_rte( ushort2 );
    long2 __OVERLOAD__ convert_long2_rte( uint2 );
    long2 __OVERLOAD__ convert_long2_rte( ulong2 );
    long2 __OVERLOAD__ convert_long2_rte( char2 );
    long2 __OVERLOAD__ convert_long2_rte( short2 );
    long2 __OVERLOAD__ convert_long2_rte( int2 );
    long2 __OVERLOAD__ convert_long2_rte( long2 );
    long2 __OVERLOAD__ convert_long2_rte( float2 );
    long2 __OVERLOAD__ convert_long2_rte( double2 );
    long3 __OVERLOAD__ convert_long3_rte( uchar3 );
    long3 __OVERLOAD__ convert_long3_rte( ushort3 );
    long3 __OVERLOAD__ convert_long3_rte( uint3 );
    long3 __OVERLOAD__ convert_long3_rte( ulong3 );
    long3 __OVERLOAD__ convert_long3_rte( char3 );
    long3 __OVERLOAD__ convert_long3_rte( short3 );
    long3 __OVERLOAD__ convert_long3_rte( int3 );
    long3 __OVERLOAD__ convert_long3_rte( long3 );
    long3 __OVERLOAD__ convert_long3_rte( float3 );
    long3 __OVERLOAD__ convert_long3_rte( double3 );
    long4 __OVERLOAD__ convert_long4_rte( uchar4 );
    long4 __OVERLOAD__ convert_long4_rte( ushort4 );
    long4 __OVERLOAD__ convert_long4_rte( uint4 );
    long4 __OVERLOAD__ convert_long4_rte( ulong4 );
    long4 __OVERLOAD__ convert_long4_rte( char4 );
    long4 __OVERLOAD__ convert_long4_rte( short4 );
    long4 __OVERLOAD__ convert_long4_rte( int4 );
    long4 __OVERLOAD__ convert_long4_rte( long4 );
    long4 __OVERLOAD__ convert_long4_rte( float4 );
    long4 __OVERLOAD__ convert_long4_rte( double4 );
    long8 __OVERLOAD__ convert_long8_rte( uchar8 );
    long8 __OVERLOAD__ convert_long8_rte( ushort8 );
    long8 __OVERLOAD__ convert_long8_rte( uint8 );
    long8 __OVERLOAD__ convert_long8_rte( ulong8 );
    long8 __OVERLOAD__ convert_long8_rte( char8 );
    long8 __OVERLOAD__ convert_long8_rte( short8 );
    long8 __OVERLOAD__ convert_long8_rte( int8 );
    long8 __OVERLOAD__ convert_long8_rte( long8 );
    long8 __OVERLOAD__ convert_long8_rte( float8 );
    long8 __OVERLOAD__ convert_long8_rte( double8 );
    long16 __OVERLOAD__ convert_long16_rte( uchar16 );
    long16 __OVERLOAD__ convert_long16_rte( ushort16 );
    long16 __OVERLOAD__ convert_long16_rte( uint16 );
    long16 __OVERLOAD__ convert_long16_rte( ulong16 );
    long16 __OVERLOAD__ convert_long16_rte( char16 );
    long16 __OVERLOAD__ convert_long16_rte( short16 );
    long16 __OVERLOAD__ convert_long16_rte( int16 );
    long16 __OVERLOAD__ convert_long16_rte( long16 );
    long16 __OVERLOAD__ convert_long16_rte( float16 );
    long16 __OVERLOAD__ convert_long16_rte( double16 );
    long __OVERLOAD__ convert_long_rtp( uchar );
    long __OVERLOAD__ convert_long_rtp( ushort );
    long __OVERLOAD__ convert_long_rtp( uint );
    long __OVERLOAD__ convert_long_rtp( ulong );
    long __OVERLOAD__ convert_long_rtp( char );
    long __OVERLOAD__ convert_long_rtp( short );
    long __OVERLOAD__ convert_long_rtp( int );
    long __OVERLOAD__ convert_long_rtp( long );
    long __OVERLOAD__ convert_long_rtp( float );
    long __OVERLOAD__ convert_long_rtp( double );
    long2 __OVERLOAD__ convert_long2_rtp( uchar2 );
    long2 __OVERLOAD__ convert_long2_rtp( ushort2 );
    long2 __OVERLOAD__ convert_long2_rtp( uint2 );
    long2 __OVERLOAD__ convert_long2_rtp( ulong2 );
    long2 __OVERLOAD__ convert_long2_rtp( char2 );
    long2 __OVERLOAD__ convert_long2_rtp( short2 );
    long2 __OVERLOAD__ convert_long2_rtp( int2 );
    long2 __OVERLOAD__ convert_long2_rtp( long2 );
    long2 __OVERLOAD__ convert_long2_rtp( float2 );
    long2 __OVERLOAD__ convert_long2_rtp( double2 );
    long3 __OVERLOAD__ convert_long3_rtp( uchar3 );
    long3 __OVERLOAD__ convert_long3_rtp( ushort3 );
    long3 __OVERLOAD__ convert_long3_rtp( uint3 );
    long3 __OVERLOAD__ convert_long3_rtp( ulong3 );
    long3 __OVERLOAD__ convert_long3_rtp( char3 );
    long3 __OVERLOAD__ convert_long3_rtp( short3 );
    long3 __OVERLOAD__ convert_long3_rtp( int3 );
    long3 __OVERLOAD__ convert_long3_rtp( long3 );
    long3 __OVERLOAD__ convert_long3_rtp( float3 );
    long3 __OVERLOAD__ convert_long3_rtp( double3 );
    long4 __OVERLOAD__ convert_long4_rtp( uchar4 );
    long4 __OVERLOAD__ convert_long4_rtp( ushort4 );
    long4 __OVERLOAD__ convert_long4_rtp( uint4 );
    long4 __OVERLOAD__ convert_long4_rtp( ulong4 );
    long4 __OVERLOAD__ convert_long4_rtp( char4 );
    long4 __OVERLOAD__ convert_long4_rtp( short4 );
    long4 __OVERLOAD__ convert_long4_rtp( int4 );
    long4 __OVERLOAD__ convert_long4_rtp( long4 );
    long4 __OVERLOAD__ convert_long4_rtp( float4 );
    long4 __OVERLOAD__ convert_long4_rtp( double4 );
    long8 __OVERLOAD__ convert_long8_rtp( uchar8 );
    long8 __OVERLOAD__ convert_long8_rtp( ushort8 );
    long8 __OVERLOAD__ convert_long8_rtp( uint8 );
    long8 __OVERLOAD__ convert_long8_rtp( ulong8 );
    long8 __OVERLOAD__ convert_long8_rtp( char8 );
    long8 __OVERLOAD__ convert_long8_rtp( short8 );
    long8 __OVERLOAD__ convert_long8_rtp( int8 );
    long8 __OVERLOAD__ convert_long8_rtp( long8 );
    long8 __OVERLOAD__ convert_long8_rtp( float8 );
    long8 __OVERLOAD__ convert_long8_rtp( double8 );
    long16 __OVERLOAD__ convert_long16_rtp( uchar16 );
    long16 __OVERLOAD__ convert_long16_rtp( ushort16 );
    long16 __OVERLOAD__ convert_long16_rtp( uint16 );
    long16 __OVERLOAD__ convert_long16_rtp( ulong16 );
    long16 __OVERLOAD__ convert_long16_rtp( char16 );
    long16 __OVERLOAD__ convert_long16_rtp( short16 );
    long16 __OVERLOAD__ convert_long16_rtp( int16 );
    long16 __OVERLOAD__ convert_long16_rtp( long16 );
    long16 __OVERLOAD__ convert_long16_rtp( float16 );
    long16 __OVERLOAD__ convert_long16_rtp( double16 );
    long __OVERLOAD__ convert_long_rtn( uchar );
    long __OVERLOAD__ convert_long_rtn( ushort );
    long __OVERLOAD__ convert_long_rtn( uint );
    long __OVERLOAD__ convert_long_rtn( ulong );
    long __OVERLOAD__ convert_long_rtn( char );
    long __OVERLOAD__ convert_long_rtn( short );
    long __OVERLOAD__ convert_long_rtn( int );
    long __OVERLOAD__ convert_long_rtn( long );
    long __OVERLOAD__ convert_long_rtn( float );
    long __OVERLOAD__ convert_long_rtn( double );
    long2 __OVERLOAD__ convert_long2_rtn( uchar2 );
    long2 __OVERLOAD__ convert_long2_rtn( ushort2 );
    long2 __OVERLOAD__ convert_long2_rtn( uint2 );
    long2 __OVERLOAD__ convert_long2_rtn( ulong2 );
    long2 __OVERLOAD__ convert_long2_rtn( char2 );
    long2 __OVERLOAD__ convert_long2_rtn( short2 );
    long2 __OVERLOAD__ convert_long2_rtn( int2 );
    long2 __OVERLOAD__ convert_long2_rtn( long2 );
    long2 __OVERLOAD__ convert_long2_rtn( float2 );
    long2 __OVERLOAD__ convert_long2_rtn( double2 );
    long3 __OVERLOAD__ convert_long3_rtn( uchar3 );
    long3 __OVERLOAD__ convert_long3_rtn( ushort3 );
    long3 __OVERLOAD__ convert_long3_rtn( uint3 );
    long3 __OVERLOAD__ convert_long3_rtn( ulong3 );
    long3 __OVERLOAD__ convert_long3_rtn( char3 );
    long3 __OVERLOAD__ convert_long3_rtn( short3 );
    long3 __OVERLOAD__ convert_long3_rtn( int3 );
    long3 __OVERLOAD__ convert_long3_rtn( long3 );
    long3 __OVERLOAD__ convert_long3_rtn( float3 );
    long3 __OVERLOAD__ convert_long3_rtn( double3 );
    long4 __OVERLOAD__ convert_long4_rtn( uchar4 );
    long4 __OVERLOAD__ convert_long4_rtn( ushort4 );
    long4 __OVERLOAD__ convert_long4_rtn( uint4 );
    long4 __OVERLOAD__ convert_long4_rtn( ulong4 );
    long4 __OVERLOAD__ convert_long4_rtn( char4 );
    long4 __OVERLOAD__ convert_long4_rtn( short4 );
    long4 __OVERLOAD__ convert_long4_rtn( int4 );
    long4 __OVERLOAD__ convert_long4_rtn( long4 );
    long4 __OVERLOAD__ convert_long4_rtn( float4 );
    long4 __OVERLOAD__ convert_long4_rtn( double4 );
    long8 __OVERLOAD__ convert_long8_rtn( uchar8 );
    long8 __OVERLOAD__ convert_long8_rtn( ushort8 );
    long8 __OVERLOAD__ convert_long8_rtn( uint8 );
    long8 __OVERLOAD__ convert_long8_rtn( ulong8 );
    long8 __OVERLOAD__ convert_long8_rtn( char8 );
    long8 __OVERLOAD__ convert_long8_rtn( short8 );
    long8 __OVERLOAD__ convert_long8_rtn( int8 );
    long8 __OVERLOAD__ convert_long8_rtn( long8 );
    long8 __OVERLOAD__ convert_long8_rtn( float8 );
    long8 __OVERLOAD__ convert_long8_rtn( double8 );
    long16 __OVERLOAD__ convert_long16_rtn( uchar16 );
    long16 __OVERLOAD__ convert_long16_rtn( ushort16 );
    long16 __OVERLOAD__ convert_long16_rtn( uint16 );
    long16 __OVERLOAD__ convert_long16_rtn( ulong16 );
    long16 __OVERLOAD__ convert_long16_rtn( char16 );
    long16 __OVERLOAD__ convert_long16_rtn( short16 );
    long16 __OVERLOAD__ convert_long16_rtn( int16 );
    long16 __OVERLOAD__ convert_long16_rtn( long16 );
    long16 __OVERLOAD__ convert_long16_rtn( float16 );
    long16 __OVERLOAD__ convert_long16_rtn( double16 );
    long __OVERLOAD__ convert_long_rtz( uchar );
    long __OVERLOAD__ convert_long_rtz( ushort );
    long __OVERLOAD__ convert_long_rtz( uint );
    long __OVERLOAD__ convert_long_rtz( ulong );
    long __OVERLOAD__ convert_long_rtz( char );
    long __OVERLOAD__ convert_long_rtz( short );
    long __OVERLOAD__ convert_long_rtz( int );
    long __OVERLOAD__ convert_long_rtz( long );
    long __OVERLOAD__ convert_long_rtz( float );
    long __OVERLOAD__ convert_long_rtz( double );
    long2 __OVERLOAD__ convert_long2_rtz( uchar2 );
    long2 __OVERLOAD__ convert_long2_rtz( ushort2 );
    long2 __OVERLOAD__ convert_long2_rtz( uint2 );
    long2 __OVERLOAD__ convert_long2_rtz( ulong2 );
    long2 __OVERLOAD__ convert_long2_rtz( char2 );
    long2 __OVERLOAD__ convert_long2_rtz( short2 );
    long2 __OVERLOAD__ convert_long2_rtz( int2 );
    long2 __OVERLOAD__ convert_long2_rtz( long2 );
    long2 __OVERLOAD__ convert_long2_rtz( float2 );
    long2 __OVERLOAD__ convert_long2_rtz( double2 );
    long3 __OVERLOAD__ convert_long3_rtz( uchar3 );
    long3 __OVERLOAD__ convert_long3_rtz( ushort3 );
    long3 __OVERLOAD__ convert_long3_rtz( uint3 );
    long3 __OVERLOAD__ convert_long3_rtz( ulong3 );
    long3 __OVERLOAD__ convert_long3_rtz( char3 );
    long3 __OVERLOAD__ convert_long3_rtz( short3 );
    long3 __OVERLOAD__ convert_long3_rtz( int3 );
    long3 __OVERLOAD__ convert_long3_rtz( long3 );
    long3 __OVERLOAD__ convert_long3_rtz( float3 );
    long3 __OVERLOAD__ convert_long3_rtz( double3 );
    long4 __OVERLOAD__ convert_long4_rtz( uchar4 );
    long4 __OVERLOAD__ convert_long4_rtz( ushort4 );
    long4 __OVERLOAD__ convert_long4_rtz( uint4 );
    long4 __OVERLOAD__ convert_long4_rtz( ulong4 );
    long4 __OVERLOAD__ convert_long4_rtz( char4 );
    long4 __OVERLOAD__ convert_long4_rtz( short4 );
    long4 __OVERLOAD__ convert_long4_rtz( int4 );
    long4 __OVERLOAD__ convert_long4_rtz( long4 );
    long4 __OVERLOAD__ convert_long4_rtz( float4 );
    long4 __OVERLOAD__ convert_long4_rtz( double4 );
    long8 __OVERLOAD__ convert_long8_rtz( uchar8 );
    long8 __OVERLOAD__ convert_long8_rtz( ushort8 );
    long8 __OVERLOAD__ convert_long8_rtz( uint8 );
    long8 __OVERLOAD__ convert_long8_rtz( ulong8 );
    long8 __OVERLOAD__ convert_long8_rtz( char8 );
    long8 __OVERLOAD__ convert_long8_rtz( short8 );
    long8 __OVERLOAD__ convert_long8_rtz( int8 );
    long8 __OVERLOAD__ convert_long8_rtz( long8 );
    long8 __OVERLOAD__ convert_long8_rtz( float8 );
    long8 __OVERLOAD__ convert_long8_rtz( double8 );
    long16 __OVERLOAD__ convert_long16_rtz( uchar16 );
    long16 __OVERLOAD__ convert_long16_rtz( ushort16 );
    long16 __OVERLOAD__ convert_long16_rtz( uint16 );
    long16 __OVERLOAD__ convert_long16_rtz( ulong16 );
    long16 __OVERLOAD__ convert_long16_rtz( char16 );
    long16 __OVERLOAD__ convert_long16_rtz( short16 );
    long16 __OVERLOAD__ convert_long16_rtz( int16 );
    long16 __OVERLOAD__ convert_long16_rtz( long16 );
    long16 __OVERLOAD__ convert_long16_rtz( float16 );
    long16 __OVERLOAD__ convert_long16_rtz( double16 );
    long __OVERLOAD__ convert_long_sat( uchar );
    long __OVERLOAD__ convert_long_sat( ushort );
    long __OVERLOAD__ convert_long_sat( uint );
    long __OVERLOAD__ convert_long_sat( ulong );
    long __OVERLOAD__ convert_long_sat( char );
    long __OVERLOAD__ convert_long_sat( short );
    long __OVERLOAD__ convert_long_sat( int );
    long __OVERLOAD__ convert_long_sat( long );
    long __OVERLOAD__ convert_long_sat( float );
    long __OVERLOAD__ convert_long_sat( double );
    long2 __OVERLOAD__ convert_long2_sat( uchar2 );
    long2 __OVERLOAD__ convert_long2_sat( ushort2 );
    long2 __OVERLOAD__ convert_long2_sat( uint2 );
    long2 __OVERLOAD__ convert_long2_sat( ulong2 );
    long2 __OVERLOAD__ convert_long2_sat( char2 );
    long2 __OVERLOAD__ convert_long2_sat( short2 );
    long2 __OVERLOAD__ convert_long2_sat( int2 );
    long2 __OVERLOAD__ convert_long2_sat( long2 );
    long2 __OVERLOAD__ convert_long2_sat( float2 );
    long2 __OVERLOAD__ convert_long2_sat( double2 );
    long3 __OVERLOAD__ convert_long3_sat( uchar3 );
    long3 __OVERLOAD__ convert_long3_sat( ushort3 );
    long3 __OVERLOAD__ convert_long3_sat( uint3 );
    long3 __OVERLOAD__ convert_long3_sat( ulong3 );
    long3 __OVERLOAD__ convert_long3_sat( char3 );
    long3 __OVERLOAD__ convert_long3_sat( short3 );
    long3 __OVERLOAD__ convert_long3_sat( int3 );
    long3 __OVERLOAD__ convert_long3_sat( long3 );
    long3 __OVERLOAD__ convert_long3_sat( float3 );
    long3 __OVERLOAD__ convert_long3_sat( double3 );
    long4 __OVERLOAD__ convert_long4_sat( uchar4 );
    long4 __OVERLOAD__ convert_long4_sat( ushort4 );
    long4 __OVERLOAD__ convert_long4_sat( uint4 );
    long4 __OVERLOAD__ convert_long4_sat( ulong4 );
    long4 __OVERLOAD__ convert_long4_sat( char4 );
    long4 __OVERLOAD__ convert_long4_sat( short4 );
    long4 __OVERLOAD__ convert_long4_sat( int4 );
    long4 __OVERLOAD__ convert_long4_sat( long4 );
    long4 __OVERLOAD__ convert_long4_sat( float4 );
    long4 __OVERLOAD__ convert_long4_sat( double4 );
    long8 __OVERLOAD__ convert_long8_sat( uchar8 );
    long8 __OVERLOAD__ convert_long8_sat( ushort8 );
    long8 __OVERLOAD__ convert_long8_sat( uint8 );
    long8 __OVERLOAD__ convert_long8_sat( ulong8 );
    long8 __OVERLOAD__ convert_long8_sat( char8 );
    long8 __OVERLOAD__ convert_long8_sat( short8 );
    long8 __OVERLOAD__ convert_long8_sat( int8 );
    long8 __OVERLOAD__ convert_long8_sat( long8 );
    long8 __OVERLOAD__ convert_long8_sat( float8 );
    long8 __OVERLOAD__ convert_long8_sat( double8 );
    long16 __OVERLOAD__ convert_long16_sat( uchar16 );
    long16 __OVERLOAD__ convert_long16_sat( ushort16 );
    long16 __OVERLOAD__ convert_long16_sat( uint16 );
    long16 __OVERLOAD__ convert_long16_sat( ulong16 );
    long16 __OVERLOAD__ convert_long16_sat( char16 );
    long16 __OVERLOAD__ convert_long16_sat( short16 );
    long16 __OVERLOAD__ convert_long16_sat( int16 );
    long16 __OVERLOAD__ convert_long16_sat( long16 );
    long16 __OVERLOAD__ convert_long16_sat( float16 );
    long16 __OVERLOAD__ convert_long16_sat( double16 );
    long __OVERLOAD__ convert_long_sat_rte( uchar );
    long __OVERLOAD__ convert_long_sat_rte( ushort );
    long __OVERLOAD__ convert_long_sat_rte( uint );
    long __OVERLOAD__ convert_long_sat_rte( ulong );
    long __OVERLOAD__ convert_long_sat_rte( char );
    long __OVERLOAD__ convert_long_sat_rte( short );
    long __OVERLOAD__ convert_long_sat_rte( int );
    long __OVERLOAD__ convert_long_sat_rte( long );
    long __OVERLOAD__ convert_long_sat_rte( float );
    long __OVERLOAD__ convert_long_sat_rte( double );
    long2 __OVERLOAD__ convert_long2_sat_rte( uchar2 );
    long2 __OVERLOAD__ convert_long2_sat_rte( ushort2 );
    long2 __OVERLOAD__ convert_long2_sat_rte( uint2 );
    long2 __OVERLOAD__ convert_long2_sat_rte( ulong2 );
    long2 __OVERLOAD__ convert_long2_sat_rte( char2 );
    long2 __OVERLOAD__ convert_long2_sat_rte( short2 );
    long2 __OVERLOAD__ convert_long2_sat_rte( int2 );
    long2 __OVERLOAD__ convert_long2_sat_rte( long2 );
    long2 __OVERLOAD__ convert_long2_sat_rte( float2 );
    long2 __OVERLOAD__ convert_long2_sat_rte( double2 );
    long3 __OVERLOAD__ convert_long3_sat_rte( uchar3 );
    long3 __OVERLOAD__ convert_long3_sat_rte( ushort3 );
    long3 __OVERLOAD__ convert_long3_sat_rte( uint3 );
    long3 __OVERLOAD__ convert_long3_sat_rte( ulong3 );
    long3 __OVERLOAD__ convert_long3_sat_rte( char3 );
    long3 __OVERLOAD__ convert_long3_sat_rte( short3 );
    long3 __OVERLOAD__ convert_long3_sat_rte( int3 );
    long3 __OVERLOAD__ convert_long3_sat_rte( long3 );
    long3 __OVERLOAD__ convert_long3_sat_rte( float3 );
    long3 __OVERLOAD__ convert_long3_sat_rte( double3 );
    long4 __OVERLOAD__ convert_long4_sat_rte( uchar4 );
    long4 __OVERLOAD__ convert_long4_sat_rte( ushort4 );
    long4 __OVERLOAD__ convert_long4_sat_rte( uint4 );
    long4 __OVERLOAD__ convert_long4_sat_rte( ulong4 );
    long4 __OVERLOAD__ convert_long4_sat_rte( char4 );
    long4 __OVERLOAD__ convert_long4_sat_rte( short4 );
    long4 __OVERLOAD__ convert_long4_sat_rte( int4 );
    long4 __OVERLOAD__ convert_long4_sat_rte( long4 );
    long4 __OVERLOAD__ convert_long4_sat_rte( float4 );
    long4 __OVERLOAD__ convert_long4_sat_rte( double4 );
    long8 __OVERLOAD__ convert_long8_sat_rte( uchar8 );
    long8 __OVERLOAD__ convert_long8_sat_rte( ushort8 );
    long8 __OVERLOAD__ convert_long8_sat_rte( uint8 );
    long8 __OVERLOAD__ convert_long8_sat_rte( ulong8 );
    long8 __OVERLOAD__ convert_long8_sat_rte( char8 );
    long8 __OVERLOAD__ convert_long8_sat_rte( short8 );
    long8 __OVERLOAD__ convert_long8_sat_rte( int8 );
    long8 __OVERLOAD__ convert_long8_sat_rte( long8 );
    long8 __OVERLOAD__ convert_long8_sat_rte( float8 );
    long8 __OVERLOAD__ convert_long8_sat_rte( double8 );
    long16 __OVERLOAD__ convert_long16_sat_rte( uchar16 );
    long16 __OVERLOAD__ convert_long16_sat_rte( ushort16 );
    long16 __OVERLOAD__ convert_long16_sat_rte( uint16 );
    long16 __OVERLOAD__ convert_long16_sat_rte( ulong16 );
    long16 __OVERLOAD__ convert_long16_sat_rte( char16 );
    long16 __OVERLOAD__ convert_long16_sat_rte( short16 );
    long16 __OVERLOAD__ convert_long16_sat_rte( int16 );
    long16 __OVERLOAD__ convert_long16_sat_rte( long16 );
    long16 __OVERLOAD__ convert_long16_sat_rte( float16 );
    long16 __OVERLOAD__ convert_long16_sat_rte( double16 );
    long __OVERLOAD__ convert_long_sat_rtp( uchar );
    long __OVERLOAD__ convert_long_sat_rtp( ushort );
    long __OVERLOAD__ convert_long_sat_rtp( uint );
    long __OVERLOAD__ convert_long_sat_rtp( ulong );
    long __OVERLOAD__ convert_long_sat_rtp( char );
    long __OVERLOAD__ convert_long_sat_rtp( short );
    long __OVERLOAD__ convert_long_sat_rtp( int );
    long __OVERLOAD__ convert_long_sat_rtp( long );
    long __OVERLOAD__ convert_long_sat_rtp( float );
    long __OVERLOAD__ convert_long_sat_rtp( double );
    long2 __OVERLOAD__ convert_long2_sat_rtp( uchar2 );
    long2 __OVERLOAD__ convert_long2_sat_rtp( ushort2 );
    long2 __OVERLOAD__ convert_long2_sat_rtp( uint2 );
    long2 __OVERLOAD__ convert_long2_sat_rtp( ulong2 );
    long2 __OVERLOAD__ convert_long2_sat_rtp( char2 );
    long2 __OVERLOAD__ convert_long2_sat_rtp( short2 );
    long2 __OVERLOAD__ convert_long2_sat_rtp( int2 );
    long2 __OVERLOAD__ convert_long2_sat_rtp( long2 );
    long2 __OVERLOAD__ convert_long2_sat_rtp( float2 );
    long2 __OVERLOAD__ convert_long2_sat_rtp( double2 );
    long3 __OVERLOAD__ convert_long3_sat_rtp( uchar3 );
    long3 __OVERLOAD__ convert_long3_sat_rtp( ushort3 );
    long3 __OVERLOAD__ convert_long3_sat_rtp( uint3 );
    long3 __OVERLOAD__ convert_long3_sat_rtp( ulong3 );
    long3 __OVERLOAD__ convert_long3_sat_rtp( char3 );
    long3 __OVERLOAD__ convert_long3_sat_rtp( short3 );
    long3 __OVERLOAD__ convert_long3_sat_rtp( int3 );
    long3 __OVERLOAD__ convert_long3_sat_rtp( long3 );
    long3 __OVERLOAD__ convert_long3_sat_rtp( float3 );
    long3 __OVERLOAD__ convert_long3_sat_rtp( double3 );
    long4 __OVERLOAD__ convert_long4_sat_rtp( uchar4 );
    long4 __OVERLOAD__ convert_long4_sat_rtp( ushort4 );
    long4 __OVERLOAD__ convert_long4_sat_rtp( uint4 );
    long4 __OVERLOAD__ convert_long4_sat_rtp( ulong4 );
    long4 __OVERLOAD__ convert_long4_sat_rtp( char4 );
    long4 __OVERLOAD__ convert_long4_sat_rtp( short4 );
    long4 __OVERLOAD__ convert_long4_sat_rtp( int4 );
    long4 __OVERLOAD__ convert_long4_sat_rtp( long4 );
    long4 __OVERLOAD__ convert_long4_sat_rtp( float4 );
    long4 __OVERLOAD__ convert_long4_sat_rtp( double4 );
    long8 __OVERLOAD__ convert_long8_sat_rtp( uchar8 );
    long8 __OVERLOAD__ convert_long8_sat_rtp( ushort8 );
    long8 __OVERLOAD__ convert_long8_sat_rtp( uint8 );
    long8 __OVERLOAD__ convert_long8_sat_rtp( ulong8 );
    long8 __OVERLOAD__ convert_long8_sat_rtp( char8 );
    long8 __OVERLOAD__ convert_long8_sat_rtp( short8 );
    long8 __OVERLOAD__ convert_long8_sat_rtp( int8 );
    long8 __OVERLOAD__ convert_long8_sat_rtp( long8 );
    long8 __OVERLOAD__ convert_long8_sat_rtp( float8 );
    long8 __OVERLOAD__ convert_long8_sat_rtp( double8 );
    long16 __OVERLOAD__ convert_long16_sat_rtp( uchar16 );
    long16 __OVERLOAD__ convert_long16_sat_rtp( ushort16 );
    long16 __OVERLOAD__ convert_long16_sat_rtp( uint16 );
    long16 __OVERLOAD__ convert_long16_sat_rtp( ulong16 );
    long16 __OVERLOAD__ convert_long16_sat_rtp( char16 );
    long16 __OVERLOAD__ convert_long16_sat_rtp( short16 );
    long16 __OVERLOAD__ convert_long16_sat_rtp( int16 );
    long16 __OVERLOAD__ convert_long16_sat_rtp( long16 );
    long16 __OVERLOAD__ convert_long16_sat_rtp( float16 );
    long16 __OVERLOAD__ convert_long16_sat_rtp( double16 );
    long __OVERLOAD__ convert_long_sat_rtn( uchar );
    long __OVERLOAD__ convert_long_sat_rtn( ushort );
    long __OVERLOAD__ convert_long_sat_rtn( uint );
    long __OVERLOAD__ convert_long_sat_rtn( ulong );
    long __OVERLOAD__ convert_long_sat_rtn( char );
    long __OVERLOAD__ convert_long_sat_rtn( short );
    long __OVERLOAD__ convert_long_sat_rtn( int );
    long __OVERLOAD__ convert_long_sat_rtn( long );
    long __OVERLOAD__ convert_long_sat_rtn( float );
    long __OVERLOAD__ convert_long_sat_rtn( double );
    long2 __OVERLOAD__ convert_long2_sat_rtn( uchar2 );
    long2 __OVERLOAD__ convert_long2_sat_rtn( ushort2 );
    long2 __OVERLOAD__ convert_long2_sat_rtn( uint2 );
    long2 __OVERLOAD__ convert_long2_sat_rtn( ulong2 );
    long2 __OVERLOAD__ convert_long2_sat_rtn( char2 );
    long2 __OVERLOAD__ convert_long2_sat_rtn( short2 );
    long2 __OVERLOAD__ convert_long2_sat_rtn( int2 );
    long2 __OVERLOAD__ convert_long2_sat_rtn( long2 );
    long2 __OVERLOAD__ convert_long2_sat_rtn( float2 );
    long2 __OVERLOAD__ convert_long2_sat_rtn( double2 );
    long3 __OVERLOAD__ convert_long3_sat_rtn( uchar3 );
    long3 __OVERLOAD__ convert_long3_sat_rtn( ushort3 );
    long3 __OVERLOAD__ convert_long3_sat_rtn( uint3 );
    long3 __OVERLOAD__ convert_long3_sat_rtn( ulong3 );
    long3 __OVERLOAD__ convert_long3_sat_rtn( char3 );
    long3 __OVERLOAD__ convert_long3_sat_rtn( short3 );
    long3 __OVERLOAD__ convert_long3_sat_rtn( int3 );
    long3 __OVERLOAD__ convert_long3_sat_rtn( long3 );
    long3 __OVERLOAD__ convert_long3_sat_rtn( float3 );
    long3 __OVERLOAD__ convert_long3_sat_rtn( double3 );
    long4 __OVERLOAD__ convert_long4_sat_rtn( uchar4 );
    long4 __OVERLOAD__ convert_long4_sat_rtn( ushort4 );
    long4 __OVERLOAD__ convert_long4_sat_rtn( uint4 );
    long4 __OVERLOAD__ convert_long4_sat_rtn( ulong4 );
    long4 __OVERLOAD__ convert_long4_sat_rtn( char4 );
    long4 __OVERLOAD__ convert_long4_sat_rtn( short4 );
    long4 __OVERLOAD__ convert_long4_sat_rtn( int4 );
    long4 __OVERLOAD__ convert_long4_sat_rtn( long4 );
    long4 __OVERLOAD__ convert_long4_sat_rtn( float4 );
    long4 __OVERLOAD__ convert_long4_sat_rtn( double4 );
    long8 __OVERLOAD__ convert_long8_sat_rtn( uchar8 );
    long8 __OVERLOAD__ convert_long8_sat_rtn( ushort8 );
    long8 __OVERLOAD__ convert_long8_sat_rtn( uint8 );
    long8 __OVERLOAD__ convert_long8_sat_rtn( ulong8 );
    long8 __OVERLOAD__ convert_long8_sat_rtn( char8 );
    long8 __OVERLOAD__ convert_long8_sat_rtn( short8 );
    long8 __OVERLOAD__ convert_long8_sat_rtn( int8 );
    long8 __OVERLOAD__ convert_long8_sat_rtn( long8 );
    long8 __OVERLOAD__ convert_long8_sat_rtn( float8 );
    long8 __OVERLOAD__ convert_long8_sat_rtn( double8 );
    long16 __OVERLOAD__ convert_long16_sat_rtn( uchar16 );
    long16 __OVERLOAD__ convert_long16_sat_rtn( ushort16 );
    long16 __OVERLOAD__ convert_long16_sat_rtn( uint16 );
    long16 __OVERLOAD__ convert_long16_sat_rtn( ulong16 );
    long16 __OVERLOAD__ convert_long16_sat_rtn( char16 );
    long16 __OVERLOAD__ convert_long16_sat_rtn( short16 );
    long16 __OVERLOAD__ convert_long16_sat_rtn( int16 );
    long16 __OVERLOAD__ convert_long16_sat_rtn( long16 );
    long16 __OVERLOAD__ convert_long16_sat_rtn( float16 );
    long16 __OVERLOAD__ convert_long16_sat_rtn( double16 );
    long __OVERLOAD__ convert_long_sat_rtz( uchar );
    long __OVERLOAD__ convert_long_sat_rtz( ushort );
    long __OVERLOAD__ convert_long_sat_rtz( uint );
    long __OVERLOAD__ convert_long_sat_rtz( ulong );
    long __OVERLOAD__ convert_long_sat_rtz( char );
    long __OVERLOAD__ convert_long_sat_rtz( short );
    long __OVERLOAD__ convert_long_sat_rtz( int );
    long __OVERLOAD__ convert_long_sat_rtz( long );
    long __OVERLOAD__ convert_long_sat_rtz( float );
    long __OVERLOAD__ convert_long_sat_rtz( double );
    long2 __OVERLOAD__ convert_long2_sat_rtz( uchar2 );
    long2 __OVERLOAD__ convert_long2_sat_rtz( ushort2 );
    long2 __OVERLOAD__ convert_long2_sat_rtz( uint2 );
    long2 __OVERLOAD__ convert_long2_sat_rtz( ulong2 );
    long2 __OVERLOAD__ convert_long2_sat_rtz( char2 );
    long2 __OVERLOAD__ convert_long2_sat_rtz( short2 );
    long2 __OVERLOAD__ convert_long2_sat_rtz( int2 );
    long2 __OVERLOAD__ convert_long2_sat_rtz( long2 );
    long2 __OVERLOAD__ convert_long2_sat_rtz( float2 );
    long2 __OVERLOAD__ convert_long2_sat_rtz( double2 );
    long3 __OVERLOAD__ convert_long3_sat_rtz( uchar3 );
    long3 __OVERLOAD__ convert_long3_sat_rtz( ushort3 );
    long3 __OVERLOAD__ convert_long3_sat_rtz( uint3 );
    long3 __OVERLOAD__ convert_long3_sat_rtz( ulong3 );
    long3 __OVERLOAD__ convert_long3_sat_rtz( char3 );
    long3 __OVERLOAD__ convert_long3_sat_rtz( short3 );
    long3 __OVERLOAD__ convert_long3_sat_rtz( int3 );
    long3 __OVERLOAD__ convert_long3_sat_rtz( long3 );
    long3 __OVERLOAD__ convert_long3_sat_rtz( float3 );
    long3 __OVERLOAD__ convert_long3_sat_rtz( double3 );
    long4 __OVERLOAD__ convert_long4_sat_rtz( uchar4 );
    long4 __OVERLOAD__ convert_long4_sat_rtz( ushort4 );
    long4 __OVERLOAD__ convert_long4_sat_rtz( uint4 );
    long4 __OVERLOAD__ convert_long4_sat_rtz( ulong4 );
    long4 __OVERLOAD__ convert_long4_sat_rtz( char4 );
    long4 __OVERLOAD__ convert_long4_sat_rtz( short4 );
    long4 __OVERLOAD__ convert_long4_sat_rtz( int4 );
    long4 __OVERLOAD__ convert_long4_sat_rtz( long4 );
    long4 __OVERLOAD__ convert_long4_sat_rtz( float4 );
    long4 __OVERLOAD__ convert_long4_sat_rtz( double4 );
    long8 __OVERLOAD__ convert_long8_sat_rtz( uchar8 );
    long8 __OVERLOAD__ convert_long8_sat_rtz( ushort8 );
    long8 __OVERLOAD__ convert_long8_sat_rtz( uint8 );
    long8 __OVERLOAD__ convert_long8_sat_rtz( ulong8 );
    long8 __OVERLOAD__ convert_long8_sat_rtz( char8 );
    long8 __OVERLOAD__ convert_long8_sat_rtz( short8 );
    long8 __OVERLOAD__ convert_long8_sat_rtz( int8 );
    long8 __OVERLOAD__ convert_long8_sat_rtz( long8 );
    long8 __OVERLOAD__ convert_long8_sat_rtz( float8 );
    long8 __OVERLOAD__ convert_long8_sat_rtz( double8 );
    long16 __OVERLOAD__ convert_long16_sat_rtz( uchar16 );
    long16 __OVERLOAD__ convert_long16_sat_rtz( ushort16 );
    long16 __OVERLOAD__ convert_long16_sat_rtz( uint16 );
    long16 __OVERLOAD__ convert_long16_sat_rtz( ulong16 );
    long16 __OVERLOAD__ convert_long16_sat_rtz( char16 );
    long16 __OVERLOAD__ convert_long16_sat_rtz( short16 );
    long16 __OVERLOAD__ convert_long16_sat_rtz( int16 );
    long16 __OVERLOAD__ convert_long16_sat_rtz( long16 );
    long16 __OVERLOAD__ convert_long16_sat_rtz( float16 );
    long16 __OVERLOAD__ convert_long16_sat_rtz( double16 );
    float __OVERLOAD__ convert_float( uchar );
    float __OVERLOAD__ convert_float( ushort );
    float __OVERLOAD__ convert_float( uint );
    float __OVERLOAD__ convert_float( ulong );
    float __OVERLOAD__ convert_float( char );
    float __OVERLOAD__ convert_float( short );
    float __OVERLOAD__ convert_float( int );
    float __OVERLOAD__ convert_float( long );
    float __OVERLOAD__ convert_float( float );
    float __OVERLOAD__ convert_float( double );
    float2 __OVERLOAD__ convert_float2( uchar2 );
    float2 __OVERLOAD__ convert_float2( ushort2 );
    float2 __OVERLOAD__ convert_float2( uint2 );
    float2 __OVERLOAD__ convert_float2( ulong2 );
    float2 __OVERLOAD__ convert_float2( char2 );
    float2 __OVERLOAD__ convert_float2( short2 );
    float2 __OVERLOAD__ convert_float2( int2 );
    float2 __OVERLOAD__ convert_float2( long2 );
    float2 __OVERLOAD__ convert_float2( float2 );
    float2 __OVERLOAD__ convert_float2( double2 );
    float3 __OVERLOAD__ convert_float3( uchar3 );
    float3 __OVERLOAD__ convert_float3( ushort3 );
    float3 __OVERLOAD__ convert_float3( uint3 );
    float3 __OVERLOAD__ convert_float3( ulong3 );
    float3 __OVERLOAD__ convert_float3( char3 );
    float3 __OVERLOAD__ convert_float3( short3 );
    float3 __OVERLOAD__ convert_float3( int3 );
    float3 __OVERLOAD__ convert_float3( long3 );
    float3 __OVERLOAD__ convert_float3( float3 );
    float3 __OVERLOAD__ convert_float3( double3 );
    float4 __OVERLOAD__ convert_float4( uchar4 );
    float4 __OVERLOAD__ convert_float4( ushort4 );
    float4 __OVERLOAD__ convert_float4( uint4 );
    float4 __OVERLOAD__ convert_float4( ulong4 );
    float4 __OVERLOAD__ convert_float4( char4 );
    float4 __OVERLOAD__ convert_float4( short4 );
    float4 __OVERLOAD__ convert_float4( int4 );
    float4 __OVERLOAD__ convert_float4( long4 );
    float4 __OVERLOAD__ convert_float4( float4 );
    float4 __OVERLOAD__ convert_float4( double4 );
    float8 __OVERLOAD__ convert_float8( uchar8 );
    float8 __OVERLOAD__ convert_float8( ushort8 );
    float8 __OVERLOAD__ convert_float8( uint8 );
    float8 __OVERLOAD__ convert_float8( ulong8 );
    float8 __OVERLOAD__ convert_float8( char8 );
    float8 __OVERLOAD__ convert_float8( short8 );
    float8 __OVERLOAD__ convert_float8( int8 );
    float8 __OVERLOAD__ convert_float8( long8 );
    float8 __OVERLOAD__ convert_float8( float8 );
    float8 __OVERLOAD__ convert_float8( double8 );
    float16 __OVERLOAD__ convert_float16( uchar16 );
    float16 __OVERLOAD__ convert_float16( ushort16 );
    float16 __OVERLOAD__ convert_float16( uint16 );
    float16 __OVERLOAD__ convert_float16( ulong16 );
    float16 __OVERLOAD__ convert_float16( char16 );
    float16 __OVERLOAD__ convert_float16( short16 );
    float16 __OVERLOAD__ convert_float16( int16 );
    float16 __OVERLOAD__ convert_float16( long16 );
    float16 __OVERLOAD__ convert_float16( float16 );
    float16 __OVERLOAD__ convert_float16( double16 );
    float __OVERLOAD__ convert_float_rte( uchar );
    float __OVERLOAD__ convert_float_rte( ushort );
    float __OVERLOAD__ convert_float_rte( uint );
    float __OVERLOAD__ convert_float_rte( ulong );
    float __OVERLOAD__ convert_float_rte( char );
    float __OVERLOAD__ convert_float_rte( short );
    float __OVERLOAD__ convert_float_rte( int );
    float __OVERLOAD__ convert_float_rte( long );
    float __OVERLOAD__ convert_float_rte( float );
    float __OVERLOAD__ convert_float_rte( double );
    float2 __OVERLOAD__ convert_float2_rte( uchar2 );
    float2 __OVERLOAD__ convert_float2_rte( ushort2 );
    float2 __OVERLOAD__ convert_float2_rte( uint2 );
    float2 __OVERLOAD__ convert_float2_rte( ulong2 );
    float2 __OVERLOAD__ convert_float2_rte( char2 );
    float2 __OVERLOAD__ convert_float2_rte( short2 );
    float2 __OVERLOAD__ convert_float2_rte( int2 );
    float2 __OVERLOAD__ convert_float2_rte( long2 );
    float2 __OVERLOAD__ convert_float2_rte( float2 );
    float2 __OVERLOAD__ convert_float2_rte( double2 );
    float3 __OVERLOAD__ convert_float3_rte( uchar3 );
    float3 __OVERLOAD__ convert_float3_rte( ushort3 );
    float3 __OVERLOAD__ convert_float3_rte( uint3 );
    float3 __OVERLOAD__ convert_float3_rte( ulong3 );
    float3 __OVERLOAD__ convert_float3_rte( char3 );
    float3 __OVERLOAD__ convert_float3_rte( short3 );
    float3 __OVERLOAD__ convert_float3_rte( int3 );
    float3 __OVERLOAD__ convert_float3_rte( long3 );
    float3 __OVERLOAD__ convert_float3_rte( float3 );
    float3 __OVERLOAD__ convert_float3_rte( double3 );
    float4 __OVERLOAD__ convert_float4_rte( uchar4 );
    float4 __OVERLOAD__ convert_float4_rte( ushort4 );
    float4 __OVERLOAD__ convert_float4_rte( uint4 );
    float4 __OVERLOAD__ convert_float4_rte( ulong4 );
    float4 __OVERLOAD__ convert_float4_rte( char4 );
    float4 __OVERLOAD__ convert_float4_rte( short4 );
    float4 __OVERLOAD__ convert_float4_rte( int4 );
    float4 __OVERLOAD__ convert_float4_rte( long4 );
    float4 __OVERLOAD__ convert_float4_rte( float4 );
    float4 __OVERLOAD__ convert_float4_rte( double4 );
    float8 __OVERLOAD__ convert_float8_rte( uchar8 );
    float8 __OVERLOAD__ convert_float8_rte( ushort8 );
    float8 __OVERLOAD__ convert_float8_rte( uint8 );
    float8 __OVERLOAD__ convert_float8_rte( ulong8 );
    float8 __OVERLOAD__ convert_float8_rte( char8 );
    float8 __OVERLOAD__ convert_float8_rte( short8 );
    float8 __OVERLOAD__ convert_float8_rte( int8 );
    float8 __OVERLOAD__ convert_float8_rte( long8 );
    float8 __OVERLOAD__ convert_float8_rte( float8 );
    float8 __OVERLOAD__ convert_float8_rte( double8 );
    float16 __OVERLOAD__ convert_float16_rte( uchar16 );
    float16 __OVERLOAD__ convert_float16_rte( ushort16 );
    float16 __OVERLOAD__ convert_float16_rte( uint16 );
    float16 __OVERLOAD__ convert_float16_rte( ulong16 );
    float16 __OVERLOAD__ convert_float16_rte( char16 );
    float16 __OVERLOAD__ convert_float16_rte( short16 );
    float16 __OVERLOAD__ convert_float16_rte( int16 );
    float16 __OVERLOAD__ convert_float16_rte( long16 );
    float16 __OVERLOAD__ convert_float16_rte( float16 );
    float16 __OVERLOAD__ convert_float16_rte( double16 );
    float __OVERLOAD__ convert_float_rtp( uchar );
    float __OVERLOAD__ convert_float_rtp( ushort );
    float __OVERLOAD__ convert_float_rtp( uint );
    float __OVERLOAD__ convert_float_rtp( ulong );
    float __OVERLOAD__ convert_float_rtp( char );
    float __OVERLOAD__ convert_float_rtp( short );
    float __OVERLOAD__ convert_float_rtp( int );
    float __OVERLOAD__ convert_float_rtp( long );
    float __OVERLOAD__ convert_float_rtp( float );
    float __OVERLOAD__ convert_float_rtp( double );
    float2 __OVERLOAD__ convert_float2_rtp( uchar2 );
    float2 __OVERLOAD__ convert_float2_rtp( ushort2 );
    float2 __OVERLOAD__ convert_float2_rtp( uint2 );
    float2 __OVERLOAD__ convert_float2_rtp( ulong2 );
    float2 __OVERLOAD__ convert_float2_rtp( char2 );
    float2 __OVERLOAD__ convert_float2_rtp( short2 );
    float2 __OVERLOAD__ convert_float2_rtp( int2 );
    float2 __OVERLOAD__ convert_float2_rtp( long2 );
    float2 __OVERLOAD__ convert_float2_rtp( float2 );
    float2 __OVERLOAD__ convert_float2_rtp( double2 );
    float3 __OVERLOAD__ convert_float3_rtp( uchar3 );
    float3 __OVERLOAD__ convert_float3_rtp( ushort3 );
    float3 __OVERLOAD__ convert_float3_rtp( uint3 );
    float3 __OVERLOAD__ convert_float3_rtp( ulong3 );
    float3 __OVERLOAD__ convert_float3_rtp( char3 );
    float3 __OVERLOAD__ convert_float3_rtp( short3 );
    float3 __OVERLOAD__ convert_float3_rtp( int3 );
    float3 __OVERLOAD__ convert_float3_rtp( long3 );
    float3 __OVERLOAD__ convert_float3_rtp( float3 );
    float3 __OVERLOAD__ convert_float3_rtp( double3 );
    float4 __OVERLOAD__ convert_float4_rtp( uchar4 );
    float4 __OVERLOAD__ convert_float4_rtp( ushort4 );
    float4 __OVERLOAD__ convert_float4_rtp( uint4 );
    float4 __OVERLOAD__ convert_float4_rtp( ulong4 );
    float4 __OVERLOAD__ convert_float4_rtp( char4 );
    float4 __OVERLOAD__ convert_float4_rtp( short4 );
    float4 __OVERLOAD__ convert_float4_rtp( int4 );
    float4 __OVERLOAD__ convert_float4_rtp( long4 );
    float4 __OVERLOAD__ convert_float4_rtp( float4 );
    float4 __OVERLOAD__ convert_float4_rtp( double4 );
    float8 __OVERLOAD__ convert_float8_rtp( uchar8 );
    float8 __OVERLOAD__ convert_float8_rtp( ushort8 );
    float8 __OVERLOAD__ convert_float8_rtp( uint8 );
    float8 __OVERLOAD__ convert_float8_rtp( ulong8 );
    float8 __OVERLOAD__ convert_float8_rtp( char8 );
    float8 __OVERLOAD__ convert_float8_rtp( short8 );
    float8 __OVERLOAD__ convert_float8_rtp( int8 );
    float8 __OVERLOAD__ convert_float8_rtp( long8 );
    float8 __OVERLOAD__ convert_float8_rtp( float8 );
    float8 __OVERLOAD__ convert_float8_rtp( double8 );
    float16 __OVERLOAD__ convert_float16_rtp( uchar16 );
    float16 __OVERLOAD__ convert_float16_rtp( ushort16 );
    float16 __OVERLOAD__ convert_float16_rtp( uint16 );
    float16 __OVERLOAD__ convert_float16_rtp( ulong16 );
    float16 __OVERLOAD__ convert_float16_rtp( char16 );
    float16 __OVERLOAD__ convert_float16_rtp( short16 );
    float16 __OVERLOAD__ convert_float16_rtp( int16 );
    float16 __OVERLOAD__ convert_float16_rtp( long16 );
    float16 __OVERLOAD__ convert_float16_rtp( float16 );
    float16 __OVERLOAD__ convert_float16_rtp( double16 );
    float __OVERLOAD__ convert_float_rtn( uchar );
    float __OVERLOAD__ convert_float_rtn( ushort );
    float __OVERLOAD__ convert_float_rtn( uint );
    float __OVERLOAD__ convert_float_rtn( ulong );
    float __OVERLOAD__ convert_float_rtn( char );
    float __OVERLOAD__ convert_float_rtn( short );
    float __OVERLOAD__ convert_float_rtn( int );
    float __OVERLOAD__ convert_float_rtn( long );
    float __OVERLOAD__ convert_float_rtn( float );
    float __OVERLOAD__ convert_float_rtn( double );
    float2 __OVERLOAD__ convert_float2_rtn( uchar2 );
    float2 __OVERLOAD__ convert_float2_rtn( ushort2 );
    float2 __OVERLOAD__ convert_float2_rtn( uint2 );
    float2 __OVERLOAD__ convert_float2_rtn( ulong2 );
    float2 __OVERLOAD__ convert_float2_rtn( char2 );
    float2 __OVERLOAD__ convert_float2_rtn( short2 );
    float2 __OVERLOAD__ convert_float2_rtn( int2 );
    float2 __OVERLOAD__ convert_float2_rtn( long2 );
    float2 __OVERLOAD__ convert_float2_rtn( float2 );
    float2 __OVERLOAD__ convert_float2_rtn( double2 );
    float3 __OVERLOAD__ convert_float3_rtn( uchar3 );
    float3 __OVERLOAD__ convert_float3_rtn( ushort3 );
    float3 __OVERLOAD__ convert_float3_rtn( uint3 );
    float3 __OVERLOAD__ convert_float3_rtn( ulong3 );
    float3 __OVERLOAD__ convert_float3_rtn( char3 );
    float3 __OVERLOAD__ convert_float3_rtn( short3 );
    float3 __OVERLOAD__ convert_float3_rtn( int3 );
    float3 __OVERLOAD__ convert_float3_rtn( long3 );
    float3 __OVERLOAD__ convert_float3_rtn( float3 );
    float3 __OVERLOAD__ convert_float3_rtn( double3 );
    float4 __OVERLOAD__ convert_float4_rtn( uchar4 );
    float4 __OVERLOAD__ convert_float4_rtn( ushort4 );
    float4 __OVERLOAD__ convert_float4_rtn( uint4 );
    float4 __OVERLOAD__ convert_float4_rtn( ulong4 );
    float4 __OVERLOAD__ convert_float4_rtn( char4 );
    float4 __OVERLOAD__ convert_float4_rtn( short4 );
    float4 __OVERLOAD__ convert_float4_rtn( int4 );
    float4 __OVERLOAD__ convert_float4_rtn( long4 );
    float4 __OVERLOAD__ convert_float4_rtn( float4 );
    float4 __OVERLOAD__ convert_float4_rtn( double4 );
    float8 __OVERLOAD__ convert_float8_rtn( uchar8 );
    float8 __OVERLOAD__ convert_float8_rtn( ushort8 );
    float8 __OVERLOAD__ convert_float8_rtn( uint8 );
    float8 __OVERLOAD__ convert_float8_rtn( ulong8 );
    float8 __OVERLOAD__ convert_float8_rtn( char8 );
    float8 __OVERLOAD__ convert_float8_rtn( short8 );
    float8 __OVERLOAD__ convert_float8_rtn( int8 );
    float8 __OVERLOAD__ convert_float8_rtn( long8 );
    float8 __OVERLOAD__ convert_float8_rtn( float8 );
    float8 __OVERLOAD__ convert_float8_rtn( double8 );
    float16 __OVERLOAD__ convert_float16_rtn( uchar16 );
    float16 __OVERLOAD__ convert_float16_rtn( ushort16 );
    float16 __OVERLOAD__ convert_float16_rtn( uint16 );
    float16 __OVERLOAD__ convert_float16_rtn( ulong16 );
    float16 __OVERLOAD__ convert_float16_rtn( char16 );
    float16 __OVERLOAD__ convert_float16_rtn( short16 );
    float16 __OVERLOAD__ convert_float16_rtn( int16 );
    float16 __OVERLOAD__ convert_float16_rtn( long16 );
    float16 __OVERLOAD__ convert_float16_rtn( float16 );
    float16 __OVERLOAD__ convert_float16_rtn( double16 );
    float __OVERLOAD__ convert_float_rtz( uchar );
    float __OVERLOAD__ convert_float_rtz( ushort );
    float __OVERLOAD__ convert_float_rtz( uint );
    float __OVERLOAD__ convert_float_rtz( ulong );
    float __OVERLOAD__ convert_float_rtz( char );
    float __OVERLOAD__ convert_float_rtz( short );
    float __OVERLOAD__ convert_float_rtz( int );
    float __OVERLOAD__ convert_float_rtz( long );
    float __OVERLOAD__ convert_float_rtz( float );
    float __OVERLOAD__ convert_float_rtz( double );
    float2 __OVERLOAD__ convert_float2_rtz( uchar2 );
    float2 __OVERLOAD__ convert_float2_rtz( ushort2 );
    float2 __OVERLOAD__ convert_float2_rtz( uint2 );
    float2 __OVERLOAD__ convert_float2_rtz( ulong2 );
    float2 __OVERLOAD__ convert_float2_rtz( char2 );
    float2 __OVERLOAD__ convert_float2_rtz( short2 );
    float2 __OVERLOAD__ convert_float2_rtz( int2 );
    float2 __OVERLOAD__ convert_float2_rtz( long2 );
    float2 __OVERLOAD__ convert_float2_rtz( float2 );
    float2 __OVERLOAD__ convert_float2_rtz( double2 );
    float3 __OVERLOAD__ convert_float3_rtz( uchar3 );
    float3 __OVERLOAD__ convert_float3_rtz( ushort3 );
    float3 __OVERLOAD__ convert_float3_rtz( uint3 );
    float3 __OVERLOAD__ convert_float3_rtz( ulong3 );
    float3 __OVERLOAD__ convert_float3_rtz( char3 );
    float3 __OVERLOAD__ convert_float3_rtz( short3 );
    float3 __OVERLOAD__ convert_float3_rtz( int3 );
    float3 __OVERLOAD__ convert_float3_rtz( long3 );
    float3 __OVERLOAD__ convert_float3_rtz( float3 );
    float3 __OVERLOAD__ convert_float3_rtz( double3 );
    float4 __OVERLOAD__ convert_float4_rtz( uchar4 );
    float4 __OVERLOAD__ convert_float4_rtz( ushort4 );
    float4 __OVERLOAD__ convert_float4_rtz( uint4 );
    float4 __OVERLOAD__ convert_float4_rtz( ulong4 );
    float4 __OVERLOAD__ convert_float4_rtz( char4 );
    float4 __OVERLOAD__ convert_float4_rtz( short4 );
    float4 __OVERLOAD__ convert_float4_rtz( int4 );
    float4 __OVERLOAD__ convert_float4_rtz( long4 );
    float4 __OVERLOAD__ convert_float4_rtz( float4 );
    float4 __OVERLOAD__ convert_float4_rtz( double4 );
    float8 __OVERLOAD__ convert_float8_rtz( uchar8 );
    float8 __OVERLOAD__ convert_float8_rtz( ushort8 );
    float8 __OVERLOAD__ convert_float8_rtz( uint8 );
    float8 __OVERLOAD__ convert_float8_rtz( ulong8 );
    float8 __OVERLOAD__ convert_float8_rtz( char8 );
    float8 __OVERLOAD__ convert_float8_rtz( short8 );
    float8 __OVERLOAD__ convert_float8_rtz( int8 );
    float8 __OVERLOAD__ convert_float8_rtz( long8 );
    float8 __OVERLOAD__ convert_float8_rtz( float8 );
    float8 __OVERLOAD__ convert_float8_rtz( double8 );
    float16 __OVERLOAD__ convert_float16_rtz( uchar16 );
    float16 __OVERLOAD__ convert_float16_rtz( ushort16 );
    float16 __OVERLOAD__ convert_float16_rtz( uint16 );
    float16 __OVERLOAD__ convert_float16_rtz( ulong16 );
    float16 __OVERLOAD__ convert_float16_rtz( char16 );
    float16 __OVERLOAD__ convert_float16_rtz( short16 );
    float16 __OVERLOAD__ convert_float16_rtz( int16 );
    float16 __OVERLOAD__ convert_float16_rtz( long16 );
    float16 __OVERLOAD__ convert_float16_rtz( float16 );
    float16 __OVERLOAD__ convert_float16_rtz( double16 );
    double __OVERLOAD__ convert_double( uchar );
    double __OVERLOAD__ convert_double( ushort );
    double __OVERLOAD__ convert_double( uint );
    double __OVERLOAD__ convert_double( ulong );
    double __OVERLOAD__ convert_double( char );
    double __OVERLOAD__ convert_double( short );
    double __OVERLOAD__ convert_double( int );
    double __OVERLOAD__ convert_double( long );
    double __OVERLOAD__ convert_double( float );
    double __OVERLOAD__ convert_double( double );
    double2 __OVERLOAD__ convert_double2( uchar2 );
    double2 __OVERLOAD__ convert_double2( ushort2 );
    double2 __OVERLOAD__ convert_double2( uint2 );
    double2 __OVERLOAD__ convert_double2( ulong2 );
    double2 __OVERLOAD__ convert_double2( char2 );
    double2 __OVERLOAD__ convert_double2( short2 );
    double2 __OVERLOAD__ convert_double2( int2 );
    double2 __OVERLOAD__ convert_double2( long2 );
    double2 __OVERLOAD__ convert_double2( float2 );
    double2 __OVERLOAD__ convert_double2( double2 );
    double3 __OVERLOAD__ convert_double3( uchar3 );
    double3 __OVERLOAD__ convert_double3( ushort3 );
    double3 __OVERLOAD__ convert_double3( uint3 );
    double3 __OVERLOAD__ convert_double3( ulong3 );
    double3 __OVERLOAD__ convert_double3( char3 );
    double3 __OVERLOAD__ convert_double3( short3 );
    double3 __OVERLOAD__ convert_double3( int3 );
    double3 __OVERLOAD__ convert_double3( long3 );
    double3 __OVERLOAD__ convert_double3( float3 );
    double3 __OVERLOAD__ convert_double3( double3 );
    double4 __OVERLOAD__ convert_double4( uchar4 );
    double4 __OVERLOAD__ convert_double4( ushort4 );
    double4 __OVERLOAD__ convert_double4( uint4 );
    double4 __OVERLOAD__ convert_double4( ulong4 );
    double4 __OVERLOAD__ convert_double4( char4 );
    double4 __OVERLOAD__ convert_double4( short4 );
    double4 __OVERLOAD__ convert_double4( int4 );
    double4 __OVERLOAD__ convert_double4( long4 );
    double4 __OVERLOAD__ convert_double4( float4 );
    double4 __OVERLOAD__ convert_double4( double4 );
    double8 __OVERLOAD__ convert_double8( uchar8 );
    double8 __OVERLOAD__ convert_double8( ushort8 );
    double8 __OVERLOAD__ convert_double8( uint8 );
    double8 __OVERLOAD__ convert_double8( ulong8 );
    double8 __OVERLOAD__ convert_double8( char8 );
    double8 __OVERLOAD__ convert_double8( short8 );
    double8 __OVERLOAD__ convert_double8( int8 );
    double8 __OVERLOAD__ convert_double8( long8 );
    double8 __OVERLOAD__ convert_double8( float8 );
    double8 __OVERLOAD__ convert_double8( double8 );
    double16 __OVERLOAD__ convert_double16( uchar16 );
    double16 __OVERLOAD__ convert_double16( ushort16 );
    double16 __OVERLOAD__ convert_double16( uint16 );
    double16 __OVERLOAD__ convert_double16( ulong16 );
    double16 __OVERLOAD__ convert_double16( char16 );
    double16 __OVERLOAD__ convert_double16( short16 );
    double16 __OVERLOAD__ convert_double16( int16 );
    double16 __OVERLOAD__ convert_double16( long16 );
    double16 __OVERLOAD__ convert_double16( float16 );
    double16 __OVERLOAD__ convert_double16( double16 );
    double __OVERLOAD__ convert_double_rte( uchar );
    double __OVERLOAD__ convert_double_rte( ushort );
    double __OVERLOAD__ convert_double_rte( uint );
    double __OVERLOAD__ convert_double_rte( ulong );
    double __OVERLOAD__ convert_double_rte( char );
    double __OVERLOAD__ convert_double_rte( short );
    double __OVERLOAD__ convert_double_rte( int );
    double __OVERLOAD__ convert_double_rte( long );
    double __OVERLOAD__ convert_double_rte( float );
    double __OVERLOAD__ convert_double_rte( double );
    double2 __OVERLOAD__ convert_double2_rte( uchar2 );
    double2 __OVERLOAD__ convert_double2_rte( ushort2 );
    double2 __OVERLOAD__ convert_double2_rte( uint2 );
    double2 __OVERLOAD__ convert_double2_rte( ulong2 );
    double2 __OVERLOAD__ convert_double2_rte( char2 );
    double2 __OVERLOAD__ convert_double2_rte( short2 );
    double2 __OVERLOAD__ convert_double2_rte( int2 );
    double2 __OVERLOAD__ convert_double2_rte( long2 );
    double2 __OVERLOAD__ convert_double2_rte( float2 );
    double2 __OVERLOAD__ convert_double2_rte( double2 );
    double3 __OVERLOAD__ convert_double3_rte( uchar3 );
    double3 __OVERLOAD__ convert_double3_rte( ushort3 );
    double3 __OVERLOAD__ convert_double3_rte( uint3 );
    double3 __OVERLOAD__ convert_double3_rte( ulong3 );
    double3 __OVERLOAD__ convert_double3_rte( char3 );
    double3 __OVERLOAD__ convert_double3_rte( short3 );
    double3 __OVERLOAD__ convert_double3_rte( int3 );
    double3 __OVERLOAD__ convert_double3_rte( long3 );
    double3 __OVERLOAD__ convert_double3_rte( float3 );
    double3 __OVERLOAD__ convert_double3_rte( double3 );
    double4 __OVERLOAD__ convert_double4_rte( uchar4 );
    double4 __OVERLOAD__ convert_double4_rte( ushort4 );
    double4 __OVERLOAD__ convert_double4_rte( uint4 );
    double4 __OVERLOAD__ convert_double4_rte( ulong4 );
    double4 __OVERLOAD__ convert_double4_rte( char4 );
    double4 __OVERLOAD__ convert_double4_rte( short4 );
    double4 __OVERLOAD__ convert_double4_rte( int4 );
    double4 __OVERLOAD__ convert_double4_rte( long4 );
    double4 __OVERLOAD__ convert_double4_rte( float4 );
    double4 __OVERLOAD__ convert_double4_rte( double4 );
    double8 __OVERLOAD__ convert_double8_rte( uchar8 );
    double8 __OVERLOAD__ convert_double8_rte( ushort8 );
    double8 __OVERLOAD__ convert_double8_rte( uint8 );
    double8 __OVERLOAD__ convert_double8_rte( ulong8 );
    double8 __OVERLOAD__ convert_double8_rte( char8 );
    double8 __OVERLOAD__ convert_double8_rte( short8 );
    double8 __OVERLOAD__ convert_double8_rte( int8 );
    double8 __OVERLOAD__ convert_double8_rte( long8 );
    double8 __OVERLOAD__ convert_double8_rte( float8 );
    double8 __OVERLOAD__ convert_double8_rte( double8 );
    double16 __OVERLOAD__ convert_double16_rte( uchar16 );
    double16 __OVERLOAD__ convert_double16_rte( ushort16 );
    double16 __OVERLOAD__ convert_double16_rte( uint16 );
    double16 __OVERLOAD__ convert_double16_rte( ulong16 );
    double16 __OVERLOAD__ convert_double16_rte( char16 );
    double16 __OVERLOAD__ convert_double16_rte( short16 );
    double16 __OVERLOAD__ convert_double16_rte( int16 );
    double16 __OVERLOAD__ convert_double16_rte( long16 );
    double16 __OVERLOAD__ convert_double16_rte( float16 );
    double16 __OVERLOAD__ convert_double16_rte( double16 );
    double __OVERLOAD__ convert_double_rtp( uchar );
    double __OVERLOAD__ convert_double_rtp( ushort );
    double __OVERLOAD__ convert_double_rtp( uint );
    double __OVERLOAD__ convert_double_rtp( ulong );
    double __OVERLOAD__ convert_double_rtp( char );
    double __OVERLOAD__ convert_double_rtp( short );
    double __OVERLOAD__ convert_double_rtp( int );
    double __OVERLOAD__ convert_double_rtp( long );
    double __OVERLOAD__ convert_double_rtp( float );
    double __OVERLOAD__ convert_double_rtp( double );
    double2 __OVERLOAD__ convert_double2_rtp( uchar2 );
    double2 __OVERLOAD__ convert_double2_rtp( ushort2 );
    double2 __OVERLOAD__ convert_double2_rtp( uint2 );
    double2 __OVERLOAD__ convert_double2_rtp( ulong2 );
    double2 __OVERLOAD__ convert_double2_rtp( char2 );
    double2 __OVERLOAD__ convert_double2_rtp( short2 );
    double2 __OVERLOAD__ convert_double2_rtp( int2 );
    double2 __OVERLOAD__ convert_double2_rtp( long2 );
    double2 __OVERLOAD__ convert_double2_rtp( float2 );
    double2 __OVERLOAD__ convert_double2_rtp( double2 );
    double3 __OVERLOAD__ convert_double3_rtp( uchar3 );
    double3 __OVERLOAD__ convert_double3_rtp( ushort3 );
    double3 __OVERLOAD__ convert_double3_rtp( uint3 );
    double3 __OVERLOAD__ convert_double3_rtp( ulong3 );
    double3 __OVERLOAD__ convert_double3_rtp( char3 );
    double3 __OVERLOAD__ convert_double3_rtp( short3 );
    double3 __OVERLOAD__ convert_double3_rtp( int3 );
    double3 __OVERLOAD__ convert_double3_rtp( long3 );
    double3 __OVERLOAD__ convert_double3_rtp( float3 );
    double3 __OVERLOAD__ convert_double3_rtp( double3 );
    double4 __OVERLOAD__ convert_double4_rtp( uchar4 );
    double4 __OVERLOAD__ convert_double4_rtp( ushort4 );
    double4 __OVERLOAD__ convert_double4_rtp( uint4 );
    double4 __OVERLOAD__ convert_double4_rtp( ulong4 );
    double4 __OVERLOAD__ convert_double4_rtp( char4 );
    double4 __OVERLOAD__ convert_double4_rtp( short4 );
    double4 __OVERLOAD__ convert_double4_rtp( int4 );
    double4 __OVERLOAD__ convert_double4_rtp( long4 );
    double4 __OVERLOAD__ convert_double4_rtp( float4 );
    double4 __OVERLOAD__ convert_double4_rtp( double4 );
    double8 __OVERLOAD__ convert_double8_rtp( uchar8 );
    double8 __OVERLOAD__ convert_double8_rtp( ushort8 );
    double8 __OVERLOAD__ convert_double8_rtp( uint8 );
    double8 __OVERLOAD__ convert_double8_rtp( ulong8 );
    double8 __OVERLOAD__ convert_double8_rtp( char8 );
    double8 __OVERLOAD__ convert_double8_rtp( short8 );
    double8 __OVERLOAD__ convert_double8_rtp( int8 );
    double8 __OVERLOAD__ convert_double8_rtp( long8 );
    double8 __OVERLOAD__ convert_double8_rtp( float8 );
    double8 __OVERLOAD__ convert_double8_rtp( double8 );
    double16 __OVERLOAD__ convert_double16_rtp( uchar16 );
    double16 __OVERLOAD__ convert_double16_rtp( ushort16 );
    double16 __OVERLOAD__ convert_double16_rtp( uint16 );
    double16 __OVERLOAD__ convert_double16_rtp( ulong16 );
    double16 __OVERLOAD__ convert_double16_rtp( char16 );
    double16 __OVERLOAD__ convert_double16_rtp( short16 );
    double16 __OVERLOAD__ convert_double16_rtp( int16 );
    double16 __OVERLOAD__ convert_double16_rtp( long16 );
    double16 __OVERLOAD__ convert_double16_rtp( float16 );
    double16 __OVERLOAD__ convert_double16_rtp( double16 );
    double __OVERLOAD__ convert_double_rtn( uchar );
    double __OVERLOAD__ convert_double_rtn( ushort );
    double __OVERLOAD__ convert_double_rtn( uint );
    double __OVERLOAD__ convert_double_rtn( ulong );
    double __OVERLOAD__ convert_double_rtn( char );
    double __OVERLOAD__ convert_double_rtn( short );
    double __OVERLOAD__ convert_double_rtn( int );
    double __OVERLOAD__ convert_double_rtn( long );
    double __OVERLOAD__ convert_double_rtn( float );
    double __OVERLOAD__ convert_double_rtn( double );
    double2 __OVERLOAD__ convert_double2_rtn( uchar2 );
    double2 __OVERLOAD__ convert_double2_rtn( ushort2 );
    double2 __OVERLOAD__ convert_double2_rtn( uint2 );
    double2 __OVERLOAD__ convert_double2_rtn( ulong2 );
    double2 __OVERLOAD__ convert_double2_rtn( char2 );
    double2 __OVERLOAD__ convert_double2_rtn( short2 );
    double2 __OVERLOAD__ convert_double2_rtn( int2 );
    double2 __OVERLOAD__ convert_double2_rtn( long2 );
    double2 __OVERLOAD__ convert_double2_rtn( float2 );
    double2 __OVERLOAD__ convert_double2_rtn( double2 );
    double3 __OVERLOAD__ convert_double3_rtn( uchar3 );
    double3 __OVERLOAD__ convert_double3_rtn( ushort3 );
    double3 __OVERLOAD__ convert_double3_rtn( uint3 );
    double3 __OVERLOAD__ convert_double3_rtn( ulong3 );
    double3 __OVERLOAD__ convert_double3_rtn( char3 );
    double3 __OVERLOAD__ convert_double3_rtn( short3 );
    double3 __OVERLOAD__ convert_double3_rtn( int3 );
    double3 __OVERLOAD__ convert_double3_rtn( long3 );
    double3 __OVERLOAD__ convert_double3_rtn( float3 );
    double3 __OVERLOAD__ convert_double3_rtn( double3 );
    double4 __OVERLOAD__ convert_double4_rtn( uchar4 );
    double4 __OVERLOAD__ convert_double4_rtn( ushort4 );
    double4 __OVERLOAD__ convert_double4_rtn( uint4 );
    double4 __OVERLOAD__ convert_double4_rtn( ulong4 );
    double4 __OVERLOAD__ convert_double4_rtn( char4 );
    double4 __OVERLOAD__ convert_double4_rtn( short4 );
    double4 __OVERLOAD__ convert_double4_rtn( int4 );
    double4 __OVERLOAD__ convert_double4_rtn( long4 );
    double4 __OVERLOAD__ convert_double4_rtn( float4 );
    double4 __OVERLOAD__ convert_double4_rtn( double4 );
    double8 __OVERLOAD__ convert_double8_rtn( uchar8 );
    double8 __OVERLOAD__ convert_double8_rtn( ushort8 );
    double8 __OVERLOAD__ convert_double8_rtn( uint8 );
    double8 __OVERLOAD__ convert_double8_rtn( ulong8 );
    double8 __OVERLOAD__ convert_double8_rtn( char8 );
    double8 __OVERLOAD__ convert_double8_rtn( short8 );
    double8 __OVERLOAD__ convert_double8_rtn( int8 );
    double8 __OVERLOAD__ convert_double8_rtn( long8 );
    double8 __OVERLOAD__ convert_double8_rtn( float8 );
    double8 __OVERLOAD__ convert_double8_rtn( double8 );
    double16 __OVERLOAD__ convert_double16_rtn( uchar16 );
    double16 __OVERLOAD__ convert_double16_rtn( ushort16 );
    double16 __OVERLOAD__ convert_double16_rtn( uint16 );
    double16 __OVERLOAD__ convert_double16_rtn( ulong16 );
    double16 __OVERLOAD__ convert_double16_rtn( char16 );
    double16 __OVERLOAD__ convert_double16_rtn( short16 );
    double16 __OVERLOAD__ convert_double16_rtn( int16 );
    double16 __OVERLOAD__ convert_double16_rtn( long16 );
    double16 __OVERLOAD__ convert_double16_rtn( float16 );
    double16 __OVERLOAD__ convert_double16_rtn( double16 );
    double __OVERLOAD__ convert_double_rtz( uchar );
    double __OVERLOAD__ convert_double_rtz( ushort );
    double __OVERLOAD__ convert_double_rtz( uint );
    double __OVERLOAD__ convert_double_rtz( ulong );
    double __OVERLOAD__ convert_double_rtz( char );
    double __OVERLOAD__ convert_double_rtz( short );
    double __OVERLOAD__ convert_double_rtz( int );
    double __OVERLOAD__ convert_double_rtz( long );
    double __OVERLOAD__ convert_double_rtz( float );
    double __OVERLOAD__ convert_double_rtz( double );
    double2 __OVERLOAD__ convert_double2_rtz( uchar2 );
    double2 __OVERLOAD__ convert_double2_rtz( ushort2 );
    double2 __OVERLOAD__ convert_double2_rtz( uint2 );
    double2 __OVERLOAD__ convert_double2_rtz( ulong2 );
    double2 __OVERLOAD__ convert_double2_rtz( char2 );
    double2 __OVERLOAD__ convert_double2_rtz( short2 );
    double2 __OVERLOAD__ convert_double2_rtz( int2 );
    double2 __OVERLOAD__ convert_double2_rtz( long2 );
    double2 __OVERLOAD__ convert_double2_rtz( float2 );
    double2 __OVERLOAD__ convert_double2_rtz( double2 );
    double3 __OVERLOAD__ convert_double3_rtz( uchar3 );
    double3 __OVERLOAD__ convert_double3_rtz( ushort3 );
    double3 __OVERLOAD__ convert_double3_rtz( uint3 );
    double3 __OVERLOAD__ convert_double3_rtz( ulong3 );
    double3 __OVERLOAD__ convert_double3_rtz( char3 );
    double3 __OVERLOAD__ convert_double3_rtz( short3 );
    double3 __OVERLOAD__ convert_double3_rtz( int3 );
    double3 __OVERLOAD__ convert_double3_rtz( long3 );
    double3 __OVERLOAD__ convert_double3_rtz( float3 );
    double3 __OVERLOAD__ convert_double3_rtz( double3 );
    double4 __OVERLOAD__ convert_double4_rtz( uchar4 );
    double4 __OVERLOAD__ convert_double4_rtz( ushort4 );
    double4 __OVERLOAD__ convert_double4_rtz( uint4 );
    double4 __OVERLOAD__ convert_double4_rtz( ulong4 );
    double4 __OVERLOAD__ convert_double4_rtz( char4 );
    double4 __OVERLOAD__ convert_double4_rtz( short4 );
    double4 __OVERLOAD__ convert_double4_rtz( int4 );
    double4 __OVERLOAD__ convert_double4_rtz( long4 );
    double4 __OVERLOAD__ convert_double4_rtz( float4 );
    double4 __OVERLOAD__ convert_double4_rtz( double4 );
    double8 __OVERLOAD__ convert_double8_rtz( uchar8 );
    double8 __OVERLOAD__ convert_double8_rtz( ushort8 );
    double8 __OVERLOAD__ convert_double8_rtz( uint8 );
    double8 __OVERLOAD__ convert_double8_rtz( ulong8 );
    double8 __OVERLOAD__ convert_double8_rtz( char8 );
    double8 __OVERLOAD__ convert_double8_rtz( short8 );
    double8 __OVERLOAD__ convert_double8_rtz( int8 );
    double8 __OVERLOAD__ convert_double8_rtz( long8 );
    double8 __OVERLOAD__ convert_double8_rtz( float8 );
    double8 __OVERLOAD__ convert_double8_rtz( double8 );
    double16 __OVERLOAD__ convert_double16_rtz( uchar16 );
    double16 __OVERLOAD__ convert_double16_rtz( ushort16 );
    double16 __OVERLOAD__ convert_double16_rtz( uint16 );
    double16 __OVERLOAD__ convert_double16_rtz( ulong16 );
    double16 __OVERLOAD__ convert_double16_rtz( char16 );
    double16 __OVERLOAD__ convert_double16_rtz( short16 );
    double16 __OVERLOAD__ convert_double16_rtz( int16 );
    double16 __OVERLOAD__ convert_double16_rtz( long16 );
    double16 __OVERLOAD__ convert_double16_rtz( float16 );
    double16 __OVERLOAD__ convert_double16_rtz( double16 );

#endif

// 6.2.4.2 as_typen
#define as_char( _x )   __builtin_astype( _x, char )
#define as_uchar( _x )  __builtin_astype( _x, uchar )
#define as_short( _x )  __builtin_astype( _x, short )
#define as_ushort( _x ) __builtin_astype( _x, ushort )
#define as_int( _x )    __builtin_astype( _x, int )
#define as_uint( _x )   __builtin_astype( _x, uint )
#define as_float( _x )  __builtin_astype( _x, float )
#define as_long( _x )   __builtin_astype( _x, long )
#define as_ulong( _x )  __builtin_astype( _x, ulong )
#define as_double( _x ) __builtin_astype( _x, double )

#define as_size_t( _x ) __builtin_astype( _x, size_t )
#define as_intptr_t( _x ) __builtin_astype( _x, intptr_t )
#define as_uintptr_t( _x ) __builtin_astype( _x, uintptr_t )
#define as_ptrdiff_t( _x ) __builtin_astype( _x, ptrdiff_t )

#define as_char2( _x )   __builtin_astype( _x, char2 )
#define as_char3( _x )   __builtin_astype( _x, char3 )
#define as_char4( _x )   __builtin_astype( _x, char4 )
#define as_char8( _x )   __builtin_astype( _x, char8 )
#define as_char16( _x )  __builtin_astype( _x, char16 )

#define as_uchar2( _x )   __builtin_astype( _x, uchar2 )
#define as_uchar3( _x )   __builtin_astype( _x, uchar3 )
#define as_uchar4( _x )   __builtin_astype( _x, uchar4 )
#define as_uchar8( _x )   __builtin_astype( _x, uchar8 )
#define as_uchar16( _x )  __builtin_astype( _x, uchar16 )

#define as_short2( _x )   __builtin_astype( _x, short2 )
#define as_short3( _x )   __builtin_astype( _x, short3 )
#define as_short4( _x )   __builtin_astype( _x, short4 )
#define as_short8( _x )   __builtin_astype( _x, short8 )
#define as_short16( _x )  __builtin_astype( _x, short16 )

#define as_ushort2( _x )   __builtin_astype( _x, ushort2 )
#define as_ushort3( _x )   __builtin_astype( _x, ushort3 )
#define as_ushort4( _x )   __builtin_astype( _x, ushort4 )
#define as_ushort8( _x )   __builtin_astype( _x, ushort8 )
#define as_ushort16( _x )  __builtin_astype( _x, ushort16 )

#define as_int2( _x )   __builtin_astype( _x, int2 )
#define as_int3( _x )   __builtin_astype( _x, int3 )
#define as_int4( _x )   __builtin_astype( _x, int4 )
#define as_int8( _x )   __builtin_astype( _x, int8 )
#define as_int16( _x )  __builtin_astype( _x, int16 )

#define as_uint2( _x )   __builtin_astype( _x, uint2 )
#define as_uint3( _x )   __builtin_astype( _x, uint3 )
#define as_uint4( _x )   __builtin_astype( _x, uint4 )
#define as_uint8( _x )   __builtin_astype( _x, uint8 )
#define as_uint16( _x )  __builtin_astype( _x, uint16 )

#define as_float2( _x )   __builtin_astype( _x, float2 )
#define as_float3( _x )   __builtin_astype( _x, float3 )
#define as_float4( _x )   __builtin_astype( _x, float4 )
#define as_float8( _x )   __builtin_astype( _x, float8 )
#define as_float16( _x )  __builtin_astype( _x, float16 )

#define as_long2( _x )   __builtin_astype( _x, long2 )
#define as_long3( _x )   __builtin_astype( _x, long3 )
#define as_long4( _x )   __builtin_astype( _x, long4 )
#define as_long8( _x )   __builtin_astype( _x, long8 )
#define as_long16( _x )  __builtin_astype( _x, long16 )

#define as_ulong2( _x )   __builtin_astype( _x, ulong2 )
#define as_ulong3( _x )   __builtin_astype( _x, ulong3 )
#define as_ulong4( _x )   __builtin_astype( _x, ulong4 )
#define as_ulong8( _x )   __builtin_astype( _x, ulong8 )
#define as_ulong16( _x )  __builtin_astype( _x, ulong16 )

#define as_double2( _x )   __builtin_astype( _x, double2 )
#define as_double3( _x )   __builtin_astype( _x, double3 )
#define as_double4( _x )   __builtin_astype( _x, double4 )
#define as_double8( _x )   __builtin_astype( _x, double8 )
#define as_double16( _x )  __builtin_astype( _x, double16 )

// 5.5 Function Qualifiers
#define __kernel __attribute__((annotate("kernel")))
#define kernel __attribute__((annotate("kernel")))

// 5.6 Image Access Qualifiers
#define __rd __attribute__((annotate("__rd")))  
#define __wr __attribute__((annotate("__wr")))  
#define __read_only __attribute__((annotate("__rd")))  
#define read_only __attribute__((annotate("__rd")))  
#define __write_only __attribute__((annotate("__wr")))  
#define write_only __attribute__((annotate("__wr")))  
#define __read_write __attribute__((annotate("__rw")))  
#define read_write __attribute__((annotate("__rw")))  

// 5.9.1 - Work-item Functions
// These are defined for GPUs by their respective builtin implementations.
uint   __OVERLOAD__ get_work_dim(void);
size_t __OVERLOAD__ get_global_size(uint dimindx);
size_t __OVERLOAD__ get_global_id(uint dimindx);
size_t __OVERLOAD__ get_global_offset(uint dimindx);
size_t __OVERLOAD__ get_local_size(uint dimindx);
size_t __OVERLOAD__ get_local_id (uint dimindx);
size_t __OVERLOAD__ get_num_groups(uint dimindx);
size_t __OVERLOAD__ get_group_id(uint dimindx);

// 5.9.2 - Math Defines
#define MAXFLOAT ((float)3.40282346638528860e+38)
#define HUGE_VALF __builtin_huge_valf()
#define INFINITY __builtin_inff()
#define NAN __builtin_nanf("")

/* Section 5.9.2, Table 5.6 */
__CLFN_FD_1FD(__cl_acos);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef acos
    #define acos(__x) __cl_acos(__x)
#else
    #warning  __CL_INTERNAL_SKIP_MATH_DEFINES__  defined.   Many builtin definitions will be missing.
#endif

__CLFN_FD_1FD(__cl_acosh);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef acosh
    #define acosh(__x) __cl_acosh(__x)
#endif

__CLFN_FD_1FD(acospi); 

__CLFN_FD_1FD(__cl_asin);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef asin
    #define asin(__x) __cl_asin(__x)
#endif

__CLFN_FD_1FD(__cl_asinh);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef asinh
    #define asinh(__x) __cl_asinh(__x)
#endif

__CLFN_FD_1FD(asinpi); 

__CLFN_FD_1FD(__cl_atan); 
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef atan
    #define atan(__x) __cl_atan(__x)
#endif

__CLFN_FD_1FD(__cl_atanh); 
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef atanh
    #define atanh(__x) __cl_atanh(__x)
#endif

__CLFN_FD_1FD(atanpi);

__CLFN_FD_2FD(__cl_atan2);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef atan2
    #define atan2(__x, __y) __cl_atan2(__x, __y)
#endif

__CLFN_FD_2FD(atan2pi);

__CLFN_FD_1FD(__cl_cbrt);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef cbrt
    #define cbrt(__x) __cl_cbrt(__x)
#endif

__CLFN_FD_1FD(__cl_ceil);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef ceil
    #define ceil(__x) __cl_ceil(__x)
#endif

__CLFN_FD_2FD(__cl_copysign);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef copysign
    #define copysign(__x, __y) __cl_copysign(__x, __y)
#endif

__CLFN_FD_1FD(__cl_cosh); 
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef cosh
    #define cosh(__x) __cl_cosh(__x)
#endif

__CLFN_FD_1FD(cospi); 

__CLFN_FD_1FD(__cl_erf);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef erf
    #define erf(__x) __cl_erf(__x)
#endif

__CLFN_FD_1FD(__cl_erfc);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef erfc
    #define erfc(__x) __cl_erfc(__x)
#endif

__CLFN_FD_1FD(__cl_exp);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef exp
    #define exp(__x) __cl_exp(__x)
#endif

__CLFN_FD_1FD(__cl_exp2);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef exp2
    #define exp2(__x) __cl_exp2(__x)
#endif

__CLFN_FD_1FD(__cl_exp10); 
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef exp10
    #define exp10(__x) __cl_exp10(__x)
#endif

__CLFN_FD_1FD(asinpi);

__CLFN_FD_1FD(__cl_expm1);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef expm1
    #define expm1(__x) __cl_expm1(__x)
#endif

__CLFN_FD_1FD(__cl_fabs);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef fabs
    #define fabs(__x) __cl_fabs(__x)
#endif

__CLFN_FD_2FD(__cl_fdim);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef fdim
    #define fdim(__x, __y) __cl_fdim(__x, __y)
#endif

__CLFN_FD_1FD(__cl_floor);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef floor
    #define floor(__x) __cl_floor(__x)
#endif

__CLFN_FD_3FD(__cl_fma);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef fma
    #define fma(__x, __y, __z) __cl_fma(__x, __y, __z)
#endif

__CLFN_FD_2FD(__cl_fmax);
__CLFN_FD_2FD_EXT(__cl_fmax);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef fmax
    #define fmax(__x, __y) __cl_fmax(__x, __y)
#endif

__CLFN_FD_2FD(__cl_fmin);
__CLFN_FD_2FD_EXT(__cl_fmin);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef fmin
    #define fmin(__x, __y) __cl_fmin(__x, __y)
#endif

__CLFN_FD_2FD(__cl_fmod);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef fmod
    #define fmod(__x, __y) __cl_fmod(__x, __y)
#endif

                                            
__CLFN_FD_1FD_1PFD(fract);

__CLFN_FD_1FD_1PI(__cl_frexp);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef frexp
    #define frexp(__x, __y) __cl_frexp(__x, __y)
#endif

__CLFN_FD_2FD(__cl_hypot);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef hypot
    #define hypot(__x, __y) __cl_hypot(__x, __y)
#endif

__CLFN_I_1FD(__cl_ilogb);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef ilogb
    #define ilogb(__x) __cl_ilogb(__x)
#endif

__CLFN_FD_1FD_1I(__cl_ldexp);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef ldexp
    #define ldexp(__x, __y) __cl_ldexp(__x, __y)
#endif

__CLFN_FD_1FD(__cl_lgamma); 
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef lgamma
    #define lgamma(__x) __cl_lgamma(__x)
#endif

__CLFN_FD_1FD_1PI(__cl_lgamma_r);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef lgamma_r
    #define lgamma_r(__x, __y) __cl_lgamma_r(__x, __y)
#endif

__CLFN_FD_1FD(__cl_log);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef log
    #define log(__x) __cl_log(__x)
#endif

__CLFN_FD_1FD(__cl_log10);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef log10
    #define log10(__x) __cl_log10(__x)
#endif

__CLFN_FD_1FD(__cl_log1p);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef log1p
    #define log1p(__x) __cl_log1p(__x)
#endif

__CLFN_FD_1FD(__cl_log2);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef log2
    #define log2(__x) __cl_log2(__x)
#endif

__CLFN_FD_1FD(__cl_logb);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef logb
    #define logb(__x) __cl_logb(__x)
#endif

__CLFN_FD_3FD(mad);
__CLFN_FD_2FD(maxmag);
__CLFN_FD_2FD(minmag);

__CLFN_FD_1FD_1PFD(__cl_modf);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef modf
    #define modf(__x, __y) __cl_modf(__x, __y)
#endif


__CLFN_FD_1UIL(__cl_nan);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef nan
    #define nan(__x) __cl_nan(__x)
#endif

__CLFN_FD_2FD(__cl_nextafter);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef nextafter
    #define nextafter(__x, __y) __cl_nextafter(__x, __y)
#endif

__CLFN_FD_2FD(__cl_pow);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef pow
    #define pow(__x, __y) __cl_pow(__x, __y)
#endif

__CLFN_FD_1FD_1I(__cl_pown);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef pown
    #define pown(__x, __y) __cl_pown(__x, __y)
#endif

__CLFN_FD_2FD(__cl_powr);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef powr
    #define powr(__x, __y) __cl_powr(__x, __y)
#endif

__CLFN_FD_2FD(__cl_remainder);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef remainder
    #define remainder(__x, __y) __cl_remainder(__x, __y)
#endif

__CLFN_FD_2FD_1PI(__cl_remquo);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef remquo
    #define remquo(__x, __y, __z) __cl_remquo(__x, __y, __z)
#endif

__CLFN_FD_1FD(__cl_rint);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef rint
    #define rint(__x) __cl_rint(__x)
#endif

__CLFN_FD_1FD_1I(rootn);

__CLFN_FD_1FD(__cl_round);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef round
    #define round(__x) __cl_round(__x)
#endif

__CLFN_FD_1FD(rsqrt);

__CLFN_FD_1FD(__cl_cos);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef cos
    #define cos(__x) __cl_cos(__x)
#endif

__CLFN_FD_1FD(__cl_sin);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef sin
    #define sin(__x) __cl_sin(__x)
#endif

__CLFN_FD_1FD(__cl_tan);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef tan
    #define tan(__x) __cl_tan(__x)
#endif

__CLFN_FD_1FD_1PFD(sincos);

__CLFN_FD_1FD(__cl_sinh);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef sinh
    #define sinh(__x) __cl_sinh(__x)
#endif

__CLFN_FD_1FD(sinpi);

__CLFN_FD_1FD(__cl_sqrt);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef sqrt
    #define sqrt(__x) __cl_sqrt(__x)
#endif


__CLFN_FD_1FD(__cl_tanh);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef tanh
    #define tanh(__x) __cl_tanh(__x)
#endif

__CLFN_FD_1FD(tanpi);          

__CLFN_FD_1FD(__cl_tgamma);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef tgamma
    #define tgamma(__x) __cl_tgamma(__x)
#endif

__CLFN_FD_1FD(__cl_trunc);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef trunc
    #define trunc(__x) __cl_trunc(__x)
#endif

/* Sectio 5.9.2, Table 5.7 */
__CLFN_F_1F(half_cos);
__CLFN_F_2F(half_divide);
__CLFN_F_1F(half_exp);
__CLFN_F_1F(half_exp2);
__CLFN_F_1F(half_exp10);
__CLFN_F_1F(half_log);
__CLFN_F_1F(half_log2);
__CLFN_F_1F(half_log10);
__CLFN_F_2F(half_powr);
__CLFN_F_1F(half_recip);
__CLFN_F_1F(half_rsqrt);
__CLFN_F_1F(half_sin);
__CLFN_F_1F(half_sqrt);
__CLFN_F_1F(half_tan);

__CLFN_F_1F(native_cos);
__CLFN_F_2F(native_divide);
__CLFN_F_1F(native_exp);
__CLFN_F_1F(native_exp2);
__CLFN_F_1F(native_exp10);
__CLFN_F_1F(native_log);
__CLFN_F_1F(native_log2);
__CLFN_F_1F(native_log10);
__CLFN_F_2F(native_powr);
__CLFN_F_1F(native_recip);
__CLFN_F_1F(native_rsqrt);
__CLFN_F_1F(native_sin);
__CLFN_F_1F(native_sqrt);
__CLFN_F_1F(native_tan);

/* Section 5.9.3 */
                                             
__CLFN_UCSIL_1CSIL(__cl_abs);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef abs
    #define abs(__x) __cl_abs(__x)
#endif

__CLFN_UCSIL_2CSIL(abs_diff);
__CLFN_ALL(add_sat);

/* upsample */
short __OVERLOAD_INLINE__ upsample(char ahi, uchar alo);
short2 __OVERLOAD_INLINE__ upsample(char2 ahi, uchar2 alo);
short4 __OVERLOAD_INLINE__ upsample(char4 ahi, uchar4 alo);
short3 __OVERLOAD_INLINE__ upsample(char3 ahi, uchar3 alo);
short8 __OVERLOAD_INLINE__ upsample(char8 ahi, uchar8 alo);
short16 __OVERLOAD_INLINE__ upsample(char16 ahi, uchar16 alo);
ushort __OVERLOAD_INLINE__ upsample(uchar ahi, uchar alo);
ushort2 __OVERLOAD_INLINE__ upsample(uchar2 ahi, uchar2 alo);
ushort3 __OVERLOAD_INLINE__ upsample(uchar3 ahi, uchar3 alo);
ushort4 __OVERLOAD_INLINE__ upsample(uchar4 ahi, uchar4 alo);
ushort8 __OVERLOAD_INLINE__ upsample(uchar8 ahi, uchar8 alo);
ushort16 __OVERLOAD_INLINE__ upsample(uchar16 ahi, uchar16 alo);
int __OVERLOAD_INLINE__ upsample(short ahi, ushort alo);
int2 __OVERLOAD_INLINE__ upsample(short2 ahi, ushort2 alo);
int4 __OVERLOAD_INLINE__ upsample(short4 ahi, ushort4 alo);
int3 __OVERLOAD_INLINE__ upsample(short3 ahi, ushort3 alo);
int8 __OVERLOAD_INLINE__ upsample(short8 ahi, ushort8 alo);
int16 __OVERLOAD_INLINE__ upsample(short16 ahi, ushort16 alo);
uint __OVERLOAD_INLINE__ upsample(ushort ahi, ushort alo);
uint2 __OVERLOAD_INLINE__ upsample(ushort2 ahi, ushort2 alo);
uint3 __OVERLOAD_INLINE__ upsample(ushort3 ahi, ushort3 alo);
uint4 __OVERLOAD_INLINE__ upsample(ushort4 ahi, ushort4 alo);
uint8 __OVERLOAD_INLINE__ upsample(ushort8 ahi, ushort8 alo);
uint16 __OVERLOAD_INLINE__ upsample(ushort16 ahi, ushort16 alo);
long __OVERLOAD_INLINE__ upsample(int ahi, uint alo);
long2 __OVERLOAD_INLINE__ upsample(int2 ahi, uint2 alo); 
long3 __OVERLOAD_INLINE__ upsample(int3 ahi, uint3 alo);
long4 __OVERLOAD_INLINE__ upsample(int4 ahi, uint4 alo);
long8 __OVERLOAD_INLINE__ upsample(int8 ahi, uint8 alo);
long16 __OVERLOAD_INLINE__ upsample(int16 ahi, uint16 alo);
ulong __OVERLOAD_INLINE__ upsample(uint ahi, uint alo);
ulong2 __OVERLOAD_INLINE__ upsample(uint2 ahi, uint2 alo); 
ulong3 __OVERLOAD_INLINE__ upsample(uint3 ahi3, uint3 alo3); 
ulong4 __OVERLOAD_INLINE__ upsample(uint4 ahi, uint4 alo); 
ulong8 __OVERLOAD_INLINE__ upsample(uint8 ahi, uint8 alo);
ulong16 __OVERLOAD_INLINE__ upsample(uint16 ahi, uint16 alo);

__CLFN_ALL(hadd);
__CLFN_ALL(rhadd);
__CLFN_CSIL_1CSIL(clz);
__CLFN_I3(mad_hi);
__CLFN_I3(mad_hi_sat);
__CLFN_I3(mad_sat);

__CLFN_ALL(max);
__CLFN_ALL_MODD(max);
__CLFN_FD_2FD(max);
__CLFN_ALL_MODD(pairwise_add);
__CLFN_R2_A(short, char, pairwise_add);
__CLFN_R2_A(int, short, pairwise_add);
__CLFN_R2_A(long, int, pairwise_add);
__CLFN_R2_A(ushort, uchar, pairwise_add);
__CLFN_R2_A(uint, ushort, pairwise_add);
__CLFN_R2_A(ulong, uint, pairwise_add);

__CLFN_ALL(pairwise_max);
__CLFN_ALL(pairwise_min);
__CLFN_CSIL_1CSIL(popcount);

__CLFN_ALL(min);
__CLFN_ALL_MODD(min);
__CLFN_FD_2FD(min);

__CLFN_I_3CSIL(msum);
__CLFN_ALL(rotate);
__CLFN_ALL(mul_hi);
__CLFN_ALL(sub_sat);

__CLFN_ALL(mul_hi2_rte_sat);
__CLFN_ALL(mul_hi2_sat);

__CLFN_R2_A2(int, short, mul2_sat);
__CLFN_R2_A2(long, int, mul2_sat);

__CLFN_R2_A3R(short, char, mad);
__CLFN_R2_A3R(ushort, uchar, mad);
__CLFN_R2_A3R(int, short, mad);
__CLFN_R2_A3R(uint, ushort, mad);
__CLFN_R2_A3R(long, int, mad);
__CLFN_R2_A3R(ulong, uint, mad);

__CLFN_R2_A3(short, char, msub);
__CLFN_R2_A3(ushort, uchar, msub);
__CLFN_R2_A3(int, short, msub);
__CLFN_R2_A3(uint, ushort, msub);
__CLFN_R2_A3(long, int, msub);
__CLFN_R2_A3(ulong, uint, msub);

__CLFN_R2_A3R(short, char, mad2_sat);
__CLFN_R2_A3R(ushort, uchar, mad2_sat);
__CLFN_R2_A3R(int, short, mad2_sat);
__CLFN_R2_A3R(uint, ushort, mad2_sat);
__CLFN_R2_A3R(long, int, mad2_sat);
__CLFN_R2_A3R(ulong, uint, mad2_sat);

__CLFN_R2_A3(short, char, msub2_sat);
__CLFN_R2_A3(ushort, uchar, msub2_sat);
__CLFN_R2_A3(int, short, msub2_sat);
__CLFN_R2_A3(uint, ushort, msub2_sat);
__CLFN_R2_A3(long, int, msub2_sat);
__CLFN_R2_A3(ulong, uint, msub2_sat);


__CLFN_I_3I(mad24);
__CLFN_I_2I(mul24);

/* Section 5.9.4 */
float __OVERLOAD__ clamp(float x, float minval, float maxval);
float2 __OVERLOAD__ clamp(float2 x, float2 minval, float2 maxval);
float3 __OVERLOAD__ clamp(float3 x, float3 minval, float3 maxval);
float4 __OVERLOAD__ clamp(float4 x, float4 minval, float4 maxval);
float8 __OVERLOAD__ clamp( float8 x, float8 minval, float8 maxval );
float16 __OVERLOAD__ clamp( float16 x, float16 minval, float16 maxval );
#if 0
float2 __OVERLOAD__ clamp(float2 x, float minval, float maxval);
float3 __OVERLOAD__ clamp(float3 x, float minval, float maxval);
float4 __OVERLOAD__ clamp(float4 x, float minval, float maxval);
float8 __OVERLOAD__ clamp(float8 x, float minval, float maxval);
float16 __OVERLOAD__ clamp(float16 x, float minval, float maxval);
double2 __OVERLOAD__ clamp(double2 x, double minval, double maxval);
double3 __OVERLOAD__ clamp(double3 x, double minval, double maxval);
double4 __OVERLOAD__ clamp(double4 x, double minval, double maxval);
double8 __OVERLOAD__ clamp(double8 x, double minval, double maxval);
double16 __OVERLOAD__ clamp(double16 x, double minval, double maxval);
#endif  
double __OVERLOAD__ clamp(double x, double minval, double maxval);
double2 __OVERLOAD__ clamp(double2 x, double2 minval, double2 maxval);
double3 __OVERLOAD__ clamp(double3 x, double3 minval, double3 maxval);
double4 __OVERLOAD__ clamp(double4 x, double4 minval, double4 maxval);
double8 __OVERLOAD__ clamp( double8 x, double8 minval, double8 maxval );
double16 __OVERLOAD__ clamp( double16 x, double16 minval, double16 maxval );
char __OVERLOAD__ clamp( char x, char min, char max );
short __OVERLOAD__ clamp( short x, short min, short max );
int __OVERLOAD__ clamp( int x, int min, int max );
long __OVERLOAD__ clamp( long x, long min, long max );
uchar __OVERLOAD__ clamp( uchar x, uchar min, uchar max );
ushort __OVERLOAD__ clamp( ushort x, ushort min, ushort max );
uint __OVERLOAD__ clamp( uint x, uint min, uint max );
ulong __OVERLOAD__ clamp( ulong x, ulong min, ulong max );
char2 __OVERLOAD__ clamp( char2 x, char2 min, char2 max );
short2 __OVERLOAD__ clamp( short2 x, short2 min, short2 max );
int2 __OVERLOAD__ clamp( int2 x, int2 min, int2 max );
long2 __OVERLOAD__ clamp( long2 x, long2 min, long2 max );
uchar2 __OVERLOAD__ clamp( uchar2 x, uchar2 min, uchar2 max );
ushort2 __OVERLOAD__ clamp( ushort2 x, ushort2 min, ushort2 max );
uint2 __OVERLOAD__ clamp( uint2 x, uint2 min, uint2 max );
ulong2 __OVERLOAD__ clamp( ulong2 x, ulong2 min, ulong2 max );
char3 __OVERLOAD__ clamp( char3 x, char3 min, char3 max );
short3 __OVERLOAD__ clamp( short3 x, short3 min, short3 max );
int3 __OVERLOAD__ clamp( int3 x3, int3 min3, int3 max3 );
long3 __OVERLOAD__ clamp( long3 x3, long3 min3, long3 max3 );
uchar3 __OVERLOAD__ clamp( uchar3 x, uchar3 min, uchar3 max );
ushort3 __OVERLOAD__ clamp( ushort3 x, ushort3 min, ushort3 max );
uint3 __OVERLOAD__ clamp( uint3 x3, uint3 min3, uint3 max3 );
ulong3 __OVERLOAD__ clamp( ulong3 x3, ulong3 min3, ulong3 max3 );
char4 __OVERLOAD__ clamp( char4 x, char4 min, char4 max );
short4 __OVERLOAD__ clamp( short4 x, short4 min, short4 max );
int4 __OVERLOAD__ clamp( int4 x, int4 min, int4 max );
long4 __OVERLOAD__ clamp( long4 x, long4 min, long4 max );
uchar4 __OVERLOAD__ clamp( uchar4 x, uchar4 min, uchar4 max );
ushort4 __OVERLOAD__ clamp( ushort4 x, ushort4 min, ushort4 max );
uint4 __OVERLOAD__ clamp( uint4 x, uint4 min, uint4 max );
ulong4 __OVERLOAD__ clamp( ulong4 x, ulong4 min, ulong4 max );
char8 __OVERLOAD__ clamp( char8 x, char8 min, char8 max );
short8 __OVERLOAD__ clamp( short8 x, short8 min, short8 max );
int8 __OVERLOAD__ clamp( int8 x, int8 min, int8 max );
long8 __OVERLOAD__ clamp( long8 x, long8 min, long8 max );
uchar8 __OVERLOAD__ clamp( uchar8 x, uchar8 min, uchar8 max );
ushort8 __OVERLOAD__ clamp( ushort8 x, ushort8 min, ushort8 max );
uint8 __OVERLOAD__ clamp( uint8 x, uint8 min, uint8 max );
ulong8 __OVERLOAD__ clamp( ulong8 x, ulong8 min, ulong8 max );
char16 __OVERLOAD__ clamp( char16 x, char16 min, char16 max );
short16 __OVERLOAD__ clamp( short16 x, short16 min, short16 max );
int16 __OVERLOAD__ clamp( int16 x, int16 min, int16 max );
long16 __OVERLOAD__ clamp( long16 x, long16 min, long16 max );
ushort16 __OVERLOAD__ clamp( ushort16 x, ushort16 min, ushort16 max );
uchar16 __OVERLOAD__ clamp( uchar16 x, uchar16 min, uchar16 max );
uint16 __OVERLOAD__ clamp( uint16 x, uint16 min, uint16 max );
ulong16 __OVERLOAD__ clamp( ulong16 x, ulong16 min, ulong16 max );
char2 __OVERLOAD__ clamp( char2 x,  char y, char z );
char3 __OVERLOAD__ clamp( char3 x,  char y, char z );
char4 __OVERLOAD__ clamp( char4 x,  char y, char z );
char8 __OVERLOAD__ clamp( char8 x,  char y, char z );
char16 __OVERLOAD__ clamp( char16 x, char y, char z );
uchar2 __OVERLOAD__ clamp( uchar2 x,  uchar y, uchar z );
uchar3 __OVERLOAD__ clamp( uchar3 x,  uchar y, uchar z );
uchar4 __OVERLOAD__ clamp( uchar4 x,  uchar y, uchar z );
uchar8 __OVERLOAD__ clamp( uchar8 x,  uchar y, uchar z );
uchar16 __OVERLOAD__ clamp( uchar16 x, uchar y, uchar z );
short2 __OVERLOAD__ clamp( short2 x,  short y, short z );
short3 __OVERLOAD__ clamp( short3 x,  short y, short z );
short4 __OVERLOAD__ clamp( short4 x,  short y, short z );
short8 __OVERLOAD__ clamp( short8 x,  short y, short z );
short16 __OVERLOAD__ clamp( short16 x, short y, short z );
ushort2 __OVERLOAD__ clamp( ushort2 x,  ushort y, ushort z );
ushort3 __OVERLOAD__ clamp( ushort3 x,  ushort y, ushort z );
ushort4 __OVERLOAD__ clamp( ushort4 x,  ushort y, ushort z );
ushort8 __OVERLOAD__ clamp( ushort8 x,  ushort y, ushort z );
ushort16 __OVERLOAD__ clamp( ushort16 x, ushort y, ushort z );
int2 __OVERLOAD__ clamp( int2 x,  int y, int z );
int3 __OVERLOAD__ clamp( int3 x,  int y, int z );
int4 __OVERLOAD__ clamp( int4 x,  int y, int z );
int8 __OVERLOAD__ clamp( int8 x,  int y, int z );
int16 __OVERLOAD__ clamp( int16 x, int y, int z );
uint2 __OVERLOAD__ clamp( uint2 x,  uint y, uint z );
uint3 __OVERLOAD__ clamp( uint3 x,  uint y, uint z );
uint4 __OVERLOAD__ clamp( uint4 x,  uint y, uint z );
uint8 __OVERLOAD__ clamp( uint8 x,  uint y, uint z );
uint16 __OVERLOAD__ clamp( uint16 x, uint y, uint z );
long2 __OVERLOAD__ clamp( long2 x,  long y, long z );
long3 __OVERLOAD__ clamp( long3 x,  long y, long z );
long4 __OVERLOAD__ clamp( long4 x,  long y, long z );
long8 __OVERLOAD__ clamp( long8 x,  long y, long z );
long16 __OVERLOAD__ clamp( long16 x, long y, long z );
ulong2 __OVERLOAD__ clamp( ulong2 x,  ulong y, ulong z );
ulong3 __OVERLOAD__ clamp( ulong3 x,  ulong y, ulong z );
ulong4 __OVERLOAD__ clamp( ulong4 x,  ulong y, ulong z );
ulong8 __OVERLOAD__ clamp( ulong8 x,  ulong y, ulong z );
ulong16 __OVERLOAD__ clamp( ulong16 x, ulong y, ulong z );
float2 __OVERLOAD__ clamp(float2 x, float minval, float maxval);
float3 __OVERLOAD__ clamp(float3 x, float minval, float maxval);
float4 __OVERLOAD__ clamp(float4 x, float minval, float maxval);
float8 __OVERLOAD__ clamp(float8 x, float minval, float maxval);
float16 __OVERLOAD__ clamp(float16 x, float minval, float maxval);
double2 __OVERLOAD__ clamp(double2 x, double minval, double maxval);
double3 __OVERLOAD__ clamp(double3 x, double minval, double maxval);
double4 __OVERLOAD__ clamp(double4 x, double minval, double maxval);
double8 __OVERLOAD__ clamp(double8 x, double minval, double maxval);
double16 __OVERLOAD__ clamp(double16 x, double minval, double maxval);

__CLFN_FD_1FD(degrees);

__CLFN_1FD_2FD_MODD(distance);
__CLFN_1FD_2FD_MODD(dot);

float3 __OVERLOAD__ cross(float3 x, float3 y);
float4 __OVERLOAD__ cross(float4 x, float4 y);
double3 __OVERLOAD__ cross(double3 x, double3 y); \
double4 __OVERLOAD__ cross(double4 x, double4 y); \

__CLFN_FD_1FD_MODD(length);
__CLFN_FD_3FD(mix);
__CLFN_FD_3FD_MOD(mix);
__CLFN_FD_1FD_MODD4(normalize);
__CLFN_FD_1FD(radians);

__CLFN_FD_2FD(step);
float2 __OVERLOAD__ step(float edge, float2 x);
float3 __OVERLOAD__ step(float edge, float3 x);
float4 __OVERLOAD__ step(float edge, float4 x);
float8 __OVERLOAD__ step(float edge, float8 x);
float16 __OVERLOAD__ step(float edge, float16 x);
double2 __OVERLOAD__ step(double edge, double2 x);
double3 __OVERLOAD__ step(double edge, double3 x);
double4 __OVERLOAD__ step(double edge, double4 x);
double8 __OVERLOAD__ step(double edge, double8 x);
double16 __OVERLOAD__ step(double edge, double16 x);

__CLFN_FD_3FD(smoothstep);
float2 __OVERLOAD__ smoothstep( float edge0, float edge1, float2 x );
float3 __OVERLOAD__ smoothstep( float edge0, float edge1, float3 x );
float4 __OVERLOAD__ smoothstep( float edge0, float edge1, float4 x );
float8 __OVERLOAD__ smoothstep( float edge0, float edge1, float8 x );
float16 __OVERLOAD__ smoothstep( float edge0, float edge1, float16 x );
double2 __OVERLOAD__ smoothstep( double e0, double e1, double2 x );
double3 __OVERLOAD__ smoothstep( double e0, double e1, double3 x );
double4 __OVERLOAD__ smoothstep( double e0, double e1, double4 x );
double8 __OVERLOAD__ smoothstep( double e0, double e1, double8 x );
double16 __OVERLOAD__ smoothstep( double e0, double e1, double16 x );

__CLFN_FD_1FD(sign);
        
__CLFN_1F_2F_MODD(fast_distance);
__CLFN_F_1F_MODD4(fast_length);
__CLFN_F_1F(fast_normalize);

/* Section 5.9.5 */
#define isless(X,Y)             ((X) <  (Y))
#define islessequal(X,Y)        ((X) <= (Y))
#define isgreater(X,Y)          ((X) >  (Y))
#define isgreaterequal(X,Y)     ((X) >= (Y))
#define isequal(X,Y)            ((X) == (Y))
#define isnotequal(X,Y)         ((X) != (Y))

__CLFN_IL_2FD(__cl_islessgreater);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef islessgreater
    #define islessgreater(__x, __y) __cl_islessgreater(__x, __y)
#endif

__CLFN_IL_1FD_MODD(__cl_isfinite);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef isfinite
    #define isfinite(__x) __cl_isfinite(__x)
#endif

__CLFN_IL_1FD_MODD(__cl_isinf);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef isinf
    #define isinf(__x) __cl_isinf(__x)
#endif

__CLFN_IL_1FD_MODD(__cl_isnan);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef isnan
    #define isnan(__x) __cl_isnan(__x)
#endif

__CLFN_IL_1FD_MODD(__cl_isnormal);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef isnormal
    #define isnormal(__x) __cl_isnormal(__x)
#endif

__CLFN_IL_2FD(isordered);

__CLFN_IL_2FD(__cl_isunordered);
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef isunordered
    #define isunordered(__x, __y) __cl_isunordered(__x, __y)
#endif

int __OVERLOAD__ __cl_signbit( float x );
int2 __OVERLOAD__ __cl_signbit( float2 x );
int3 __OVERLOAD__ __cl_signbit( float3 x );
int4 __OVERLOAD__ __cl_signbit( float4 x );
int8 __OVERLOAD__ __cl_signbit( float8 x );
int16 __OVERLOAD__ __cl_signbit( float16 x );
int __OVERLOAD__ __cl_signbit( double x );
long2 __OVERLOAD__ __cl_signbit( double2 x );
long3 __OVERLOAD__ __cl_signbit( double3 x );
long4 __OVERLOAD__ __cl_signbit( double4 x );
long8 __OVERLOAD__ __cl_signbit( double8 x );
long16 __OVERLOAD__ __cl_signbit( double16 x );
#ifndef __CL_INTERNAL_SKIP_MATH_DEFINES__
    #undef signbit
    #define signbit(__x) __cl_signbit(__x)
#endif

__CLFN_I_ALL(any);
__CLFN_I_ALL(all);

char __OVERLOAD__ select( char x, char y, char m );
uchar __OVERLOAD__ select( uchar x, uchar y, char m );
short __OVERLOAD__ select( short x, short y, short m );
ushort __OVERLOAD__ select( ushort x, ushort y, short m );
int __OVERLOAD__ select( int x, int y, int m );
uint __OVERLOAD__ select( uint x, uint y, int m );
long __OVERLOAD__ select( long x, long y, long m );
ulong __OVERLOAD__ select( ulong x, ulong y, long m );
char __OVERLOAD__ select( char x, char y, uchar m );
uchar __OVERLOAD__ select( uchar x, uchar y, uchar m );
short __OVERLOAD__ select( short x, short y, ushort m );
ushort __OVERLOAD__ select( ushort x, ushort y, ushort m );
int __OVERLOAD__ select( int x, int y, uint m );
uint __OVERLOAD__ select( uint x, uint y, uint m );
long __OVERLOAD__ select( long x, long y, ulong m );
ulong __OVERLOAD__ select( ulong x, ulong y, ulong m );
float __OVERLOAD__ select( float x, float y, int m );
float __OVERLOAD__ select( float x, float y, uint m );
double __OVERLOAD__ select( double x, double y, long m );
double __OVERLOAD__ select( double x, double y, ulong m );
char2 __OVERLOAD__ select( char2 x, char2 y, char2 m );
char3 __OVERLOAD__ select( char3 x, char3 y, char3 m );
char4 __OVERLOAD__ select( char4 x, char4 y, char4 m );
char8 __OVERLOAD__ select( char8 x, char8 y, char8 m );
char16 __OVERLOAD__ select( char16 vx, char16 vy, char16 vm );
uchar2 __OVERLOAD__ select( uchar2 x, uchar2 y, char2 m );
uchar3 __OVERLOAD__ select( uchar3 x, uchar3 y, char3 m );
uchar4 __OVERLOAD__ select( uchar4 x, uchar4 y, char4 m );
uchar8 __OVERLOAD__ select( uchar8 x, uchar8 y, char8 m );
uchar16 __OVERLOAD__ select( uchar16 vx, uchar16 vy, char16 vm );
char2 __OVERLOAD__ select( char2 x, char2 y, uchar2 m );
char3 __OVERLOAD__ select( char3 x, char3 y, uchar3 m );
char4 __OVERLOAD__ select( char4 x, char4 y, uchar4 m );
char8 __OVERLOAD__ select( char8 x, char8 y, uchar8 m );
char16 __OVERLOAD__ select( char16 x, char16 y, uchar16 m );
uchar2 __OVERLOAD__ select( uchar2 x, uchar2 y, uchar2 m );
uchar3 __OVERLOAD__ select( uchar3 x, uchar3 y, uchar3 m );
uchar4 __OVERLOAD__ select( uchar4 x, uchar4 y, uchar4 m );
uchar8 __OVERLOAD__ select( uchar8 x, uchar8 y, uchar8 m );
uchar16 __OVERLOAD__ select( uchar16 x, uchar16 y, uchar16 m );
short2 __OVERLOAD__ select( short2 x, short2 y, short2 m );
short3 __OVERLOAD__ select( short3 x, short3 y, short3 m );
short4 __OVERLOAD__ select( short4 x, short4 y, short4 m );
short8 __OVERLOAD__ select( short8 vx, short8 vy, short8 vm );
short16 __OVERLOAD__ select( short16 vx, short16 vy, short16 vm );
ushort2 __OVERLOAD__ select( ushort2 x, ushort2 y, short2 m );
ushort3 __OVERLOAD__ select( ushort3 x, ushort3 y, short3 m );
ushort4 __OVERLOAD__ select( ushort4 x, ushort4 y, short4 m );
ushort8 __OVERLOAD__ select( ushort8 vx, ushort8 vy, short8 vm );
ushort16 __OVERLOAD__ select( ushort16 vx, ushort16 vy, short16 vm );
short2 __OVERLOAD__ select( short2 x, short2 y, ushort2 m );
short3 __OVERLOAD__ select( short3 x, short3 y, ushort3 m );
short4 __OVERLOAD__ select( short4 x, short4 y, ushort4 m );
short8 __OVERLOAD__ select( short8 x, short8 y, ushort8 m );
short16 __OVERLOAD__ select( short16 x, short16 y, ushort16 m );
ushort2 __OVERLOAD__ select( ushort2 x, ushort2 y, ushort2 m );
ushort3 __OVERLOAD__ select( ushort3 x, ushort3 y, ushort3 m );
ushort4 __OVERLOAD__ select( ushort4 x, ushort4 y, ushort4 m );
ushort8 __OVERLOAD__ select( ushort8 x, ushort8 y, ushort8 m );
ushort16 __OVERLOAD__ select( ushort16 x, ushort16 y, ushort16 m );
int2 __OVERLOAD__ select( int2 x, int2 y, int2 m );
int3 __OVERLOAD__ select( int3 x, int3 y, int3 m );
int4 __OVERLOAD__ select( int4 vx, int4 vy, int4 vm );
int8 __OVERLOAD__ select( int8 vx, int8 vy, int8 vm );
int16 __OVERLOAD__ select( int16 vx, int16 vy, int16 vm );
uint2 __OVERLOAD__ select( uint2 x, uint2 y, int2 m );
uint3 __OVERLOAD__ select( uint3 x, uint3 y, int3 m );
uint4 __OVERLOAD__ select( uint4 x, uint4 y, int4 m );
uint8 __OVERLOAD__ select( uint8 x, uint8 y, int8 m );
uint16 __OVERLOAD__ select( uint16 x, uint16 y, int16 m );
int2 __OVERLOAD__ select( int2 x, int2 y, uint2 m );
int3 __OVERLOAD__ select( int3 x, int3 y, uint3 m );
int4 __OVERLOAD__ select( int4 x, int4 y, uint4 m );
int8 __OVERLOAD__ select( int8 x, int8 y, uint8 m );
int16 __OVERLOAD__ select( int16 x, int16 y, uint16 m );
uint2 __OVERLOAD__ select( uint2 x, uint2 y, uint2 m );
uint3 __OVERLOAD__ select( uint3 x, uint3 y, uint3 m );
uint4 __OVERLOAD__ select( uint4 x, uint4 y, uint4 m );
uint8 __OVERLOAD__ select( uint8 x, uint8 y, uint8 m );
uint16 __OVERLOAD__ select( uint16 x, uint16 y, uint16 m );
long2 __OVERLOAD__ select( long2 x, long2 y, long2 m );
long3 __OVERLOAD__ select( long3 x, long3 y, long3 m );
long4 __OVERLOAD__ select( long4 x, long4 y, long4 m );
long8 __OVERLOAD__ select( long8 x, long8 y, long8 m );
long16 __OVERLOAD__ select( long16 x, long16 y, long16 m );
ulong2 __OVERLOAD__ select( ulong2 x, ulong2 y, long2 m );
ulong3 __OVERLOAD__ select( ulong3 x, ulong3 y, long3 m );
ulong4 __OVERLOAD__ select( ulong4 x, ulong4 y, long4 m );
ulong8 __OVERLOAD__ select( ulong8 x, ulong8 y, long8 m );
ulong16 __OVERLOAD__ select( ulong16 x, ulong16 y, long16 m );
long2 __OVERLOAD__ select( long2 x, long2 y, ulong2 m );
long3 __OVERLOAD__ select( long3 x, long3 y, ulong3 m );
long4 __OVERLOAD__ select( long4 x, long4 y, ulong4 m );
long8 __OVERLOAD__ select( long8 x, long8 y, ulong8 m );
long16 __OVERLOAD__ select( long16 x, long16 y, ulong16 m );
ulong2 __OVERLOAD__ select( ulong2 x, ulong2 y, ulong2 m );
ulong3 __OVERLOAD__ select( ulong3 x, ulong3 y, ulong3 m );
ulong4 __OVERLOAD__ select( ulong4 x, ulong4 y, ulong4 m );
ulong8 __OVERLOAD__ select( ulong8 x, ulong8 y, ulong8 m );
ulong16 __OVERLOAD__ select( ulong16 x, ulong16 y, ulong16 m );
float2 __OVERLOAD__ select( float2 x, float2 y, int2 m );
float3 __OVERLOAD__ select(float3 x, float3 y, __int3_SPI z);
float4 __OVERLOAD__ select( float4 x, float4 y, int4 m );
float8 __OVERLOAD__ select( float8 x, float8 y, int8 m );
float16 __OVERLOAD__ select( float16 x, float16 y, int16 m );
float2 __OVERLOAD__ select( float2 x, float2 y, uint2 m );
float3 __OVERLOAD__ select( float3 x, float3 y, uint3 m );
float4 __OVERLOAD__ select( float4 x, float4 y, uint4 m );
float8 __OVERLOAD__ select( float8 x, float8 y, uint8 m );
float16 __OVERLOAD__ select( float16 x, float16 y, uint16 m );
double2 __OVERLOAD__ select( double2 x, double2 y, long2 m ) ;
double3 __OVERLOAD__ select( double3 x, double3 y, long3 m ) ;
double4 __OVERLOAD__ select( double4 x, double4 y, long4 m ) ;
double8 __OVERLOAD__ select( double8 x, double8 y, long8 m ) ;
double16 __OVERLOAD__ select( double16 x, double16 y, long16 m ) ;
double2 __OVERLOAD__ select( double2 x, double2 y, ulong2 m ) ;
double3 __OVERLOAD__ select( double3 x, double3 y, ulong3 m ) ;
double4 __OVERLOAD__ select( double4 x, double4 y, ulong4 m ) ;
double8 __OVERLOAD__ select( double8 x, double8 y, ulong8 m ) ;
double16 __OVERLOAD__ select( double16 x, double16 y, ulong16 m ) ;

char __OVERLOAD__ bitselect( char x, char y, char m);
char2 __OVERLOAD__ bitselect( char2 x, char2 y, char2 m);
char3 __OVERLOAD__ bitselect( char3 x, char3 y, char3 m);
char4 __OVERLOAD__ bitselect( char4 x, char4 y, char4 m);
char8 __OVERLOAD__ bitselect( char8 x, char8 y, char8 m);
char16 __OVERLOAD__ bitselect( char16 x, char16 y, char16 m);
uchar __OVERLOAD__ bitselect( uchar x, uchar y, char m);
uchar2 __OVERLOAD__ bitselect( uchar2 x, uchar2 y, char2 m);
uchar3 __OVERLOAD__ bitselect( uchar3 x, uchar3 y, char3 m);
uchar4 __OVERLOAD__ bitselect( uchar4 x, uchar4 y, char4 m);
uchar8 __OVERLOAD__ bitselect( uchar8 x, uchar8 y, char8 m);
uchar16 __OVERLOAD__ bitselect( uchar16 x, uchar16 y, char16 m);
short __OVERLOAD__ bitselect( short x, short y, short m);
short2 __OVERLOAD__ bitselect( short2 x, short2 y, short2 m);
short3 __OVERLOAD__ bitselect( short3 x, short3 y, short3 m);
short4 __OVERLOAD__ bitselect( short4 x, short4 y, short4 m);
short8 __OVERLOAD__ bitselect( short8 x, short8 y, short8 m);
short16 __OVERLOAD__ bitselect( short16 x, short16 y, short16 m);
ushort __OVERLOAD__ bitselect( ushort x, ushort y, short m);
ushort2 __OVERLOAD__ bitselect( ushort2 x, ushort2 y, short2 m);
ushort3 __OVERLOAD__ bitselect( ushort3 x, ushort3 y, short3 m);
ushort4 __OVERLOAD__ bitselect( ushort4 x, ushort4 y, short4 m);
ushort8 __OVERLOAD__ bitselect( ushort8 x, ushort8 y, short8 m);
ushort16 __OVERLOAD__ bitselect( ushort16 x, ushort16 y, short16 m);
int __OVERLOAD__ bitselect( int x, int y, int m);
int2 __OVERLOAD__ bitselect( int2 x, int2 y, int2 m);
int3 __OVERLOAD__ bitselect( int3 x, int3 y, int3 m);
int4 __OVERLOAD__ bitselect( int4 x, int4 y, int4 m);
int8 __OVERLOAD__ bitselect( int8 x, int8 y, int8 m);
int16 __OVERLOAD__ bitselect( int16 x, int16 y, int16 m);
uint __OVERLOAD__ bitselect( uint x, uint y, int m);
uint2 __OVERLOAD__ bitselect( uint2 x, uint2 y, int2 m);
uint3 __OVERLOAD__ bitselect( uint3 x, uint3 y, int3 m);
uint4 __OVERLOAD__ bitselect( uint4 x, uint4 y, int4 m);
uint8 __OVERLOAD__ bitselect( uint8 x, uint8 y, int8 m);
uint16 __OVERLOAD__ bitselect( uint16 x, uint16 y, int16 m);
long __OVERLOAD__ bitselect( long x, long y, long m);
long2 __OVERLOAD__ bitselect( long2 x, long2 y, long2 m);
long3 __OVERLOAD__ bitselect( long3 x, long3 y, long3 m);
long4 __OVERLOAD__ bitselect( long4 x, long4 y, long4 m);
long8 __OVERLOAD__ bitselect( long8 x, long8 y, long8 m);
long16 __OVERLOAD__ bitselect( long16 x, long16 y, long16 m);
ulong __OVERLOAD__ bitselect( ulong x, ulong y, long m);
ulong2 __OVERLOAD__ bitselect( ulong2 x, ulong2 y, long2 m);
ulong3 __OVERLOAD__ bitselect( ulong3 x, ulong3 y, long3 m);
ulong4 __OVERLOAD__ bitselect( ulong4 x, ulong4 y, long4 m);
ulong8 __OVERLOAD__ bitselect( ulong8 x, ulong8 y, long8 m);
ulong16 __OVERLOAD__ bitselect( ulong16 x, ulong16 y, long16 m);
char __OVERLOAD__ bitselect( char x, char y, uchar m);
char2 __OVERLOAD__ bitselect( char2 x, char2 y, uchar2 m);
char3 __OVERLOAD__ bitselect( char3 x, char3 y, uchar3 m);
char4 __OVERLOAD__ bitselect( char4 x, char4 y, uchar4 m);
char8 __OVERLOAD__ bitselect( char8 x, char8 y, uchar8 m);
char16 __OVERLOAD__ bitselect( char16 x, char16 y, uchar16 m);
uchar __OVERLOAD__ bitselect( uchar x, uchar y, uchar m);
uchar2 __OVERLOAD__ bitselect( uchar2 x, uchar2 y, uchar2 m);
uchar3 __OVERLOAD__ bitselect( uchar3 x, uchar3 y, uchar3 m);
uchar4 __OVERLOAD__ bitselect( uchar4 x, uchar4 y, uchar4 m);
uchar8 __OVERLOAD__ bitselect( uchar8 x, uchar8 y, uchar8 m);
uchar16 __OVERLOAD__ bitselect( uchar16 x, uchar16 y, uchar16 m);
short __OVERLOAD__ bitselect( short x, short y, ushort m);
short2 __OVERLOAD__ bitselect( short2 x, short2 y, ushort2 m);
short3 __OVERLOAD__ bitselect( short3 x, short3 y, ushort3 m);
short4 __OVERLOAD__ bitselect( short4 x, short4 y, ushort4 m);
short8 __OVERLOAD__ bitselect( short8 x, short8 y, ushort8 m);
short16 __OVERLOAD__ bitselect( short16 x, short16 y, ushort16 m);
ushort __OVERLOAD__ bitselect( ushort x, ushort y, ushort m);
ushort2 __OVERLOAD__ bitselect( ushort2 x, ushort2 y, ushort2 m);
ushort3 __OVERLOAD__ bitselect( ushort3 x, ushort3 y, ushort3 m);
ushort4 __OVERLOAD__ bitselect( ushort4 x, ushort4 y, ushort4 m);
ushort8 __OVERLOAD__ bitselect( ushort8 x, ushort8 y, ushort8 m);
ushort16 __OVERLOAD__ bitselect( ushort16 x, ushort16 y, ushort16 m);
int __OVERLOAD__ bitselect( int x, int y, uint m);
int2 __OVERLOAD__ bitselect( int2 x, int2 y, uint2 m);
int3 __OVERLOAD__ bitselect( int3 x, int3 y, uint3 m);
int4 __OVERLOAD__ bitselect( int4 x, int4 y, uint4 m);
int8 __OVERLOAD__ bitselect( int8 x, int8 y, uint8 m);
int16 __OVERLOAD__ bitselect( int16 x, int16 y, uint16 m);
uint __OVERLOAD__ bitselect( uint x, uint y, uint m);
uint2 __OVERLOAD__ bitselect( uint2 x, uint2 y, uint2 m);
uint3 __OVERLOAD__ bitselect( uint3 x, uint3 y, uint3 m);
uint4 __OVERLOAD__ bitselect( uint4 x, uint4 y, uint4 m);
uint8 __OVERLOAD__ bitselect( uint8 x, uint8 y, uint8 m);
uint16 __OVERLOAD__ bitselect( uint16 x, uint16 y, uint16 m);
long __OVERLOAD__ bitselect( long x, long y, ulong m);
long2 __OVERLOAD__ bitselect( long2 x, long2 y, ulong2 m);
long3 __OVERLOAD__ bitselect( long3 x, long3 y, ulong3 m);
long4 __OVERLOAD__ bitselect( long4 x, long4 y, ulong4 m);
long8 __OVERLOAD__ bitselect( long8 x, long8 y, ulong8 m);
long16 __OVERLOAD__ bitselect( long16 x, long16 y, ulong16 m);
ulong __OVERLOAD__ bitselect( ulong x, ulong y, ulong m);
ulong2 __OVERLOAD__ bitselect( ulong2 x, ulong2 y, ulong2 m);
ulong3 __OVERLOAD__ bitselect( ulong3 x, ulong3 y, ulong3 m);
ulong4 __OVERLOAD__ bitselect( ulong4 x, ulong4 y, ulong4 m);
ulong8 __OVERLOAD__ bitselect( ulong8 x, ulong8 y, ulong8 m);
ulong16 __OVERLOAD__ bitselect( ulong16 x, ulong16 y, ulong16 m);
float __OVERLOAD__ bitselect( float x, float y, float m);
float2 __OVERLOAD__ bitselect( float2 x, float2 y, float2 m);
float3 __OVERLOAD__ bitselect( float3 x, float3 y, float3 m);
float4 __OVERLOAD__ bitselect( float4 x, float4 y, float4 m);
float8 __OVERLOAD__ bitselect( float8 x, float8 y, float8 m);
float16 __OVERLOAD__ bitselect( float16 x, float16 y, float16 m);
double __OVERLOAD__ bitselect( double x, double y, double m);
double2 __OVERLOAD__ bitselect( double2 x, double2 y, double2 m);
double3 __OVERLOAD__ bitselect( double3 x, double3 y, double3 m);
double4 __OVERLOAD__ bitselect( double4 x, double4 y, double4 m);
double8 __OVERLOAD__ bitselect( double8 x, double8 y, double8 m);
double16 __OVERLOAD__ bitselect( double16 x, double16 y, double16 m);


// Include shared types that have to be visible both here and in the framework
#include "cl_kernel_shared.h"

// image format description
typedef struct _cl_image_format_t {
  unsigned int num_channels;
  unsigned int channel_order;
  unsigned int channel_data_type;
} cl_image_format_t;

typedef __global struct  _image2d_t  *image2d_t;
typedef __global struct  _image3d_t  *image3d_t;
typedef __global struct  _cubemap_t  *cubemap_t;

// 5.8.4 - Image Stream Read and Write
// 2D
float4 __OVERLOAD__ read_imagef(image2d_t image, sampler_t sampler, int2 coord);
float4 __OVERLOAD__ read_imagef(image2d_t image, sampler_t sampler, float2 coord);
int4   __OVERLOAD__ read_imagei(image2d_t image, sampler_t sampler, int2 coord);
int4   __OVERLOAD__ read_imagei(image2d_t image, sampler_t sampler, float2 coord);
uint4  __OVERLOAD__ read_imageui(image2d_t image, sampler_t sampler, int2 coord);
uint4  __OVERLOAD__ read_imageui(image2d_t image, sampler_t sampler, float2 coord);
float4 __OVERLOAD__ read_mipmap_imagef( image2d_t image, sampler_t sampler, float miplevel, float2 coord);
int4   __OVERLOAD__ read_mipmap_imagei( image2d_t image, sampler_t sampler, float miplevel, float2 coord);
uint4  __OVERLOAD__ read_mipmap_imageui( image2d_t image, sampler_t sampler, float miplevel, float2 coord);
float4 __OVERLOAD__ read_gradient_imagef( image2d_t image, sampler_t sampler, float2 ddx, float2 ddy, float2 coord);
int4   __OVERLOAD__ read_gradient_imagei( image2d_t image, sampler_t sampler, float2 ddx, float2 ddy, float2 coord);
uint4  __OVERLOAD__ read_gradient_imageui( image2d_t image, sampler_t sampler, float2 ddx, float2 ddy, float2 coord);

// 3D
float4 __OVERLOAD__ read_imagef(image3d_t image, sampler_t sampler, int4 coord);
float4 __OVERLOAD__ read_imagef(image3d_t image, sampler_t sampler, float4 coord);
int4   __OVERLOAD__ read_imagei(image3d_t image, sampler_t sampler, int4 coord);
int4   __OVERLOAD__ read_imagei(image3d_t image, sampler_t sampler, float4 coord);
uint4  __OVERLOAD__ read_imageui(image3d_t image, sampler_t sampler, int4 coord);
uint4  __OVERLOAD__ read_imageui(image3d_t image, sampler_t sampler, float4 coord);
float4 __OVERLOAD__ read_mipmap_imagef( image3d_t image, sampler_t sampler, float miplevel, float4 coord);
int4   __OVERLOAD__ read_mipmap_imagei( image3d_t image, sampler_t sampler, float miplevel, float4 coord);
uint4  __OVERLOAD__ read_mipmap_imageui( image3d_t image, sampler_t sampler, float miplevel, float4 coord);
float4 __OVERLOAD__ read_gradient_imagef( image3d_t image, sampler_t sampler, float4 ddx, float4 ddy, float4 coord);
int4   __OVERLOAD__ read_gradient_imagei( image3d_t image, sampler_t sampler, float4 ddx, float4 ddy, float4 coord);
uint4  __OVERLOAD__ read_gradient_imageui( image3d_t image, sampler_t sampler, float4 ddx, float4 ddy, float4 coord);

void __OVERLOAD__ write_imagef(image2d_t image, int2 coord, float4 color);
void __OVERLOAD__ write_imagef(image3d_t image, int4 coord, float4 color);

void __OVERLOAD__ write_imagei(image2d_t image, int2 coord, int4 color);
void __OVERLOAD__ write_imagei(image3d_t image, int4 coord, int4 color);

void __OVERLOAD__ write_imageui(image2d_t image, int2 coord, uint4 val);
void __OVERLOAD__ write_imageui(image3d_t image, int4 coord, uint4 val);

void __write_imagef_2d_unorm_int8_rgba_apple_SPI(image2d_t image, int2 coord, float4 val);
void __write_imagef_2d_unorm_int8_bgra_apple_SPI(image2d_t image, int2 coord, float4 val); 
#define write_imagef_unorm_int8_rgba_apple_SPI(_image, _coord, _val) __write_imagef_2d_unorm_int8_rgba_apple_SPI(_image, _coord, _val)
#define write_imagef_unorm_int8_bgra_apple_SPI(_image, _coord, _val) __write_imagef_2d_unorm_int8_bgra_apple_SPI(_image, _coord, _val)

int __OVERLOAD__ get_image_width(image2d_t img);
int __OVERLOAD__ get_image_width(image3d_t img);
int __OVERLOAD__ get_image_width(cubemap_t img);

int __OVERLOAD__ get_image_height(image2d_t img);
int __OVERLOAD__ get_image_height(image3d_t img);
int __OVERLOAD__ get_image_height(cubemap_t img);

int __OVERLOAD__ get_image_depth(image3d_t img);

int __OVERLOAD__ get_image_channel_data_type( image2d_t image);
int __OVERLOAD__ get_image_channel_data_type( image3d_t image);
int __OVERLOAD__ get_image_channel_data_type( cubemap_t image);

int __OVERLOAD__ get_image_channel_order( image2d_t image);
int __OVERLOAD__ get_image_channel_order( image3d_t image);
int __OVERLOAD__ get_image_channel_order( cubemap_t image);

int2 __OVERLOAD__ get_image_dim( image2d_t image);
int4 __OVERLOAD__ get_image_dim( image3d_t image);
int2 __OVERLOAD__ get_image_dim( cubemap_t image);

int __OVERLOAD__ get_image_num_miplevels( image2d_t image );
int __OVERLOAD__ get_image_num_miplevels( image3d_t image );
int __OVERLOAD__ get_image_num_miplevels( cubemap_t image );

#if defined( __i386__ ) || defined( __x86_64__ ) || defined( __arm__ )
// SPI for CoreImage
void __OVERLOAD__ __read_transposed_imagef_resample( __rd image2d_t src, sampler_t smp, float4 x,float4 y, float4 *r, float4 *g, float4 *b, float4 *a);
void __OVERLOAD__ __read_transposed_imagef_resample( __rd image2d_t src, sampler_t smp, float8 x,float8 y, float8 *r, float8 *g, float8 *b, float8 *a);
#define read_transposed_imagef( _src, _smp, _x, _y, _r, _g, _b, _a )   __read_transposed_imagef_resample(_src, _smp, _x, _y, _r, _g, _b, _a )
void __OVERLOAD__ __read_direct_imagef_resample( __rd image2d_t src, sampler_t smp, float4 x,float4 y, float16 *dest);


void __OVERLOAD__ __read_transposed_3d_imagef_resample( __rd image3d_t src, sampler_t smp, float4 x,float4 y, float4 z, float4 *r,float4 *g,float4 *b,float4 *a);
void __OVERLOAD__ __read_transposed_3d_imagef_resample( __rd image3d_t src, sampler_t smp, float8 x,float8 y, float8 z, float8 *r,float8 *g,float8 *b,float8 *a);
#define read_transposed_3d_imagef( _src, _smp, _x, _y, _z, _r, _g, _b, _a )     __read_transposed_imagef_resample( _src, _smp, _x, _y, _z, _r, _g, _b, _a )
void __OVERLOAD__ __read_direct_3d_imagef_resample( __rd image3d_t src, sampler_t smp, float4 x,float4 y, float4 z, float16 *dest );

void __OVERLOAD__ write_transposed_imagef( __wr image2d_t dst, int x, int y, float4 r, float4 g, float4 b, float4 a);
void __OVERLOAD__ write_transposed_imagef( __wr image2d_t dst, int x, int y, float8 r, float8 g, float8 b, float8 a);
void __OVERLOAD__ write_direct_imagef( __wr image2d_t dst, int x, int y, float16 v );

event_t __OVERLOAD__ __async_work_group_stream_to_image( __wr image2d_t image, size_t x, size_t y, size_t count, 
                                                    const __global float4 *r, const __global float4 *g, 
                                                    const __global float4 *b, const __global float4 *a );
event_t __OVERLOAD__ __async_work_group_stream_to_image( __wr image2d_t image, size_t x, size_t y, size_t count, 
                                                    const __constant float4 *r, const __constant float4 *g, 
                                                    const __constant float4 *b, const __constant float4 *a );
event_t __OVERLOAD__ __async_work_group_stream_to_image( __wr image2d_t image, size_t x, size_t y, size_t count, 
                                                    const __private float4 *r, const __private float4 *g, 
                                                    const __private float4 *b, const __private float4 *a );
event_t __OVERLOAD__ __async_work_group_stream_to_image( __wr image2d_t image, size_t x, size_t y, size_t count, 
                                                    const __local float4 *r, const __local float4 *g, 
                                                    const __local float4 *b, const __local float4 *a );

event_t __OVERLOAD__ __async_work_group_stream_to_image_direct( __wr image2d_t image, size_t x, size_t y, size_t count, const __global float4 *src );
event_t __OVERLOAD__ __async_work_group_stream_to_image_direct( __wr image2d_t image, size_t x, size_t y, size_t count, const __constant float4 *src );
event_t __OVERLOAD__ __async_work_group_stream_to_image_direct( __wr image2d_t image, size_t x, size_t y, size_t count, const __private float4 *src );
event_t __OVERLOAD__ __async_work_group_stream_to_image_direct( __wr image2d_t image, size_t x, size_t y, size_t count, const __local float4 *src );

                   
event_t	__OVERLOAD__  __async_work_group_stream_from_image( __rd image2d_t image, sampler_t sampler, float2 start, float2 stride, size_t count, 
                                                        __private float4 *r,  __private float4 *g,  __private float4 *b,  __private float4 *a );
event_t	__OVERLOAD__  __async_work_group_stream_from_image( __rd image2d_t image, sampler_t sampler, float2 start, float2 stride, size_t count, 
                                                        __global float4 *r,  __global float4 *g,  __global float4 *b,  __global float4 *a );
event_t	__OVERLOAD__  __async_work_group_stream_from_image( __rd image2d_t image, sampler_t sampler, float2 start, float2 stride, size_t count, 
                                                        __local float4 *r,  __local float4 *g,  __local float4 *b,  __local float4 *a );

event_t	__OVERLOAD__  __async_work_group_stream_from_image_direct( __rd image2d_t image, sampler_t sampler, float2 start, float2 stride, size_t count, __private float4 *dest );
event_t	__OVERLOAD__  __async_work_group_stream_from_image_direct( __rd image2d_t image, sampler_t sampler, float2 start, float2 stride, size_t count, __global float4 *dest );
event_t	__OVERLOAD__  __async_work_group_stream_from_image_direct( __rd image2d_t image, sampler_t sampler, float2 start, float2 stride, size_t count, __local float4 *dest );


float16  __ci_gamma_SPI( float4 r, float4 g, float4 b, float4 y );
void     __ci_gamma_SPI_8( float8 r, float8 g, float8 b, float8 y, float8 *outR, float8 *outG, float8 *outB ); 
float3  __ci_gamma_scalar_SPI( float3 rgb, float y );
#endif

__CLFN_EVENT_ALL(async_work_group_copy);
__CLFN_EVENT_ALL4(async_work_group_strided_copy);

void wait_group_events( int, event_t* );

void __OVERLOAD__  prefetch(const __global char* p, size_t n);
void __OVERLOAD__  prefetch(const __global uchar* p, size_t n);
void __OVERLOAD__  prefetch(const __global short* p, size_t n);
void __OVERLOAD__  prefetch(const __global ushort* p, size_t n);
void __OVERLOAD__  prefetch(const __global int* p, size_t n);
void __OVERLOAD__  prefetch(const __global uint* p, size_t n);
void __OVERLOAD__  prefetch(const __global long* p, size_t n);
void __OVERLOAD__  prefetch(const __global ulong* p, size_t n);
void __OVERLOAD__  prefetch(const __global float* p, size_t n);
void __OVERLOAD__  prefetch(const __global double* p, size_t n);
void __OVERLOAD__  prefetch(const __global char2* p, size_t n);
void __OVERLOAD__  prefetch(const __global uchar2* p, size_t n);
void __OVERLOAD__  prefetch(const __global short2* p, size_t n);
void __OVERLOAD__  prefetch(const __global ushort2* p, size_t n);
void __OVERLOAD__  prefetch(const __global int2* p, size_t n);
void __OVERLOAD__  prefetch(const __global uint2* p, size_t n);
void __OVERLOAD__  prefetch(const __global long2* p, size_t n);
void __OVERLOAD__  prefetch(const __global ulong2* p, size_t n);
void __OVERLOAD__  prefetch(const __global float2* p, size_t n);
void __OVERLOAD__  prefetch(const __global double2* p, size_t n);
void __OVERLOAD__  prefetch(const __global char4* p, size_t n);
void __OVERLOAD__  prefetch(const __global uchar4* p, size_t n);
void __OVERLOAD__  prefetch(const __global short4* p, size_t n);
void __OVERLOAD__  prefetch(const __global ushort4* p, size_t n);
void __OVERLOAD__  prefetch(const __global int4* p, size_t n);
void __OVERLOAD__  prefetch(const __global uint4* p, size_t n);
void __OVERLOAD__  prefetch(const __global long4* p, size_t n);
void __OVERLOAD__  prefetch(const __global ulong4* p, size_t n);
void __OVERLOAD__  prefetch(const __global float4* p, size_t n);
void __OVERLOAD__  prefetch(const __global double4* p, size_t n);
void __OVERLOAD__  prefetch(const __global char8* p, size_t n);
void __OVERLOAD__  prefetch(const __global uchar8* p, size_t n);
void __OVERLOAD__  prefetch(const __global short8* p, size_t n);
void __OVERLOAD__  prefetch(const __global ushort8* p, size_t n);
void __OVERLOAD__  prefetch(const __global int8* p, size_t n);
void __OVERLOAD__  prefetch(const __global uint8* p, size_t n);
void __OVERLOAD__  prefetch(const __global long8* p, size_t n);
void __OVERLOAD__  prefetch(const __global ulong8* p, size_t n);
void __OVERLOAD__  prefetch(const __global float8* p, size_t n);
void __OVERLOAD__  prefetch(const __global double8* p, size_t n);
void __OVERLOAD__  prefetch(const __global char16* p, size_t n);
void __OVERLOAD__  prefetch(const __global uchar16* p, size_t n);
void __OVERLOAD__  prefetch(const __global short16* p, size_t n);
void __OVERLOAD__  prefetch(const __global ushort16* p, size_t n);
void __OVERLOAD__  prefetch(const __global int16* p, size_t n);
void __OVERLOAD__  prefetch(const __global uint16* p, size_t n);
void __OVERLOAD__  prefetch(const __global long16* p, size_t n);
void __OVERLOAD__  prefetch(const __global ulong16* p, size_t n);
void __OVERLOAD__  prefetch(const __global float16* p, size_t n);
void __OVERLOAD__  prefetch(const __global double16* p, size_t n);

// 5.9.7 - Synchronization
typedef enum {
  CLK_LOCAL_MEM_FENCE  = 1U << 0,
  CLK_GLOBAL_MEM_FENCE = 1U << 1,
  __unused_except_to_make_sure_the_enum_has_the_right_size = 1U << 31
} cl_mem_fence_flags;

void barrier(cl_mem_fence_flags);
void mem_fence(cl_mem_fence_flags);
void read_mem_fence(cl_mem_fence_flags);
void write_mem_fence(cl_mem_fence_flags);

// 5.9.9 - Atomic Functions
int __OVERLOAD__  atom_add(volatile __global int *p, int val);
int __OVERLOAD__  atom_sub(volatile __global int *p, int val);
int __OVERLOAD__ atom_xchg(volatile __global int *p, int val);
int __OVERLOAD__ atom_min(volatile __global int *p, int val);
int __OVERLOAD__ atom_max(volatile __global int *p, int val);
int __OVERLOAD__ atom_inc(volatile __global int *p);
int __OVERLOAD__ atom_dec(volatile __global int *p);
int __OVERLOAD__ atom_cmpxchg(volatile __global int *p, int cmp, int val);
int __OVERLOAD__ atom_and(volatile __global int *p, int val);
int __OVERLOAD__ atom_or(volatile __global int *p, int val);
int __OVERLOAD__ atom_xor(volatile __global int *p, int val);

unsigned __OVERLOAD__  atom_add(volatile __global unsigned *p, unsigned val);
unsigned __OVERLOAD__  atom_sub(volatile __global unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_xchg(volatile __global unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_min(volatile __global unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_max(volatile __global unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_inc(volatile __global unsigned *p);
unsigned __OVERLOAD__ atom_dec(volatile __global unsigned *p);
unsigned __OVERLOAD__ atom_cmpxchg(volatile __global unsigned *p, unsigned cmp, unsigned val);
unsigned __OVERLOAD__ atom_and(volatile __global unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_or(volatile __global unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_xor(volatile __global unsigned *p, unsigned val);

float __OVERLOAD__ atom_xchg(volatile __global float *p, float val);


int __OVERLOAD__  atom_add(volatile local int *p, int val);
int __OVERLOAD__  atom_sub(volatile local int *p, int val);
int __OVERLOAD__ atom_xchg(volatile local int *p, int val);
int __OVERLOAD__ atom_min(volatile local int *p, int val);
int __OVERLOAD__ atom_max(volatile local int *p, int val);
int __OVERLOAD__ atom_inc(volatile local int *p);
int __OVERLOAD__ atom_dec(volatile local int *p);
int __OVERLOAD__ atom_cmpxchg(volatile local int *p, int cmp, int val);
int __OVERLOAD__ atom_and(volatile local int *p, int val);
int __OVERLOAD__ atom_or(volatile local int *p, int val);
int __OVERLOAD__ atom_xor(volatile local int *p, int val);

unsigned __OVERLOAD__  atom_add(volatile local unsigned *p, unsigned val);
unsigned __OVERLOAD__  atom_sub(volatile local unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_xchg(volatile local unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_min(volatile local unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_max(volatile local unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_inc(volatile local unsigned *p);
unsigned __OVERLOAD__ atom_dec(volatile local unsigned *p);
unsigned __OVERLOAD__ atom_cmpxchg(volatile local unsigned *p, unsigned cmp, unsigned val);
unsigned __OVERLOAD__ atom_and(volatile local unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_or(volatile local unsigned *p, unsigned val);
unsigned __OVERLOAD__ atom_xor(volatile local unsigned *p, unsigned val);

float __OVERLOAD__ atom_xchg(volatile local float *p, float val);


long __OVERLOAD__  atom_add(volatile __global long *p, long val);
long __OVERLOAD__  atom_sub(volatile __global long *p, long val);
long __OVERLOAD__ atom_xchg(volatile __global long *p, long val);
long __OVERLOAD__ atom_min(volatile __global long *p, long val);
long __OVERLOAD__ atom_max(volatile __global long *p, long val);
long __OVERLOAD__ atom_inc(volatile __global long *p);
long __OVERLOAD__ atom_dec(volatile __global long *p);
long __OVERLOAD__ atom_cmpxchg(volatile __global long *p, long cmp, long val);
long __OVERLOAD__ atom_and(volatile __global long *p, long val);
long __OVERLOAD__ atom_or(volatile __global long *p, long val);
long __OVERLOAD__ atom_xor(volatile __global long *p, long val);

ulong __OVERLOAD__  atom_add(volatile __global ulong *p, ulong val);
ulong __OVERLOAD__  atom_sub(volatile __global ulong *p, ulong val);
ulong __OVERLOAD__ atom_xchg(volatile __global ulong *p, ulong val);
ulong __OVERLOAD__ atom_min(volatile __global ulong *p, ulong val);
ulong __OVERLOAD__ atom_max(volatile __global ulong *p, ulong val);
ulong __OVERLOAD__ atom_inc(volatile __global ulong *p);
ulong __OVERLOAD__ atom_dec(volatile __global ulong *p);
ulong __OVERLOAD__ atom_cmpxchg(volatile __global ulong *p, ulong cmp, ulong val);
ulong __OVERLOAD__ atom_and(volatile __global ulong *p, ulong val);
ulong __OVERLOAD__ atom_or(volatile __global ulong *p, ulong val);
ulong __OVERLOAD__ atom_xor(volatile __global ulong *p, ulong val);


long __OVERLOAD__  atom_add(volatile local long *p, long val);
long __OVERLOAD__  atom_sub(volatile local long *p, long val);
long __OVERLOAD__ atom_xchg(volatile local long *p, long val);
long __OVERLOAD__ atom_min(volatile local long *p, long val);
long __OVERLOAD__ atom_max(volatile local long *p, long val);
long __OVERLOAD__ atom_inc(volatile local long *p);
long __OVERLOAD__ atom_dec(volatile local long *p);
long __OVERLOAD__ atom_cmpxchg(volatile local long *p, long cmp, long val);
long __OVERLOAD__ atom_and(volatile local long *p, long val);
long __OVERLOAD__ atom_or(volatile local long *p, long val);
long __OVERLOAD__ atom_xor(volatile local long *p, long val);

ulong __OVERLOAD__  atom_add(volatile local ulong *p, ulong val);
ulong __OVERLOAD__  atom_sub(volatile local ulong *p, ulong val);
ulong __OVERLOAD__ atom_xchg(volatile local ulong *p, ulong val);
ulong __OVERLOAD__ atom_min(volatile local ulong *p, ulong val);
ulong __OVERLOAD__ atom_max(volatile local ulong *p, ulong val);
ulong __OVERLOAD__ atom_inc(volatile local ulong *p);
ulong __OVERLOAD__ atom_dec(volatile local ulong *p);
ulong __OVERLOAD__ atom_cmpxchg(volatile local ulong *p, ulong cmp, ulong val);
ulong __OVERLOAD__ atom_and(volatile local ulong *p, ulong val);
ulong __OVERLOAD__ atom_or(volatile local ulong *p, ulong val);
ulong __OVERLOAD__ atom_xor(volatile local ulong *p, ulong val);

#define atomic_add(X,Y)         atom_add((X),(Y))
#define atomic_sub(X,Y)         atom_sub((X),(Y))
#define atomic_xchg(X,Y)        atom_xchg((X),(Y))
#define atomic_min(X,Y)         atom_min((X),(Y))
#define atomic_max(X,Y)         atom_max((X),(Y))
#define atomic_inc(X)           atom_inc(X)
#define atomic_dec(X)           atom_dec(X)
#define atomic_cmpxchg(X,Y,Z)   atom_cmpxchg((X),(Y),(Z)) 
#define atomic_and(X,Y)         atom_and((X),(Y))
#define atomic_or(X,Y)          atom_or((X),(Y))
#define atomic_xor(X,Y)         atom_xor((X),(Y))

char2 __OVERLOAD__ shuffle( char2 x, uchar2 map );
char4 __OVERLOAD__ shuffle( char2 x, uchar4 map );
char8 __OVERLOAD__ shuffle( char2 x, uchar8 map );
char16 __OVERLOAD__ shuffle( char2 x, uchar16 map );
uchar2 __OVERLOAD__ shuffle( uchar2 x, uchar2 map );
uchar4 __OVERLOAD__ shuffle( uchar2 x, uchar4 map );
uchar8 __OVERLOAD__ shuffle( uchar2 x, uchar8 map );
uchar16 __OVERLOAD__ shuffle( uchar2 x, uchar16 map );
char2 __OVERLOAD__ shuffle( char4 x, uchar2 map );
char4 __OVERLOAD__ shuffle( char4 x, uchar4 map );
char8 __OVERLOAD__ shuffle( char4 x, uchar8 map );
char16 __OVERLOAD__ shuffle( char4 x, uchar16 map );
uchar2 __OVERLOAD__ shuffle( uchar4 x, uchar2 map );
uchar4 __OVERLOAD__ shuffle( uchar4 x, uchar4 map );
uchar8 __OVERLOAD__ shuffle( uchar4 x, uchar8 map );
uchar16 __OVERLOAD__ shuffle( uchar4 x, uchar16 map );
char2 __OVERLOAD__ shuffle( char8 x, uchar2 map );
char4 __OVERLOAD__ shuffle( char8 x, uchar4 map );
char8 __OVERLOAD__ shuffle( char8 x, uchar8 map );
char16 __OVERLOAD__ shuffle( char8 x, uchar16 map );
uchar2 __OVERLOAD__ shuffle( uchar8 x, uchar2 map );
uchar4 __OVERLOAD__ shuffle( uchar8 x, uchar4 map );
uchar8 __OVERLOAD__ shuffle( uchar8 x, uchar8 map );
uchar16 __OVERLOAD__ shuffle( uchar8 x, uchar16 map );
char2 __OVERLOAD__ shuffle( char16 x, uchar2 map );
char4 __OVERLOAD__ shuffle( char16 x, uchar4 map );
char8 __OVERLOAD__ shuffle( char16 x, uchar8 map );
char16 __OVERLOAD__ shuffle( char16 x, uchar16 map );
uchar2 __OVERLOAD__ shuffle( uchar16 x, uchar2 map );
uchar4 __OVERLOAD__ shuffle( uchar16 x, uchar4 map );
uchar8 __OVERLOAD__ shuffle( uchar16 x, uchar8 map );
uchar16 __OVERLOAD__ shuffle( uchar16 x, uchar16 map );
short2 __OVERLOAD__ shuffle( short2 x, ushort2 map );
short4 __OVERLOAD__ shuffle( short2 x, ushort4 map );
short8 __OVERLOAD__ shuffle( short2 x, ushort8 map );
short16 __OVERLOAD__ shuffle( short2 x, ushort16 map );
ushort2 __OVERLOAD__ shuffle( ushort2 x, ushort2 map );
ushort4 __OVERLOAD__ shuffle( ushort2 x, ushort4 map );
ushort8 __OVERLOAD__ shuffle( ushort2 x, ushort8 map );
ushort16 __OVERLOAD__ shuffle( ushort2 x, ushort16 map );
short2 __OVERLOAD__ shuffle( short4 x, ushort2 map );
short4 __OVERLOAD__ shuffle( short4 x, ushort4 map );
short8 __OVERLOAD__ shuffle( short4 x, ushort8 map );
short16 __OVERLOAD__ shuffle( short4 x, ushort16 map );
ushort2 __OVERLOAD__ shuffle( ushort4 x, ushort2 map );
ushort4 __OVERLOAD__ shuffle( ushort4 x, ushort4 map );
ushort8 __OVERLOAD__ shuffle( ushort4 x, ushort8 map );
ushort16 __OVERLOAD__ shuffle( ushort4 x, ushort16 map );
short2 __OVERLOAD__ shuffle( short8 x, ushort2 map );
short4 __OVERLOAD__ shuffle( short8 x, ushort4 map );
short8 __OVERLOAD__ shuffle( short8 x, ushort8 map );
short16 __OVERLOAD__ shuffle( short8 x, ushort16 map );
ushort2 __OVERLOAD__ shuffle( ushort8 x, ushort2 map );
ushort4 __OVERLOAD__ shuffle( ushort8 x, ushort4 map );
ushort8 __OVERLOAD__ shuffle( ushort8 x, ushort8 map );
ushort16 __OVERLOAD__ shuffle( ushort8 x, ushort16 map );
short2 __OVERLOAD__ shuffle( short16 x, ushort2 map );
short4 __OVERLOAD__ shuffle( short16 x, ushort4 map );
short8 __OVERLOAD__ shuffle( short16 x, ushort8 map );
short16 __OVERLOAD__ shuffle( short16 x, ushort16 map );
ushort2 __OVERLOAD__ shuffle( ushort16 x, ushort2 map );
ushort4 __OVERLOAD__ shuffle( ushort16 x, ushort4 map );
ushort8 __OVERLOAD__ shuffle( ushort16 x, ushort8 map );
ushort16 __OVERLOAD__ shuffle( ushort16 x, ushort16 map );
int2 __OVERLOAD__ shuffle( int2 x, uint2 map );
int4 __OVERLOAD__ shuffle( int2 x, uint4 map );
int8 __OVERLOAD__ shuffle( int2 x, uint8 map );
int16 __OVERLOAD__ shuffle( int2 x, uint16 map );
uint2 __OVERLOAD__ shuffle( uint2 x, uint2 map );
uint4 __OVERLOAD__ shuffle( uint2 x, uint4 map );
uint8 __OVERLOAD__ shuffle( uint2 x, uint8 map );
uint16 __OVERLOAD__ shuffle( uint2 x, uint16 map );
int2 __OVERLOAD__ shuffle( int4 x, uint2 map );
int4 __OVERLOAD__ shuffle( int4 x, uint4 map );
int8 __OVERLOAD__ shuffle( int4 x, uint8 map );
int16 __OVERLOAD__ shuffle( int4 x, uint16 map );
uint2 __OVERLOAD__ shuffle( uint4 x, uint2 map );
uint4 __OVERLOAD__ shuffle( uint4 x, uint4 map );
uint8 __OVERLOAD__ shuffle( uint4 x, uint8 map );
uint16 __OVERLOAD__ shuffle( uint4 x, uint16 map );
int2 __OVERLOAD__ shuffle( int8 x, uint2 map );
int4 __OVERLOAD__ shuffle( int8 x, uint4 map );
int8 __OVERLOAD__ shuffle( int8 x, uint8 map );
int16 __OVERLOAD__ shuffle( int8 x, uint16 map );
uint2 __OVERLOAD__ shuffle( uint8 x, uint2 map );
uint4 __OVERLOAD__ shuffle( uint8 x, uint4 map );
uint8 __OVERLOAD__ shuffle( uint8 x, uint8 map );
uint16 __OVERLOAD__ shuffle( uint8 x, uint16 map );
int2 __OVERLOAD__ shuffle( int16 x, uint2 map );
int4 __OVERLOAD__ shuffle( int16 x, uint4 map );
int8 __OVERLOAD__ shuffle( int16 x, uint8 map );
int16 __OVERLOAD__ shuffle( int16 x, uint16 map );
uint2 __OVERLOAD__ shuffle( uint16 x, uint2 map );
uint4 __OVERLOAD__ shuffle( uint16 x, uint4 map );
uint8 __OVERLOAD__ shuffle( uint16 x, uint8 map );
uint16 __OVERLOAD__ shuffle( uint16 x, uint16 map );
long2 __OVERLOAD__ shuffle( long2 x, ulong2 map );
long4 __OVERLOAD__ shuffle( long2 x, ulong4 map );
long8 __OVERLOAD__ shuffle( long2 x, ulong8 map );
long16 __OVERLOAD__ shuffle( long2 x, ulong16 map );
ulong2 __OVERLOAD__ shuffle( ulong2 x, ulong2 map );
ulong4 __OVERLOAD__ shuffle( ulong2 x, ulong4 map );
ulong8 __OVERLOAD__ shuffle( ulong2 x, ulong8 map );
ulong16 __OVERLOAD__ shuffle( ulong2 x, ulong16 map );
long2 __OVERLOAD__ shuffle( long4 x, ulong2 map );
long4 __OVERLOAD__ shuffle( long4 x, ulong4 map );
long8 __OVERLOAD__ shuffle( long4 x, ulong8 map );
long16 __OVERLOAD__ shuffle( long4 x, ulong16 map );
ulong2 __OVERLOAD__ shuffle( ulong4 x, ulong2 map );
ulong4 __OVERLOAD__ shuffle( ulong4 x, ulong4 map );
ulong8 __OVERLOAD__ shuffle( ulong4 x, ulong8 map );
ulong16 __OVERLOAD__ shuffle( ulong4 x, ulong16 map );
long2 __OVERLOAD__ shuffle( long8 x, ulong2 map );
long4 __OVERLOAD__ shuffle( long8 x, ulong4 map );
long8 __OVERLOAD__ shuffle( long8 x, ulong8 map );
long16 __OVERLOAD__ shuffle( long8 x, ulong16 map );
ulong2 __OVERLOAD__ shuffle( ulong8 x, ulong2 map );
ulong4 __OVERLOAD__ shuffle( ulong8 x, ulong4 map );
ulong8 __OVERLOAD__ shuffle( ulong8 x, ulong8 map );
ulong16 __OVERLOAD__ shuffle( ulong8 x, ulong16 map );
long2 __OVERLOAD__ shuffle( long16 x, ulong2 map );
long4 __OVERLOAD__ shuffle( long16 x, ulong4 map );
long8 __OVERLOAD__ shuffle( long16 x, ulong8 map );
long16 __OVERLOAD__ shuffle( long16 x, ulong16 map );
ulong2 __OVERLOAD__ shuffle( ulong16 x, ulong2 map );
ulong4 __OVERLOAD__ shuffle( ulong16 x, ulong4 map );
ulong8 __OVERLOAD__ shuffle( ulong16 x, ulong8 map );
ulong16 __OVERLOAD__ shuffle( ulong16 x, ulong16 map );
float2  __OVERLOAD__ shuffle( float2 x, uint2 map );
float4  __OVERLOAD__ shuffle( float2 x, uint4 map );
float8  __OVERLOAD__ shuffle( float2 x, uint8 map );
float16  __OVERLOAD__ shuffle( float2 x, uint16 map );
float2  __OVERLOAD__ shuffle( float4 x, uint2 map );
float4  __OVERLOAD__ shuffle( float4 x, uint4 map );
float8  __OVERLOAD__ shuffle( float4 x, uint8 map );
float16  __OVERLOAD__ shuffle( float4 x, uint16 map );
float2  __OVERLOAD__ shuffle( float8 x, uint2 map );
float4  __OVERLOAD__ shuffle( float8 x, uint4 map );
float8  __OVERLOAD__ shuffle( float8 x, uint8 map );
float16  __OVERLOAD__ shuffle( float8 x, uint16 map );
float2  __OVERLOAD__ shuffle( float16 x, uint2 map );
float4  __OVERLOAD__ shuffle( float16 x, uint4 map );
float8  __OVERLOAD__ shuffle( float16 x, uint8 map );
float16  __OVERLOAD__ shuffle( float16 x, uint16 map );
double2  __OVERLOAD__ shuffle( double2 x, ulong2 map );
double4  __OVERLOAD__ shuffle( double2 x, ulong4 map );
double8  __OVERLOAD__ shuffle( double2 x, ulong8 map );
double16  __OVERLOAD__ shuffle( double2 x, ulong16 map );
double2  __OVERLOAD__ shuffle( double4 x, ulong2 map );
double4  __OVERLOAD__ shuffle( double4 x, ulong4 map );
double8  __OVERLOAD__ shuffle( double4 x, ulong8 map );
double16  __OVERLOAD__ shuffle( double4 x, ulong16 map );
double2  __OVERLOAD__ shuffle( double8 x, ulong2 map );
double4  __OVERLOAD__ shuffle( double8 x, ulong4 map );
double8  __OVERLOAD__ shuffle( double8 x, ulong8 map );
double16  __OVERLOAD__ shuffle( double8 x, ulong16 map );
double2  __OVERLOAD__ shuffle( double16 x, ulong2 map );
double4  __OVERLOAD__ shuffle( double16 x, ulong4 map );
double8  __OVERLOAD__ shuffle( double16 x, ulong8 map );
double16  __OVERLOAD__ shuffle( double16 x, ulong16 map );

char2 __OVERLOAD__ shuffle2( char2 x, char2 y, uchar2 map);
char2 __OVERLOAD__ shuffle2( char4 x, char4 y, uchar2 map);
char2 __OVERLOAD__ shuffle2( char8 x, char8 y, uchar2 map);
char2 __OVERLOAD__ shuffle2( char16 x, char16 y, uchar2 map);
uchar2 __OVERLOAD__ shuffle2( uchar2 x, uchar2 y, uchar2 map);
uchar2 __OVERLOAD__ shuffle2( uchar4 x, uchar4 y, uchar2 map);
uchar2 __OVERLOAD__ shuffle2( uchar8 x, uchar8 y, uchar2 map);
uchar2 __OVERLOAD__ shuffle2( uchar16 x, uchar16 y, uchar2 map);
char4 __OVERLOAD__ shuffle2( char2 x, char2 y, uchar4 map);
char4 __OVERLOAD__ shuffle2( char4 x, char4 y, uchar4 map);
char4 __OVERLOAD__ shuffle2( char8 x, char8 y, uchar4 map);
char4 __OVERLOAD__ shuffle2( char16 x, char16 y, uchar4 map);
uchar4 __OVERLOAD__ shuffle2( uchar2 x, uchar2 y, uchar4 map);
uchar4 __OVERLOAD__ shuffle2( uchar4 x, uchar4 y, uchar4 map);
uchar4 __OVERLOAD__ shuffle2( uchar8 x, uchar8 y, uchar4 map);
uchar4 __OVERLOAD__ shuffle2( uchar16 x, uchar16 y, uchar4 map);
char8 __OVERLOAD__ shuffle2( char2 x, char2 y, uchar8 map);
char8 __OVERLOAD__ shuffle2( char4 x, char4 y, uchar8 map);
char8 __OVERLOAD__ shuffle2( char8 x, char8 y, uchar8 map);
char8 __OVERLOAD__ shuffle2( char16 x, char16 y, uchar8 map);
uchar8 __OVERLOAD__ shuffle2( uchar2 x, uchar2 y, uchar8 map);
uchar8 __OVERLOAD__ shuffle2( uchar4 x, uchar4 y, uchar8 map);
uchar8 __OVERLOAD__ shuffle2( uchar8 x, uchar8 y, uchar8 map);
uchar8 __OVERLOAD__ shuffle2( uchar16 x, uchar16 y, uchar8 map);
char16 __OVERLOAD__ shuffle2( char2 x, char2 y, uchar16 map);
char16 __OVERLOAD__ shuffle2( char4 x, char4 y, uchar16 map);
char16 __OVERLOAD__ shuffle2( char8 x, char8 y, uchar16 map);
char16 __OVERLOAD__ shuffle2( char16 x, char16 y, uchar16 map);
uchar16 __OVERLOAD__ shuffle2( uchar2 x, uchar2 y, uchar16 map);
uchar16 __OVERLOAD__ shuffle2( uchar4 x, uchar4 y, uchar16 map);
uchar16 __OVERLOAD__ shuffle2( uchar8 x, uchar8 y, uchar16 map);
uchar16 __OVERLOAD__ shuffle2( uchar16 x, uchar16 y, uchar16 map);
short2 __OVERLOAD__ shuffle2( short2 x, short2 y, ushort2 map);
short2 __OVERLOAD__ shuffle2( short4 x, short4 y, ushort2 map);
short2 __OVERLOAD__ shuffle2( short8 x, short8 y, ushort2 map);
short2 __OVERLOAD__ shuffle2( short16 x, short16 y, ushort2 map);
ushort2 __OVERLOAD__ shuffle2( ushort2 x, ushort2 y, ushort2 map);
ushort2 __OVERLOAD__ shuffle2( ushort4 x, ushort4 y, ushort2 map);
ushort2 __OVERLOAD__ shuffle2( ushort8 x, ushort8 y, ushort2 map);
ushort2 __OVERLOAD__ shuffle2( ushort16 x, ushort16 y, ushort2 map);
short4 __OVERLOAD__ shuffle2( short2 x, short2 y, ushort4 map);
short4 __OVERLOAD__ shuffle2( short4 x, short4 y, ushort4 map);
short4 __OVERLOAD__ shuffle2( short8 x, short8 y, ushort4 map);
short4 __OVERLOAD__ shuffle2( short16 x, short16 y, ushort4 map);
ushort4 __OVERLOAD__ shuffle2( ushort2 x, ushort2 y, ushort4 map);
ushort4 __OVERLOAD__ shuffle2( ushort4 x, ushort4 y, ushort4 map);
ushort4 __OVERLOAD__ shuffle2( ushort8 x, ushort8 y, ushort4 map);
ushort4 __OVERLOAD__ shuffle2( ushort16 x, ushort16 y, ushort4 map);
short8 __OVERLOAD__ shuffle2( short2 x, short2 y, ushort8 map);
short8 __OVERLOAD__ shuffle2( short4 x, short4 y, ushort8 map);
short8 __OVERLOAD__ shuffle2( short8 x, short8 y, ushort8 map);
short8 __OVERLOAD__ shuffle2( short16 x, short16 y, ushort8 map);
ushort8 __OVERLOAD__ shuffle2( ushort2 x, ushort2 y, ushort8 map);
ushort8 __OVERLOAD__ shuffle2( ushort4 x, ushort4 y, ushort8 map);
ushort8 __OVERLOAD__ shuffle2( ushort8 x, ushort8 y, ushort8 map);
ushort8 __OVERLOAD__ shuffle2( ushort16 x, ushort16 y, ushort8 map);
short16 __OVERLOAD__ shuffle2( short2 x, short2 y, ushort16 map);
short16 __OVERLOAD__ shuffle2( short4 x, short4 y, ushort16 map);
short16 __OVERLOAD__ shuffle2( short8 x, short8 y, ushort16 map);
short16 __OVERLOAD__ shuffle2( short16 x, short16 y, ushort16 map);
ushort16 __OVERLOAD__ shuffle2( ushort2 x, ushort2 y, ushort16 map);
ushort16 __OVERLOAD__ shuffle2( ushort4 x, ushort4 y, ushort16 map);
ushort16 __OVERLOAD__ shuffle2( ushort8 x, ushort8 y, ushort16 map);
ushort16 __OVERLOAD__ shuffle2( ushort16 x, ushort16 y, ushort16 map);
int2 __OVERLOAD__ shuffle2( int2 x, int2 y, uint2 map);
int2 __OVERLOAD__ shuffle2( int4 x, int4 y, uint2 map);
int2 __OVERLOAD__ shuffle2( int8 x, int8 y, uint2 map);
int2  __OVERLOAD__ shuffle2( int16 x, int16 y, uint2 map);
uint2 __OVERLOAD__ shuffle2( uint2 x, uint2 y, uint2 map);
uint2 __OVERLOAD__ shuffle2( uint4 x, uint4 y, uint2 map);
uint2 __OVERLOAD__ shuffle2( uint8 x, uint8 y, uint2 map);
uint2 __OVERLOAD__ shuffle2( uint16 x, uint16 y, uint2 map);
float2 __OVERLOAD__ shuffle2( float2 x, float2 y, uint2 map);
float2 __OVERLOAD__ shuffle2( float4 x, float4 y, uint2 map);
float2 __OVERLOAD__ shuffle2( float8 x, float8 y, uint2 map);
float2 __OVERLOAD__ shuffle2( float16 x, float16 y, uint2 map);
int4 __OVERLOAD__ shuffle2( int2 x, int2 y, uint4 map);
int4 __OVERLOAD__ shuffle2( int4 x, int4 y, uint4 map);
int4 __OVERLOAD__ shuffle2( int8 x, int8 y, uint4 map);
int4 __OVERLOAD__ shuffle2( int16 x, int16 y, uint4 map);
uint4 __OVERLOAD__ shuffle2( uint2 x, uint2 y, uint4 map);
uint4 __OVERLOAD__ shuffle2( uint4 x, uint4 y, uint4 map);
uint4 __OVERLOAD__ shuffle2( uint8 x, uint8 y, uint4 map);
uint4 __OVERLOAD__ shuffle2( uint16 x, uint16 y, uint4 map);
float4 __OVERLOAD__ shuffle2( float2 x, float2 y, uint4 map);
float4 __OVERLOAD__ shuffle2( float4 x, float4 y, uint4 map);
float4 __OVERLOAD__ shuffle2( float8 x, float8 y, uint4 map);
float4 __OVERLOAD__ shuffle2( float16 x, float16 y, uint4 map);
int8 __OVERLOAD__ shuffle2( int2 x, int2 y, uint8 map);
int8 __OVERLOAD__ shuffle2( int4 x, int4 y, uint8 map);
int8 __OVERLOAD__ shuffle2( int8 x, int8 y, uint8 map);
int8 __OVERLOAD__ shuffle2( int16 x, int16 y, uint8 map);
uint8 __OVERLOAD__ shuffle2( uint2 x, uint2 y, uint8 map);
uint8 __OVERLOAD__ shuffle2( uint4 x, uint4 y, uint8 map);
uint8 __OVERLOAD__ shuffle2( uint8 x, uint8 y, uint8 map);
uint8 __OVERLOAD__ shuffle2( uint16 x, uint16 y, uint8 map);
float8 __OVERLOAD__ shuffle2( float2 x, float2 y, uint8 map);
float8 __OVERLOAD__ shuffle2( float4 x, float4 y, uint8 map);
float8 __OVERLOAD__ shuffle2( float8 x, float8 y, uint8 map);
float8 __OVERLOAD__ shuffle2( float16 x, float16 y, uint8 map);
int16 __OVERLOAD__ shuffle2( int2 x, int2 y, uint16 map);
int16 __OVERLOAD__ shuffle2( int4 x, int4 y, uint16 map);
int16 __OVERLOAD__ shuffle2( int8 x, int8 y, uint16 map);
int16 __OVERLOAD__ shuffle2( int16 x, int16 y, uint16 map);
uint16 __OVERLOAD__ shuffle2( uint2 x, uint2 y, uint16 map);
uint16 __OVERLOAD__ shuffle2( uint4 x, uint4 y, uint16 map);
uint16 __OVERLOAD__ shuffle2( uint8 x, uint8 y, uint16 map);
uint16 __OVERLOAD__ shuffle2( uint16 x, uint16 y, uint16 map);
float16 __OVERLOAD__ shuffle2( float2 x, float2 y, uint16 map);
float16 __OVERLOAD__ shuffle2( float4 x, float4 y, uint16 map);
float16 __OVERLOAD__ shuffle2( float8 x, float8 y, uint16 map);
float16 __OVERLOAD__ shuffle2( float16 x, float16 y, uint16 map);
long2 __OVERLOAD__ shuffle2( long2 x, long2 y, ulong2 map);
long2 __OVERLOAD__ shuffle2( long4 x, long4 y, ulong2 map);
long2 __OVERLOAD__ shuffle2( long8 x, long8 y, ulong2 map);
long2 __OVERLOAD__ shuffle2( long16 x, long16 y, ulong2 map);
ulong2 __OVERLOAD__ shuffle2( ulong2 x, ulong2 y, ulong2 map);
ulong2 __OVERLOAD__ shuffle2( ulong4 x, ulong4 y, ulong2 map);
ulong2 __OVERLOAD__ shuffle2( ulong8 x, ulong8 y, ulong2 map);
ulong2 __OVERLOAD__ shuffle2( ulong16 x, ulong16 y, ulong2 map);
double2 __OVERLOAD__ shuffle2( double2 x, double2 y, ulong2 map);
double2 __OVERLOAD__ shuffle2( double4 x, double4 y, ulong2 map);
double2 __OVERLOAD__ shuffle2( double8 x, double8 y, ulong2 map);
double2 __OVERLOAD__ shuffle2( double16 x, double16 y, ulong2 map);
long4 __OVERLOAD__ shuffle2( long2 x, long2 y, ulong4 map);
long4 __OVERLOAD__ shuffle2( long4 x, long4 y, ulong4 map);
long4 __OVERLOAD__ shuffle2( long8 x, long8 y, ulong4 map);
long4 __OVERLOAD__ shuffle2( long16 x, long16 y, ulong4 map);
ulong4 __OVERLOAD__ shuffle2( ulong2 x, ulong2 y, ulong4 map);
ulong4 __OVERLOAD__ shuffle2( ulong4 x, ulong4 y, ulong4 map);
ulong4 __OVERLOAD__ shuffle2( ulong8 x, ulong8 y, ulong4 map);
ulong4 __OVERLOAD__ shuffle2( ulong16 x, ulong16 y, ulong4 map);
double4 __OVERLOAD__ shuffle2( double2 x, double2 y, ulong4 map);
double4 __OVERLOAD__ shuffle2( double4 x, double4 y, ulong4 map);
double4 __OVERLOAD__ shuffle2( double8 x, double8 y, ulong4 map);
double4 __OVERLOAD__ shuffle2( double16 x, double16 y, ulong4 map);
long8 __OVERLOAD__ shuffle2( long2 x, long2 y, ulong8 map);
long8 __OVERLOAD__ shuffle2( long4 x, long4 y, ulong8 map);
long8 __OVERLOAD__ shuffle2( long8 x, long8 y, ulong8 map);
long8 __OVERLOAD__ shuffle2( long16 x, long16 y, ulong8 map);
ulong8 __OVERLOAD__ shuffle2( ulong2 x, ulong2 y, ulong8 map);
ulong8 __OVERLOAD__ shuffle2( ulong4 x, ulong4 y, ulong8 map);
ulong8 __OVERLOAD__ shuffle2( ulong8 x, ulong8 y, ulong8 map);
ulong8 __OVERLOAD__ shuffle2( ulong16 x, ulong16 y, ulong8 map);
double8 __OVERLOAD__ shuffle2( double2 x, double2 y, ulong8 map);
double8 __OVERLOAD__ shuffle2( double4 x, double4 y, ulong8 map);
double8 __OVERLOAD__ shuffle2( double8 x, double8 y, ulong8 map);
double8 __OVERLOAD__ shuffle2( double16 x, double16 y, ulong8 map);
long16 __OVERLOAD__ shuffle2( long2 x, long2 y, ulong16 map);
long16 __OVERLOAD__ shuffle2( long4 x, long4 y, ulong16 map);
long16 __OVERLOAD__ shuffle2( long8 x, long8 y, ulong16 map);
long16 __OVERLOAD__ shuffle2( long16 x, long16 y, ulong16 map);
ulong16 __OVERLOAD__ shuffle2( ulong2 x, ulong2 y, ulong16 map);
ulong16 __OVERLOAD__ shuffle2( ulong4 x, ulong4 y, ulong16 map);
ulong16 __OVERLOAD__ shuffle2( ulong8 x, ulong8 y, ulong16 map);
ulong16 __OVERLOAD__ shuffle2( ulong16 x, ulong16 y, ulong16 map);
double16 __OVERLOAD__ shuffle2( double2 x, double2 y, ulong16 map);
double16 __OVERLOAD__ shuffle2( double4 x, double4 y, ulong16 map);
double16 __OVERLOAD__ shuffle2( double8 x, double8 y, ulong16 map);
double16 __OVERLOAD__ shuffle2( double16 x, double16 y, ulong16 map);

/* support for q13 data type */
#if defined  cl_APPLE_ext_q13
    // Typedefs for q13_n.
    typedef __attribute__((fixpoint_type(13))) short q13_;
    typedef __attribute__(( ext_vector_type(4) )) q13_ q13_4;
    typedef __attribute__(( ext_vector_type(8) )) q13_ q13_8;

    // as_type support
    #define as_q13_( _x )   __builtin_astype( _x, q13_ )
    #define as_q13_4( _x )  __builtin_astype( _x, q13_4 )
    #define as_q13_8( _x )  __builtin_astype( _x, q13_8 )


    // Conversions from float to q13_; round-to-nearest, with saturation.
    q13_  __OVERLOAD__ convert_q13__sat_rte(float x);
    q13_4 __OVERLOAD__ convert_q13_4_sat_rte(float4 x);
    q13_8 __OVERLOAD__ convert_q13_8_sat_rte(float8 x);

    // Conversions from q13_ to float, no rounding or overflow can occur.
    float  __OVERLOAD__ convert_float_q13_(q13_ x);
    float4 __OVERLOAD__ convert_float4_q13_(q13_4 x);
    float8 __OVERLOAD__ convert_float8_q13_(q13_8 x);

    // Requested 6.11.2 Math Functions: fract, floor, sqrt, rsqrt, sin, cos.
    q13_  __OVERLOAD__ cos_q13_(q13_  x);
    q13_4 __OVERLOAD__ cos_q13_(q13_4 x);
    q13_8 __OVERLOAD__ cos_q13_(q13_8 x);

    q13_  __OVERLOAD__ fract_q13_(q13_  x, __global q13_ *iptr);
    q13_4 __OVERLOAD__ fract_q13_(q13_4 x, __global q13_4 *iptr);
    q13_8 __OVERLOAD__ fract_q13_(q13_8 x, __global q13_8 *iptr);
    q13_  __OVERLOAD__ fract_q13_(q13_  x, __local q13_ *iptr);
    q13_4 __OVERLOAD__ fract_q13_(q13_4 x, __local q13_4 *iptr);
    q13_8 __OVERLOAD__ fract_q13_(q13_8 x, __local q13_8 *iptr);
    q13_  __OVERLOAD__ fract_q13_(q13_  x, __private q13_ *iptr);
    q13_4 __OVERLOAD__ fract_q13_(q13_4 x, __private q13_4 *iptr);
    q13_8 __OVERLOAD__ fract_q13_(q13_8 x, __private q13_8 *iptr);

    q13_  __OVERLOAD__ floor_q13_(q13_  x);
    q13_4 __OVERLOAD__ floor_q13_(q13_4 x);
    q13_8 __OVERLOAD__ floor_q13_(q13_8 x);

    q13_  __OVERLOAD__ sin_q13_(q13_  x);
    q13_4 __OVERLOAD__ sin_q13_(q13_4 x);
    q13_8 __OVERLOAD__ sin_q13_(q13_8 x);

    q13_  __OVERLOAD__ sqrt_q13_(q13_  x);
    q13_4 __OVERLOAD__ sqrt_q13_(q13_4 x);
    q13_8 __OVERLOAD__ sqrt_q13_(q13_8 x);

    q13_  __OVERLOAD__ rsqrt_q13_(q13_  x);
    q13_4 __OVERLOAD__ rsqrt_q13_(q13_4 x);
    q13_8 __OVERLOAD__ rsqrt_q13_(q13_8 x);

    // Requested 6.11.3 Integer Functions: abs, clamp, max, min.
    q13_  __OVERLOAD__ abs_q13_(q13_  x);
    q13_4 __OVERLOAD__ abs_q13_(q13_4 x);
    q13_8 __OVERLOAD__ abs_q13_(q13_8 x);

    q13_  __OVERLOAD__ clamp_q13_(q13_  x, q13_  minval, q13_  maxval);
    q13_4 __OVERLOAD__ clamp_q13_(q13_4 x, q13_4 minval, q13_4 maxval);
    q13_4 __OVERLOAD__ clamp_q13_(q13_4 x, q13_  minval, q13_  maxval);
    q13_8 __OVERLOAD__ clamp_q13_(q13_8 x, q13_8 minval, q13_8 maxval);
    q13_8 __OVERLOAD__ clamp_q13_(q13_8 x, q13_  minval, q13_  maxval);

    q13_  __OVERLOAD__ max_q13_(q13_  x, q13_  y);
    q13_4 __OVERLOAD__ max_q13_(q13_4 x, q13_4 y);
    q13_4 __OVERLOAD__ max_q13_(q13_4 x, q13_  y);
    q13_8 __OVERLOAD__ max_q13_(q13_8 x, q13_8 y);
    q13_8 __OVERLOAD__ max_q13_(q13_8 x, q13_  y);

    q13_  __OVERLOAD__ min_q13_(q13_  x, q13_  y);
    q13_4 __OVERLOAD__ min_q13_(q13_4 x, q13_4 y);
    q13_4 __OVERLOAD__ min_q13_(q13_4 x, q13_  y);
    q13_8 __OVERLOAD__ min_q13_(q13_8 x, q13_8 y);
    q13_8 __OVERLOAD__ min_q13_(q13_8 x, q13_  y);

    // Requested 6.11.4 Common Functions: mix, smoothstep.
    q13_  __OVERLOAD__ mix_q13_(q13_  x, q13_  y, q13_  a);
    q13_4 __OVERLOAD__ mix_q13_(q13_4 x, q13_4 y, q13_4 a);
    q13_4 __OVERLOAD__ mix_q13_(q13_4 x, q13_4 y, q13_  a);
    q13_8 __OVERLOAD__ mix_q13_(q13_8 x, q13_8 y, q13_8 a);
    q13_8 __OVERLOAD__ mix_q13_(q13_8 x, q13_8 y, q13_  a);

    q13_  __OVERLOAD__ smoothstep_q13_(q13_  edge0, q13_  edge1, q13_  x);
    q13_4 __OVERLOAD__ smoothstep_q13_(q13_4 edge0, q13_4 edge1, q13_4 x);
    q13_4 __OVERLOAD__ smoothstep_q13_(q13_  edge0, q13_  edge1, q13_4 x);
    q13_8 __OVERLOAD__ smoothstep_q13_(q13_8 edge0, q13_8 edge1, q13_8 x);
    q13_8 __OVERLOAD__ smoothstep_q13_(q13_  edge0, q13_  edge1, q13_8 x);

    // Special bonus function: smoothstep(0, 1, x)
    q13_  __OVERLOAD__ smoothstep01_q13_(q13_  x);
    q13_4 __OVERLOAD__ smoothstep01_q13_(q13_4 x);
    q13_8 __OVERLOAD__ smoothstep01_q13_(q13_8 x);
    
#endif  /* cl_ext_q13_APPLE */


#if defined( __arm__ )

// Arm instrinsics for converting fixed to float or vice-versa (e.g. vcvtq_n_s32_f32)
// takes number of bits as an argument which needs to be compile time constant ... hence
// these builtins cannot be defined as builtin functions and need to be defined as macros.
 	
#define float_to_fixed_int_float(x, bits) ({ ((int2)__builtin_neon_vcvt_n_s32_v((float2)((x),(x)), (bits), 2)).x; })
#define float_to_fixed_int_float2(x, bits) ({ (int2)__builtin_neon_vcvt_n_s32_v((float2)(x), (bits), 2); })
#define float_to_fixed_int_float4(x, bits) ({ (int4)__builtin_neon_vcvtq_n_s32_v((float4)(x), (bits), 18); })
#define float_to_fixed_int_float8(x, bits) ({ (int8)(float_to_fixed_int_float4((x).hi, (bits)), float_to_fixed_int_float4((x).lo, (bits))); })
#define float_to_fixed_int_float16(x, bits) ({ (int16)(float_to_fixed_int_float8((x).hi, (bits)), float_to_fixed_int_float8((x).lo, (bits))); })

#define float_to_fixed_uint_float(x, bits) ({ ((uint2)__builtin_neon_vcvt_n_u32_v((float2)((x),(x)), (bits), 10)).x; })
#define float_to_fixed_uint_float2(x, bits) ({ (uint2)__builtin_neon_vcvt_n_u32_v((float2)(x), (bits), 10); })
#define float_to_fixed_uint_float4(x, bits) ({ (uint4)__builtin_neon_vcvtq_n_u32_v((float4)(x), (bits), 26); })
#define float_to_fixed_uint_float8(x, bits) ({ (uint8)(float_to_fixed_uint_float4((x).hi, (bits)), float_to_fixed_uint_float4((x).lo, (bits))); })
#define float_to_fixed_uint_float16(x, bits) ({ (uint16)(float_to_fixed_uint_float8((x).hi, (bits)), float_to_fixed_uint_float8((x).lo, (bits))); })

#define fixed_to_float_int_int(x, bits) ({ ((float2)__builtin_neon_vcvt_n_f32_v((int2)((x),(x)), (bits), 2)).x; })
#define fixed_to_float_int_int2(x, bits) ({ (float2)__builtin_neon_vcvt_n_f32_v((int2)(x), (bits), 2); })
#define fixed_to_float_int_int4(x, bits) ({ (float4)__builtin_neon_vcvtq_n_f32_v((int4)(x), (bits), 18); })
#define fixed_to_float_int_int8(x, bits) ({ (float8)(fixed_to_float_int_int4((x).hi, (bits)), fixed_to_float_int_int4((x).lo, (bits))); })
#define fixed_to_float_int_int16(x, bits) ({ (float16)(fixed_to_float_int_int8((x).hi, (bits)), fixed_to_float_int_int8((x).lo, (bits))); })

#define fixed_to_float_uint_uint(x, bits) ({ ((float2)__builtin_neon_vcvt_n_f32_v((uint2)((x),(x)), (bits), 10)).x; })
#define fixed_to_float_uint_uint2(x, bits) ({ (float2)__builtin_neon_vcvt_n_f32_v((uint2)(x), (bits), 10); })
#define fixed_to_float_uint_uint4(x, bits) ({ (float4)__builtin_neon_vcvtq_n_f32_v((uint4)(x), (bits), 26); })
#define fixed_to_float_uint_uint8(x, bits) ({ (float8)(fixed_to_float_uint_uint4((x).hi, (bits)), fixed_to_float_uint_uint4((x).lo, (bits))); })
#define fixed_to_float_uint_uint16(x, bits) ({ (float16)(fixed_to_float_uint_uint8((x).hi, (bits)), fixed_to_float_uint_uint8((x).lo, (bits))); })

#else

int __OVERLOAD__ float_to_fixed_int_float(float x, const int bits);
int2 __OVERLOAD__ float_to_fixed_int_float2(float2 x, const int bits);
int4 __OVERLOAD__ float_to_fixed_int_float4(float4 x, const int bits);
int8 __OVERLOAD__ float_to_fixed_int_float8(float8 x, const int bits);
int16 __OVERLOAD__ float_to_fixed_int_float16(float16 x, const int bits);

uint __OVERLOAD__ float_to_fixed_uint_float(float x, const int bits);
uint2 __OVERLOAD__ float_to_fixed_uint_float2(float2 x, const int bits);
uint4 __OVERLOAD__ float_to_fixed_uint_float4(float4 x, const int bits);
uint8 __OVERLOAD__ float_to_fixed_uint_float8(float8 x, const int bits);
uint16 __OVERLOAD__ float_to_fixed_uint_float16(float16 x, const int bits);

float __OVERLOAD__ fixed_to_float_int_int(int x, int bits);
float2 __OVERLOAD__ fixed_to_float_int_int2(int2 x, int bits);
float4 __OVERLOAD__ fixed_to_float_int_int4(int4 x, int bits);
float8 __OVERLOAD__ fixed_to_float_int_int8(int8 x, int bits);
float16 __OVERLOAD__ fixed_to_float_int_int16(int16 x, int bits);

float __OVERLOAD__ fixed_to_float_uint_uint(uint x, int bits);
float2 __OVERLOAD__ fixed_to_float_uint_uint2(uint2 x, int bits);
float4 __OVERLOAD__ fixed_to_float_uint_uint4(uint4 x, int bits);
float8 __OVERLOAD__ fixed_to_float_uint_uint8(uint8 x, int bits);
float16 __OVERLOAD__ fixed_to_float_uint_uint16(uint16 x, int bits);

#endif

#endif  // __CL_KERNEL_H
