import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--clspv", action="store_true", help="Generate the clspv variant of the code"
)
args = parser.parse_args()
clspv = args.clspv
types = [
"char",
"uchar",
"short",
"ushort",
"int",
"uint",
"long",
"ulong",
"half",
"float",
"double",
]
int_types = ["char", "uchar", "short", "ushort", "int", "uint", "long", "ulong"]
unsigned_types = ["uchar", "ushort", "uint", "ulong"]
float_types = ["half", "float", "double"]
int64_types = ["long", "ulong"]
float64_types = ["double"]
float16_types = ["half"]
vector_sizes = ["", "2", "3", "4", "8", "16"]
half_sizes = [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")]
saturation = ["", "_sat"]
rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"]
bool_type = {
"char": "char",
"uchar": "char",
"short": "short",
"ushort": "short",
"int": "int",
"uint": "int",
"long": "long",
"ulong": "long",
"half": "short",
"float": "int",
"double": "long",
}
unsigned_type = {
"char": "uchar",
"uchar": "uchar",
"short": "ushort",
"ushort": "ushort",
"int": "uint",
"uint": "uint",
"long": "ulong",
"ulong": "ulong",
}
sizeof_type = {
"char": 1,
"uchar": 1,
"short": 2,
"ushort": 2,
"int": 4,
"uint": 4,
"long": 8,
"ulong": 8,
"half": 2,
"float": 4,
"double": 8,
}
limit_max = {
"char": "CHAR_MAX",
"uchar": "UCHAR_MAX",
"short": "SHRT_MAX",
"ushort": "USHRT_MAX",
"int": "INT_MAX",
"uint": "UINT_MAX",
"long": "LONG_MAX",
"ulong": "ULONG_MAX",
"half": "0x1.ffcp+15",
}
limit_min = {
"char": "CHAR_MIN",
"uchar": "0",
"short": "SHRT_MIN",
"ushort": "0",
"int": "INT_MIN",
"uint": "0",
"long": "LONG_MIN",
"ulong": "0",
"half": "-0x1.ffcp+15",
}
def conditional_guard(src, dst):
int64_count = 0
float64_count = 0
float16_count = 0
if src in int64_types:
int64_count = int64_count + 1
elif src in float64_types:
float64_count = float64_count + 1
elif src in float16_types:
float16_count = float16_count + 1
if dst in int64_types:
int64_count = int64_count + 1
elif dst in float64_types:
float64_count = float64_count + 1
elif dst in float16_types:
float16_count = float16_count + 1
if float64_count > 0 and float16_count > 0:
print("#if defined(cl_khr_fp16) && defined(cl_khr_fp64)")
return True
elif float64_count > 0:
print("#ifdef cl_khr_fp64")
return True
elif float16_count > 0:
print("#if defined cl_khr_fp16")
return True
elif int64_count > 0:
print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)")
return True
return False
print(
"""/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN:
$ ./generate-conversion-type-cl.sh
OpenCL type conversion functions
Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <clc/clc.h>
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#endif
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#if defined(__EMBEDDED_PROFILE__) && !defined(cles_khr_int64)
#error Embedded profile that supports cl_khr_fp64 also has to support cles_khr_int64
#endif
#endif
#ifdef cles_khr_int64
#pragma OPENCL EXTENSION cles_khr_int64 : enable
#endif
"""
)
def generate_default_conversion(src, dst, mode):
close_conditional = conditional_guard(src, dst)
print(
"""_CLC_DEF _CLC_OVERLOAD
{DST} convert_{DST}{M}({SRC} x)
{{
return ({DST})x;
}}
""".format(
SRC=src, DST=dst, M=mode
)
)
for size, half_size in half_sizes:
print(
"""_CLC_DEF _CLC_OVERLOAD
{DST}{N} convert_{DST}{N}{M}({SRC}{N} x)
{{
return ({DST}{N})(convert_{DST}{H}(x.lo), convert_{DST}{H}(x.hi));
}}
""".format(
SRC=src, DST=dst, N=size, H=half_size, M=mode
)
)
print(
"""_CLC_DEF _CLC_OVERLOAD
{DST}3 convert_{DST}3{M}({SRC}3 x)
{{
return ({DST}3)(convert_{DST}2(x.s01), convert_{DST}(x.s2));
}}""".format(
SRC=src, DST=dst, M=mode
)
)
if close_conditional:
print("#endif")
if not clspv:
for src in types:
for dst in types:
generate_default_conversion(src, dst, "")
for src in int_types:
for dst in int_types:
for mode in rounding_modes:
if clspv and mode == "_rte":
continue
generate_default_conversion(src, dst, mode)
def generate_saturated_conversion(src, dst, size):
close_conditional = conditional_guard(src, dst)
print(
"""_CLC_DEF _CLC_OVERLOAD
{DST}{N} convert_{DST}{N}_sat({SRC}{N} x)
{{""".format(
DST=dst, SRC=src, N=size
)
)
if dst in unsigned_types:
bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format(
DST=dst, BOOL=bool_type[dst], N=size
)
bool_suffix = ")"
else:
bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size)
bool_suffix = ""
if src == dst:
print(" return x;")
elif src in float_types:
if clspv:
print(
""" {DST}{N} y = convert_{DST}{N}(x);
y = select(y, ({DST}{N}){DST_MIN}, {BP}(x <= ({SRC}{N}){DST_MIN}){BS});
y = select(y, ({DST}{N}){DST_MAX}, {BP}(x >= ({SRC}{N}){DST_MAX}){BS});
return y;""".format(
SRC=src,
DST=dst,
N=size,
DST_MIN=limit_min[dst],
DST_MAX=limit_max[dst],
BP=bool_prefix,
BS=bool_suffix,
)
)
else:
print(
""" {DST}{N} y = convert_{DST}{N}(x);
y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
return y;""".format(
SRC=src,
DST=dst,
N=size,
DST_MIN=limit_min[dst],
DST_MAX=limit_max[dst],
BP=bool_prefix,
BS=bool_suffix,
)
)
else:
if sizeof_type[src] == sizeof_type[dst]:
if src in unsigned_types:
print(
" x = min(x, ({SRC}){DST_MAX});".format(
SRC=src, DST_MAX=limit_max[dst]
)
)
else:
print(" x = max(x, ({SRC})0);".format(SRC=src))
elif sizeof_type[src] > sizeof_type[dst]:
if src in unsigned_types:
print(
" x = min(x, ({SRC}){DST_MAX});".format(
SRC=src, DST_MAX=limit_max[dst]
)
)
else:
print(
" x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});".format(
SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst]
)
)
elif src not in unsigned_types and dst in unsigned_types:
print(" x = max(x, ({SRC})0);".format(SRC=src))
print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
print("}")
if close_conditional:
print("#endif")
for src in types:
for dst in int_types:
for size in vector_sizes:
generate_saturated_conversion(src, dst, size)
def generate_saturated_conversion_with_rounding(src, dst, size, mode):
close_conditional = conditional_guard(src, dst)
print(
"""_CLC_DEF _CLC_OVERLOAD
{DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x)
{{
return convert_{DST}{N}_sat(x);
}}
""".format(
DST=dst, SRC=src, N=size, M=mode
)
)
if close_conditional:
print("#endif")
for src in int_types:
for dst in int_types:
for size in vector_sizes:
for mode in rounding_modes:
generate_saturated_conversion_with_rounding(src, dst, size, mode)
def generate_float_conversion(src, dst, size, mode, sat):
close_conditional = conditional_guard(src, dst)
print(
"""_CLC_DEF _CLC_OVERLOAD
{DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x)
{{""".format(
SRC=src, DST=dst, N=size, M=mode, S=sat
)
)
if dst in int_types:
if mode == "_rte":
print(" x = rint(x);")
elif mode == "_rtp":
print(" x = ceil(x);")
elif mode == "_rtn":
print(" x = floor(x);")
print(" return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat))
elif mode == "_rte":
print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
else:
print(" {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
if clspv:
print(" {SRC}{N} y = convert_{SRC}{N}_sat(r);".format(SRC=src, N=size))
else:
print(" {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size))
if mode == "_rtz":
if src in int_types:
print(
" {USRC}{N} abs_x = abs(x);".format(
USRC=unsigned_type[src], N=size
)
)
print(
" {USRC}{N} abs_y = abs(y);".format(
USRC=unsigned_type[src], N=size
)
)
else:
print(" {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
print(" {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
if clspv:
print(
" {BOOL}{N} c = convert_{BOOL}{N}(abs_y > abs_x);".format(
BOOL=bool_type[dst], N=size
)
)
if sizeof_type[src] >= 4 and src in int_types:
print(
" c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
)
)
print(
" {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), c);".format(
DST=dst, N=size, BOOL=bool_type[dst], SRC=src
)
)
else:
print(
" {DST}{N} sel = select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
DST=dst, N=size, BOOL=bool_type[dst]
)
)
if dst == "half" and src in int_types and sizeof_type[src] >= 2:
dst_max = limit_max[dst]
if src == "short":
dst_max = "0x1.ffcp+14"
print(
" return clamp(sel, ({DST}{N}){DST_MIN}, ({DST}{N}){DST_MAX});".format(
DST=dst, N=size, DST_MIN=limit_min[dst], DST_MAX=dst_max
)
)
else:
print(" return sel;")
if mode == "_rtp":
print(
" {DST}{N} sel = select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format(
DST=dst, N=size, BOOL=bool_type[dst]
)
)
if dst == "half" and src in int_types and sizeof_type[src] >= 2:
print(
" return max(sel, ({DST}{N}){DST_MIN});".format(
DST=dst, N=size, DST_MIN=limit_min[dst]
)
)
else:
print(" return sel;")
if mode == "_rtn":
if clspv:
print(
" {BOOL}{N} c = convert_{BOOL}{N}(y > x);".format(
BOOL=bool_type[dst], N=size
)
)
if sizeof_type[src] >= 4 and src in int_types:
print(
" c = c || convert_{BOOL}{N}(({SRC}{N}){SRC_MAX} == x);".format(
BOOL=bool_type[dst], N=size, SRC=src, SRC_MAX=limit_max[src]
)
)
print(
" {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), c);".format(
DST=dst, N=size, BOOL=bool_type[dst], SRC=src
)
)
else:
print(
" {DST}{N} sel = select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
DST=dst, N=size, BOOL=bool_type[dst]
)
)
if dst == "half" and src in int_types and sizeof_type[src] >= 2:
dst_max = limit_max[dst]
if src == "short":
dst_max = "0x1.ffcp+14"
print(
" return min(sel, ({DST}{N}){DST_MAX});".format(
DST=dst, N=size, DST_MAX=dst_max
)
)
else:
print(" return sel;")
print("}")
if close_conditional:
print("#endif")
for src in float_types:
for dst in int_types:
for size in vector_sizes:
for mode in rounding_modes:
for sat in saturation:
generate_float_conversion(src, dst, size, mode, sat)
for src in types:
for dst in float_types:
for size in vector_sizes:
for mode in rounding_modes:
if clspv and mode == "_rte":
continue
generate_float_conversion(src, dst, size, mode, "")