#if defined(__OPENCL_VERSION__) \
&& __OPENCL_VERSION__ >= 110
typedef int i1;
typedef int2 i2;
typedef uint u1;
typedef uint4 u4;
typedef float f1;
typedef float2 f2;
typedef float4 f4;
__private void
px_2x4_mtx(
__private f2* crd,
__private f4* mtx)
{
// Do a matrix multiplication. Both vector access notations shown here for
// reference. There's no difference between the two notations, just pick
// your preference (and .xyzw are obviously limited to to the first four
// components only and you may not mix both styles within a single group
// notation.)
f2 crn = (f2)(
#ifdef PX_ROTATE__NO_DOT_PRODUCT
crd->x * mtx->x // crd->s0 * mtx->s0
+ crd->y * mtx->y, // crd->s1 * mtx->s1
crd->x * mtx->z // crd->s0 * mtx->s2
+ crd->y * mtx->w // crd->s1 * mtx->s3
#else
// This may not compile to any faster code than the above.
dot(
*crd, mtx->xy), // *crd * mtx->s12
dot(
*crd, mtx->zw) // *crd * mtx->s23
#endif
);
crd->x = crn.x;
crd->y = crn.y;
return;
}
#if defined(__IMAGE_SUPPORT__) \
&& __IMAGE_SUPPORT__
#if !defined(PX_ROTATE__SHEAR) \
&& !defined(PX_ROTATE__RMTRX)
# define PX_ROTATE__SHEAR
#endif
__kernel void
px_rotate(
__read_only image2d_t src,
__write_only image2d_t dst,
float deg,
sampler_t smp)
{
/* It is possible to calculate rotated coordinates by doing three matrix
multiplications using two different shear matrixes:
X' = A * B * A * X
where X and X' are origin and rotated coordinate matrixes respectively.
Obviously the result of a A * B * A is a rotation matrix R:
R = [ cos(L) -sin(L)
sin(L) cos(L) ]
where L is the angle of rotation.
However, as matrixes A and B are defined (or rotation matrix is
decomposed, if you will) as follows:
A = [ 1.0f -tan(L/2)
0.0f 1.0f ],
B = [ 1.0f 0.0f
sin(L) 1.0f ]
Calculating inverse A^-1 and B^-1 is extremely simple as both are
triangular matrixes. (It also so happens, that in this case R^-1 is
a trivial calculation, too, as determinant cos^2(L) + sin^2(L) is 1
and thus again only sine signs are changed as elements are swapped.)
Inverse matrixes are useful, as we may now calculate the origin for each
rotated coordinate. This reverse approach is highly useful, as it
provides one important fundamendal guarantee: Every single coordinate
(i.e. pixel or data) will be filled in output buffer thus leaving no
space for alising issues.
If the image were rotated `directly', it would be necessary to check
with extreme prejudice that no output coordinate is left unvisited or
use some postprocessing trick(s). By using shear matrixes in the first
place is one way of overcoming major alising issues, however, as we
have a configurable sampler available only for input buffer it makes
no sense whatsover to use direct approach for any purpose.
*/
// As explained above, this is the coordinate where some origin pixel is
// rotated (location of which is yet unknown). This kernel is executed
// for each and every target within the image, i.e. pixel, thus leaving
// no empty spots and no place for alising errors.
i2 TARGET_int = (i2)(
get_global_id(0),
get_global_id(1));
#if defined(PX_ROTATE__SHEAR) \
|| defined(PX_ROTATE__RMTRX)
f1 rad = M_PI * (clamp(deg, 0.0f, 180.0f)
/ 180.0f);
i2 blksiz_int = (i2)(
get_global_size(0),
get_global_size(1));
// Explicit casts are pretty much the only way around.
f2 blksiz_flt = convert_float2_rtz(
blksiz_int);
f2 center_flt = blksiz_flt / 2.0f;
f2 ORIGIN_flt = convert_float2_rtz(
TARGET_int);
f1 sin_rad = sin(rad);
f1 cos_rad = cos(rad);
// As the length of the both image sides is equal, rotated image will be
// off bounds by length * cos(angle). Scale factor for side is respectively
// length * cos(angle) / length, and this reduced to mere a cos(angle).
f1 scale = cos_rad;
#endif
// PX_ROTATE__{SHEAR|RMTRX}
#if defined(PX_ROTATE__SHEAR)
f1 tan_rad = tan(rad / 2.0f);
f4 shear_a = (f4)( // inverse A
1.0f, /* 1.0f */tan_rad,
0.0f, 1.0f);
f4 shear_b = (f4)( // inverse B
1.0f, 0.0f,
-1.0f * sin_rad, 1.0f);
#elif defined(PX_ROTATE__RMTRX)
f4 rotat_r = (f4)( // inverse R
/* 1.0f */cos_rad, /* 1.0f */sin_rad,
-1.0f * sin_rad, /* 1.0f */cos_rad
#endif
f1 scale_i = 1.0f / scale;
#ifndef PX_ROTATE__NO_SCALE_MATRIX
// A scale matrix is used for the sake of every other operation being a
// matrix operation. If you dislike, define PX_ROTATE__NO_SCALE_MATRIX.
f4 scale_m = (f4)( // inverse scale matrix
/* 1.0f */scale_i, 0.0f,
0.0f, /* 1.0f */scale_i);
#endif // ..
#if defined(PX_ROTATE__SHEAR) \
|| defined(PX_ROTATE__RMTRX)
// Re-orient or normalize coordinate system so that the epicenter for each
// operation is at the center of the image, not at the (0, 0).
ORIGIN_flt -= center_flt;
#ifndef PX_ROTATE__NO_SCALE_MATRIX
px_2x4_mtx(&ORIGIN_flt, &scale_m);
#else
ORIGIN_flt *=scale_i; // Nevertheless, this must be inverse.
#endif
#if defined(PX_ROTATE__SHEAR)
// X = A^-1 * B^-1 * A^-1 * X'
px_2x4_mtx(&ORIGIN_flt, &shear_a);
px_2x4_mtx(&ORIGIN_flt, &shear_b);
px_2x4_mtx(&ORIGIN_flt, &shear_a);
#elif defined(PX_ROTATE__RMTRX)
// X = R^-1 * X'
px_2_4_mtx(&ORIGIN_flt, &rotat_r);
#endif
// Reverse re-orientation back to `normal'.
ORIGIN_flt += center_flt;
// Sature and cast.
i2 ORIGIN_int = convert_int2_sat_rtz(
ORIGIN_flt);
#else
i2 ORIGIN_int = TARGET_int;
#endif
// PX_ROTATE__{SHEAR|RMTRX}
// Read pixel from a rotated and scaled location. Our sampler takes care of
// out-of-bounds reads and if configured so, does some anti-aliasing.
f4 pxl =
read_imagef(src, smp, ORIGIN_int);
// Note: No memory barriers / fences are required in this example as the
// order in which kernels are executed is not important: Each kernel only
// operates on a single output pixel that is not affected by any neighbour
// pixels.
// It is possible, and even likely, that two or more parallel threads read
// the input pixel (due to alising error caused by moving from floating
// point to integer arithmetic) from the very same coordinate.
// However, this is not an issue as data is read-only and thus readable
// simultaneously by multiple threads.
#ifdef PX_ROTATE__VISUALIZE_LOCAL_GROUPS
f1 rdx = (f1)(get_local_id (0)+
get_local_id (1))
/ (f1)(get_local_size(0)+
get_local_size(1)) + 0.5f;
pxl *= (f4)(rdx, rdx, rdx, 1.0f);
#endif
// ..and store it to currently operated location.
write_imagef(dst, TARGET_int, pxl);
return;
}
#endif
// __IMAGE_SUPPORT__..
#endif
// __OPENCL_VERSION__..
/* This file is part of
521445S Digital Techniques III -- An (OpenCL) Programming Guide.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see . */