This source file includes following definitions.
- OPENCL_DEFINE
- STRINGIFY
- STRINGIFY
- STRINGIFY
- OPENCL_IF
- OPENCL_ELIF
- OPENCL_ELIF
- OPENCL_ENDIF
- STRINGIFY
- STRINGIFY
- STRINGIFY
- setBlue
- getBlueF4
- setBlueF4
- getGreen
- setGreen
- getGreenF4
- setGreenF4
- getRed
- setRed
- getRedF4
- setRedF4
- getOpacity
- setOpacity
- getOpacityF4
- setOpacityF4
- setGray
- GetPixelIntensity
- STRINGIFY
- STRINGIFY
- STRINGIFY
- CompositeColorDodge
- STRINGIFY
- STRINGIFY
- ConvertHSBToRGB
- Contrast
- STRINGIFY
- STRINGIFY
- STRINGIFY
- STRINGIFY
- STRINGIFY
- STRINGIFY
- STRINGIFY
- STRINGIFY
- RandomNumberGenerator
- STRINGIFY
- STRINGIFY
- STRINGIFY
- STRINGIFY
- STRINGIFY
- STRINGIFY
- STRINGIFY
- UnsharpMaskBlurColumnSection
#ifndef _MAGICKCORE_ACCELERATE_PRIVATE_H
#define _MAGICKCORE_ACCELERATE_PRIVATE_H
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if defined(MAGICKCORE_OPENCL_SUPPORT)
#define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
#define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
#define OPENCL_ELSE() "\n #""else " " \n"
#define OPENCL_ENDIF() "\n #""endif " " \n"
#define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
#define STRINGIFY(...) #__VA_ARGS__ "\n"
typedef struct _FloatPixelPacket
{
#ifdef MAGICK_PIXEL_RGBA
MagickRealType
red,
green,
blue,
opacity;
#endif
#ifdef MAGICK_PIXEL_BGRA
MagickRealType
blue,
green,
red,
opacity;
#endif
} FloatPixelPacket;
const char* accelerateKernels =
OPENCL_DEFINE(GetPixelAlpha(pixel),(QuantumRange-(pixel).w))
OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
OPENCL_DEFINE(SigmaRandom, (attenuate))
OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
STRINGIFY(
typedef enum
{
UndefinedColorspace,
RGBColorspace,
GRAYColorspace,
TransparentColorspace,
OHTAColorspace,
LabColorspace,
XYZColorspace,
YCbCrColorspace,
YCCColorspace,
YIQColorspace,
YPbPrColorspace,
YUVColorspace,
CMYKColorspace,
sRGBColorspace,
HSBColorspace,
HSLColorspace,
HWBColorspace,
Rec601LumaColorspace,
Rec601YCbCrColorspace,
Rec709LumaColorspace,
Rec709YCbCrColorspace,
LogColorspace,
CMYColorspace,
LuvColorspace,
HCLColorspace,
LCHColorspace,
LMSColorspace,
LCHabColorspace,
LCHuvColorspace,
scRGBColorspace,
HSIColorspace,
HSVColorspace,
HCLpColorspace,
YDbDrColorspace
} ColorspaceType;
)
STRINGIFY(
typedef enum
{
UndefinedCompositeOp,
NoCompositeOp,
ModulusAddCompositeOp,
AtopCompositeOp,
BlendCompositeOp,
BumpmapCompositeOp,
ChangeMaskCompositeOp,
ClearCompositeOp,
ColorBurnCompositeOp,
ColorDodgeCompositeOp,
ColorizeCompositeOp,
CopyBlackCompositeOp,
CopyBlueCompositeOp,
CopyCompositeOp,
CopyCyanCompositeOp,
CopyGreenCompositeOp,
CopyMagentaCompositeOp,
CopyOpacityCompositeOp,
CopyRedCompositeOp,
CopyYellowCompositeOp,
DarkenCompositeOp,
DstAtopCompositeOp,
DstCompositeOp,
DstInCompositeOp,
DstOutCompositeOp,
DstOverCompositeOp,
DifferenceCompositeOp,
DisplaceCompositeOp,
DissolveCompositeOp,
ExclusionCompositeOp,
HardLightCompositeOp,
HueCompositeOp,
InCompositeOp,
LightenCompositeOp,
LinearLightCompositeOp,
LuminizeCompositeOp,
MinusDstCompositeOp,
ModulateCompositeOp,
MultiplyCompositeOp,
OutCompositeOp,
OverCompositeOp,
OverlayCompositeOp,
PlusCompositeOp,
ReplaceCompositeOp,
SaturateCompositeOp,
ScreenCompositeOp,
SoftLightCompositeOp,
SrcAtopCompositeOp,
SrcCompositeOp,
SrcInCompositeOp,
SrcOutCompositeOp,
SrcOverCompositeOp,
ModulusSubtractCompositeOp,
ThresholdCompositeOp,
XorCompositeOp,
DivideDstCompositeOp,
DistortCompositeOp,
BlurCompositeOp,
PegtopLightCompositeOp,
VividLightCompositeOp,
PinLightCompositeOp,
LinearDodgeCompositeOp,
LinearBurnCompositeOp,
MathematicsCompositeOp,
DivideSrcCompositeOp,
MinusSrcCompositeOp,
DarkenIntensityCompositeOp,
LightenIntensityCompositeOp
} CompositeOperator;
)
STRINGIFY(
typedef enum
{
UndefinedFunction,
PolynomialFunction,
SinusoidFunction,
ArcsinFunction,
ArctanFunction
} MagickFunction;
)
STRINGIFY(
typedef enum
{
UndefinedNoise,
UniformNoise,
GaussianNoise,
MultiplicativeGaussianNoise,
ImpulseNoise,
LaplacianNoise,
PoissonNoise,
RandomNoise
} NoiseType;
)
STRINGIFY(
typedef enum
{
UndefinedPixelIntensityMethod = 0,
AveragePixelIntensityMethod,
BrightnessPixelIntensityMethod,
LightnessPixelIntensityMethod,
Rec601LumaPixelIntensityMethod,
Rec601LuminancePixelIntensityMethod,
Rec709LumaPixelIntensityMethod,
Rec709LuminancePixelIntensityMethod,
RMSPixelIntensityMethod,
MSPixelIntensityMethod
} PixelIntensityMethod;
)
STRINGIFY(
typedef enum {
BoxWeightingFunction = 0,
TriangleWeightingFunction,
CubicBCWeightingFunction,
HanningWeightingFunction,
HammingWeightingFunction,
BlackmanWeightingFunction,
GaussianWeightingFunction,
QuadraticWeightingFunction,
JincWeightingFunction,
SincWeightingFunction,
SincFastWeightingFunction,
KaiserWeightingFunction,
WelshWeightingFunction,
BohmanWeightingFunction,
LagrangeWeightingFunction,
CosineWeightingFunction,
} ResizeWeightingFunctionType;
)
STRINGIFY(
typedef enum
{
UndefinedChannel,
RedChannel = 0x0001,
GrayChannel = 0x0001,
CyanChannel = 0x0001,
GreenChannel = 0x0002,
MagentaChannel = 0x0002,
BlueChannel = 0x0004,
YellowChannel = 0x0004,
AlphaChannel = 0x0008,
OpacityChannel = 0x0008,
MatteChannel = 0x0008,
BlackChannel = 0x0020,
IndexChannel = 0x0020,
CompositeChannels = 0x002F,
AllChannels = 0x7ffffff,
TrueAlphaChannel = 0x0040,
RGBChannels = 0x0080,
GrayChannels = 0x0080,
SyncChannels = 0x0100,
DefaultChannels = ((AllChannels | SyncChannels) &~ OpacityChannel)
} ChannelType;
)
OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
STRINGIFY(
inline CLQuantum ScaleCharToQuantum(const unsigned char value)
{
return((CLQuantum) value);
}
)
OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
STRINGIFY(
inline CLQuantum ScaleCharToQuantum(const unsigned char value)
{
return((CLQuantum) (257.0f*value));
}
)
OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
STRINGIFY(
inline CLQuantum ScaleCharToQuantum(const unsigned char value)
{
return((CLQuantum) (16843009.0*value));
}
)
OPENCL_ENDIF()
STRINGIFY(
inline int ClampToCanvas(const int offset, const int range)
{
return clamp(offset, (int)0, range - 1);
}
)
STRINGIFY(
inline int ClampToCanvasWithHalo(const int offset, const int range, const int edge, const int section)
{
return clamp(offset, section ? (int)(0 - edge) : (int)0, section ? (range - 1) : (range - 1 + edge));
}
)
STRINGIFY(
inline CLQuantum ClampToQuantum(const float value)
{
return (CLQuantum)(clamp(value, 0.0f, (float)QuantumRange) + 0.5f);
}
)
STRINGIFY(
inline uint ScaleQuantumToMap(CLQuantum value)
{
if (value >= (CLQuantum)MaxMap)
return ((uint)MaxMap);
else
return ((uint)value);
}
)
STRINGIFY(
inline float PerceptibleReciprocal(const float x)
{
float sign = x < (float) 0.0 ? (float)-1.0 : (float) 1.0;
return((sign*x) >= MagickEpsilon ? (float) 1.0 / x : sign*((float) 1.0 / MagickEpsilon));
}
)
STRINGIFY(
inline float RoundToUnity(const float value)
{
return clamp(value, 0.0f, 1.0f);
}
)
STRINGIFY(
inline CLQuantum getBlue(CLPixelType p) { return p.x; }
inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
inline float getBlueF4(float4 p) { return p.x; }
inline void setBlueF4(float4* p, float value) { (*p).x = value; }
inline CLQuantum getGreen(CLPixelType p) { return p.y; }
inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
inline float getGreenF4(float4 p) { return p.y; }
inline void setGreenF4(float4* p, float value) { (*p).y = value; }
inline CLQuantum getRed(CLPixelType p) { return p.z; }
inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
inline float getRedF4(float4 p) { return p.z; }
inline void setRedF4(float4* p, float value) { (*p).z = value; }
inline CLQuantum getOpacity(CLPixelType p) { return p.w; }
inline void setOpacity(CLPixelType* p, CLQuantum value) { (*p).w = value; }
inline float getOpacityF4(float4 p) { return p.w; }
inline void setOpacityF4(float4* p, float value) { (*p).w = value; }
inline void setGray(CLPixelType* p, CLQuantum value) { (*p).z = value; (*p).y = value; (*p).x = value; }
inline float GetPixelIntensity(const int method, const int colorspace, CLPixelType p)
{
float red = getRed(p);
float green = getGreen(p);
float blue = getBlue(p);
float intensity;
if (colorspace == GRAYColorspace)
return red;
switch (method)
{
case AveragePixelIntensityMethod:
{
intensity = (red + green + blue) / 3.0;
break;
}
case BrightnessPixelIntensityMethod:
{
intensity = max(max(red, green), blue);
break;
}
case LightnessPixelIntensityMethod:
{
intensity = (min(min(red, green), blue) +
max(max(red, green), blue)) / 2.0;
break;
}
case MSPixelIntensityMethod:
{
intensity = (float)(((float)red*red + green*green + blue*blue) /
(3.0*QuantumRange));
break;
}
case Rec601LumaPixelIntensityMethod:
{
intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
break;
}
case Rec601LuminancePixelIntensityMethod:
{
intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
break;
}
case Rec709LumaPixelIntensityMethod:
default:
{
intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
break;
}
case Rec709LuminancePixelIntensityMethod:
{
intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
break;
}
case RMSPixelIntensityMethod:
{
intensity = (float)(sqrt((float)red*red + green*green + blue*blue) /
sqrt(3.0));
break;
}
}
return intensity;
}
)
STRINGIFY(
ulong MWC_AddMod64(ulong a, ulong b, ulong M)
{
ulong v=a+b;
if( (v>=M) || (convert_float(v) < convert_float(a)) )
v=v-M;
return v;
}
ulong MWC_MulMod64(ulong a, ulong b, ulong M)
{
ulong r=0;
while(a!=0){
if(a&1)
r=MWC_AddMod64(r,b,M);
b=MWC_AddMod64(b,b,M);
a=a>>1;
}
return r;
}
ulong MWC_PowMod64(ulong a, ulong e, ulong M)
{
ulong sqr=a, acc=1;
while(e!=0){
if(e&1)
acc=MWC_MulMod64(acc,sqr,M);
sqr=MWC_MulMod64(sqr,sqr,M);
e=e>>1;
}
return acc;
}
uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
{
ulong m=MWC_PowMod64(A, distance, M);
ulong x=curr.x*(ulong)A+curr.y;
x=MWC_MulMod64(x, m, M);
return (uint2)((uint)(x/A), (uint)(x%A));
}
uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
{
enum{ MWC_BASEID = 4077358422479273989UL };
ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
ulong m=MWC_PowMod64(A, dist, M);
ulong x=MWC_MulMod64(MWC_BASEID, m, M);
return (uint2)((uint)(x/A), (uint)(x%A));
}
typedef struct{ uint x; uint c; } mwc64x_state_t;
enum{ MWC64X_A = 4294883355U };
enum{ MWC64X_M = 18446383549859758079UL };
void MWC64X_Step(mwc64x_state_t *s)
{
uint X=s->x, C=s->c;
uint Xn=MWC64X_A*X+C;
uint carry=(uint)(Xn<C);
uint Cn=mad_hi(MWC64X_A,X,carry);
s->x=Xn;
s->c=Cn;
}
void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
{
uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), MWC64X_A, MWC64X_M, distance);
s->x=tmp.x;
s->c=tmp.y;
}
void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
{
uint2 tmp=MWC_SeedImpl_Mod64(MWC64X_A, MWC64X_M, 1, 0, baseOffset, perStreamOffset);
s->x=tmp.x;
s->c=tmp.y;
}
uint MWC64X_NextUint(mwc64x_state_t *s)
{
uint res=s->x ^ s->c;
MWC64X_Step(s);
return res;
}
float mwcReadPseudoRandomValue(mwc64x_state_t* rng) {
return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff);
}
float mwcGenerateDifferentialNoise(mwc64x_state_t* r, CLQuantum pixel, NoiseType noise_type, float attenuate) {
float
alpha,
beta,
noise,
sigma;
noise = 0.0f;
alpha=mwcReadPseudoRandomValue(r);
switch(noise_type) {
case UniformNoise:
default:
{
noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
break;
}
case GaussianNoise:
{
float
gamma,
tau;
if (alpha == 0.0f)
alpha=1.0f;
beta=mwcReadPseudoRandomValue(r);
gamma=sqrt(-2.0f*log(alpha));
sigma=gamma*cospi((2.0f*beta));
tau=gamma*sinpi((2.0f*beta));
noise=(float)(pixel+sqrt((float) pixel)*SigmaGaussian*sigma+
QuantumRange*TauGaussian*tau);
break;
}
case ImpulseNoise:
{
if (alpha < (SigmaImpulse/2.0f))
noise=0.0f;
else
if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
noise=(float)QuantumRange;
else
noise=(float)pixel;
break;
}
case LaplacianNoise:
{
if (alpha <= 0.5f)
{
if (alpha <= MagickEpsilon)
noise=(float) (pixel-QuantumRange);
else
noise=(float) (pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
0.5f);
break;
}
beta=1.0f-alpha;
if (beta <= (0.5f*MagickEpsilon))
noise=(float) (pixel+QuantumRange);
else
noise=(float) (pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
break;
}
case MultiplicativeGaussianNoise:
{
sigma=1.0f;
if (alpha > MagickEpsilon)
sigma=sqrt(-2.0f*log(alpha));
beta=mwcReadPseudoRandomValue(r);
noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma*
cospi((float) (2.0f*beta))/2.0f);
break;
}
case PoissonNoise:
{
float
poisson;
unsigned int i;
poisson=exp(-SigmaPoisson*QuantumScale*pixel);
for (i=0; alpha > poisson; i++)
{
beta=mwcReadPseudoRandomValue(r);
alpha*=beta;
}
noise=(float) (QuantumRange*i/SigmaPoisson);
break;
}
case RandomNoise:
{
noise=(float) (QuantumRange*SigmaRandom*alpha);
break;
}
};
return noise;
}
__kernel
void AddNoise(const __global CLPixelType* inputImage, __global CLPixelType* filteredImage
,const unsigned int inputPixelCount, const unsigned int pixelsPerWorkItem
,const ChannelType channel
,const NoiseType noise_type, const float attenuate
,const unsigned int seed0, const unsigned int seed1
,const unsigned int numRandomNumbersPerPixel) {
mwc64x_state_t rng;
rng.x = seed0;
rng.c = seed1;
uint span = pixelsPerWorkItem * numRandomNumbersPerPixel;
uint offset = span * get_local_size(0) * get_group_id(0);
MWC64X_SeedStreams(&rng, offset, span);
uint pos = get_local_size(0) * get_group_id(0) * pixelsPerWorkItem + get_local_id(0);
uint count = pixelsPerWorkItem;
while (count > 0) {
if (pos < inputPixelCount) {
CLPixelType p = inputImage[pos];
if ((channel&RedChannel)!=0) {
setRed(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getRed(p),noise_type,attenuate)));
}
if ((channel&GreenChannel)!=0) {
setGreen(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getGreen(p),noise_type,attenuate)));
}
if ((channel&BlueChannel)!=0) {
setBlue(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getBlue(p),noise_type,attenuate)));
}
if ((channel & OpacityChannel) != 0) {
setOpacity(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getOpacity(p),noise_type,attenuate)));
}
filteredImage[pos] = p;
}
pos += get_local_size(0);
--count;
}
}
)
STRINGIFY(
__kernel void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
const ChannelType channel, __constant float *filter,
const unsigned int width,
const unsigned int imageColumns, const unsigned int imageRows,
__local CLPixelType *temp)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = imageColumns;
const unsigned int radius = (width-1)/2;
const int wsize = get_local_size(0);
const unsigned int loadSize = wsize+width;
const int groupX=get_local_size(0)*get_group_id(0);
const int groupY=get_local_size(1)*get_group_id(1);
for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
{
temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_global_id(0) < columns)
{
float4 result = (float4) 0;
int i = 0;
\n #ifndef UFACTOR \n
\n #define UFACTOR 8 \n
\n #endif \n
for ( ; i+UFACTOR < width; )
{
\n #pragma unroll UFACTOR\n
for (int j=0; j < UFACTOR; j++, i++)
{
result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
}
}
for ( ; i < width; i++)
{
result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
}
result.x = ClampToQuantum(result.x);
result.y = ClampToQuantum(result.y);
result.z = ClampToQuantum(result.z);
result.w = ClampToQuantum(result.w);
filtered_im[y*columns+x] = result;
}
}
)
STRINGIFY(
__kernel void BlurRowSection(__global CLPixelType *im, __global float4 *filtered_im,
const ChannelType channel, __constant float *filter,
const unsigned int width,
const unsigned int imageColumns, const unsigned int imageRows,
__local CLPixelType *temp,
const unsigned int offsetRows, const unsigned int section)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = imageColumns;
const unsigned int radius = (width-1)/2;
const int wsize = get_local_size(0);
const unsigned int loadSize = wsize+width;
const int groupX=get_local_size(0)*get_group_id(0);
const int groupY=get_local_size(1)*get_group_id(1);
im += imageColumns * (offsetRows - radius * section);
for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
{
temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_global_id(0) < columns)
{
float4 result = (float4) 0;
int i = 0;
\n #ifndef UFACTOR \n
\n #define UFACTOR 8 \n
\n #endif \n
for ( ; i+UFACTOR < width; )
{
\n #pragma unroll UFACTOR\n
for (int j=0; j < UFACTOR; j++, i++)
{
result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
}
}
for ( ; i < width; i++)
{
result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
}
result.x = ClampToQuantum(result.x);
result.y = ClampToQuantum(result.y);
result.z = ClampToQuantum(result.z);
result.w = ClampToQuantum(result.w);
filtered_im[y*columns+x] = result;
}
}
)
STRINGIFY(
__kernel void BlurColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
const ChannelType channel, __constant float *filter,
const unsigned int width,
const unsigned int imageColumns, const unsigned int imageRows,
__local float4 *temp)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = imageColumns;
const int rows = imageRows;
unsigned int radius = (width-1)/2;
const int wsize = get_local_size(1);
const unsigned int loadSize = wsize+width;
const int groupX=get_local_size(0)*get_group_id(0);
const int groupY=get_local_size(1)*get_group_id(1);
for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
{
temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_global_id(1) < rows)
{
float4 result = (float4) 0;
int i = 0;
\n #ifndef UFACTOR \n
\n #define UFACTOR 8 \n
\n #endif \n
for ( ; i+UFACTOR < width; )
{
\n #pragma unroll UFACTOR \n
for (int j=0; j < UFACTOR; j++, i++)
{
result+=filter[i]*temp[i+get_local_id(1)];
}
}
for ( ; i < width; i++)
{
result+=filter[i]*temp[i+get_local_id(1)];
}
result.x = ClampToQuantum(result.x);
result.y = ClampToQuantum(result.y);
result.z = ClampToQuantum(result.z);
result.w = ClampToQuantum(result.w);
filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
}
}
)
STRINGIFY(
__kernel void BlurColumnSection(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
const ChannelType channel, __constant float *filter,
const unsigned int width,
const unsigned int imageColumns, const unsigned int imageRows,
__local float4 *temp,
const unsigned int offsetRows, const unsigned int section)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = imageColumns;
const int rows = imageRows;
unsigned int radius = (width-1)/2;
const int wsize = get_local_size(1);
const unsigned int loadSize = wsize+width;
const int groupX=get_local_size(0)*get_group_id(0);
const int groupY=get_local_size(1)*get_group_id(1);
blurRowData += imageColumns * radius * section;
for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
{
int pos = ClampToCanvasWithHalo(i+groupY-radius, rows, radius, section) * columns + groupX;
temp[i] = *(blurRowData+pos);
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_global_id(1) < rows)
{
float4 result = (float4) 0;
int i = 0;
\n #ifndef UFACTOR \n
\n #define UFACTOR 8 \n
\n #endif \n
for ( ; i+UFACTOR < width; )
{
\n #pragma unroll UFACTOR \n
for (int j=0; j < UFACTOR; j++, i++)
{
result+=filter[i]*temp[i+get_local_id(1)];
}
}
for ( ; i < width; i++)
{
result+=filter[i]*temp[i+get_local_id(1)];
}
result.x = ClampToQuantum(result.x);
result.y = ClampToQuantum(result.y);
result.z = ClampToQuantum(result.z);
result.w = ClampToQuantum(result.w);
filtered_im += imageColumns * offsetRows;
filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
}
}
)
STRINGIFY(
inline float ColorDodge(const float Sca,
const float Sa,const float Dca,const float Da)
{
if ((Sca*Da+Dca*Sa) >= Sa*Da)
return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
}
inline void CompositeColorDodge(const float4 *p,
const float4 *q,float4 *composite) {
float
Da,
gamma,
Sa;
Sa=1.0f-QuantumScale*getOpacityF4(*p);
Da=1.0f-QuantumScale*getOpacityF4(*q);
gamma=RoundToUnity(Sa+Da-Sa*Da);
setOpacityF4(composite, QuantumRange*(1.0-gamma));
gamma=QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma);
setRedF4(composite,gamma*ColorDodge(QuantumScale*getRedF4(*p)*Sa,Sa,QuantumScale*
getRedF4(*q)*Da,Da));
setGreenF4(composite,gamma*ColorDodge(QuantumScale*getGreenF4(*p)*Sa,Sa,QuantumScale*
getGreenF4(*q)*Da,Da));
setBlueF4(composite,gamma*ColorDodge(QuantumScale*getBlueF4(*p)*Sa,Sa,QuantumScale*
getBlueF4(*q)*Da,Da));
}
)
STRINGIFY(
inline void MagickPixelCompositePlus(const float4 *p,
const float alpha,const float4 *q,
const float beta,float4 *composite)
{
float
gamma;
float
Da,
Sa;
Sa=1.0-QuantumScale*alpha;
Da=1.0-QuantumScale*beta;
gamma=RoundToUnity(Sa+Da);
setOpacityF4(composite,(float) QuantumRange*(1.0-gamma));
gamma=PerceptibleReciprocal(gamma);
setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q)));
setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q)));
setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q)));
}
)
STRINGIFY(
inline void MagickPixelCompositeBlend(const float4 *p,
const float alpha,const float4 *q,
const float beta,float4 *composite)
{
MagickPixelCompositePlus(p,(float) (QuantumRange-alpha*
(QuantumRange-getOpacityF4(*p))),q,(float) (QuantumRange-beta*
(QuantumRange-getOpacityF4(*q))),composite);
}
)
STRINGIFY(
__kernel
void Composite(__global CLPixelType *image,
const unsigned int imageWidth,
const unsigned int imageHeight,
const unsigned int imageMatte,
const __global CLPixelType *compositeImage,
const unsigned int compositeWidth,
const unsigned int compositeHeight,
const unsigned int compositeMatte,
const unsigned int compose,
const ChannelType channel,
const float destination_dissolve,
const float source_dissolve) {
uint2 index;
index.x = get_global_id(0);
index.y = get_global_id(1);
if (index.x >= imageWidth
|| index.y >= imageHeight) {
return;
}
const CLPixelType inputPixel = image[index.y*imageWidth+index.x];
float4 destination;
setRedF4(&destination,getRed(inputPixel));
setGreenF4(&destination,getGreen(inputPixel));
setBlueF4(&destination,getBlue(inputPixel));
const CLPixelType compositePixel
= compositeImage[index.y*imageWidth+index.x];
float4 source;
setRedF4(&source,getRed(compositePixel));
setGreenF4(&source,getGreen(compositePixel));
setBlueF4(&source,getBlue(compositePixel));
if (imageMatte != 0) {
setOpacityF4(&destination,getOpacity(inputPixel));
}
else {
setOpacityF4(&destination,0.0f);
}
if (compositeMatte != 0) {
setOpacityF4(&source,getOpacity(compositePixel));
}
else {
setOpacityF4(&source,0.0f);
}
float4 composite=destination;
CompositeOperator op = (CompositeOperator)compose;
switch (op) {
case ColorDodgeCompositeOp:
CompositeColorDodge(&source,&destination,&composite);
break;
case BlendCompositeOp:
MagickPixelCompositeBlend(&source,source_dissolve,&destination,
destination_dissolve,&composite);
break;
default:
break;
};
CLPixelType outputPixel;
setRed(&outputPixel, ClampToQuantum(getRedF4(composite)));
setGreen(&outputPixel, ClampToQuantum(getGreenF4(composite)));
setBlue(&outputPixel, ClampToQuantum(getBlueF4(composite)));
setOpacity(&outputPixel, ClampToQuantum(getOpacityF4(composite)));
image[index.y*imageWidth+index.x] = outputPixel;
}
)
STRINGIFY(
inline float3 ConvertRGBToHSB(CLPixelType pixel) {
float3 HueSaturationBrightness;
HueSaturationBrightness.x = 0.0f;
HueSaturationBrightness.y = 0.0f;
HueSaturationBrightness.z = 0.0f;
float r=(float) getRed(pixel);
float g=(float) getGreen(pixel);
float b=(float) getBlue(pixel);
float tmin=min(min(r,g),b);
float tmax=max(max(r,g),b);
if (tmax!=0.0f) {
float delta=tmax-tmin;
HueSaturationBrightness.y=delta/tmax;
HueSaturationBrightness.z=QuantumScale*tmax;
if (delta != 0.0f) {
HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
HueSaturationBrightness.x/=6.0f;
HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f;
}
}
return HueSaturationBrightness;
}
inline CLPixelType ConvertHSBToRGB(float3 HueSaturationBrightness) {
float hue = HueSaturationBrightness.x;
float brightness = HueSaturationBrightness.z;
float saturation = HueSaturationBrightness.y;
CLPixelType rgb;
if (saturation == 0.0f) {
setRed(&rgb,ClampToQuantum(QuantumRange*brightness));
setGreen(&rgb,getRed(rgb));
setBlue(&rgb,getRed(rgb));
}
else {
float h=6.0f*(hue-floor(hue));
float f=h-floor(h);
float p=brightness*(1.0f-saturation);
float q=brightness*(1.0f-saturation*f);
float t=brightness*(1.0f-(saturation*(1.0f-f)));
float clampedBrightness = ClampToQuantum(QuantumRange*brightness);
float clamped_t = ClampToQuantum(QuantumRange*t);
float clamped_p = ClampToQuantum(QuantumRange*p);
float clamped_q = ClampToQuantum(QuantumRange*q);
int ih = (int)h;
setRed(&rgb, (ih == 1)?clamped_q:
(ih == 2 || ih == 3)?clamped_p:
(ih == 4)?clamped_t:
clampedBrightness);
setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness:
(ih == 3)?clamped_q:
(ih == 4 || ih == 5)?clamped_p:
clamped_t);
setBlue(&rgb, (ih == 2)?clamped_t:
(ih == 3 || ih == 4)?clampedBrightness:
(ih == 5)?clamped_q:
clamped_p);
}
return rgb;
}
__kernel void Contrast(__global CLPixelType *im, const unsigned int sharpen)
{
const int sign = sharpen!=0?1:-1;
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = get_global_size(0);
const int c = x + y * columns;
CLPixelType pixel = im[c];
float3 HueSaturationBrightness = ConvertRGBToHSB(pixel);
float brightness = HueSaturationBrightness.z;
brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
brightness = clamp(brightness,0.0f,1.0f);
HueSaturationBrightness.z = brightness;
CLPixelType filteredPixel = ConvertHSBToRGB(HueSaturationBrightness);
filteredPixel.w = pixel.w;
im[c] = filteredPixel;
}
)
STRINGIFY(
__kernel void Histogram(__global CLPixelType * restrict im,
const ChannelType channel,
const int method,
const int colorspace,
__global uint4 * restrict histogram)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = get_global_size(0);
const int c = x + y * columns;
if ((channel & SyncChannels) != 0)
{
float intensity = GetPixelIntensity(method, colorspace,im[c]);
uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
atomic_inc((__global uint *)(&(histogram[pos]))+2);
}
else
{
}
}
)
STRINGIFY(
__kernel void ContrastStretch(__global CLPixelType * restrict im,
const ChannelType channel,
__global CLPixelType * restrict stretch_map,
const float4 white, const float4 black)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = get_global_size(0);
const int c = x + y * columns;
uint ePos;
CLPixelType oValue, eValue;
CLQuantum red, green, blue, opacity;
oValue=im[c];
if ((channel & RedChannel) != 0)
{
if (getRedF4(white) != getRedF4(black))
{
ePos = ScaleQuantumToMap(getRed(oValue));
eValue = stretch_map[ePos];
red = getRed(eValue);
}
}
if ((channel & GreenChannel) != 0)
{
if (getGreenF4(white) != getGreenF4(black))
{
ePos = ScaleQuantumToMap(getGreen(oValue));
eValue = stretch_map[ePos];
green = getGreen(eValue);
}
}
if ((channel & BlueChannel) != 0)
{
if (getBlueF4(white) != getBlueF4(black))
{
ePos = ScaleQuantumToMap(getBlue(oValue));
eValue = stretch_map[ePos];
blue = getBlue(eValue);
}
}
if ((channel & OpacityChannel) != 0)
{
if (getOpacityF4(white) != getOpacityF4(black))
{
ePos = ScaleQuantumToMap(getOpacity(oValue));
eValue = stretch_map[ePos];
opacity = getOpacity(eValue);
}
}
im[c]=(CLPixelType)(blue, green, red, opacity);
}
)
STRINGIFY(
__kernel
void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
const unsigned int imageWidth, const unsigned int imageHeight,
__constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
int2 blockID;
blockID.x = get_group_id(0);
blockID.y = get_group_id(1);
int2 imageAreaOrg;
imageAreaOrg.x = blockID.x * get_local_size(0);
imageAreaOrg.y = blockID.y * get_local_size(1);
int2 midFilterDimen;
midFilterDimen.x = (filterWidth-1)/2;
midFilterDimen.y = (filterHeight-1)/2;
int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
int2 cachedAreaDimen;
cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
int groupSize = get_local_size(0) * get_local_size(1);
for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
int2 cachedAreaIndex;
cachedAreaIndex.x = i % cachedAreaDimen.x;
cachedAreaIndex.y = i / cachedAreaDimen.x;
int2 imagePixelIndex;
imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
}
for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
filterCache[i] = filter[i];
}
barrier(CLK_LOCAL_MEM_FENCE);
int2 imageIndex;
imageIndex.x = imageAreaOrg.x + get_local_id(0);
imageIndex.y = imageAreaOrg.y + get_local_id(1);
if (imageIndex.x >= imageWidth
|| imageIndex.y >= imageHeight) {
return;
}
int filterIndex = 0;
float4 sum = (float4)0.0f;
float gamma = 0.0f;
if (((channel & OpacityChannel) == 0) || (matte == 0)) {
int cacheIndexY = get_local_id(1);
for (int j = 0; j < filterHeight; j++) {
int cacheIndexX = get_local_id(0);
for (int i = 0; i < filterWidth; i++) {
CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
float f = filterCache[filterIndex];
sum.x += f * p.x;
sum.y += f * p.y;
sum.z += f * p.z;
sum.w += f * p.w;
gamma += f;
filterIndex++;
cacheIndexX++;
}
cacheIndexY++;
}
}
else {
int cacheIndexY = get_local_id(1);
for (int j = 0; j < filterHeight; j++) {
int cacheIndexX = get_local_id(0);
for (int i = 0; i < filterWidth; i++) {
CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
float alpha = QuantumScale*(QuantumRange-p.w);
float f = filterCache[filterIndex];
float g = alpha * f;
sum.x += g*p.x;
sum.y += g*p.y;
sum.z += g*p.z;
sum.w += f*p.w;
gamma += g;
filterIndex++;
cacheIndexX++;
}
cacheIndexY++;
}
gamma = PerceptibleReciprocal(gamma);
sum.xyz = gamma*sum.xyz;
}
CLPixelType outputPixel;
outputPixel.x = ClampToQuantum(sum.x);
outputPixel.y = ClampToQuantum(sum.y);
outputPixel.z = ClampToQuantum(sum.z);
outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
}
)
STRINGIFY(
__kernel
void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
const uint imageWidth, const uint imageHeight,
__constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
const uint matte, const ChannelType channel) {
int2 imageIndex;
imageIndex.x = get_global_id(0);
imageIndex.y = get_global_id(1);
if (imageIndex.x >= imageWidth
|| imageIndex.y >= imageHeight)
return;
int2 midFilterDimen;
midFilterDimen.x = (filterWidth-1)/2;
midFilterDimen.y = (filterHeight-1)/2;
int filterIndex = 0;
float4 sum = (float4)0.0f;
float gamma = 0.0f;
if (((channel & OpacityChannel) == 0) || (matte == 0)) {
for (int j = 0; j < filterHeight; j++) {
int2 inputPixelIndex;
inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
for (int i = 0; i < filterWidth; i++) {
inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
float f = filter[filterIndex];
sum.x += f * p.x;
sum.y += f * p.y;
sum.z += f * p.z;
sum.w += f * p.w;
gamma += f;
filterIndex++;
}
}
}
else {
for (int j = 0; j < filterHeight; j++) {
int2 inputPixelIndex;
inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
for (int i = 0; i < filterWidth; i++) {
inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
float alpha = QuantumScale*(QuantumRange-p.w);
float f = filter[filterIndex];
float g = alpha * f;
sum.x += g*p.x;
sum.y += g*p.y;
sum.z += g*p.z;
sum.w += f*p.w;
gamma += g;
filterIndex++;
}
}
gamma = PerceptibleReciprocal(gamma);
sum.xyz = gamma*sum.xyz;
}
CLPixelType outputPixel;
outputPixel.x = ClampToQuantum(sum.x);
outputPixel.y = ClampToQuantum(sum.y);
outputPixel.z = ClampToQuantum(sum.z);
outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
}
)
STRINGIFY(
__kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
, const unsigned int imageWidth, const unsigned int imageHeight
, const int2 offset, const int polarity, const int matte) {
int x = get_global_id(0);
int y = get_global_id(1);
CLPixelType v = inputImage[y*imageWidth+x];
int2 neighbor;
neighbor.y = y + offset.y;
neighbor.x = x + offset.x;
int2 clampedNeighbor;
clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
CLPixelType r = (clampedNeighbor.x == neighbor.x
&& clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
:(CLPixelType)0;
int sv[4];
sv[0] = (int)v.x;
sv[1] = (int)v.y;
sv[2] = (int)v.z;
sv[3] = (int)v.w;
int sr[4];
sr[0] = (int)r.x;
sr[1] = (int)r.y;
sr[2] = (int)r.z;
sr[3] = (int)r.w;
if (polarity > 0) {
\n #pragma unroll 4\n
for (unsigned int i = 0; i < 4; i++) {
sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
}
}
else {
\n #pragma unroll 4\n
for (unsigned int i = 0; i < 4; i++) {
sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
}
}
v.x = (CLQuantum)sv[0];
v.y = (CLQuantum)sv[1];
v.z = (CLQuantum)sv[2];
if (matte!=0)
v.w = (CLQuantum)sv[3];
outputImage[y*imageWidth+x] = v;
}
)
STRINGIFY(
__kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
, const unsigned int imageWidth, const unsigned int imageHeight
, const int2 offset, const int polarity, const int matte) {
int x = get_global_id(0);
int y = get_global_id(1);
CLPixelType v = inputImage[y*imageWidth+x];
int2 neighbor, clampedNeighbor;
neighbor.y = y + offset.y;
neighbor.x = x + offset.x;
clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
CLPixelType r = (clampedNeighbor.x == neighbor.x
&& clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
:(CLPixelType)0;
neighbor.y = y - offset.y;
neighbor.x = x - offset.x;
clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
CLPixelType s = (clampedNeighbor.x == neighbor.x
&& clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
:(CLPixelType)0;
int sv[4];
sv[0] = (int)v.x;
sv[1] = (int)v.y;
sv[2] = (int)v.z;
sv[3] = (int)v.w;
int sr[4];
sr[0] = (int)r.x;
sr[1] = (int)r.y;
sr[2] = (int)r.z;
sr[3] = (int)r.w;
int ss[4];
ss[0] = (int)s.x;
ss[1] = (int)s.y;
ss[2] = (int)s.z;
ss[3] = (int)s.w;
if (polarity > 0) {
\n #pragma unroll 4\n
for (unsigned int i = 0; i < 4; i++) {
sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
}
}
else {
\n #pragma unroll 4\n
for (unsigned int i = 0; i < 4; i++) {
sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
}
}
v.x = (CLQuantum)sv[0];
v.y = (CLQuantum)sv[1];
v.z = (CLQuantum)sv[2];
if (matte!=0)
v.w = (CLQuantum)sv[3];
outputImage[y*imageWidth+x] = v;
}
)
STRINGIFY(
__kernel void Equalize(__global CLPixelType * restrict im,
const ChannelType channel,
__global CLPixelType * restrict equalize_map,
const float4 white, const float4 black)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = get_global_size(0);
const int c = x + y * columns;
uint ePos;
CLPixelType oValue, eValue;
CLQuantum red, green, blue, opacity;
oValue=im[c];
if ((channel & SyncChannels) != 0)
{
if (getRedF4(white) != getRedF4(black))
{
ePos = ScaleQuantumToMap(getRed(oValue));
eValue = equalize_map[ePos];
red = getRed(eValue);
ePos = ScaleQuantumToMap(getGreen(oValue));
eValue = equalize_map[ePos];
green = getRed(eValue);
ePos = ScaleQuantumToMap(getBlue(oValue));
eValue = equalize_map[ePos];
blue = getRed(eValue);
ePos = ScaleQuantumToMap(getOpacity(oValue));
eValue = equalize_map[ePos];
opacity = getRed(eValue);
im[c]=(CLPixelType)(blue, green, red, opacity);
}
}
}
)
STRINGIFY(
CLPixelType ApplyFunction(CLPixelType pixel,const MagickFunction function,
const unsigned int number_parameters,
__constant float *parameters)
{
float4 result = (float4) 0.0f;
switch (function)
{
case PolynomialFunction:
{
for (unsigned int i=0; i < number_parameters; i++)
result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i];
result *= (float4)QuantumRange;
break;
}
case SinusoidFunction:
{
float freq,phase,ampl,bias;
freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
result.x = QuantumRange*(ampl*sin(2.0f*MagickPI*
(freq*QuantumScale*(float)pixel.x + phase/360.0f)) + bias);
result.y = QuantumRange*(ampl*sin(2.0f*MagickPI*
(freq*QuantumScale*(float)pixel.y + phase/360.0f)) + bias);
result.z = QuantumRange*(ampl*sin(2.0f*MagickPI*
(freq*QuantumScale*(float)pixel.z + phase/360.0f)) + bias);
result.w = QuantumRange*(ampl*sin(2.0f*MagickPI*
(freq*QuantumScale*(float)pixel.w + phase/360.0f)) + bias);
break;
}
case ArcsinFunction:
{
float width,range,center,bias;
width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center);
result.x = range/MagickPI*asin(result.x)+bias;
result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x;
result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x;
result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center);
result.y = range/MagickPI*asin(result.y)+bias;
result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y;
result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y;
result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center);
result.z = range/MagickPI*asin(result.z)+bias;
result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x;
result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x;
result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center);
result.w = range/MagickPI*asin(result.w)+bias;
result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w;
result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w;
result *= (float4)QuantumRange;
break;
}
case ArctanFunction:
{
float slope,range,center,bias;
slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center);
result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias);
break;
}
case UndefinedFunction:
break;
}
return (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
ClampToQuantum(result.z), ClampToQuantum(result.w));
}
)
STRINGIFY(
__kernel void ComputeFunction(__global CLPixelType *im,
const ChannelType channel, const MagickFunction function,
const unsigned int number_parameters, __constant float *parameters)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = get_global_size(0);
const int c = x + y * columns;
im[c] = ApplyFunction(im[c], function, number_parameters, parameters);
}
)
STRINGIFY(
__kernel void Grayscale(__global CLPixelType *im,
const int method, const int colorspace)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = get_global_size(0);
const int c = x + y * columns;
CLPixelType pixel = im[c];
float
blue,
green,
intensity,
red;
red=(float)getRed(pixel);
green=(float)getGreen(pixel);
blue=(float)getBlue(pixel);
intensity=0.0;
CLPixelType filteredPixel;
switch (method)
{
case AveragePixelIntensityMethod:
{
intensity=(red+green+blue)/3.0;
break;
}
case BrightnessPixelIntensityMethod:
{
intensity=max(max(red,green),blue);
break;
}
case LightnessPixelIntensityMethod:
{
intensity=(min(min(red,green),blue)+
max(max(red,green),blue))/2.0;
break;
}
case MSPixelIntensityMethod:
{
intensity=(float) (((float) red*red+green*green+
blue*blue)/(3.0*QuantumRange));
break;
}
case Rec601LumaPixelIntensityMethod:
{
intensity=0.298839*red+0.586811*green+0.114350*blue;
break;
}
case Rec601LuminancePixelIntensityMethod:
{
intensity=0.298839*red+0.586811*green+0.114350*blue;
break;
}
case Rec709LumaPixelIntensityMethod:
default:
{
intensity=0.212656*red+0.715158*green+0.072186*blue;
break;
}
case Rec709LuminancePixelIntensityMethod:
{
intensity=0.212656*red+0.715158*green+0.072186*blue;
break;
}
case RMSPixelIntensityMethod:
{
intensity=(float) (sqrt((float) red*red+green*green+
blue*blue)/sqrt(3.0));
break;
}
}
setGray(&filteredPixel, ClampToQuantum(intensity));
filteredPixel.w = pixel.w;
im[c] = filteredPixel;
}
)
STRINGIFY(
inline int mirrorBottom(int value)
{
return (value < 0) ? - (value) : value;
}
inline int mirrorTop(int value, int width)
{
return (value >= width) ? (2 * width - value - 1) : value;
}
__kernel void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *tmpImage,
const int radius,
const int imageWidth,
const int imageHeight)
{
const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
int x = get_local_id(0);
int y = get_global_id(1);
global CLPixelType *src = srcImage + y * imageWidth;
for (int i = x; i < imageWidth; i += get_local_size(0)) {
float sum = 0.0f;
float weight = 1.0f;
int j = i - radius;
while ((j + 7) < i) {
for (int k = 0; k < 8; ++k)
sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
weight += 8.0f;
j+=8;
}
while (j < i) {
sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
weight += 1.0f;
++j;
}
while ((j + 7) < radius + i) {
for (int k = 0; k < 8; ++k)
sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
weight -= 8.0f;
j+=8;
}
while (j < radius + i) {
sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
weight -= 1.0f;
++j;
}
tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
}
}
)
STRINGIFY(
__kernel void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *blurImage,
const int radius,
const float strength,
const int imageWidth,
const int imageHeight)
{
const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
int x = get_global_id(0);
int y = get_global_id(1);
if ((x >= imageWidth) || (y >= imageHeight))
return;
global float *src = blurImage + x;
float sum = 0.0f;
float weight = 1.0f;
int j = y - radius;
while ((j + 7) < y) {
for (int k = 0; k < 8; ++k)
sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
weight += 8.0f;
j+=8;
}
while (j < y) {
sum += weight * src[mirrorBottom(j) * imageWidth];
weight += 1.0f;
++j;
}
while ((j + 7) < radius + y) {
for (int k = 0; k < 8; ++k)
sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
weight -= 8.0f;
j+=8;
}
while (j < radius + y) {
sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
weight -= 1.0f;
++j;
}
CLPixelType pixel = srcImage[x + y * imageWidth];
float srcVal = dot(RGB, convert_float4(pixel));
float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
mult = (srcVal + mult) / srcVal;
pixel.x = ClampToQuantum(pixel.x * mult);
pixel.y = ClampToQuantum(pixel.y * mult);
pixel.z = ClampToQuantum(pixel.z * mult);
dstImage[x + y * imageWidth] = pixel;
}
)
STRINGIFY(
inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
float *hue, float *saturation, float *lightness)
{
float
c,
tmax,
tmin;
tmax=max(QuantumScale*red,max(QuantumScale*green, QuantumScale*blue));
tmin=min(QuantumScale*red,min(QuantumScale*green, QuantumScale*blue));
c=tmax-tmin;
*lightness=(tmax+tmin)/2.0;
if (c <= 0.0)
{
*hue=0.0;
*saturation=0.0;
return;
}
if (tmax == (QuantumScale*red))
{
*hue=(QuantumScale*green-QuantumScale*blue)/c;
if ((QuantumScale*green) < (QuantumScale*blue))
*hue+=6.0;
}
else
if (tmax == (QuantumScale*green))
*hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
else
*hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
*hue*=60.0/360.0;
if (*lightness <= 0.5)
*saturation=c/(2.0*(*lightness));
else
*saturation=c/(2.0-2.0*(*lightness));
}
inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
CLQuantum *red,CLQuantum *green,CLQuantum *blue)
{
float
b,
c,
g,
h,
tmin,
r,
x;
h=hue*360.0;
if (lightness <= 0.5)
c=2.0*lightness*saturation;
else
c=(2.0-2.0*lightness)*saturation;
tmin=lightness-0.5*c;
h-=360.0*floor(h/360.0);
h/=60.0;
x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
switch ((int) floor(h))
{
case 0:
{
r=tmin+c;
g=tmin+x;
b=tmin;
break;
}
case 1:
{
r=tmin+x;
g=tmin+c;
b=tmin;
break;
}
case 2:
{
r=tmin;
g=tmin+c;
b=tmin+x;
break;
}
case 3:
{
r=tmin;
g=tmin+x;
b=tmin+c;
break;
}
case 4:
{
r=tmin+x;
g=tmin;
b=tmin+c;
break;
}
case 5:
{
r=tmin+c;
g=tmin;
b=tmin+x;
break;
}
default:
{
r=0.0;
g=0.0;
b=0.0;
}
}
*red=ClampToQuantum(QuantumRange*r);
*green=ClampToQuantum(QuantumRange*g);
*blue=ClampToQuantum(QuantumRange*b);
}
inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
CLQuantum *red,CLQuantum *green,CLQuantum *blue)
{
float
hue,
lightness,
saturation;
ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
hue+=0.5*(0.01*percent_hue-1.0);
while (hue < 0.0)
hue+=1.0;
while (hue >= 1.0)
hue-=1.0;
saturation*=0.01*percent_saturation;
lightness*=0.01*percent_lightness;
ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
}
__kernel void Modulate(__global CLPixelType *im,
const float percent_brightness,
const float percent_hue,
const float percent_saturation,
const int colorspace)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = get_global_size(0);
const int c = x + y * columns;
CLPixelType pixel = im[c];
CLQuantum
blue,
green,
red;
red=getRed(pixel);
green=getGreen(pixel);
blue=getBlue(pixel);
switch (colorspace)
{
case HSLColorspace:
default:
{
ModulateHSL(percent_hue, percent_saturation, percent_brightness,
&red, &green, &blue);
}
}
CLPixelType filteredPixel;
setRed(&filteredPixel, red);
setGreen(&filteredPixel, green);
setBlue(&filteredPixel, blue);
filteredPixel.w = pixel.w;
im[c] = filteredPixel;
}
)
STRINGIFY(
__kernel
void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
const unsigned int imageWidth, const unsigned int imageHeight,
const __global float *filter, const unsigned int width, const __global int2* offset,
const float4 bias,
const ChannelType channel, const unsigned int matte) {
int2 currentPixel;
currentPixel.x = get_global_id(0);
currentPixel.y = get_global_id(1);
if (currentPixel.x >= imageWidth
|| currentPixel.y >= imageHeight)
return;
float4 pixel;
pixel.x = (float)bias.x;
pixel.y = (float)bias.y;
pixel.z = (float)bias.z;
pixel.w = (float)bias.w;
if (((channel & OpacityChannel) == 0) || (matte == 0)) {
for (int i = 0; i < width; i++) {
int2 samplePixel = currentPixel + offset[i];
samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
pixel.x += (filter[i] * (float)samplePixelValue.x);
pixel.y += (filter[i] * (float)samplePixelValue.y);
pixel.z += (filter[i] * (float)samplePixelValue.z);
pixel.w += (filter[i] * (float)samplePixelValue.w);
}
CLPixelType outputPixel;
outputPixel.x = ClampToQuantum(pixel.x);
outputPixel.y = ClampToQuantum(pixel.y);
outputPixel.z = ClampToQuantum(pixel.z);
outputPixel.w = ClampToQuantum(pixel.w);
output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
}
else {
float gamma = 0.0f;
for (int i = 0; i < width; i++) {
int2 samplePixel = currentPixel + offset[i];
samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
float alpha = QuantumScale*(QuantumRange-samplePixelValue.w);
float k = filter[i];
pixel.x = pixel.x + k * alpha * samplePixelValue.x;
pixel.y = pixel.y + k * alpha * samplePixelValue.y;
pixel.z = pixel.z + k * alpha * samplePixelValue.z;
pixel.w += k * alpha * samplePixelValue.w;
gamma+=k*alpha;
}
gamma = PerceptibleReciprocal(gamma);
pixel.xyz = gamma*pixel.xyz;
CLPixelType outputPixel;
outputPixel.x = ClampToQuantum(pixel.x);
outputPixel.y = ClampToQuantum(pixel.y);
outputPixel.z = ClampToQuantum(pixel.z);
outputPixel.w = ClampToQuantum(pixel.w);
output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
}
}
)
STRINGIFY(
__kernel void RadialBlur(const __global CLPixelType *im, __global CLPixelType *filtered_im,
const float4 bias,
const unsigned int channel, const unsigned int matte,
const float2 blurCenter,
__constant float *cos_theta, __constant float *sin_theta,
const unsigned int cossin_theta_size)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int columns = get_global_size(0);
const int rows = get_global_size(1);
unsigned int step = 1;
float center_x = (float) x - blurCenter.x;
float center_y = (float) y - blurCenter.y;
float radius = hypot(center_x, center_y);
float blur_radius = hypot(blurCenter.x, blurCenter.y);
if (radius > MagickEpsilon)
{
step = (unsigned int) (blur_radius / radius);
if (step == 0)
step = 1;
if (step >= cossin_theta_size)
step = cossin_theta_size-1;
}
float4 result;
result.x = (float)bias.x;
result.y = (float)bias.y;
result.z = (float)bias.z;
result.w = (float)bias.w;
float normalize = 0.0f;
if (((channel & OpacityChannel) == 0) || (matte == 0)) {
for (unsigned int i=0; i<cossin_theta_size; i+=step)
{
result += convert_float4(im[
ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
normalize += 1.0f;
}
normalize = PerceptibleReciprocal(normalize);
result = result * normalize;
}
else {
float gamma = 0.0f;
for (unsigned int i=0; i<cossin_theta_size; i+=step)
{
float4 p = convert_float4(im[
ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
float alpha = (float)(QuantumScale*(QuantumRange-p.w));
result.x += alpha * p.x;
result.y += alpha * p.y;
result.z += alpha * p.z;
result.w += p.w;
gamma+=alpha;
normalize += 1.0f;
}
gamma = PerceptibleReciprocal(gamma);
normalize = PerceptibleReciprocal(normalize);
result.x = gamma*result.x;
result.y = gamma*result.y;
result.z = gamma*result.z;
result.w = normalize*result.w;
}
filtered_im[y * columns + x] = (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
ClampToQuantum(result.z), ClampToQuantum(result.w));
}
)
STRINGIFY(
inline float GetPseudoRandomValue(uint4* seed, const float normalizeRand) {
uint4 s = *seed;
do {
unsigned int alpha = (unsigned int)(s.y ^ (s.y << 11));
s.y = s.z;
s.z = s.w;
s.w = s.x;
s.x = (s.x ^ (s.x >> 19)) ^ (alpha ^ (alpha >> 8));
} while (s.x == ~0UL);
*seed = s;
return (normalizeRand*s.x);
}
__kernel void RandomNumberGenerator(__global uint* seeds, const float normalizeRand
, __global float* randomNumbers, const uint init
, const uint numRandomNumbers) {
unsigned int id = get_global_id(0);
unsigned int seed[4];
if (init != 0) {
seed[0] = seeds[id * 4];
seed[1] = 0x50a7f451;
seed[2] = 0x5365417e;
seed[3] = 0xc3a4171a;
}
else {
seed[0] = seeds[id * 4];
seed[1] = seeds[id * 4 + 1];
seed[2] = seeds[id * 4 + 2];
seed[3] = seeds[id * 4 + 3];
}
unsigned int numRandomNumbersPerItem = (numRandomNumbers + get_global_size(0) - 1) / get_global_size(0);
for (unsigned int i = 0; i < numRandomNumbersPerItem; i++) {
do
{
unsigned int alpha = (unsigned int)(seed[1] ^ (seed[1] << 11));
seed[1] = seed[2];
seed[2] = seed[3];
seed[3] = seed[0];
seed[0] = (seed[0] ^ (seed[0] >> 19)) ^ (alpha ^ (alpha >> 8));
} while (seed[0] == ~0UL);
unsigned int pos = (get_group_id(0)*get_local_size(0)*numRandomNumbersPerItem)
+ get_local_size(0) * i + get_local_id(0);
if (pos >= numRandomNumbers)
break;
randomNumbers[pos] = normalizeRand*seed[0];
}
seeds[id * 4] = seed[0];
seeds[id * 4 + 1] = seed[1];
seeds[id * 4 + 2] = seed[2];
seeds[id * 4 + 3] = seed[3];
}
)
STRINGIFY(
float BoxResizeFilter(const float x)
{
return 1.0f;
}
)
STRINGIFY(
float CubicBC(const float x,const __global float* resizeFilterCoefficients)
{
if (x < 1.0)
return(resizeFilterCoefficients[0]+x*(x*
(resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
if (x < 2.0)
return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
(resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
return(0.0);
}
)
STRINGIFY(
float Sinc(const float x)
{
if (x != 0.0f)
{
const float alpha=(float) (MagickPI*x);
return sinpi(x)/alpha;
}
return(1.0f);
}
)
STRINGIFY(
float Triangle(const float x)
{
return ((x<1.0f)?(1.0f-x):0.0f);
}
)
STRINGIFY(
float Hanning(const float x)
{
const float cosine=cos((MagickPI*x));
return(0.5f+0.5f*cosine);
}
)
STRINGIFY(
float Hamming(const float x)
{
const float cosine=cos((MagickPI*x));
return(0.54f+0.46f*cosine);
}
)
STRINGIFY(
float Blackman(const float x)
{
const float cosine=cos((MagickPI*x));
return(0.34f+cosine*(0.5f+cosine*0.16f));
}
)
STRINGIFY(
inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
{
switch (filterType)
{
case SincWeightingFunction:
case SincFastWeightingFunction:
return Sinc(x);
case CubicBCWeightingFunction:
return CubicBC(x,filterCoefficients);
case BoxWeightingFunction:
return BoxResizeFilter(x);
case TriangleWeightingFunction:
return Triangle(x);
case HanningWeightingFunction:
return Hanning(x);
case HammingWeightingFunction:
return Hamming(x);
case BlackmanWeightingFunction:
return Blackman(x);
default:
return 0.0f;
}
}
)
STRINGIFY(
inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
, const ResizeWeightingFunctionType resizeWindowType
, const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
{
float scale;
float xBlur = fabs(x/resizeFilterBlur);
if (resizeWindowSupport < MagickEpsilon
|| resizeWindowType == BoxWeightingFunction)
{
scale = 1.0f;
}
else
{
scale = resizeFilterScale;
scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
}
float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
return weight;
}
)
;
const char* accelerateKernels2 =
STRINGIFY(
inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
return (numWorkItems/pixelPerWorkgroup);
}
inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
int pixelIndex = itemID/numWorkItemsPerPixel;
pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
return pixelIndex;
}
)
STRINGIFY(
__kernel __attribute__((reqd_work_group_size(256, 1, 1)))
void ResizeHorizontalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
, const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
, const int resizeFilterType, const int resizeWindowType
, const __global float* resizeFilterCubicCoefficients
, const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
, __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
, __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
const unsigned int stopX = min(startX + pixelPerWorkgroup,filteredColumns);
const unsigned int actualNumPixelToCompute = stopX - startX;
float scale = max(1.0f/xFactor+MagickEpsilon ,1.0f);
const float support = max(scale*resizeFilterSupport,0.5f);
scale = PerceptibleReciprocal(scale);
const int cacheRangeStartX = max((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
const int cacheRangeEndX = min((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
const unsigned int y = get_global_id(1);
event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
wait_group_events(1,&e);
unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
{
const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
const unsigned int chunkStopX = min(chunkStartX + pixelChunkSize, stopX);
const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
const unsigned int itemID = get_local_id(0);
const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
float4 filteredPixel = (float4)0.0f;
float density = 0.0f;
float gamma = 0.0f;
if (pixelIndex != -1) {
const int x = chunkStartX + pixelIndex;
const float bisect = (x+0.5)/xFactor+MagickEpsilon;
const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f);
const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputColumns);
const unsigned int n = stop - start;
unsigned int numStepsPerWorkItem = n / numItems;
numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
if (startStep < n) {
const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
unsigned int cacheIndex = start+startStep-cacheRangeStartX;
if (matte == 0) {
for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
float4 cp = convert_float4(inputImageCache[cacheIndex]);
float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
, (ResizeWeightingFunctionType)resizeWindowType
, resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
filteredPixel += ((float4)weight)*cp;
density+=weight;
}
}
else {
for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
CLPixelType p = inputImageCache[cacheIndex];
float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
, (ResizeWeightingFunctionType)resizeWindowType
, resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
float alpha = weight * QuantumScale * GetPixelAlpha(p);
float4 cp = convert_float4(p);
filteredPixel.x += alpha * cp.x;
filteredPixel.y += alpha * cp.y;
filteredPixel.z += alpha * cp.z;
filteredPixel.w += weight * cp.w;
density+=weight;
gamma+=alpha;
}
}
}
}
if (itemID < actualNumPixelInThisChunk) {
outputPixelCache[itemID] = (float4)0.0f;
densityCache[itemID] = 0.0f;
if (matte != 0)
gammaCache[itemID] = 0.0f;
}
barrier(CLK_LOCAL_MEM_FENCE);
for (unsigned int i = 0; i < numItems; i++) {
if (pixelIndex != -1) {
if (itemID%numItems == i) {
outputPixelCache[pixelIndex]+=filteredPixel;
densityCache[pixelIndex]+=density;
if (matte!=0) {
gammaCache[pixelIndex]+=gamma;
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (itemID < actualNumPixelInThisChunk) {
if (matte==0) {
float density = densityCache[itemID];
float4 filteredPixel = outputPixelCache[itemID];
if (density!= 0.0f && density != 1.0)
{
density = PerceptibleReciprocal(density);
filteredPixel *= (float4)density;
}
filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
, ClampToQuantum(filteredPixel.y)
, ClampToQuantum(filteredPixel.z)
, ClampToQuantum(filteredPixel.w));
}
else {
float density = densityCache[itemID];
float gamma = gammaCache[itemID];
float4 filteredPixel = outputPixelCache[itemID];
if (density!= 0.0f && density != 1.0) {
density = PerceptibleReciprocal(density);
filteredPixel *= (float4)density;
gamma *= density;
}
gamma = PerceptibleReciprocal(gamma);
CLPixelType fp;
fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
, ClampToQuantum(gamma*filteredPixel.y)
, ClampToQuantum(gamma*filteredPixel.z)
, ClampToQuantum(filteredPixel.w));
filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
}
}
}
}
)
STRINGIFY(
__kernel __attribute__((reqd_work_group_size(256, 1, 1)))
void ResizeHorizontalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
, const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
, const int resizeFilterType, const int resizeWindowType
, const __global float* resizeFilterCubicCoefficients
, const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
, __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
, __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
ResizeHorizontalFilter(inputImage,inputColumns,inputRows,matte
,xFactor, filteredImage, filteredColumns, filteredRows
,SincWeightingFunction, SincWeightingFunction
,resizeFilterCubicCoefficients
,resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur
,inputImageCache, numCachedPixels, pixelPerWorkgroup, pixelChunkSize
,outputPixelCache, densityCache, gammaCache);
}
)
STRINGIFY(
__kernel __attribute__((reqd_work_group_size(1, 256, 1)))
void ResizeVerticalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
, const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
, const int resizeFilterType, const int resizeWindowType
, const __global float* resizeFilterCubicCoefficients
, const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
, __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
, __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
const unsigned int stopY = min(startY + pixelPerWorkgroup,filteredRows);
const unsigned int actualNumPixelToCompute = stopY - startY;
float scale = max(1.0f/yFactor+MagickEpsilon ,1.0f);
const float support = max(scale*resizeFilterSupport,0.5f);
scale = PerceptibleReciprocal(scale);
const int cacheRangeStartY = max((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
const int cacheRangeEndY = min((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
const unsigned int x = get_global_id(0);
event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
wait_group_events(1,&e);
unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
{
const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
const unsigned int chunkStopY = min(chunkStartY + pixelChunkSize, stopY);
const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
const unsigned int itemID = get_local_id(1);
const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
float4 filteredPixel = (float4)0.0f;
float density = 0.0f;
float gamma = 0.0f;
if (pixelIndex != -1) {
const int y = chunkStartY + pixelIndex;
const float bisect = (y+0.5)/yFactor+MagickEpsilon;
const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f);
const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputRows);
const unsigned int n = stop - start;
unsigned int numStepsPerWorkItem = n / numItems;
numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
if (startStep < n) {
const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
unsigned int cacheIndex = start+startStep-cacheRangeStartY;
if (matte == 0) {
for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
float4 cp = convert_float4(inputImageCache[cacheIndex]);
float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
, (ResizeWeightingFunctionType)resizeWindowType
, resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
filteredPixel += ((float4)weight)*cp;
density+=weight;
}
}
else {
for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
CLPixelType p = inputImageCache[cacheIndex];
float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
, (ResizeWeightingFunctionType)resizeWindowType
, resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
float alpha = weight * QuantumScale * GetPixelAlpha(p);
float4 cp = convert_float4(p);
filteredPixel.x += alpha * cp.x;
filteredPixel.y += alpha * cp.y;
filteredPixel.z += alpha * cp.z;
filteredPixel.w += weight * cp.w;
density+=weight;
gamma+=alpha;
}
}
}
}
if (itemID < actualNumPixelInThisChunk) {
outputPixelCache[itemID] = (float4)0.0f;
densityCache[itemID] = 0.0f;
if (matte != 0)
gammaCache[itemID] = 0.0f;
}
barrier(CLK_LOCAL_MEM_FENCE);
for (unsigned int i = 0; i < numItems; i++) {
if (pixelIndex != -1) {
if (itemID%numItems == i) {
outputPixelCache[pixelIndex]+=filteredPixel;
densityCache[pixelIndex]+=density;
if (matte!=0) {
gammaCache[pixelIndex]+=gamma;
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (itemID < actualNumPixelInThisChunk) {
if (matte==0) {
float density = densityCache[itemID];
float4 filteredPixel = outputPixelCache[itemID];
if (density!= 0.0f && density != 1.0)
{
density = PerceptibleReciprocal(density);
filteredPixel *= (float4)density;
}
filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
, ClampToQuantum(filteredPixel.y)
, ClampToQuantum(filteredPixel.z)
, ClampToQuantum(filteredPixel.w));
}
else {
float density = densityCache[itemID];
float gamma = gammaCache[itemID];
float4 filteredPixel = outputPixelCache[itemID];
if (density!= 0.0f && density != 1.0) {
density = PerceptibleReciprocal(density);
filteredPixel *= (float4)density;
gamma *= density;
}
gamma = PerceptibleReciprocal(gamma);
CLPixelType fp;
fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
, ClampToQuantum(gamma*filteredPixel.y)
, ClampToQuantum(gamma*filteredPixel.z)
, ClampToQuantum(filteredPixel.w));
filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
}
}
}
}
)
STRINGIFY(
__kernel __attribute__((reqd_work_group_size(1, 256, 1)))
void ResizeVerticalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
, const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
, const int resizeFilterType, const int resizeWindowType
, const __global float* resizeFilterCubicCoefficients
, const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
, __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
, __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
ResizeVerticalFilter(inputImage,inputColumns,inputRows,matte
,yFactor,filteredImage,filteredColumns,filteredRows
,SincWeightingFunction, SincWeightingFunction
,resizeFilterCubicCoefficients
,resizeFilterScale,resizeFilterSupport,resizeFilterWindowSupport,resizeFilterBlur
,inputImageCache,numCachedPixels,pixelPerWorkgroup,pixelChunkSize
,outputPixelCache,densityCache,gammaCache);
}
)
STRINGIFY(
__kernel void UnsharpMaskBlurColumn(const __global CLPixelType* inputImage,
const __global float4 *blurRowData, __global CLPixelType *filtered_im,
const unsigned int imageColumns, const unsigned int imageRows,
__local float4* cachedData, __local float* cachedFilter,
const ChannelType channel, const __global float *filter, const unsigned int width,
const float gain, const float threshold)
{
const unsigned int radius = (width-1)/2;
const int groupX = get_group_id(0);
const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
if (groupStartY >= 0
&& groupStopY < imageRows) {
event_t e = async_work_group_strided_copy(cachedData
,blurRowData+groupStartY*imageColumns+groupX
,groupStopY-groupStartY,imageColumns,0);
wait_group_events(1,&e);
}
else {
for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
event_t e = async_work_group_copy(cachedFilter,filter,width,0);
wait_group_events(1,&e);
const int cy = get_global_id(1);
if (cy < imageRows) {
float4 blurredPixel = (float4) 0.0f;
int i = 0;
\n #ifndef UFACTOR \n
\n #define UFACTOR 8 \n
\n #endif \n
for ( ; i+UFACTOR < width; )
{
\n #pragma unroll UFACTOR \n
for (int j=0; j < UFACTOR; j++, i++)
{
blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
}
}
for ( ; i < width; i++)
{
blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
}
blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
float4 outputPixel = inputImagePixel - blurredPixel;
float quantumThreshold = QuantumRange*threshold;
int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
}
}
__kernel void UnsharpMaskBlurColumnSection(const __global CLPixelType* inputImage,
const __global float4 *blurRowData, __global CLPixelType *filtered_im,
const unsigned int imageColumns, const unsigned int imageRows,
__local float4* cachedData, __local float* cachedFilter,
const ChannelType channel, const __global float *filter, const unsigned int width,
const float gain, const float threshold,
const unsigned int offsetRows, const unsigned int section)
{
const unsigned int radius = (width-1)/2;
const int groupX = get_group_id(0);
const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
blurRowData += imageColumns * radius * section;
if (groupStartY >= 0
&& groupStopY < imageRows) {
event_t e = async_work_group_strided_copy(cachedData
,blurRowData+groupStartY*imageColumns+groupX
,groupStopY-groupStartY,imageColumns,0);
wait_group_events(1,&e);
}
else {
for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
int pos = ClampToCanvasWithHalo(groupStartY+i,imageRows, radius, section)*imageColumns+ groupX;
cachedData[i] = *(blurRowData + pos);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
event_t e = async_work_group_copy(cachedFilter,filter,width,0);
wait_group_events(1,&e);
const int cy = get_global_id(1);
if (cy < imageRows) {
float4 blurredPixel = (float4) 0.0f;
int i = 0;
\n #ifndef UFACTOR \n
\n #define UFACTOR 8 \n
\n #endif \n
for ( ; i+UFACTOR < width; )
{
\n #pragma unroll UFACTOR \n
for (int j=0; j < UFACTOR; j++, i++)
{
blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
}
}
for ( ; i < width; i++)
{
blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
}
blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
inputImage += imageColumns * offsetRows;
filtered_im += imageColumns * offsetRows;
float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
float4 outputPixel = inputImagePixel - blurredPixel;
float quantumThreshold = QuantumRange*threshold;
int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
}
}
)
STRINGIFY(
__kernel void UnsharpMask(__global CLPixelType *im, __global CLPixelType *filtered_im,
__constant float *filter,
const unsigned int width,
const unsigned int imageColumns, const unsigned int imageRows,
__local float4 *pixels,
const float gain, const float threshold, const unsigned int justBlur)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const unsigned int radius = (width - 1) / 2;
int row = y - radius;
int baseRow = get_group_id(1) * get_local_size(1) - radius;
int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
while (row < endRow) {
int srcy = (row < 0) ? -row : row;
srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy;
float4 value = 0.0f;
int ix = x - radius;
int i = 0;
while (i + 7 < width) {
for (int j = 0; j < 8; ++j) {
int srcx = ix + j;
srcx = (srcx < 0) ? -srcx : srcx;
srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]);
}
ix += 8;
i += 8;
}
while (i < width) {
int srcx = (ix < 0) ? -ix : ix;
srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]);
++i;
++ix;
}
pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
row += get_local_size(1);
}
barrier(CLK_LOCAL_MEM_FENCE);
const int px = get_local_id(0);
const int py = get_local_id(1);
const int prp = get_local_size(0);
float4 value = (float4)(0.0f);
int i = 0;
while (i + 7 < width) {
value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
value += (float4)(filter[i]) * pixels[px + (py + i + 1) * prp];
value += (float4)(filter[i]) * pixels[px + (py + i + 2) * prp];
value += (float4)(filter[i]) * pixels[px + (py + i + 3) * prp];
value += (float4)(filter[i]) * pixels[px + (py + i + 4) * prp];
value += (float4)(filter[i]) * pixels[px + (py + i + 5) * prp];
value += (float4)(filter[i]) * pixels[px + (py + i + 6) * prp];
value += (float4)(filter[i]) * pixels[px + (py + i + 7) * prp];
i += 8;
}
while (i < width) {
value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
++i;
}
if (justBlur == 0) {
float4 srcPixel = convert_float4(im[x + y * imageColumns]);
float4 diff = srcPixel - value;
float quantumThreshold = QuantumRange*threshold;
int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
value = select(srcPixel + diff * gain, srcPixel, mask);
}
if ((x < imageColumns) && (y < imageRows))
filtered_im[x + y * imageColumns] = (CLPixelType)(ClampToQuantum(value.s0), ClampToQuantum(value.s1), ClampToQuantum(value.s2), ClampToQuantum(value.s3));
}
)
;
#endif
#if defined(__cplusplus) || defined(c_plusplus)
}
#endif
#endif