00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
00020 #define MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
00021
00022 #if defined(__cplusplus) || defined(c_plusplus)
00023 extern "C" {
00024 #endif
00025
00026 #if defined(MAGICKCORE_OPENCL_SUPPORT)
00027
00028
00029
00030
00031 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
00032 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
00033 #define OPENCL_ELSE() "\n #""else " " \n"
00034 #define OPENCL_ENDIF() "\n #""endif " " \n"
00035 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
00036 #define STRINGIFY(...) #__VA_ARGS__ "\n"
00037
00038 const char *accelerateKernels =
00039
00040
00041
00042
00043 OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
00044 OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
00045 OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
00046 OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
00047 OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
00048 OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
00049 OPENCL_DEFINE(SigmaRandom, (attenuate))
00050 OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
00051 OPENCL_DEFINE(MagickMax(x,y), (((x) > (y)) ? (x) : (y)))
00052 OPENCL_DEFINE(MagickMin(x,y), (((x) < (y)) ? (x) : (y)))
00053
00054
00055
00056
00057 STRINGIFY(
00058 typedef enum
00059 {
00060 UndefinedColorspace,
00061 CMYColorspace,
00062 CMYKColorspace,
00063 GRAYColorspace,
00064 HCLColorspace,
00065 HCLpColorspace,
00066 HSBColorspace,
00067 HSIColorspace,
00068 HSLColorspace,
00069 HSVColorspace,
00070 HWBColorspace,
00071 LabColorspace,
00072 LCHColorspace,
00073 LCHabColorspace,
00074 LCHuvColorspace,
00075 LogColorspace,
00076 LMSColorspace,
00077 LuvColorspace,
00078 OHTAColorspace,
00079 Rec601YCbCrColorspace,
00080 Rec709YCbCrColorspace,
00081 RGBColorspace,
00082 scRGBColorspace,
00083 sRGBColorspace,
00084 TransparentColorspace,
00085 xyYColorspace,
00086 XYZColorspace,
00087 YCbCrColorspace,
00088 YCCColorspace,
00089 YDbDrColorspace,
00090 YIQColorspace,
00091 YPbPrColorspace,
00092 YUVColorspace,
00093 LinearGRAYColorspace
00094 } ColorspaceType;
00095 )
00096
00097 STRINGIFY(
00098 typedef enum
00099 {
00100 UndefinedCompositeOp,
00101 AlphaCompositeOp,
00102 AtopCompositeOp,
00103 BlendCompositeOp,
00104 BlurCompositeOp,
00105 BumpmapCompositeOp,
00106 ChangeMaskCompositeOp,
00107 ClearCompositeOp,
00108 ColorBurnCompositeOp,
00109 ColorDodgeCompositeOp,
00110 ColorizeCompositeOp,
00111 CopyBlackCompositeOp,
00112 CopyBlueCompositeOp,
00113 CopyCompositeOp,
00114 CopyCyanCompositeOp,
00115 CopyGreenCompositeOp,
00116 CopyMagentaCompositeOp,
00117 CopyAlphaCompositeOp,
00118 CopyRedCompositeOp,
00119 CopyYellowCompositeOp,
00120 DarkenCompositeOp,
00121 DarkenIntensityCompositeOp,
00122 DifferenceCompositeOp,
00123 DisplaceCompositeOp,
00124 DissolveCompositeOp,
00125 DistortCompositeOp,
00126 DivideDstCompositeOp,
00127 DivideSrcCompositeOp,
00128 DstAtopCompositeOp,
00129 DstCompositeOp,
00130 DstInCompositeOp,
00131 DstOutCompositeOp,
00132 DstOverCompositeOp,
00133 ExclusionCompositeOp,
00134 HardLightCompositeOp,
00135 HardMixCompositeOp,
00136 HueCompositeOp,
00137 InCompositeOp,
00138 IntensityCompositeOp,
00139 LightenCompositeOp,
00140 LightenIntensityCompositeOp,
00141 LinearBurnCompositeOp,
00142 LinearDodgeCompositeOp,
00143 LinearLightCompositeOp,
00144 LuminizeCompositeOp,
00145 MathematicsCompositeOp,
00146 MinusDstCompositeOp,
00147 MinusSrcCompositeOp,
00148 ModulateCompositeOp,
00149 ModulusAddCompositeOp,
00150 ModulusSubtractCompositeOp,
00151 MultiplyCompositeOp,
00152 NoCompositeOp,
00153 OutCompositeOp,
00154 OverCompositeOp,
00155 OverlayCompositeOp,
00156 PegtopLightCompositeOp,
00157 PinLightCompositeOp,
00158 PlusCompositeOp,
00159 ReplaceCompositeOp,
00160 SaturateCompositeOp,
00161 ScreenCompositeOp,
00162 SoftLightCompositeOp,
00163 SrcAtopCompositeOp,
00164 SrcCompositeOp,
00165 SrcInCompositeOp,
00166 SrcOutCompositeOp,
00167 SrcOverCompositeOp,
00168 ThresholdCompositeOp,
00169 VividLightCompositeOp,
00170 XorCompositeOp,
00171 StereoCompositeOp
00172 } CompositeOperator;
00173 )
00174
00175 STRINGIFY(
00176 typedef enum
00177 {
00178 UndefinedFunction,
00179 ArcsinFunction,
00180 ArctanFunction,
00181 PolynomialFunction,
00182 SinusoidFunction
00183 } MagickFunction;
00184 )
00185
00186 STRINGIFY(
00187 typedef enum
00188 {
00189 UndefinedNoise,
00190 UniformNoise,
00191 GaussianNoise,
00192 MultiplicativeGaussianNoise,
00193 ImpulseNoise,
00194 LaplacianNoise,
00195 PoissonNoise,
00196 RandomNoise
00197 } NoiseType;
00198 )
00199
00200 STRINGIFY(
00201 typedef enum
00202 {
00203 UndefinedPixelIntensityMethod = 0,
00204 AveragePixelIntensityMethod,
00205 BrightnessPixelIntensityMethod,
00206 LightnessPixelIntensityMethod,
00207 MSPixelIntensityMethod,
00208 Rec601LumaPixelIntensityMethod,
00209 Rec601LuminancePixelIntensityMethod,
00210 Rec709LumaPixelIntensityMethod,
00211 Rec709LuminancePixelIntensityMethod,
00212 RMSPixelIntensityMethod
00213 } PixelIntensityMethod;
00214 )
00215
00216 STRINGIFY(
00217 typedef enum
00218 {
00219 BoxWeightingFunction = 0,
00220 TriangleWeightingFunction,
00221 CubicBCWeightingFunction,
00222 HannWeightingFunction,
00223 HammingWeightingFunction,
00224 BlackmanWeightingFunction,
00225 GaussianWeightingFunction,
00226 QuadraticWeightingFunction,
00227 JincWeightingFunction,
00228 SincWeightingFunction,
00229 SincFastWeightingFunction,
00230 KaiserWeightingFunction,
00231 WelchWeightingFunction,
00232 BohmanWeightingFunction,
00233 LagrangeWeightingFunction,
00234 CosineWeightingFunction
00235 } ResizeWeightingFunctionType;
00236 )
00237
00238 STRINGIFY(
00239 typedef enum
00240 {
00241 UndefinedChannel = 0x0000,
00242 RedChannel = 0x0001,
00243 GrayChannel = 0x0001,
00244 CyanChannel = 0x0001,
00245 GreenChannel = 0x0002,
00246 MagentaChannel = 0x0002,
00247 BlueChannel = 0x0004,
00248 YellowChannel = 0x0004,
00249 BlackChannel = 0x0008,
00250 AlphaChannel = 0x0010,
00251 OpacityChannel = 0x0010,
00252 IndexChannel = 0x0020,
00253 ReadMaskChannel = 0x0040,
00254 WriteMaskChannel = 0x0080,
00255 MetaChannel = 0x0100,
00256 CompositeChannels = 0x001F,
00257 AllChannels = 0x7ffffff,
00258
00259
00260
00261
00262
00263
00264
00265 TrueAlphaChannel = 0x0100,
00266 RGBChannels = 0x0200,
00267 GrayChannels = 0x0400,
00268 SyncChannels = 0x20000,
00269 DefaultChannels = AllChannels
00270 } ChannelType;
00271 )
00272
00273
00274
00275
00276
00277 OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
00278
00279 STRINGIFY(
00280 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
00281 {
00282 return((CLQuantum) value);
00283 }
00284 )
00285
00286 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
00287
00288 STRINGIFY(
00289 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
00290 {
00291 return((CLQuantum) (257.0f*value));
00292 }
00293 )
00294
00295 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
00296
00297 STRINGIFY(
00298 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
00299 {
00300 return((CLQuantum) (16843009.0*value));
00301 }
00302 )
00303
00304 OPENCL_ENDIF()
00305
00306 OPENCL_IF((MAGICKCORE_HDRI_SUPPORT == 1))
00307
00308 STRINGIFY(
00309 static inline CLQuantum ClampToQuantum(const float value)
00310 {
00311 return (CLQuantum) clamp(value, 0.0f, QuantumRange);
00312 }
00313 )
00314
00315 OPENCL_ELSE()
00316
00317 STRINGIFY(
00318 static inline CLQuantum ClampToQuantum(const float value)
00319 {
00320 return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f);
00321 }
00322 )
00323
00324 OPENCL_ENDIF()
00325
00326 STRINGIFY(
00327 static inline int ClampToCanvas(const int offset,const int range)
00328 {
00329 return clamp(offset, (int)0, range-1);
00330 }
00331 )
00332
00333 STRINGIFY(
00334 static inline uint ScaleQuantumToMap(CLQuantum value)
00335 {
00336 if (value >= (CLQuantum) MaxMap)
00337 return ((uint)MaxMap);
00338 else
00339 return ((uint)value);
00340 }
00341 )
00342
00343 STRINGIFY(
00344 static inline float PerceptibleReciprocal(const float x)
00345 {
00346 float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0;
00347 return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/MagickEpsilon));
00348 }
00349 )
00350
00351 STRINGIFY(
00352 static inline float RoundToUnity(const float value)
00353 {
00354 return clamp(value,0.0f,1.0f);
00355 }
00356 )
00357
00358 STRINGIFY(
00359
00360 static inline unsigned int getPixelIndex(const unsigned int number_channels,
00361 const unsigned int columns, const unsigned int x, const unsigned int y)
00362 {
00363 return (x * number_channels) + (y * columns * number_channels);
00364 }
00365
00366 static inline float getPixelRed(const __global CLQuantum *p) { return (float)*p; }
00367 static inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); }
00368 static inline float getPixelBlue(const __global CLQuantum *p) { return (float)*(p+2); }
00369 static inline float getPixelAlpha(const __global CLQuantum *p,const unsigned int number_channels) { return (float)*(p+number_channels-1); }
00370
00371 static inline void setPixelRed(__global CLQuantum *p,const CLQuantum value) { *p=value; }
00372 static inline void setPixelGreen(__global CLQuantum *p,const CLQuantum value) { *(p+1)=value; }
00373 static inline void setPixelBlue(__global CLQuantum *p,const CLQuantum value) { *(p+2)=value; }
00374 static inline void setPixelAlpha(__global CLQuantum *p,const unsigned int number_channels,const CLQuantum value) { *(p+number_channels-1)=value; }
00375
00376 static inline CLQuantum getBlue(CLPixelType p) { return p.x; }
00377 static inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
00378 static inline float getBlueF4(float4 p) { return p.x; }
00379 static inline void setBlueF4(float4* p, float value) { (*p).x = value; }
00380
00381 static inline CLQuantum getGreen(CLPixelType p) { return p.y; }
00382 static inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
00383 static inline float getGreenF4(float4 p) { return p.y; }
00384 static inline void setGreenF4(float4* p, float value) { (*p).y = value; }
00385
00386 static inline CLQuantum getRed(CLPixelType p) { return p.z; }
00387 static inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
00388 static inline float getRedF4(float4 p) { return p.z; }
00389 static inline void setRedF4(float4* p, float value) { (*p).z = value; }
00390
00391 static inline CLQuantum getAlpha(CLPixelType p) { return p.w; }
00392 static inline void setAlpha(CLPixelType* p, CLQuantum value) { (*p).w = value; }
00393 static inline float getAlphaF4(float4 p) { return p.w; }
00394 static inline void setAlphaF4(float4* p, float value) { (*p).w = value; }
00395
00396 static inline void ReadChannels(const __global CLQuantum *p, const unsigned int number_channels,
00397 const ChannelType channel, float *red, float *green, float *blue, float *alpha)
00398 {
00399 if ((channel & RedChannel) != 0)
00400 *red=getPixelRed(p);
00401
00402 if (number_channels > 2)
00403 {
00404 if ((channel & GreenChannel) != 0)
00405 *green=getPixelGreen(p);
00406
00407 if ((channel & BlueChannel) != 0)
00408 *blue=getPixelBlue(p);
00409 }
00410
00411 if (((number_channels == 4) || (number_channels == 2)) &&
00412 ((channel & AlphaChannel) != 0))
00413 *alpha=getPixelAlpha(p,number_channels);
00414 }
00415
00416 static inline float4 ReadAllChannels(const __global CLQuantum *image, const unsigned int number_channels,
00417 const unsigned int columns, const unsigned int x, const unsigned int y)
00418 {
00419 const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
00420
00421 float4 pixel;
00422
00423 pixel.x=getPixelRed(p);
00424
00425 if (number_channels > 2)
00426 {
00427 pixel.y=getPixelGreen(p);
00428 pixel.z=getPixelBlue(p);
00429 }
00430
00431 if ((number_channels == 4) || (number_channels == 2))
00432 pixel.w=getPixelAlpha(p,number_channels);
00433 return(pixel);
00434 }
00435
00436 static inline float4 ReadFloat4(const __global CLQuantum *image, const unsigned int number_channels,
00437 const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel)
00438 {
00439 const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
00440
00441 float red = 0.0f;
00442 float green = 0.0f;
00443 float blue = 0.0f;
00444 float alpha = 0.0f;
00445
00446 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
00447 return (float4)(red, green, blue, alpha);
00448 }
00449
00450 static inline void WriteChannels(__global CLQuantum *p, const unsigned int number_channels,
00451 const ChannelType channel, float red, float green, float blue, float alpha)
00452 {
00453 if ((channel & RedChannel) != 0)
00454 setPixelRed(p,ClampToQuantum(red));
00455
00456 if (number_channels > 2)
00457 {
00458 if ((channel & GreenChannel) != 0)
00459 setPixelGreen(p,ClampToQuantum(green));
00460
00461 if ((channel & BlueChannel) != 0)
00462 setPixelBlue(p,ClampToQuantum(blue));
00463 }
00464
00465 if (((number_channels == 4) || (number_channels == 2)) &&
00466 ((channel & AlphaChannel) != 0))
00467 setPixelAlpha(p,number_channels,ClampToQuantum(alpha));
00468 }
00469
00470 static inline void WriteAllChannels(__global CLQuantum *image, const unsigned int number_channels,
00471 const unsigned int columns, const unsigned int x, const unsigned int y, float4 pixel)
00472 {
00473 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
00474
00475 setPixelRed(p,ClampToQuantum(pixel.x));
00476
00477 if (number_channels > 2)
00478 {
00479 setPixelGreen(p,ClampToQuantum(pixel.y));
00480 setPixelBlue(p,ClampToQuantum(pixel.z));
00481 }
00482
00483 if ((number_channels == 4) || (number_channels == 2))
00484 setPixelAlpha(p,number_channels,ClampToQuantum(pixel.w));
00485 }
00486
00487 static inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels,
00488 const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel,
00489 float4 pixel)
00490 {
00491 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
00492 WriteChannels(p, number_channels, channel, pixel.x, pixel.y, pixel.z, pixel.w);
00493 }
00494
00495 static inline float GetPixelIntensity(const unsigned int colorspace,
00496 const unsigned int method,float red,float green,float blue)
00497 {
00498 float intensity;
00499
00500 if ((colorspace == GRAYColorspace) || (colorspace == LinearGRAYColorspace))
00501 return red;
00502
00503 switch (method)
00504 {
00505 case AveragePixelIntensityMethod:
00506 {
00507 intensity=(red+green+blue)/3.0;
00508 break;
00509 }
00510 case BrightnessPixelIntensityMethod:
00511 {
00512 intensity=MagickMax(MagickMax(red,green),blue);
00513 break;
00514 }
00515 case LightnessPixelIntensityMethod:
00516 {
00517 intensity=(MagickMin(MagickMin(red,green),blue)+
00518 MagickMax(MagickMax(red,green),blue))/2.0;
00519 break;
00520 }
00521 case MSPixelIntensityMethod:
00522 {
00523 intensity=(float) (((float) red*red+green*green+blue*blue)/
00524 (3.0*QuantumRange));
00525 break;
00526 }
00527 case Rec601LumaPixelIntensityMethod:
00528 {
00529
00530
00531
00532
00533
00534
00535
00536
00537 intensity=0.298839*red+0.586811*green+0.114350*blue;
00538 break;
00539 }
00540 case Rec601LuminancePixelIntensityMethod:
00541 {
00542
00543
00544
00545
00546
00547
00548
00549
00550 intensity=0.298839*red+0.586811*green+0.114350*blue;
00551 break;
00552 }
00553 case Rec709LumaPixelIntensityMethod:
00554 default:
00555 {
00556
00557
00558
00559
00560
00561
00562
00563
00564 intensity=0.212656*red+0.715158*green+0.072186*blue;
00565 break;
00566 }
00567 case Rec709LuminancePixelIntensityMethod:
00568 {
00569
00570
00571
00572
00573
00574
00575
00576
00577 intensity=0.212656*red+0.715158*green+0.072186*blue;
00578 break;
00579 }
00580 case RMSPixelIntensityMethod:
00581 {
00582 intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/
00583 sqrt(3.0));
00584 break;
00585 }
00586 }
00587
00588 return intensity;
00589 }
00590
00591 static inline int mirrorBottom(int value)
00592 {
00593 return (value < 0) ? - (value) : value;
00594 }
00595
00596 static inline int mirrorTop(int value, int width)
00597 {
00598 return (value >= width) ? (2 * width - value - 1) : value;
00599 }
00600 )
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614 STRINGIFY(
00615
00616
00617
00618
00619
00620
00621
00622
00623 ulong MWC_AddMod64(ulong a, ulong b, ulong M)
00624 {
00625 ulong v=a+b;
00626
00627 if( (v>=M) || (convert_float(v) < convert_float(a)) )
00628 v=v-M;
00629 return v;
00630 }
00631
00632
00633
00634
00635
00636
00637
00638 ulong MWC_MulMod64(ulong a, ulong b, ulong M)
00639 {
00640 ulong r=0;
00641 while(a!=0){
00642 if(a&1)
00643 r=MWC_AddMod64(r,b,M);
00644 b=MWC_AddMod64(b,b,M);
00645 a=a>>1;
00646 }
00647 return r;
00648 }
00649
00650
00651
00652
00653
00654 ulong MWC_PowMod64(ulong a, ulong e, ulong M)
00655 {
00656 ulong sqr=a, acc=1;
00657 while(e!=0){
00658 if(e&1)
00659 acc=MWC_MulMod64(acc,sqr,M);
00660 sqr=MWC_MulMod64(sqr,sqr,M);
00661 e=e>>1;
00662 }
00663 return acc;
00664 }
00665
00666 uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
00667 {
00668 ulong m=MWC_PowMod64(A, distance, M);
00669 ulong x=curr.x*(ulong)A+curr.y;
00670 x=MWC_MulMod64(x, m, M);
00671 return (uint2)((uint)(x/A), (uint)(x%A));
00672 }
00673
00674 uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
00675 {
00676
00677
00678
00679
00680
00681 enum{ MWC_BASEID = 4077358422479273989UL };
00682
00683 ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
00684 ulong m=MWC_PowMod64(A, dist, M);
00685
00686 ulong x=MWC_MulMod64(MWC_BASEID, m, M);
00687 return (uint2)((uint)(x/A), (uint)(x%A));
00688 }
00689
00691 typedef struct{ uint x; uint c; uint seed0; ulong seed1; } mwc64x_state_t;
00692
00693 void MWC64X_Step(mwc64x_state_t *s)
00694 {
00695 uint X=s->x, C=s->c;
00696
00697 uint Xn=s->seed0*X+C;
00698 uint carry=(uint)(Xn<C);
00699 uint Cn=mad_hi(s->seed0,X,carry);
00700
00701 s->x=Xn;
00702 s->c=Cn;
00703 }
00704
00705 void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
00706 {
00707 uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), s->seed0, s->seed1, distance);
00708 s->x=tmp.x;
00709 s->c=tmp.y;
00710 }
00711
00712 void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
00713 {
00714 uint2 tmp=MWC_SeedImpl_Mod64(s->seed0, s->seed1, 1, 0, baseOffset, perStreamOffset);
00715 s->x=tmp.x;
00716 s->c=tmp.y;
00717 }
00718
00720 uint MWC64X_NextUint(mwc64x_state_t *s)
00721 {
00722 uint res=s->x ^ s->c;
00723 MWC64X_Step(s);
00724 return res;
00725 }
00726
00727
00728
00729
00730
00731 float mwcReadPseudoRandomValue(mwc64x_state_t* rng)
00732 {
00733 return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff);
00734 }
00735
00736 float mwcGenerateDifferentialNoise(mwc64x_state_t* r, float pixel, NoiseType noise_type, float attenuate)
00737 {
00738 float
00739 alpha,
00740 beta,
00741 noise,
00742 sigma;
00743
00744 noise = 0.0f;
00745 alpha=mwcReadPseudoRandomValue(r);
00746 switch(noise_type)
00747 {
00748 case UniformNoise:
00749 default:
00750 {
00751 noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
00752 break;
00753 }
00754 case GaussianNoise:
00755 {
00756 float
00757 gamma,
00758 tau;
00759
00760 if (alpha == 0.0f)
00761 alpha=1.0f;
00762 beta=mwcReadPseudoRandomValue(r);
00763 gamma=sqrt(-2.0f*log(alpha));
00764 sigma=gamma*cospi((2.0f*beta));
00765 tau=gamma*sinpi((2.0f*beta));
00766 noise=pixel+sqrt(pixel)*SigmaGaussian*sigma+QuantumRange*TauGaussian*tau;
00767 break;
00768 }
00769 case ImpulseNoise:
00770 {
00771 if (alpha < (SigmaImpulse/2.0f))
00772 noise=0.0f;
00773 else
00774 if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
00775 noise=QuantumRange;
00776 else
00777 noise=pixel;
00778 break;
00779 }
00780 case LaplacianNoise:
00781 {
00782 if (alpha <= 0.5f)
00783 {
00784 if (alpha <= MagickEpsilon)
00785 noise=(pixel-QuantumRange);
00786 else
00787 noise=(pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
00788 0.5f);
00789 break;
00790 }
00791 beta=1.0f-alpha;
00792 if (beta <= (0.5f*MagickEpsilon))
00793 noise=(pixel+QuantumRange);
00794 else
00795 noise=(pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
00796 break;
00797 }
00798 case MultiplicativeGaussianNoise:
00799 {
00800 sigma=1.0f;
00801 if (alpha > MagickEpsilon)
00802 sigma=sqrt(-2.0f*log(alpha));
00803 beta=mwcReadPseudoRandomValue(r);
00804 noise=(pixel+pixel*SigmaMultiplicativeGaussian*sigma*
00805 cospi((2.0f*beta))/2.0f);
00806 break;
00807 }
00808 case PoissonNoise:
00809 {
00810 float
00811 poisson;
00812 unsigned int i;
00813 poisson=exp(-SigmaPoisson*QuantumScale*pixel);
00814 for (i=0; alpha > poisson; i++)
00815 {
00816 beta=mwcReadPseudoRandomValue(r);
00817 alpha*=beta;
00818 }
00819 noise=(QuantumRange*i/SigmaPoisson);
00820 break;
00821 }
00822 case RandomNoise:
00823 {
00824 noise=(QuantumRange*SigmaRandom*alpha);
00825 break;
00826 }
00827 }
00828 return noise;
00829 }
00830
00831 __kernel
00832 void AddNoise(const __global CLQuantum *image,
00833 const unsigned int number_channels,const ChannelType channel,
00834 const unsigned int length,const unsigned int pixelsPerWorkItem,
00835 const NoiseType noise_type,const float attenuate,const unsigned int seed0,
00836 const unsigned int seed1,const unsigned int numRandomNumbersPerPixel,
00837 __global CLQuantum *filteredImage)
00838 {
00839 mwc64x_state_t rng;
00840 rng.seed0 = seed0;
00841 rng.seed1 = seed1;
00842
00843 uint span = pixelsPerWorkItem * numRandomNumbersPerPixel;
00844 uint offset = span * get_local_size(0) * get_group_id(0);
00845 MWC64X_SeedStreams(&rng, offset, span);
00846
00847 uint pos = get_group_id(0) * get_local_size(0) * pixelsPerWorkItem * number_channels + (get_local_id(0) * number_channels);
00848 uint count = pixelsPerWorkItem;
00849
00850 while (count > 0 && pos < length)
00851 {
00852 const __global CLQuantum *p = image + pos;
00853 __global CLQuantum *q = filteredImage + pos;
00854
00855 float red;
00856 float green;
00857 float blue;
00858 float alpha;
00859
00860 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
00861
00862 if ((channel & RedChannel) != 0)
00863 red=mwcGenerateDifferentialNoise(&rng,red,noise_type,attenuate);
00864
00865 if (number_channels > 2)
00866 {
00867 if ((channel & GreenChannel) != 0)
00868 green=mwcGenerateDifferentialNoise(&rng,green,noise_type,attenuate);
00869
00870 if ((channel & BlueChannel) != 0)
00871 blue=mwcGenerateDifferentialNoise(&rng,blue,noise_type,attenuate);
00872 }
00873
00874 if (((number_channels == 4) || (number_channels == 2)) &&
00875 ((channel & AlphaChannel) != 0))
00876 alpha=mwcGenerateDifferentialNoise(&rng,alpha,noise_type,attenuate);
00877
00878 WriteChannels(q, number_channels, channel, red, green, blue, alpha);
00879
00880 pos += (get_local_size(0) * number_channels);
00881 count--;
00882 }
00883 }
00884 )
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898 STRINGIFY(
00899
00900
00901
00902 __kernel void BlurRow(const __global CLQuantum *image,
00903 const unsigned int number_channels,const ChannelType channel,
00904 __constant float *filter,const unsigned int width,
00905 const unsigned int imageColumns,const unsigned int imageRows,
00906 __local float4 *temp,__global float4 *tempImage)
00907 {
00908 const int x = get_global_id(0);
00909 const int y = get_global_id(1);
00910
00911 const int columns = imageColumns;
00912
00913 const unsigned int radius = (width-1)/2;
00914 const int wsize = get_local_size(0);
00915 const unsigned int loadSize = wsize+width;
00916
00917
00918 const int groupX=get_local_size(0)*get_group_id(0);
00919
00920
00921 for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
00922 {
00923 int cx = ClampToCanvas(i + groupX - radius, columns);
00924 temp[i] = ReadFloat4(image, number_channels, columns, cx, y, channel);
00925 }
00926
00927
00928 barrier(CLK_LOCAL_MEM_FENCE);
00929
00930
00931 if (get_global_id(0) < columns)
00932 {
00933
00934 float4 result = (float4) 0;
00935
00936 int i = 0;
00937
00938 for ( ; i+7 < width; )
00939 {
00940 for (int j=0; j < 8; j++)
00941 result+=filter[i+j]*temp[i+j+get_local_id(0)];
00942 i+=8;
00943 }
00944
00945 for ( ; i < width; i++)
00946 result+=filter[i]*temp[i+get_local_id(0)];
00947
00948
00949 tempImage[y*columns+x] = result;
00950 }
00951 }
00952 )
00953
00954 STRINGIFY(
00955
00956
00957
00958 __kernel void BlurColumn(const __global float4 *blurRowData,
00959 const unsigned int number_channels,const ChannelType channel,
00960 __constant float *filter,const unsigned int width,
00961 const unsigned int imageColumns,const unsigned int imageRows,
00962 __local float4 *temp,__global CLQuantum *filteredImage)
00963 {
00964 const int x = get_global_id(0);
00965 const int y = get_global_id(1);
00966
00967 const int columns = imageColumns;
00968 const int rows = imageRows;
00969
00970 unsigned int radius = (width-1)/2;
00971 const int wsize = get_local_size(1);
00972 const unsigned int loadSize = wsize+width;
00973
00974
00975 const int groupX=get_local_size(0)*get_group_id(0);
00976 const int groupY=get_local_size(1)*get_group_id(1);
00977
00978
00979
00980
00981 for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
00982 temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
00983
00984
00985 barrier(CLK_LOCAL_MEM_FENCE);
00986
00987
00988 if (get_global_id(1) < rows)
00989 {
00990
00991 float4 result = (float4) 0;
00992
00993 int i = 0;
00994
00995 for ( ; i+7 < width; )
00996 {
00997 for (int j=0; j < 8; j++)
00998 result+=filter[i+j]*temp[i+j+get_local_id(1)];
00999 i+=8;
01000 }
01001
01002 for ( ; i < width; i++)
01003 result+=filter[i]*temp[i+get_local_id(1)];
01004
01005
01006 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
01007 }
01008 }
01009 )
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022
01023 STRINGIFY(
01024
01025 static inline float4 ConvertRGBToHSB(const float4 pixel)
01026 {
01027 float4 result=0.0f;
01028 result.w=pixel.w;
01029 float tmax=MagickMax(MagickMax(pixel.x,pixel.y),pixel.z);
01030 if (tmax != 0.0f)
01031 {
01032 float tmin=MagickMin(MagickMin(pixel.x,pixel.y),pixel.z);
01033 float delta=tmax-tmin;
01034
01035 result.y=delta/tmax;
01036 result.z=QuantumScale*tmax;
01037 if (delta != 0.0f)
01038 {
01039 result.x =((pixel.x == tmax) ? 0.0f : ((pixel.y == tmax) ? 2.0f : 4.0f));
01040 result.x+=((pixel.x == tmax) ? (pixel.y-pixel.z) : ((pixel.y == tmax) ?
01041 (pixel.z-pixel.x) : (pixel.x-pixel.y)))/delta;
01042 result.x/=6.0f;
01043 result.x+=(result.x < 0.0f) ? 0.0f : 1.0f;
01044 }
01045 }
01046 return(result);
01047 }
01048
01049 static inline float4 ConvertHSBToRGB(const float4 pixel)
01050 {
01051 float hue=pixel.x;
01052 float saturation=pixel.y;
01053 float brightness=pixel.z;
01054
01055 float4 result=pixel;
01056
01057 if (saturation == 0.0f)
01058 {
01059 result.x=result.y=result.z=ClampToQuantum(QuantumRange*brightness);
01060 }
01061 else
01062 {
01063 float h=6.0f*(hue-floor(hue));
01064 float f=h-floor(h);
01065 float p=brightness*(1.0f-saturation);
01066 float q=brightness*(1.0f-saturation*f);
01067 float t=brightness*(1.0f-(saturation*(1.0f-f)));
01068 int ih = (int)h;
01069
01070 if (ih == 1)
01071 {
01072 result.x=ClampToQuantum(QuantumRange*q);
01073 result.y=ClampToQuantum(QuantumRange*brightness);
01074 result.z=ClampToQuantum(QuantumRange*p);
01075 }
01076 else if (ih == 2)
01077 {
01078 result.x=ClampToQuantum(QuantumRange*p);
01079 result.y=ClampToQuantum(QuantumRange*brightness);
01080 result.z=ClampToQuantum(QuantumRange*t);
01081 }
01082 else if (ih == 3)
01083 {
01084 result.x=ClampToQuantum(QuantumRange*p);
01085 result.y=ClampToQuantum(QuantumRange*q);
01086 result.z=ClampToQuantum(QuantumRange*brightness);
01087 }
01088 else if (ih == 4)
01089 {
01090 result.x=ClampToQuantum(QuantumRange*t);
01091 result.y=ClampToQuantum(QuantumRange*p);
01092 result.z=ClampToQuantum(QuantumRange*brightness);
01093 }
01094 else if (ih == 5)
01095 {
01096 result.x=ClampToQuantum(QuantumRange*brightness);
01097 result.y=ClampToQuantum(QuantumRange*p);
01098 result.z=ClampToQuantum(QuantumRange*q);
01099 }
01100 else
01101 {
01102 result.x=ClampToQuantum(QuantumRange*brightness);
01103 result.y=ClampToQuantum(QuantumRange*t);
01104 result.z=ClampToQuantum(QuantumRange*p);
01105 }
01106 }
01107 return(result);
01108 }
01109
01110 __kernel void Contrast(__global CLQuantum *image,
01111 const unsigned int number_channels,const int sign)
01112 {
01113 const int x=get_global_id(0);
01114 const int y=get_global_id(1);
01115 const unsigned int columns=get_global_size(0);
01116
01117 float4 pixel=ReadAllChannels(image,number_channels,columns,x,y);
01118 if (number_channels < 3)
01119 pixel.y=pixel.z=pixel.x;
01120
01121 pixel=ConvertRGBToHSB(pixel);
01122 float brightness=pixel.z;
01123 brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
01124 brightness=clamp(brightness,0.0f,1.0f);
01125 pixel.z=brightness;
01126 pixel=ConvertHSBToRGB(pixel);
01127
01128 WriteAllChannels(image,number_channels,columns,x,y,pixel);
01129 }
01130 )
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144 STRINGIFY(
01145
01146
01147 __kernel void Histogram(__global CLPixelType * restrict im,
01148 const ChannelType channel,
01149 const unsigned int colorspace,
01150 const unsigned int method,
01151 __global uint4 * restrict histogram)
01152 {
01153 const int x = get_global_id(0);
01154 const int y = get_global_id(1);
01155 const int columns = get_global_size(0);
01156 const int c = x + y * columns;
01157 if ((channel & SyncChannels) != 0)
01158 {
01159 float red=(float)getRed(im[c]);
01160 float green=(float)getGreen(im[c]);
01161 float blue=(float)getBlue(im[c]);
01162
01163 float intensity = GetPixelIntensity(colorspace, method, red, green, blue);
01164 uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
01165 atomic_inc((__global uint *)(&(histogram[pos]))+2);
01166 }
01167 else
01168 {
01169
01170
01171 }
01172 }
01173 )
01174
01175 STRINGIFY(
01176
01177
01178 __kernel void ContrastStretch(__global CLPixelType * restrict im,
01179 const ChannelType channel,
01180 __global CLPixelType * restrict stretch_map,
01181 const float4 white, const float4 black)
01182 {
01183 const int x = get_global_id(0);
01184 const int y = get_global_id(1);
01185 const int columns = get_global_size(0);
01186 const int c = x + y * columns;
01187
01188 uint ePos;
01189 CLPixelType oValue, eValue;
01190 CLQuantum red, green, blue, alpha;
01191
01192
01193 oValue=im[c];
01194
01195 if ((channel & RedChannel) != 0)
01196 {
01197 if (getRedF4(white) != getRedF4(black))
01198 {
01199 ePos = ScaleQuantumToMap(getRed(oValue));
01200 eValue = stretch_map[ePos];
01201 red = getRed(eValue);
01202 }
01203 }
01204
01205 if ((channel & GreenChannel) != 0)
01206 {
01207 if (getGreenF4(white) != getGreenF4(black))
01208 {
01209 ePos = ScaleQuantumToMap(getGreen(oValue));
01210 eValue = stretch_map[ePos];
01211 green = getGreen(eValue);
01212 }
01213 }
01214
01215 if ((channel & BlueChannel) != 0)
01216 {
01217 if (getBlueF4(white) != getBlueF4(black))
01218 {
01219 ePos = ScaleQuantumToMap(getBlue(oValue));
01220 eValue = stretch_map[ePos];
01221 blue = getBlue(eValue);
01222 }
01223 }
01224
01225 if ((channel & AlphaChannel) != 0)
01226 {
01227 if (getAlphaF4(white) != getAlphaF4(black))
01228 {
01229 ePos = ScaleQuantumToMap(getAlpha(oValue));
01230 eValue = stretch_map[ePos];
01231 alpha = getAlpha(eValue);
01232 }
01233 }
01234
01235
01236 im[c]=(CLPixelType)(blue, green, red, alpha);
01237
01238 }
01239 )
01240
01241
01242
01243
01244
01245
01246
01247
01248
01249
01250
01251
01252
01253 STRINGIFY(
01254 __kernel
01255 void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
01256 const unsigned int imageWidth, const unsigned int imageHeight,
01257 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
01258 const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
01259
01260 int2 blockID;
01261 blockID.x = get_global_id(0) / get_local_size(0);
01262 blockID.y = get_global_id(1) / get_local_size(1);
01263
01264
01265 int2 imageAreaOrg;
01266 imageAreaOrg.x = blockID.x * get_local_size(0);
01267 imageAreaOrg.y = blockID.y * get_local_size(1);
01268
01269 int2 midFilterDimen;
01270 midFilterDimen.x = (filterWidth-1)/2;
01271 midFilterDimen.y = (filterHeight-1)/2;
01272
01273 int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
01274
01275
01276 int2 cachedAreaDimen;
01277 cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
01278 cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
01279
01280
01281 int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
01282 int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
01283 int groupSize = get_local_size(0) * get_local_size(1);
01284 for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
01285
01286 int2 cachedAreaIndex;
01287 cachedAreaIndex.x = i % cachedAreaDimen.x;
01288 cachedAreaIndex.y = i / cachedAreaDimen.x;
01289
01290 int2 imagePixelIndex;
01291 imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
01292
01293
01294
01295 imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
01296 imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
01297
01298 pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
01299 }
01300
01301
01302 for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
01303 filterCache[i] = filter[i];
01304 }
01305 barrier(CLK_LOCAL_MEM_FENCE);
01306
01307
01308 int2 imageIndex;
01309 imageIndex.x = imageAreaOrg.x + get_local_id(0);
01310 imageIndex.y = imageAreaOrg.y + get_local_id(1);
01311
01312
01313 if (imageIndex.x >= imageWidth
01314 || imageIndex.y >= imageHeight) {
01315 return;
01316 }
01317
01318 int filterIndex = 0;
01319 float4 sum = (float4)0.0f;
01320 float gamma = 0.0f;
01321 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
01322 int cacheIndexY = get_local_id(1);
01323 for (int j = 0; j < filterHeight; j++) {
01324 int cacheIndexX = get_local_id(0);
01325 for (int i = 0; i < filterWidth; i++) {
01326 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
01327 float f = filterCache[filterIndex];
01328
01329 sum.x += f * p.x;
01330 sum.y += f * p.y;
01331 sum.z += f * p.z;
01332 sum.w += f * p.w;
01333
01334 gamma += f;
01335 filterIndex++;
01336 cacheIndexX++;
01337 }
01338 cacheIndexY++;
01339 }
01340 }
01341 else {
01342 int cacheIndexY = get_local_id(1);
01343 for (int j = 0; j < filterHeight; j++) {
01344 int cacheIndexX = get_local_id(0);
01345 for (int i = 0; i < filterWidth; i++) {
01346
01347 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
01348 float alpha = QuantumScale*p.w;
01349 float f = filterCache[filterIndex];
01350 float g = alpha * f;
01351
01352 sum.x += g*p.x;
01353 sum.y += g*p.y;
01354 sum.z += g*p.z;
01355 sum.w += f*p.w;
01356
01357 gamma += g;
01358 filterIndex++;
01359 cacheIndexX++;
01360 }
01361 cacheIndexY++;
01362 }
01363 gamma = PerceptibleReciprocal(gamma);
01364 sum.xyz = gamma*sum.xyz;
01365 }
01366 CLPixelType outputPixel;
01367 outputPixel.x = ClampToQuantum(sum.x);
01368 outputPixel.y = ClampToQuantum(sum.y);
01369 outputPixel.z = ClampToQuantum(sum.z);
01370 outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
01371
01372 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
01373 }
01374 )
01375
01376 STRINGIFY(
01377 __kernel
01378 void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
01379 const uint imageWidth, const uint imageHeight,
01380 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
01381 const uint matte, const ChannelType channel) {
01382
01383 int2 imageIndex;
01384 imageIndex.x = get_global_id(0);
01385 imageIndex.y = get_global_id(1);
01386
01387
01388
01389
01390
01391 if (imageIndex.x >= imageWidth
01392 || imageIndex.y >= imageHeight)
01393 return;
01394
01395 int2 midFilterDimen;
01396 midFilterDimen.x = (filterWidth-1)/2;
01397 midFilterDimen.y = (filterHeight-1)/2;
01398
01399 int filterIndex = 0;
01400 float4 sum = (float4)0.0f;
01401 float gamma = 0.0f;
01402 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
01403 for (int j = 0; j < filterHeight; j++) {
01404 int2 inputPixelIndex;
01405 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
01406 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
01407 for (int i = 0; i < filterWidth; i++) {
01408 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
01409 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
01410
01411 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
01412 float f = filter[filterIndex];
01413
01414 sum.x += f * p.x;
01415 sum.y += f * p.y;
01416 sum.z += f * p.z;
01417 sum.w += f * p.w;
01418
01419 gamma += f;
01420
01421 filterIndex++;
01422 }
01423 }
01424 }
01425 else {
01426
01427 for (int j = 0; j < filterHeight; j++) {
01428 int2 inputPixelIndex;
01429 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
01430 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
01431 for (int i = 0; i < filterWidth; i++) {
01432 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
01433 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
01434
01435 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
01436 float alpha = QuantumScale*p.w;
01437 float f = filter[filterIndex];
01438 float g = alpha * f;
01439
01440 sum.x += g*p.x;
01441 sum.y += g*p.y;
01442 sum.z += g*p.z;
01443 sum.w += f*p.w;
01444
01445 gamma += g;
01446
01447
01448 filterIndex++;
01449 }
01450 }
01451 gamma = PerceptibleReciprocal(gamma);
01452 sum.xyz = gamma*sum.xyz;
01453 }
01454
01455 CLPixelType outputPixel;
01456 outputPixel.x = ClampToQuantum(sum.x);
01457 outputPixel.y = ClampToQuantum(sum.y);
01458 outputPixel.z = ClampToQuantum(sum.z);
01459 outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
01460
01461 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
01462 }
01463 )
01464
01465
01466
01467
01468
01469
01470
01471
01472
01473
01474
01475
01476
01477 STRINGIFY(
01478
01479 __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
01480 , const unsigned int imageWidth, const unsigned int imageHeight
01481 , const int2 offset, const int polarity, const int matte) {
01482
01483 int x = get_global_id(0);
01484 int y = get_global_id(1);
01485
01486 CLPixelType v = inputImage[y*imageWidth+x];
01487
01488 int2 neighbor;
01489 neighbor.y = y + offset.y;
01490 neighbor.x = x + offset.x;
01491
01492 int2 clampedNeighbor;
01493 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
01494 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
01495
01496 CLPixelType r = (clampedNeighbor.x == neighbor.x
01497 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
01498 :(CLPixelType)0;
01499
01500 int sv[4];
01501 sv[0] = (int)v.x;
01502 sv[1] = (int)v.y;
01503 sv[2] = (int)v.z;
01504 sv[3] = (int)v.w;
01505
01506 int sr[4];
01507 sr[0] = (int)r.x;
01508 sr[1] = (int)r.y;
01509 sr[2] = (int)r.z;
01510 sr[3] = (int)r.w;
01511
01512 if (polarity > 0) {
01513 \n #pragma unroll 4\n
01514 for (unsigned int i = 0; i < 4; i++) {
01515 sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
01516 }
01517 }
01518 else {
01519 \n #pragma unroll 4\n
01520 for (unsigned int i = 0; i < 4; i++) {
01521 sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
01522 }
01523
01524 }
01525
01526 v.x = (CLQuantum)sv[0];
01527 v.y = (CLQuantum)sv[1];
01528 v.z = (CLQuantum)sv[2];
01529
01530 if (matte!=0)
01531 v.w = (CLQuantum)sv[3];
01532
01533 outputImage[y*imageWidth+x] = v;
01534
01535 }
01536
01537
01538 )
01539
01540 STRINGIFY(
01541
01542 __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
01543 , const unsigned int imageWidth, const unsigned int imageHeight
01544 , const int2 offset, const int polarity, const int matte) {
01545
01546 int x = get_global_id(0);
01547 int y = get_global_id(1);
01548
01549 CLPixelType v = inputImage[y*imageWidth+x];
01550
01551 int2 neighbor, clampedNeighbor;
01552
01553 neighbor.y = y + offset.y;
01554 neighbor.x = x + offset.x;
01555 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
01556 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
01557
01558 CLPixelType r = (clampedNeighbor.x == neighbor.x
01559 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
01560 :(CLPixelType)0;
01561
01562
01563 neighbor.y = y - offset.y;
01564 neighbor.x = x - offset.x;
01565 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
01566 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
01567
01568 CLPixelType s = (clampedNeighbor.x == neighbor.x
01569 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
01570 :(CLPixelType)0;
01571
01572
01573 int sv[4];
01574 sv[0] = (int)v.x;
01575 sv[1] = (int)v.y;
01576 sv[2] = (int)v.z;
01577 sv[3] = (int)v.w;
01578
01579 int sr[4];
01580 sr[0] = (int)r.x;
01581 sr[1] = (int)r.y;
01582 sr[2] = (int)r.z;
01583 sr[3] = (int)r.w;
01584
01585 int ss[4];
01586 ss[0] = (int)s.x;
01587 ss[1] = (int)s.y;
01588 ss[2] = (int)s.z;
01589 ss[3] = (int)s.w;
01590
01591 if (polarity > 0) {
01592 \n #pragma unroll 4\n
01593 for (unsigned int i = 0; i < 4; i++) {
01594
01595
01596
01597
01598 sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
01599 }
01600 }
01601 else {
01602 \n #pragma unroll 4\n
01603 for (unsigned int i = 0; i < 4; i++) {
01604
01605
01606
01607 sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
01608 }
01609 }
01610
01611 v.x = (CLQuantum)sv[0];
01612 v.y = (CLQuantum)sv[1];
01613 v.z = (CLQuantum)sv[2];
01614
01615 if (matte!=0)
01616 v.w = (CLQuantum)sv[3];
01617
01618 outputImage[y*imageWidth+x] = v;
01619
01620 }
01621 )
01622
01623
01624
01625
01626
01627
01628
01629
01630
01631
01632
01633
01634
01635 STRINGIFY(
01636
01637
01638 __kernel void Equalize(__global CLPixelType * restrict im,
01639 const ChannelType channel,
01640 __global CLPixelType * restrict equalize_map,
01641 const float4 white, const float4 black)
01642 {
01643 const int x = get_global_id(0);
01644 const int y = get_global_id(1);
01645 const int columns = get_global_size(0);
01646 const int c = x + y * columns;
01647
01648 uint ePos;
01649 CLPixelType oValue, eValue;
01650 CLQuantum red, green, blue, alpha;
01651
01652
01653 oValue=im[c];
01654
01655 if ((channel & SyncChannels) != 0)
01656 {
01657 if (getRedF4(white) != getRedF4(black))
01658 {
01659 ePos = ScaleQuantumToMap(getRed(oValue));
01660 eValue = equalize_map[ePos];
01661 red = getRed(eValue);
01662 ePos = ScaleQuantumToMap(getGreen(oValue));
01663 eValue = equalize_map[ePos];
01664 green = getRed(eValue);
01665 ePos = ScaleQuantumToMap(getBlue(oValue));
01666 eValue = equalize_map[ePos];
01667 blue = getRed(eValue);
01668 ePos = ScaleQuantumToMap(getAlpha(oValue));
01669 eValue = equalize_map[ePos];
01670 alpha = getRed(eValue);
01671
01672
01673 im[c]=(CLPixelType)(blue, green, red, alpha);
01674 }
01675
01676 }
01677
01678
01679
01680
01681 }
01682 )
01683
01684
01685
01686
01687
01688
01689
01690
01691
01692
01693
01694
01695
01696 STRINGIFY(
01697
01698
01699
01700 CLQuantum ApplyFunction(float pixel,const MagickFunction function,
01701 const unsigned int number_parameters,__constant float *parameters)
01702 {
01703 float result = 0.0f;
01704
01705 switch (function)
01706 {
01707 case PolynomialFunction:
01708 {
01709 for (unsigned int i=0; i < number_parameters; i++)
01710 result = result*QuantumScale*pixel + parameters[i];
01711 result *= QuantumRange;
01712 break;
01713 }
01714 case SinusoidFunction:
01715 {
01716 float freq,phase,ampl,bias;
01717 freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
01718 phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
01719 ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
01720 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
01721 result = QuantumRange*(ampl*sin(2.0f*MagickPI*
01722 (freq*QuantumScale*pixel + phase/360.0f)) + bias);
01723 break;
01724 }
01725 case ArcsinFunction:
01726 {
01727 float width,range,center,bias;
01728 width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
01729 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
01730 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
01731 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
01732
01733 result = 2.0f/width*(QuantumScale*pixel - center);
01734 result = range/MagickPI*asin(result)+bias;
01735 result = ( result <= -1.0f ) ? bias - range/2.0f : result;
01736 result = ( result >= 1.0f ) ? bias + range/2.0f : result;
01737 result *= QuantumRange;
01738 break;
01739 }
01740 case ArctanFunction:
01741 {
01742 float slope,range,center,bias;
01743 slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
01744 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
01745 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
01746 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
01747 result = MagickPI*slope*(QuantumScale*pixel-center);
01748 result = QuantumRange*(range/MagickPI*atan(result) + bias);
01749 break;
01750 }
01751 case UndefinedFunction:
01752 break;
01753 }
01754 return(ClampToQuantum(result));
01755 }
01756 )
01757
01758 STRINGIFY(
01759
01760
01761
01762
01763
01764
01765
01766 __kernel void ComputeFunction(__global CLQuantum *image,const unsigned int number_channels,
01767 const ChannelType channel,const MagickFunction function,const unsigned int number_parameters,
01768 __constant float *parameters)
01769 {
01770 const unsigned int x = get_global_id(0);
01771 const unsigned int y = get_global_id(1);
01772 const unsigned int columns = get_global_size(0);
01773 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
01774
01775 float red;
01776 float green;
01777 float blue;
01778 float alpha;
01779
01780 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
01781
01782 if ((channel & RedChannel) != 0)
01783 red=ApplyFunction(red, function, number_parameters, parameters);
01784
01785 if (number_channels > 2)
01786 {
01787 if ((channel & GreenChannel) != 0)
01788 green=ApplyFunction(green, function, number_parameters, parameters);
01789
01790 if ((channel & BlueChannel) != 0)
01791 blue=ApplyFunction(blue, function, number_parameters, parameters);
01792 }
01793
01794 if (((number_channels == 4) || (number_channels == 2)) &&
01795 ((channel & AlphaChannel) != 0))
01796 alpha=ApplyFunction(alpha, function, number_parameters, parameters);
01797
01798 WriteChannels(p, number_channels, channel, red, green, blue, alpha);
01799 }
01800 )
01801
01802
01803
01804
01805
01806
01807
01808
01809
01810
01811
01812
01813
01814 STRINGIFY(
01815 __kernel void Grayscale(__global CLQuantum *image,const int number_channels,
01816 const unsigned int colorspace,const unsigned int method)
01817 {
01818 const unsigned int x = get_global_id(0);
01819 const unsigned int y = get_global_id(1);
01820 const unsigned int columns = get_global_size(0);
01821 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
01822
01823 float
01824 blue,
01825 green,
01826 red;
01827
01828 red=getPixelRed(p);
01829 green=getPixelGreen(p);
01830 blue=getPixelBlue(p);
01831
01832 CLQuantum intensity=ClampToQuantum(GetPixelIntensity(colorspace, method, red, green, blue));
01833
01834 setPixelRed(p,intensity);
01835 setPixelGreen(p,intensity);
01836 setPixelBlue(p,intensity);
01837 }
01838 )
01839
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851
01852 STRINGIFY(
01853
01854 __kernel void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *tmpImage,
01855 const int radius,
01856 const int imageWidth,
01857 const int imageHeight)
01858 {
01859 const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
01860
01861 int x = get_local_id(0);
01862 int y = get_global_id(1);
01863
01864 if ((x >= imageWidth) || (y >= imageHeight))
01865 return;
01866
01867 global CLPixelType *src = srcImage + y * imageWidth;
01868
01869 for (int i = x; i < imageWidth; i += get_local_size(0)) {
01870 float sum = 0.0f;
01871 float weight = 1.0f;
01872
01873 int j = i - radius;
01874 while ((j + 7) < i) {
01875 for (int k = 0; k < 8; ++k)
01876 sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
01877 weight += 8.0f;
01878 j+=8;
01879 }
01880 while (j < i) {
01881 sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
01882 weight += 1.0f;
01883 ++j;
01884 }
01885
01886 while ((j + 7) < radius + i) {
01887 for (int k = 0; k < 8; ++k)
01888 sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
01889 weight -= 8.0f;
01890 j+=8;
01891 }
01892 while (j < radius + i) {
01893 sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
01894 weight -= 1.0f;
01895 ++j;
01896 }
01897
01898 tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
01899 }
01900 }
01901 )
01902
01903 STRINGIFY(
01904 __kernel void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *blurImage,
01905 const int radius,
01906 const float strength,
01907 const int imageWidth,
01908 const int imageHeight)
01909 {
01910 const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
01911
01912 int x = get_global_id(0);
01913 int y = get_global_id(1);
01914
01915 if ((x >= imageWidth) || (y >= imageHeight))
01916 return;
01917
01918 global float *src = blurImage + x;
01919
01920 float sum = 0.0f;
01921 float weight = 1.0f;
01922
01923 int j = y - radius;
01924 while ((j + 7) < y) {
01925 for (int k = 0; k < 8; ++k)
01926 sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
01927 weight += 8.0f;
01928 j+=8;
01929 }
01930 while (j < y) {
01931 sum += weight * src[mirrorBottom(j) * imageWidth];
01932 weight += 1.0f;
01933 ++j;
01934 }
01935
01936 while ((j + 7) < radius + y) {
01937 for (int k = 0; k < 8; ++k)
01938 sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
01939 weight -= 8.0f;
01940 j+=8;
01941 }
01942 while (j < radius + y) {
01943 sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
01944 weight -= 1.0f;
01945 ++j;
01946 }
01947
01948 CLPixelType pixel = srcImage[x + y * imageWidth];
01949 float srcVal = dot(RGB, convert_float4(pixel));
01950 float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
01951 mult = (srcVal + mult) / srcVal;
01952
01953 pixel.x = ClampToQuantum(pixel.x * mult);
01954 pixel.y = ClampToQuantum(pixel.y * mult);
01955 pixel.z = ClampToQuantum(pixel.z * mult);
01956
01957 dstImage[x + y * imageWidth] = pixel;
01958 }
01959 )
01960
01961
01962
01963
01964
01965
01966
01967
01968
01969
01970
01971
01972
01973 STRINGIFY(
01974
01975 static inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
01976 float *hue, float *saturation, float *lightness)
01977 {
01978 float
01979 c,
01980 tmax,
01981 tmin;
01982
01983
01984
01985
01986 tmax=MagickMax(QuantumScale*red,MagickMax(QuantumScale*green, QuantumScale*blue));
01987 tmin=MagickMin(QuantumScale*red,MagickMin(QuantumScale*green, QuantumScale*blue));
01988
01989 c=tmax-tmin;
01990
01991 *lightness=(tmax+tmin)/2.0;
01992 if (c <= 0.0)
01993 {
01994 *hue=0.0;
01995 *saturation=0.0;
01996 return;
01997 }
01998
01999 if (tmax == (QuantumScale*red))
02000 {
02001 *hue=(QuantumScale*green-QuantumScale*blue)/c;
02002 if ((QuantumScale*green) < (QuantumScale*blue))
02003 *hue+=6.0;
02004 }
02005 else
02006 if (tmax == (QuantumScale*green))
02007 *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
02008 else
02009 *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
02010
02011 *hue*=60.0/360.0;
02012 if (*lightness <= 0.5)
02013 *saturation=c/(2.0*(*lightness));
02014 else
02015 *saturation=c/(2.0-2.0*(*lightness));
02016 }
02017
02018 static inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
02019 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
02020 {
02021 float
02022 b,
02023 c,
02024 g,
02025 h,
02026 tmin,
02027 r,
02028 x;
02029
02030
02031
02032
02033 h=hue*360.0;
02034 if (lightness <= 0.5)
02035 c=2.0*lightness*saturation;
02036 else
02037 c=(2.0-2.0*lightness)*saturation;
02038 tmin=lightness-0.5*c;
02039 h-=360.0*floor(h/360.0);
02040 h/=60.0;
02041 x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
02042 switch ((int) floor(h) % 6)
02043 {
02044 case 0:
02045 {
02046 r=tmin+c;
02047 g=tmin+x;
02048 b=tmin;
02049 break;
02050 }
02051 case 1:
02052 {
02053 r=tmin+x;
02054 g=tmin+c;
02055 b=tmin;
02056 break;
02057 }
02058 case 2:
02059 {
02060 r=tmin;
02061 g=tmin+c;
02062 b=tmin+x;
02063 break;
02064 }
02065 case 3:
02066 {
02067 r=tmin;
02068 g=tmin+x;
02069 b=tmin+c;
02070 break;
02071 }
02072 case 4:
02073 {
02074 r=tmin+x;
02075 g=tmin;
02076 b=tmin+c;
02077 break;
02078 }
02079 case 5:
02080 {
02081 r=tmin+c;
02082 g=tmin;
02083 b=tmin+x;
02084 break;
02085 }
02086 default:
02087 {
02088 r=0.0;
02089 g=0.0;
02090 b=0.0;
02091 }
02092 }
02093 *red=ClampToQuantum(QuantumRange*r);
02094 *green=ClampToQuantum(QuantumRange*g);
02095 *blue=ClampToQuantum(QuantumRange*b);
02096 }
02097
02098 static inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
02099 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
02100 {
02101 float
02102 hue,
02103 lightness,
02104 saturation;
02105
02106
02107
02108
02109 ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
02110 hue+=0.5*(0.01*percent_hue-1.0);
02111 while (hue < 0.0)
02112 hue+=1.0;
02113 while (hue >= 1.0)
02114 hue-=1.0;
02115 saturation*=0.01*percent_saturation;
02116 lightness*=0.01*percent_lightness;
02117 ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
02118 }
02119
02120 __kernel void Modulate(__global CLPixelType *im,
02121 const float percent_brightness,
02122 const float percent_hue,
02123 const float percent_saturation,
02124 const int colorspace)
02125 {
02126
02127 const int x = get_global_id(0);
02128 const int y = get_global_id(1);
02129 const int columns = get_global_size(0);
02130 const int c = x + y * columns;
02131
02132 CLPixelType pixel = im[c];
02133
02134 CLQuantum
02135 blue,
02136 green,
02137 red;
02138
02139 red=getRed(pixel);
02140 green=getGreen(pixel);
02141 blue=getBlue(pixel);
02142
02143 switch (colorspace)
02144 {
02145 case HSLColorspace:
02146 default:
02147 {
02148 ModulateHSL(percent_hue, percent_saturation, percent_brightness,
02149 &red, &green, &blue);
02150 }
02151
02152 }
02153
02154 CLPixelType filteredPixel;
02155
02156 setRed(&filteredPixel, red);
02157 setGreen(&filteredPixel, green);
02158 setBlue(&filteredPixel, blue);
02159 filteredPixel.w = pixel.w;
02160
02161 im[c] = filteredPixel;
02162 }
02163 )
02164
02165
02166
02167
02168
02169
02170
02171
02172
02173
02174
02175
02176
02177 STRINGIFY(
02178 __kernel
02179 void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
02180 const unsigned int imageWidth, const unsigned int imageHeight,
02181 const __global float *filter, const unsigned int width, const __global int2* offset,
02182 const float4 bias,
02183 const ChannelType channel, const unsigned int matte) {
02184
02185 int2 currentPixel;
02186 currentPixel.x = get_global_id(0);
02187 currentPixel.y = get_global_id(1);
02188
02189 if (currentPixel.x >= imageWidth
02190 || currentPixel.y >= imageHeight)
02191 return;
02192
02193 float4 pixel;
02194 pixel.x = (float)bias.x;
02195 pixel.y = (float)bias.y;
02196 pixel.z = (float)bias.z;
02197 pixel.w = (float)bias.w;
02198
02199 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
02200
02201 for (int i = 0; i < width; i++) {
02202
02203
02204 int2 samplePixel = currentPixel + offset[i];
02205 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
02206 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
02207 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
02208
02209 pixel.x += (filter[i] * (float)samplePixelValue.x);
02210 pixel.y += (filter[i] * (float)samplePixelValue.y);
02211 pixel.z += (filter[i] * (float)samplePixelValue.z);
02212 pixel.w += (filter[i] * (float)samplePixelValue.w);
02213 }
02214
02215 CLPixelType outputPixel;
02216 outputPixel.x = ClampToQuantum(pixel.x);
02217 outputPixel.y = ClampToQuantum(pixel.y);
02218 outputPixel.z = ClampToQuantum(pixel.z);
02219 outputPixel.w = ClampToQuantum(pixel.w);
02220 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
02221 }
02222 else {
02223
02224 float gamma = 0.0f;
02225 for (int i = 0; i < width; i++) {
02226
02227
02228 int2 samplePixel = currentPixel + offset[i];
02229 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
02230 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
02231
02232 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
02233
02234 float alpha = QuantumScale*samplePixelValue.w;
02235 float k = filter[i];
02236 pixel.x = pixel.x + k * alpha * samplePixelValue.x;
02237 pixel.y = pixel.y + k * alpha * samplePixelValue.y;
02238 pixel.z = pixel.z + k * alpha * samplePixelValue.z;
02239
02240 pixel.w += k * alpha * samplePixelValue.w;
02241
02242 gamma+=k*alpha;
02243 }
02244 gamma = PerceptibleReciprocal(gamma);
02245 pixel.xyz = gamma*pixel.xyz;
02246
02247 CLPixelType outputPixel;
02248 outputPixel.x = ClampToQuantum(pixel.x);
02249 outputPixel.y = ClampToQuantum(pixel.y);
02250 outputPixel.z = ClampToQuantum(pixel.z);
02251 outputPixel.w = ClampToQuantum(pixel.w);
02252 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
02253 }
02254 }
02255 )
02256
02257
02258
02259
02260
02261
02262
02263
02264
02265
02266
02267
02268
02269 STRINGIFY(
02270
02271 float BoxResizeFilter(const float x)
02272 {
02273 return 1.0f;
02274 }
02275 )
02276
02277 STRINGIFY(
02278
02279 float CubicBC(const float x,const __global float* resizeFilterCoefficients)
02280 {
02281
02282
02283
02284
02285
02286
02287
02288
02289
02290
02291
02292
02293
02294
02295
02296
02297
02298
02299
02300
02301
02302
02303
02304
02305
02306
02307
02308
02309
02310 if (x < 1.0)
02311 return(resizeFilterCoefficients[0]+x*(x*
02312 (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
02313 if (x < 2.0)
02314 return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
02315 (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
02316 return(0.0);
02317 }
02318 )
02319
02320 STRINGIFY(
02321 float Sinc(const float x)
02322 {
02323 if (x != 0.0f)
02324 {
02325 const float alpha=(float) (MagickPI*x);
02326 return sinpi(x)/alpha;
02327 }
02328 return(1.0f);
02329 }
02330 )
02331
02332 STRINGIFY(
02333 float Triangle(const float x)
02334 {
02335
02336
02337
02338
02339
02340 return ((x<1.0f)?(1.0f-x):0.0f);
02341 }
02342 )
02343
02344
02345 STRINGIFY(
02346 float Hann(const float x)
02347 {
02348
02349
02350
02351
02352 const float cosine=cos((MagickPI*x));
02353 return(0.5f+0.5f*cosine);
02354 }
02355 )
02356
02357 STRINGIFY(
02358 float Hamming(const float x)
02359 {
02360
02361
02362
02363
02364 const float cosine=cos((MagickPI*x));
02365 return(0.54f+0.46f*cosine);
02366 }
02367 )
02368
02369 STRINGIFY(
02370 float Blackman(const float x)
02371 {
02372
02373
02374
02375
02376
02377
02378
02379 const float cosine=cos((MagickPI*x));
02380 return(0.34f+cosine*(0.5f+cosine*0.16f));
02381 }
02382 )
02383
02384 STRINGIFY(
02385 static inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
02386 {
02387 switch (filterType)
02388 {
02389
02390
02391 case SincWeightingFunction:
02392 case SincFastWeightingFunction:
02393 return Sinc(x);
02394 case CubicBCWeightingFunction:
02395 return CubicBC(x,filterCoefficients);
02396 case BoxWeightingFunction:
02397 return BoxResizeFilter(x);
02398 case TriangleWeightingFunction:
02399 return Triangle(x);
02400 case HannWeightingFunction:
02401 return Hann(x);
02402 case HammingWeightingFunction:
02403 return Hamming(x);
02404 case BlackmanWeightingFunction:
02405 return Blackman(x);
02406
02407 default:
02408 return 0.0f;
02409 }
02410 }
02411 )
02412
02413
02414 STRINGIFY(
02415 static inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
02416 , const ResizeWeightingFunctionType resizeWindowType
02417 , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
02418 {
02419 float scale;
02420 float xBlur = fabs(x/resizeFilterBlur);
02421 if (resizeWindowSupport < MagickEpsilon
02422 || resizeWindowType == BoxWeightingFunction)
02423 {
02424 scale = 1.0f;
02425 }
02426 else
02427 {
02428 scale = resizeFilterScale;
02429 scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
02430 }
02431 float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
02432 return weight;
02433 }
02434
02435 )
02436
02437 ;
02438 const char *accelerateKernels2 =
02439
02440 STRINGIFY(
02441
02442 static inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
02443 return (numWorkItems/pixelPerWorkgroup);
02444 }
02445
02446
02447
02448 static inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
02449 const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
02450 int pixelIndex = itemID/numWorkItemsPerPixel;
02451 pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
02452 return pixelIndex;
02453 }
02454
02455 )
02456
02457 STRINGIFY(
02458 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
02459 void ResizeHorizontalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
02460 const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
02461 const unsigned int filteredColumns, const unsigned int filteredRows, const float xFactor,
02462 const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
02463 const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
02464 const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
02465 const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
02466 __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
02467 {
02468
02469 const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
02470 const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns);
02471 const unsigned int actualNumPixelToCompute = stopX - startX;
02472
02473
02474 float scale = MagickMax(1.0f/xFactor+MagickEpsilon ,1.0f);
02475 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
02476 scale = PerceptibleReciprocal(scale);
02477
02478 const int cacheRangeStartX = MagickMax((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
02479 const int cacheRangeEndX = MagickMin((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
02480
02481
02482 const unsigned int y = get_global_id(1);
02483 const unsigned int pos = getPixelIndex(number_channels, inputColumns, cacheRangeStartX, y);
02484 const unsigned int num_elements = (cacheRangeEndX - cacheRangeStartX) * number_channels;
02485 event_t e = async_work_group_copy(inputImageCache, inputImage + pos, num_elements, 0);
02486 wait_group_events(1, &e);
02487
02488 unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
02489 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
02490 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
02491 {
02492 const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
02493 const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX);
02494 const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
02495
02496
02497 const unsigned int itemID = get_local_id(0);
02498 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
02499
02500 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
02501
02502 float4 filteredPixel = (float4)0.0f;
02503 float density = 0.0f;
02504 float gamma = 0.0f;
02505
02506 if (pixelIndex != -1)
02507 {
02508
02509 const int x = chunkStartX + pixelIndex;
02510
02511
02512 const float bisect = (x+0.5)/xFactor+MagickEpsilon;
02513 const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
02514 const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputColumns);
02515 const unsigned int n = stop - start;
02516
02517
02518 unsigned int numStepsPerWorkItem = n / numItems;
02519 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
02520
02521 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
02522 if (startStep < n)
02523 {
02524 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
02525
02526 unsigned int cacheIndex = start+startStep-cacheRangeStartX;
02527 for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
02528 {
02529 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
02530 (ResizeWeightingFunctionType) resizeFilterType,
02531 (ResizeWeightingFunctionType) resizeWindowType,
02532 resizeFilterScale, resizeFilterWindowSupport,
02533 resizeFilterBlur, scale*(start + i - bisect + 0.5));
02534
02535 float4 cp = (float4)0.0f;
02536
02537 __local CLQuantum *p = inputImageCache + (cacheIndex*number_channels);
02538 cp.x = (float) *(p);
02539 if (number_channels > 2)
02540 {
02541 cp.y = (float) *(p + 1);
02542 cp.z = (float) *(p + 2);
02543 }
02544
02545 if (alpha_index != 0)
02546 {
02547 cp.w = (float) *(p + alpha_index);
02548
02549 float alpha = weight * QuantumScale * cp.w;
02550
02551 filteredPixel.x += alpha * cp.x;
02552 filteredPixel.y += alpha * cp.y;
02553 filteredPixel.z += alpha * cp.z;
02554 filteredPixel.w += weight * cp.w;
02555 gamma += alpha;
02556 }
02557 else
02558 filteredPixel += ((float4) weight)*cp;
02559
02560 density += weight;
02561 }
02562 }
02563 }
02564
02565
02566 if (itemID < actualNumPixelInThisChunk) {
02567 outputPixelCache[itemID] = (float4)0.0f;
02568 densityCache[itemID] = 0.0f;
02569 if (alpha_index != 0)
02570 gammaCache[itemID] = 0.0f;
02571 }
02572 barrier(CLK_LOCAL_MEM_FENCE);
02573
02574
02575 for (unsigned int i = 0; i < numItems; i++) {
02576 if (pixelIndex != -1) {
02577 if (itemID%numItems == i) {
02578 outputPixelCache[pixelIndex]+=filteredPixel;
02579 densityCache[pixelIndex]+=density;
02580 if (alpha_index != 0)
02581 gammaCache[pixelIndex]+=gamma;
02582 }
02583 }
02584 barrier(CLK_LOCAL_MEM_FENCE);
02585 }
02586
02587 if (itemID < actualNumPixelInThisChunk)
02588 {
02589 float4 filteredPixel = outputPixelCache[itemID];
02590
02591 float gamma = 0.0f;
02592 if (alpha_index != 0)
02593 gamma = gammaCache[itemID];
02594
02595 float density = densityCache[itemID];
02596 if ((density != 0.0f) && (density != 1.0f))
02597 {
02598 density = PerceptibleReciprocal(density);
02599 filteredPixel *= (float4) density;
02600 if (alpha_index != 0)
02601 gamma *= density;
02602 }
02603
02604 if (alpha_index != 0)
02605 {
02606 gamma = PerceptibleReciprocal(gamma);
02607 filteredPixel.x *= gamma;
02608 filteredPixel.y *= gamma;
02609 filteredPixel.z *= gamma;
02610 }
02611
02612 WriteAllChannels(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, filteredPixel);
02613 }
02614 }
02615 }
02616 )
02617
02618
02619 STRINGIFY(
02620 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
02621 void ResizeVerticalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
02622 const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
02623 const unsigned int filteredColumns, const unsigned int filteredRows, const float yFactor,
02624 const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
02625 const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
02626 const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
02627 const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
02628 __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
02629 {
02630
02631 const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
02632 const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows);
02633 const unsigned int actualNumPixelToCompute = stopY - startY;
02634
02635
02636 float scale = MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f);
02637 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
02638 scale = PerceptibleReciprocal(scale);
02639
02640 const int cacheRangeStartY = MagickMax((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
02641 const int cacheRangeEndY = MagickMin((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
02642
02643
02644 const unsigned int x = get_global_id(0);
02645 unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY);
02646 unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY;
02647 unsigned int stride = inputColumns * number_channels;
02648 for (unsigned int i = 0; i < number_channels; i++)
02649 {
02650 event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0);
02651 wait_group_events(1,&e);
02652 }
02653
02654 unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
02655 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
02656 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
02657 {
02658 const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
02659 const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY);
02660 const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
02661
02662
02663 const unsigned int itemID = get_local_id(1);
02664 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
02665
02666 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
02667
02668 float4 filteredPixel = (float4)0.0f;
02669 float density = 0.0f;
02670 float gamma = 0.0f;
02671
02672 if (pixelIndex != -1)
02673 {
02674
02675 const int y = chunkStartY + pixelIndex;
02676
02677
02678 const float bisect = (y+0.5)/yFactor+MagickEpsilon;
02679 const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
02680 const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputRows);
02681 const unsigned int n = stop - start;
02682
02683
02684 unsigned int numStepsPerWorkItem = n / numItems;
02685 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
02686
02687 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
02688 if (startStep < n)
02689 {
02690 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
02691
02692 unsigned int cacheIndex = start+startStep-cacheRangeStartY;
02693 for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
02694 {
02695 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
02696 (ResizeWeightingFunctionType) resizeFilterType,
02697 (ResizeWeightingFunctionType) resizeWindowType,
02698 resizeFilterScale, resizeFilterWindowSupport,
02699 resizeFilterBlur, scale*(start + i - bisect + 0.5));
02700
02701 float4 cp = (float4)0.0f;
02702
02703 __local CLQuantum *p = inputImageCache + cacheIndex;
02704 cp.x = (float) *(p);
02705 if (number_channels > 2)
02706 {
02707 cp.y = (float) *(p + rangeLength);
02708 cp.z = (float) *(p + (rangeLength * 2));
02709 }
02710
02711 if (alpha_index != 0)
02712 {
02713 cp.w = (float) *(p + (rangeLength * alpha_index));
02714
02715 float alpha = weight * QuantumScale * cp.w;
02716
02717 filteredPixel.x += alpha * cp.x;
02718 filteredPixel.y += alpha * cp.y;
02719 filteredPixel.z += alpha * cp.z;
02720 filteredPixel.w += weight * cp.w;
02721 gamma += alpha;
02722 }
02723 else
02724 filteredPixel += ((float4) weight)*cp;
02725
02726 density += weight;
02727 }
02728 }
02729 }
02730
02731
02732 if (itemID < actualNumPixelInThisChunk) {
02733 outputPixelCache[itemID] = (float4)0.0f;
02734 densityCache[itemID] = 0.0f;
02735 if (alpha_index != 0)
02736 gammaCache[itemID] = 0.0f;
02737 }
02738 barrier(CLK_LOCAL_MEM_FENCE);
02739
02740
02741 for (unsigned int i = 0; i < numItems; i++) {
02742 if (pixelIndex != -1) {
02743 if (itemID%numItems == i) {
02744 outputPixelCache[pixelIndex]+=filteredPixel;
02745 densityCache[pixelIndex]+=density;
02746 if (alpha_index != 0)
02747 gammaCache[pixelIndex]+=gamma;
02748 }
02749 }
02750 barrier(CLK_LOCAL_MEM_FENCE);
02751 }
02752
02753 if (itemID < actualNumPixelInThisChunk)
02754 {
02755 float4 filteredPixel = outputPixelCache[itemID];
02756
02757 float gamma = 0.0f;
02758 if (alpha_index != 0)
02759 gamma = gammaCache[itemID];
02760
02761 float density = densityCache[itemID];
02762 if ((density != 0.0f) && (density != 1.0f))
02763 {
02764 density = PerceptibleReciprocal(density);
02765 filteredPixel *= (float4) density;
02766 if (alpha_index != 0)
02767 gamma *= density;
02768 }
02769
02770 if (alpha_index != 0)
02771 {
02772 gamma = PerceptibleReciprocal(gamma);
02773 filteredPixel.x *= gamma;
02774 filteredPixel.y *= gamma;
02775 filteredPixel.z *= gamma;
02776 }
02777
02778 WriteAllChannels(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, filteredPixel);
02779 }
02780 }
02781 }
02782 )
02783
02784
02785
02786
02787
02788
02789
02790
02791
02792
02793
02794
02795
02796 STRINGIFY(
02797 __kernel void RotationalBlur(const __global CLQuantum *image,
02798 const unsigned int number_channels,const unsigned int channel,
02799 const float2 blurCenter,__constant float *cos_theta,
02800 __constant float *sin_theta,const unsigned int cossin_theta_size,
02801 __global CLQuantum *filteredImage)
02802 {
02803 const int x = get_global_id(0);
02804 const int y = get_global_id(1);
02805 const int columns = get_global_size(0);
02806 const int rows = get_global_size(1);
02807 unsigned int step = 1;
02808 float center_x = (float) x - blurCenter.x;
02809 float center_y = (float) y - blurCenter.y;
02810 float radius = hypot(center_x, center_y);
02811
02812 float blur_radius = hypot(blurCenter.x, blurCenter.y);
02813
02814 if (radius > MagickEpsilon)
02815 {
02816 step = (unsigned int) (blur_radius / radius);
02817 if (step == 0)
02818 step = 1;
02819 if (step >= cossin_theta_size)
02820 step = cossin_theta_size-1;
02821 }
02822
02823 float4 result = 0.0f;
02824 float normalize = 0.0f;
02825 float gamma = 0.0f;
02826
02827 for (unsigned int i=0; i<cossin_theta_size; i+=step)
02828 {
02829 int cx = ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns);
02830 int cy = ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f,rows);
02831
02832 float4 pixel = ReadAllChannels(image, number_channels, columns, cx, cy);
02833
02834 if ((number_channels == 4) || (number_channels == 2))
02835 {
02836 float alpha = (float)(QuantumScale*pixel.w);
02837
02838 gamma += alpha;
02839
02840 result.x += alpha * pixel.x;
02841 result.y += alpha * pixel.y;
02842 result.z += alpha * pixel.z;
02843 result.w += pixel.w;
02844 }
02845 else
02846 result += pixel;
02847
02848 normalize += 1.0f;
02849 }
02850
02851 normalize = PerceptibleReciprocal(normalize);
02852
02853 if ((number_channels == 4) || (number_channels == 2))
02854 {
02855 gamma = PerceptibleReciprocal(gamma);
02856 result.x *= gamma;
02857 result.y *= gamma;
02858 result.z *= gamma;
02859 result.w *= normalize;
02860 }
02861 else
02862 result *= normalize;
02863
02864 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
02865 }
02866 )
02867
02868
02869
02870
02871
02872
02873
02874
02875
02876
02877
02878
02879
02880 STRINGIFY(
02881 __kernel void UnsharpMaskBlurColumn(const __global CLQuantum* image,
02882 const __global float4 *blurRowData,const unsigned int number_channels,
02883 const ChannelType channel,const unsigned int columns,
02884 const unsigned int rows,__local float4* cachedData,
02885 __local float* cachedFilter,const __global float *filter,
02886 const unsigned int width,const float gain, const float threshold,
02887 __global CLQuantum *filteredImage)
02888 {
02889 const unsigned int radius = (width-1)/2;
02890
02891
02892 const int groupX = get_group_id(0);
02893 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
02894 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
02895
02896 if ((groupStartY >= 0) && (groupStopY < rows))
02897 {
02898 event_t e = async_work_group_strided_copy(cachedData,
02899 blurRowData+groupStartY*columns+groupX,groupStopY-groupStartY,columns,0);
02900 wait_group_events(1,&e);
02901 }
02902 else
02903 {
02904 for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1))
02905 cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,rows)*columns + groupX];
02906
02907 barrier(CLK_LOCAL_MEM_FENCE);
02908 }
02909
02910 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
02911 wait_group_events(1,&e);
02912
02913
02914 const int cy = get_global_id(1);
02915
02916 if (cy < rows)
02917 {
02918 float4 blurredPixel = (float4) 0.0f;
02919
02920 int i = 0;
02921
02922 for ( ; i+7 < width; )
02923 {
02924 for (int j=0; j < 8; j++, i++)
02925 blurredPixel+=cachedFilter[i+j]*cachedData[i+j+get_local_id(1)];
02926 }
02927
02928 for ( ; i < width; i++)
02929 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
02930
02931 float4 inputImagePixel = ReadFloat4(image,number_channels,columns,groupX,cy,channel);
02932 float4 outputPixel = inputImagePixel - blurredPixel;
02933
02934 float quantumThreshold = QuantumRange*threshold;
02935
02936 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
02937 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
02938
02939
02940 WriteFloat4(filteredImage,number_channels,columns,groupX,cy,channel,outputPixel);
02941 }
02942 }
02943 )
02944
02945 STRINGIFY(
02946 __kernel void UnsharpMask(const __global CLQuantum *image,const unsigned int number_channels,
02947 const ChannelType channel,__constant float *filter,const unsigned int width,
02948 const unsigned int columns,const unsigned int rows,__local float4 *pixels,
02949 const float gain,const float threshold,__global CLQuantum *filteredImage)
02950 {
02951 const unsigned int x = get_global_id(0);
02952 const unsigned int y = get_global_id(1);
02953
02954 const unsigned int radius = (width - 1) / 2;
02955
02956 int row = y - radius;
02957 int baseRow = get_group_id(1) * get_local_size(1) - radius;
02958 int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
02959
02960 while (row < endRow) {
02961 int srcy = (row < 0) ? -row : row;
02962 srcy = (srcy >= rows) ? (2 * rows - srcy - 1) : srcy;
02963
02964 float4 value = 0.0f;
02965
02966 int ix = x - radius;
02967 int i = 0;
02968
02969 while (i + 7 < width) {
02970 for (int j = 0; j < 8; ++j) {
02971 int srcx = ix + j;
02972 srcx = (srcx < 0) ? -srcx : srcx;
02973 srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
02974 value += filter[i + j] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
02975 }
02976 ix += 8;
02977 i += 8;
02978 }
02979
02980 while (i < width) {
02981 int srcx = (ix < 0) ? -ix : ix;
02982 srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
02983 value += filter[i] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
02984 ++i;
02985 ++ix;
02986 }
02987 pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
02988 row += get_local_size(1);
02989 }
02990
02991 barrier(CLK_LOCAL_MEM_FENCE);
02992
02993 const int px = get_local_id(0);
02994 const int py = get_local_id(1);
02995 const int prp = get_local_size(0);
02996 float4 value = (float4)(0.0f);
02997
02998 int i = 0;
02999 while (i + 7 < width) {
03000 for (int j = 0; j < 8; ++j)
03001 value += (float4)(filter[i]) * pixels[px + (py + i + j) * prp];
03002 i += 8;
03003 }
03004 while (i < width) {
03005 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
03006 ++i;
03007 }
03008
03009 if ((x < columns) && (y < rows)) {
03010 float4 srcPixel = ReadFloat4(image, number_channels, columns, x, y, channel);
03011 float4 diff = srcPixel - value;
03012
03013 float quantumThreshold = QuantumRange*threshold;
03014
03015 int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
03016 value = select(srcPixel + diff * gain, srcPixel, mask);
03017
03018 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, value);
03019 }
03020 }
03021 )
03022
03023 STRINGIFY(
03024 __kernel __attribute__((reqd_work_group_size(64, 4, 1)))
03025 void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage,
03026 const unsigned int number_channels,const unsigned int max_channels,
03027 const float threshold,const int passes,const unsigned int imageWidth,
03028 const unsigned int imageHeight)
03029 {
03030 const int pad = (1 << (passes - 1));
03031 const int tileSize = 64;
03032 const int tileRowPixels = 64;
03033 const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 };
03034
03035 CLQuantum stage[48];
03036
03037 local float buffer[64 * 64];
03038
03039 int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0);
03040 int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad;
03041
03042 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03043 int pos = (mirrorTop(mirrorBottom(srcx), imageWidth) * number_channels) +
03044 (mirrorTop(mirrorBottom(srcy + i), imageHeight)) * imageWidth * number_channels;
03045
03046 for (int channel = 0; channel < max_channels; ++channel)
03047 stage[(i / 4) + (16 * channel)] = srcImage[pos + channel];
03048 }
03049
03050 for (int channel = 0; channel < max_channels; ++channel) {
03051
03052 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03053 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[(i / 4) + (16 * channel)]);
03054
03055
03056
03057 float tmp[16];
03058 float accum[16];
03059 float pixel;
03060
03061 for (int i = 0; i < 16; i++)
03062 accum[i]=0.0f;
03063
03064 for (int pass = 0; pass < passes; ++pass) {
03065 const int radius = 1 << pass;
03066 const int x = get_local_id(0);
03067 const float thresh = threshold * noise[pass];
03068
03069
03070 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03071 const int offset = i * tileRowPixels;
03072 if (pass == 0)
03073 tmp[i / 4] = buffer[x + offset];
03074 pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]);
03075 barrier(CLK_LOCAL_MEM_FENCE);
03076 buffer[x + offset] = pixel;
03077 }
03078 barrier(CLK_LOCAL_MEM_FENCE);
03079
03080
03081 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03082 pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]);
03083 float delta = tmp[i / 4] - pixel;
03084 tmp[i / 4] = pixel;
03085 if (delta < -thresh)
03086 delta += thresh;
03087 else if (delta > thresh)
03088 delta -= thresh;
03089 else
03090 delta = 0;
03091 accum[i / 4] += delta;
03092 }
03093 barrier(CLK_LOCAL_MEM_FENCE);
03094
03095 if (pass < passes - 1)
03096 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03097 buffer[x + i * tileRowPixels] = tmp[i / 4];
03098 else
03099 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03100 accum[i / 4] += tmp[i / 4];
03101 barrier(CLK_LOCAL_MEM_FENCE);
03102 }
03103
03104 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03105 stage[(i / 4) + (16 * channel)] = ClampToQuantum(accum[i / 4]);
03106
03107 barrier(CLK_LOCAL_MEM_FENCE);
03108 }
03109
03110
03111
03112 if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) {
03113 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03114 if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) {
03115 int pos = (srcx * number_channels) + ((srcy + i) * (imageWidth * number_channels));
03116 for (int channel = 0; channel < max_channels; ++channel) {
03117 dstImage[pos + channel] = stage[(i / 4) + (16 * channel)];
03118 }
03119 }
03120 }
03121 }
03122 }
03123 )
03124
03125 ;
03126
03127 #endif // MAGICKCORE_OPENCL_SUPPORT
03128
03129 #if defined(__cplusplus) || defined(c_plusplus)
03130 }
03131 #endif
03132
03133 #endif // MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H