00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
00020 #define MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
00021
00022 #if defined(__cplusplus) || defined(c_plusplus)
00023 extern "C" {
00024 #endif
00025
00026 #if defined(MAGICKCORE_OPENCL_SUPPORT)
00027
00028
00029
00030
00031 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
00032 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
00033 #define OPENCL_ELSE() "\n #""else " " \n"
00034 #define OPENCL_ENDIF() "\n #""endif " " \n"
00035 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
00036 #define STRINGIFY(...) #__VA_ARGS__ "\n"
00037
00038 const char *accelerateKernels =
00039
00040
00041
00042
00043 OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
00044 OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
00045 OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
00046 OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
00047 OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
00048 OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
00049 OPENCL_DEFINE(SigmaRandom, (attenuate))
00050 OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
00051 OPENCL_DEFINE(MagickMax(x,y), (((x) > (y)) ? (x) : (y)))
00052 OPENCL_DEFINE(MagickMin(x,y), (((x) < (y)) ? (x) : (y)))
00053
00054
00055
00056
00057 STRINGIFY(
00058 typedef enum
00059 {
00060 UndefinedColorspace,
00061 CMYColorspace,
00062 CMYKColorspace,
00063 GRAYColorspace,
00064 HCLColorspace,
00065 HCLpColorspace,
00066 HSBColorspace,
00067 HSIColorspace,
00068 HSLColorspace,
00069 HSVColorspace,
00070 HWBColorspace,
00071 LabColorspace,
00072 LCHColorspace,
00073 LCHabColorspace,
00074 LCHuvColorspace,
00075 LogColorspace,
00076 LMSColorspace,
00077 LuvColorspace,
00078 OHTAColorspace,
00079 Rec601YCbCrColorspace,
00080 Rec709YCbCrColorspace,
00081 RGBColorspace,
00082 scRGBColorspace,
00083 sRGBColorspace,
00084 TransparentColorspace,
00085 xyYColorspace,
00086 XYZColorspace,
00087 YCbCrColorspace,
00088 YCCColorspace,
00089 YDbDrColorspace,
00090 YIQColorspace,
00091 YPbPrColorspace,
00092 YUVColorspace,
00093 LinearGRAYColorspace
00094 } ColorspaceType;
00095 )
00096
00097 STRINGIFY(
00098 typedef enum
00099 {
00100 UndefinedCompositeOp,
00101 AlphaCompositeOp,
00102 AtopCompositeOp,
00103 BlendCompositeOp,
00104 BlurCompositeOp,
00105 BumpmapCompositeOp,
00106 ChangeMaskCompositeOp,
00107 ClearCompositeOp,
00108 ColorBurnCompositeOp,
00109 ColorDodgeCompositeOp,
00110 ColorizeCompositeOp,
00111 CopyBlackCompositeOp,
00112 CopyBlueCompositeOp,
00113 CopyCompositeOp,
00114 CopyCyanCompositeOp,
00115 CopyGreenCompositeOp,
00116 CopyMagentaCompositeOp,
00117 CopyAlphaCompositeOp,
00118 CopyRedCompositeOp,
00119 CopyYellowCompositeOp,
00120 DarkenCompositeOp,
00121 DarkenIntensityCompositeOp,
00122 DifferenceCompositeOp,
00123 DisplaceCompositeOp,
00124 DissolveCompositeOp,
00125 DistortCompositeOp,
00126 DivideDstCompositeOp,
00127 DivideSrcCompositeOp,
00128 DstAtopCompositeOp,
00129 DstCompositeOp,
00130 DstInCompositeOp,
00131 DstOutCompositeOp,
00132 DstOverCompositeOp,
00133 ExclusionCompositeOp,
00134 HardLightCompositeOp,
00135 HardMixCompositeOp,
00136 HueCompositeOp,
00137 InCompositeOp,
00138 IntensityCompositeOp,
00139 LightenCompositeOp,
00140 LightenIntensityCompositeOp,
00141 LinearBurnCompositeOp,
00142 LinearDodgeCompositeOp,
00143 LinearLightCompositeOp,
00144 LuminizeCompositeOp,
00145 MathematicsCompositeOp,
00146 MinusDstCompositeOp,
00147 MinusSrcCompositeOp,
00148 ModulateCompositeOp,
00149 ModulusAddCompositeOp,
00150 ModulusSubtractCompositeOp,
00151 MultiplyCompositeOp,
00152 NoCompositeOp,
00153 OutCompositeOp,
00154 OverCompositeOp,
00155 OverlayCompositeOp,
00156 PegtopLightCompositeOp,
00157 PinLightCompositeOp,
00158 PlusCompositeOp,
00159 ReplaceCompositeOp,
00160 SaturateCompositeOp,
00161 ScreenCompositeOp,
00162 SoftLightCompositeOp,
00163 SrcAtopCompositeOp,
00164 SrcCompositeOp,
00165 SrcInCompositeOp,
00166 SrcOutCompositeOp,
00167 SrcOverCompositeOp,
00168 ThresholdCompositeOp,
00169 VividLightCompositeOp,
00170 XorCompositeOp,
00171 StereoCompositeOp
00172 } CompositeOperator;
00173 )
00174
00175 STRINGIFY(
00176 typedef enum
00177 {
00178 UndefinedFunction,
00179 ArcsinFunction,
00180 ArctanFunction,
00181 PolynomialFunction,
00182 SinusoidFunction
00183 } MagickFunction;
00184 )
00185
00186 STRINGIFY(
00187 typedef enum
00188 {
00189 UndefinedNoise,
00190 UniformNoise,
00191 GaussianNoise,
00192 MultiplicativeGaussianNoise,
00193 ImpulseNoise,
00194 LaplacianNoise,
00195 PoissonNoise,
00196 RandomNoise
00197 } NoiseType;
00198 )
00199
00200 STRINGIFY(
00201 typedef enum
00202 {
00203 UndefinedPixelIntensityMethod = 0,
00204 AveragePixelIntensityMethod,
00205 BrightnessPixelIntensityMethod,
00206 LightnessPixelIntensityMethod,
00207 MSPixelIntensityMethod,
00208 Rec601LumaPixelIntensityMethod,
00209 Rec601LuminancePixelIntensityMethod,
00210 Rec709LumaPixelIntensityMethod,
00211 Rec709LuminancePixelIntensityMethod,
00212 RMSPixelIntensityMethod
00213 } PixelIntensityMethod;
00214 )
00215
00216 STRINGIFY(
00217 typedef enum
00218 {
00219 BoxWeightingFunction = 0,
00220 TriangleWeightingFunction,
00221 CubicBCWeightingFunction,
00222 HannWeightingFunction,
00223 HammingWeightingFunction,
00224 BlackmanWeightingFunction,
00225 GaussianWeightingFunction,
00226 QuadraticWeightingFunction,
00227 JincWeightingFunction,
00228 SincWeightingFunction,
00229 SincFastWeightingFunction,
00230 KaiserWeightingFunction,
00231 WelchWeightingFunction,
00232 BohmanWeightingFunction,
00233 LagrangeWeightingFunction,
00234 CosineWeightingFunction
00235 } ResizeWeightingFunctionType;
00236 )
00237
00238 STRINGIFY(
00239 typedef enum
00240 {
00241 UndefinedChannel = 0x0000,
00242 RedChannel = 0x0001,
00243 GrayChannel = 0x0001,
00244 CyanChannel = 0x0001,
00245 GreenChannel = 0x0002,
00246 MagentaChannel = 0x0002,
00247 BlueChannel = 0x0004,
00248 YellowChannel = 0x0004,
00249 BlackChannel = 0x0008,
00250 AlphaChannel = 0x0010,
00251 OpacityChannel = 0x0010,
00252 IndexChannel = 0x0020,
00253 ReadMaskChannel = 0x0040,
00254 WriteMaskChannel = 0x0080,
00255 MetaChannel = 0x0100,
00256 CompositeChannels = 0x001F,
00257 AllChannels = 0x7ffffff,
00258
00259
00260
00261
00262
00263
00264
00265 TrueAlphaChannel = 0x0100,
00266 RGBChannels = 0x0200,
00267 GrayChannels = 0x0400,
00268 SyncChannels = 0x20000,
00269 DefaultChannels = AllChannels
00270 } ChannelType;
00271 )
00272
00273
00274
00275
00276
00277 OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
00278
00279 STRINGIFY(
00280 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
00281 {
00282 return((CLQuantum) value);
00283 }
00284 )
00285
00286 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
00287
00288 STRINGIFY(
00289 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
00290 {
00291 return((CLQuantum) (257.0f*value));
00292 }
00293 )
00294
00295 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
00296
00297 STRINGIFY(
00298 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
00299 {
00300 return((CLQuantum) (16843009.0*value));
00301 }
00302 )
00303
00304 OPENCL_ENDIF()
00305
00306 OPENCL_IF((MAGICKCORE_HDRI_SUPPORT == 1))
00307
00308 STRINGIFY(
00309 static inline CLQuantum ClampToQuantum(const float value)
00310 {
00311 return (CLQuantum) clamp(value, 0.0f, QuantumRange);
00312 }
00313 )
00314
00315 OPENCL_ELSE()
00316
00317 STRINGIFY(
00318 static inline CLQuantum ClampToQuantum(const float value)
00319 {
00320 return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f);
00321 }
00322 )
00323
00324 OPENCL_ENDIF()
00325
00326 STRINGIFY(
00327 static inline int ClampToCanvas(const int offset,const int range)
00328 {
00329 return clamp(offset, (int)0, range-1);
00330 }
00331 )
00332
00333 STRINGIFY(
00334 static inline uint ScaleQuantumToMap(CLQuantum value)
00335 {
00336 if (value >= (CLQuantum) MaxMap)
00337 return ((uint)MaxMap);
00338 else
00339 return ((uint)value);
00340 }
00341 )
00342
00343 STRINGIFY(
00344 static inline float PerceptibleReciprocal(const float x)
00345 {
00346 float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0;
00347 return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/MagickEpsilon));
00348 }
00349 )
00350
00351 STRINGIFY(
00352
00353 static inline unsigned int getPixelIndex(const unsigned int number_channels,
00354 const unsigned int columns, const unsigned int x, const unsigned int y)
00355 {
00356 return (x * number_channels) + (y * columns * number_channels);
00357 }
00358
00359 static inline float getPixelRed(const __global CLQuantum *p) { return (float)*p; }
00360 static inline float getPixelGreen(const __global CLQuantum *p) { return (float)*(p+1); }
00361 static inline float getPixelBlue(const __global CLQuantum *p) { return (float)*(p+2); }
00362 static inline float getPixelAlpha(const __global CLQuantum *p,const unsigned int number_channels) { return (float)*(p+number_channels-1); }
00363
00364 static inline void setPixelRed(__global CLQuantum *p,const CLQuantum value) { *p=value; }
00365 static inline void setPixelGreen(__global CLQuantum *p,const CLQuantum value) { *(p+1)=value; }
00366 static inline void setPixelBlue(__global CLQuantum *p,const CLQuantum value) { *(p+2)=value; }
00367 static inline void setPixelAlpha(__global CLQuantum *p,const unsigned int number_channels,const CLQuantum value) { *(p+number_channels-1)=value; }
00368
00369 static inline CLQuantum getBlue(CLPixelType p) { return p.x; }
00370 static inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
00371 static inline float getBlueF4(float4 p) { return p.x; }
00372 static inline void setBlueF4(float4* p, float value) { (*p).x = value; }
00373
00374 static inline CLQuantum getGreen(CLPixelType p) { return p.y; }
00375 static inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
00376 static inline float getGreenF4(float4 p) { return p.y; }
00377 static inline void setGreenF4(float4* p, float value) { (*p).y = value; }
00378
00379 static inline CLQuantum getRed(CLPixelType p) { return p.z; }
00380 static inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
00381 static inline float getRedF4(float4 p) { return p.z; }
00382 static inline void setRedF4(float4* p, float value) { (*p).z = value; }
00383
00384 static inline CLQuantum getAlpha(CLPixelType p) { return p.w; }
00385 static inline void setAlpha(CLPixelType* p, CLQuantum value) { (*p).w = value; }
00386 static inline float getAlphaF4(float4 p) { return p.w; }
00387 static inline void setAlphaF4(float4* p, float value) { (*p).w = value; }
00388
00389 static inline void ReadChannels(const __global CLQuantum *p, const unsigned int number_channels,
00390 const ChannelType channel, float *red, float *green, float *blue, float *alpha)
00391 {
00392 if ((channel & RedChannel) != 0)
00393 *red=getPixelRed(p);
00394
00395 if (number_channels > 2)
00396 {
00397 if ((channel & GreenChannel) != 0)
00398 *green=getPixelGreen(p);
00399
00400 if ((channel & BlueChannel) != 0)
00401 *blue=getPixelBlue(p);
00402 }
00403
00404 if (((number_channels == 4) || (number_channels == 2)) &&
00405 ((channel & AlphaChannel) != 0))
00406 *alpha=getPixelAlpha(p,number_channels);
00407 }
00408
00409 static inline float4 ReadAllChannels(const __global CLQuantum *image, const unsigned int number_channels,
00410 const unsigned int columns, const unsigned int x, const unsigned int y)
00411 {
00412 const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
00413
00414 float4 pixel;
00415
00416 pixel.x=getPixelRed(p);
00417
00418 if (number_channels > 2)
00419 {
00420 pixel.y=getPixelGreen(p);
00421 pixel.z=getPixelBlue(p);
00422 }
00423
00424 if ((number_channels == 4) || (number_channels == 2))
00425 pixel.w=getPixelAlpha(p,number_channels);
00426 return(pixel);
00427 }
00428
00429 static inline float4 ReadFloat4(const __global CLQuantum *image, const unsigned int number_channels,
00430 const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel)
00431 {
00432 const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
00433
00434 float red = 0.0f;
00435 float green = 0.0f;
00436 float blue = 0.0f;
00437 float alpha = 0.0f;
00438
00439 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
00440 return (float4)(red, green, blue, alpha);
00441 }
00442
00443 static inline void WriteChannels(__global CLQuantum *p, const unsigned int number_channels,
00444 const ChannelType channel, float red, float green, float blue, float alpha)
00445 {
00446 if ((channel & RedChannel) != 0)
00447 setPixelRed(p,ClampToQuantum(red));
00448
00449 if (number_channels > 2)
00450 {
00451 if ((channel & GreenChannel) != 0)
00452 setPixelGreen(p,ClampToQuantum(green));
00453
00454 if ((channel & BlueChannel) != 0)
00455 setPixelBlue(p,ClampToQuantum(blue));
00456 }
00457
00458 if (((number_channels == 4) || (number_channels == 2)) &&
00459 ((channel & AlphaChannel) != 0))
00460 setPixelAlpha(p,number_channels,ClampToQuantum(alpha));
00461 }
00462
00463 static inline void WriteAllChannels(__global CLQuantum *image, const unsigned int number_channels,
00464 const unsigned int columns, const unsigned int x, const unsigned int y, float4 pixel)
00465 {
00466 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
00467
00468 setPixelRed(p,ClampToQuantum(pixel.x));
00469
00470 if (number_channels > 2)
00471 {
00472 setPixelGreen(p,ClampToQuantum(pixel.y));
00473 setPixelBlue(p,ClampToQuantum(pixel.z));
00474 }
00475
00476 if ((number_channels == 4) || (number_channels == 2))
00477 setPixelAlpha(p,number_channels,ClampToQuantum(pixel.w));
00478 }
00479
00480 static inline void WriteFloat4(__global CLQuantum *image, const unsigned int number_channels,
00481 const unsigned int columns, const unsigned int x, const unsigned int y, const ChannelType channel,
00482 float4 pixel)
00483 {
00484 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
00485 WriteChannels(p, number_channels, channel, pixel.x, pixel.y, pixel.z, pixel.w);
00486 }
00487
00488 static inline float GetPixelIntensity(const unsigned int colorspace,
00489 const unsigned int method,float red,float green,float blue)
00490 {
00491 float intensity;
00492
00493 if ((colorspace == GRAYColorspace) || (colorspace == LinearGRAYColorspace))
00494 return red;
00495
00496 switch (method)
00497 {
00498 case AveragePixelIntensityMethod:
00499 {
00500 intensity=(red+green+blue)/3.0;
00501 break;
00502 }
00503 case BrightnessPixelIntensityMethod:
00504 {
00505 intensity=MagickMax(MagickMax(red,green),blue);
00506 break;
00507 }
00508 case LightnessPixelIntensityMethod:
00509 {
00510 intensity=(MagickMin(MagickMin(red,green),blue)+
00511 MagickMax(MagickMax(red,green),blue))/2.0;
00512 break;
00513 }
00514 case MSPixelIntensityMethod:
00515 {
00516 intensity=(float) (((float) red*red+green*green+blue*blue)/
00517 (3.0*QuantumRange));
00518 break;
00519 }
00520 case Rec601LumaPixelIntensityMethod:
00521 {
00522
00523
00524
00525
00526
00527
00528
00529
00530 intensity=0.298839*red+0.586811*green+0.114350*blue;
00531 break;
00532 }
00533 case Rec601LuminancePixelIntensityMethod:
00534 {
00535
00536
00537
00538
00539
00540
00541
00542
00543 intensity=0.298839*red+0.586811*green+0.114350*blue;
00544 break;
00545 }
00546 case Rec709LumaPixelIntensityMethod:
00547 default:
00548 {
00549
00550
00551
00552
00553
00554
00555
00556
00557 intensity=0.212656*red+0.715158*green+0.072186*blue;
00558 break;
00559 }
00560 case Rec709LuminancePixelIntensityMethod:
00561 {
00562
00563
00564
00565
00566
00567
00568
00569
00570 intensity=0.212656*red+0.715158*green+0.072186*blue;
00571 break;
00572 }
00573 case RMSPixelIntensityMethod:
00574 {
00575 intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/
00576 sqrt(3.0));
00577 break;
00578 }
00579 }
00580
00581 return intensity;
00582 }
00583
00584 static inline int mirrorBottom(int value)
00585 {
00586 return (value < 0) ? - (value) : value;
00587 }
00588
00589 static inline int mirrorTop(int value, int width)
00590 {
00591 return (value >= width) ? (2 * width - value - 1) : value;
00592 }
00593 )
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607 STRINGIFY(
00608
00609
00610
00611
00612
00613
00614
00615
00616 ulong MWC_AddMod64(ulong a, ulong b, ulong M)
00617 {
00618 ulong v=a+b;
00619
00620 if( (v>=M) || (convert_float(v) < convert_float(a)) )
00621 v=v-M;
00622 return v;
00623 }
00624
00625
00626
00627
00628
00629
00630
00631 ulong MWC_MulMod64(ulong a, ulong b, ulong M)
00632 {
00633 ulong r=0;
00634 while(a!=0){
00635 if(a&1)
00636 r=MWC_AddMod64(r,b,M);
00637 b=MWC_AddMod64(b,b,M);
00638 a=a>>1;
00639 }
00640 return r;
00641 }
00642
00643
00644
00645
00646
00647 ulong MWC_PowMod64(ulong a, ulong e, ulong M)
00648 {
00649 ulong sqr=a, acc=1;
00650 while(e!=0){
00651 if(e&1)
00652 acc=MWC_MulMod64(acc,sqr,M);
00653 sqr=MWC_MulMod64(sqr,sqr,M);
00654 e=e>>1;
00655 }
00656 return acc;
00657 }
00658
00659 uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
00660 {
00661 ulong m=MWC_PowMod64(A, distance, M);
00662 ulong x=curr.x*(ulong)A+curr.y;
00663 x=MWC_MulMod64(x, m, M);
00664 return (uint2)((uint)(x/A), (uint)(x%A));
00665 }
00666
00667 uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
00668 {
00669
00670
00671
00672
00673
00674 enum{ MWC_BASEID = 4077358422479273989UL };
00675
00676 ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
00677 ulong m=MWC_PowMod64(A, dist, M);
00678
00679 ulong x=MWC_MulMod64(MWC_BASEID, m, M);
00680 return (uint2)((uint)(x/A), (uint)(x%A));
00681 }
00682
00684 typedef struct{ uint x; uint c; uint seed0; ulong seed1; } mwc64x_state_t;
00685
00686 void MWC64X_Step(mwc64x_state_t *s)
00687 {
00688 uint X=s->x, C=s->c;
00689
00690 uint Xn=s->seed0*X+C;
00691 uint carry=(uint)(Xn<C);
00692 uint Cn=mad_hi(s->seed0,X,carry);
00693
00694 s->x=Xn;
00695 s->c=Cn;
00696 }
00697
00698 void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
00699 {
00700 uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), s->seed0, s->seed1, distance);
00701 s->x=tmp.x;
00702 s->c=tmp.y;
00703 }
00704
00705 void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
00706 {
00707 uint2 tmp=MWC_SeedImpl_Mod64(s->seed0, s->seed1, 1, 0, baseOffset, perStreamOffset);
00708 s->x=tmp.x;
00709 s->c=tmp.y;
00710 }
00711
00713 uint MWC64X_NextUint(mwc64x_state_t *s)
00714 {
00715 uint res=s->x ^ s->c;
00716 MWC64X_Step(s);
00717 return res;
00718 }
00719
00720
00721
00722
00723
00724 float mwcReadPseudoRandomValue(mwc64x_state_t* rng)
00725 {
00726 return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff);
00727 }
00728
00729 float mwcGenerateDifferentialNoise(mwc64x_state_t* r, float pixel, NoiseType noise_type, float attenuate)
00730 {
00731 float
00732 alpha,
00733 beta,
00734 noise,
00735 sigma;
00736
00737 noise = 0.0f;
00738 alpha=mwcReadPseudoRandomValue(r);
00739 switch(noise_type)
00740 {
00741 case UniformNoise:
00742 default:
00743 {
00744 noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
00745 break;
00746 }
00747 case GaussianNoise:
00748 {
00749 float
00750 gamma,
00751 tau;
00752
00753 if (alpha == 0.0f)
00754 alpha=1.0f;
00755 beta=mwcReadPseudoRandomValue(r);
00756 gamma=sqrt(-2.0f*log(alpha));
00757 sigma=gamma*cospi((2.0f*beta));
00758 tau=gamma*sinpi((2.0f*beta));
00759 noise=pixel+sqrt(pixel)*SigmaGaussian*sigma+QuantumRange*TauGaussian*tau;
00760 break;
00761 }
00762 case ImpulseNoise:
00763 {
00764 if (alpha < (SigmaImpulse/2.0f))
00765 noise=0.0f;
00766 else
00767 if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
00768 noise=QuantumRange;
00769 else
00770 noise=pixel;
00771 break;
00772 }
00773 case LaplacianNoise:
00774 {
00775 if (alpha <= 0.5f)
00776 {
00777 if (alpha <= MagickEpsilon)
00778 noise=(pixel-QuantumRange);
00779 else
00780 noise=(pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
00781 0.5f);
00782 break;
00783 }
00784 beta=1.0f-alpha;
00785 if (beta <= (0.5f*MagickEpsilon))
00786 noise=(pixel+QuantumRange);
00787 else
00788 noise=(pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
00789 break;
00790 }
00791 case MultiplicativeGaussianNoise:
00792 {
00793 sigma=1.0f;
00794 if (alpha > MagickEpsilon)
00795 sigma=sqrt(-2.0f*log(alpha));
00796 beta=mwcReadPseudoRandomValue(r);
00797 noise=(pixel+pixel*SigmaMultiplicativeGaussian*sigma*
00798 cospi((2.0f*beta))/2.0f);
00799 break;
00800 }
00801 case PoissonNoise:
00802 {
00803 float
00804 poisson;
00805 unsigned int i;
00806 poisson=exp(-SigmaPoisson*QuantumScale*pixel);
00807 for (i=0; alpha > poisson; i++)
00808 {
00809 beta=mwcReadPseudoRandomValue(r);
00810 alpha*=beta;
00811 }
00812 noise=(QuantumRange*i/SigmaPoisson);
00813 break;
00814 }
00815 case RandomNoise:
00816 {
00817 noise=(QuantumRange*SigmaRandom*alpha);
00818 break;
00819 }
00820 }
00821 return noise;
00822 }
00823
00824 __kernel
00825 void AddNoise(const __global CLQuantum *image,
00826 const unsigned int number_channels,const ChannelType channel,
00827 const unsigned int length,const unsigned int pixelsPerWorkItem,
00828 const NoiseType noise_type,const float attenuate,const unsigned int seed0,
00829 const unsigned int seed1,const unsigned int numRandomNumbersPerPixel,
00830 __global CLQuantum *filteredImage)
00831 {
00832 mwc64x_state_t rng;
00833 rng.seed0 = seed0;
00834 rng.seed1 = seed1;
00835
00836 uint span = pixelsPerWorkItem * numRandomNumbersPerPixel;
00837 uint offset = span * get_local_size(0) * get_group_id(0);
00838 MWC64X_SeedStreams(&rng, offset, span);
00839
00840 uint pos = get_group_id(0) * get_local_size(0) * pixelsPerWorkItem * number_channels + (get_local_id(0) * number_channels);
00841 uint count = pixelsPerWorkItem;
00842
00843 while (count > 0 && pos < length)
00844 {
00845 const __global CLQuantum *p = image + pos;
00846 __global CLQuantum *q = filteredImage + pos;
00847
00848 float red;
00849 float green;
00850 float blue;
00851 float alpha;
00852
00853 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
00854
00855 if ((channel & RedChannel) != 0)
00856 red=mwcGenerateDifferentialNoise(&rng,red,noise_type,attenuate);
00857
00858 if (number_channels > 2)
00859 {
00860 if ((channel & GreenChannel) != 0)
00861 green=mwcGenerateDifferentialNoise(&rng,green,noise_type,attenuate);
00862
00863 if ((channel & BlueChannel) != 0)
00864 blue=mwcGenerateDifferentialNoise(&rng,blue,noise_type,attenuate);
00865 }
00866
00867 if (((number_channels == 4) || (number_channels == 2)) &&
00868 ((channel & AlphaChannel) != 0))
00869 alpha=mwcGenerateDifferentialNoise(&rng,alpha,noise_type,attenuate);
00870
00871 WriteChannels(q, number_channels, channel, red, green, blue, alpha);
00872
00873 pos += (get_local_size(0) * number_channels);
00874 count--;
00875 }
00876 }
00877 )
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891 STRINGIFY(
00892
00893
00894
00895 __kernel void BlurRow(const __global CLQuantum *image,
00896 const unsigned int number_channels,const ChannelType channel,
00897 __constant float *filter,const unsigned int width,
00898 const unsigned int imageColumns,const unsigned int imageRows,
00899 __local float4 *temp,__global float4 *tempImage)
00900 {
00901 const int x = get_global_id(0);
00902 const int y = get_global_id(1);
00903
00904 const int columns = imageColumns;
00905
00906 const unsigned int radius = (width-1)/2;
00907 const int wsize = get_local_size(0);
00908 const unsigned int loadSize = wsize+width;
00909
00910
00911 const int groupX=get_local_size(0)*get_group_id(0);
00912
00913
00914 for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
00915 {
00916 int cx = ClampToCanvas(i + groupX - radius, columns);
00917 temp[i] = ReadFloat4(image, number_channels, columns, cx, y, channel);
00918 }
00919
00920
00921 barrier(CLK_LOCAL_MEM_FENCE);
00922
00923
00924 if (get_global_id(0) < columns)
00925 {
00926
00927 float4 result = (float4) 0;
00928
00929 int i = 0;
00930
00931 for ( ; i+7 < width; )
00932 {
00933 for (int j=0; j < 8; j++)
00934 result+=filter[i+j]*temp[i+j+get_local_id(0)];
00935 i+=8;
00936 }
00937
00938 for ( ; i < width; i++)
00939 result+=filter[i]*temp[i+get_local_id(0)];
00940
00941
00942 tempImage[y*columns+x] = result;
00943 }
00944 }
00945 )
00946
00947 STRINGIFY(
00948
00949
00950
00951 __kernel void BlurColumn(const __global float4 *blurRowData,
00952 const unsigned int number_channels,const ChannelType channel,
00953 __constant float *filter,const unsigned int width,
00954 const unsigned int imageColumns,const unsigned int imageRows,
00955 __local float4 *temp,__global CLQuantum *filteredImage)
00956 {
00957 const int x = get_global_id(0);
00958 const int y = get_global_id(1);
00959
00960 const int columns = imageColumns;
00961 const int rows = imageRows;
00962
00963 unsigned int radius = (width-1)/2;
00964 const int wsize = get_local_size(1);
00965 const unsigned int loadSize = wsize+width;
00966
00967
00968 const int groupX=get_local_size(0)*get_group_id(0);
00969 const int groupY=get_local_size(1)*get_group_id(1);
00970
00971
00972
00973
00974 for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
00975 temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
00976
00977
00978 barrier(CLK_LOCAL_MEM_FENCE);
00979
00980
00981 if (get_global_id(1) < rows)
00982 {
00983
00984 float4 result = (float4) 0;
00985
00986 int i = 0;
00987
00988 for ( ; i+7 < width; )
00989 {
00990 for (int j=0; j < 8; j++)
00991 result+=filter[i+j]*temp[i+j+get_local_id(1)];
00992 i+=8;
00993 }
00994
00995 for ( ; i < width; i++)
00996 result+=filter[i]*temp[i+get_local_id(1)];
00997
00998
00999 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
01000 }
01001 }
01002 )
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016 STRINGIFY(
01017
01018 static inline float4 ConvertRGBToHSB(const float4 pixel)
01019 {
01020 float4 result=0.0f;
01021 result.w=pixel.w;
01022 float tmax=MagickMax(MagickMax(pixel.x,pixel.y),pixel.z);
01023 if (tmax != 0.0f)
01024 {
01025 float tmin=MagickMin(MagickMin(pixel.x,pixel.y),pixel.z);
01026 float delta=tmax-tmin;
01027
01028 result.y=delta/tmax;
01029 result.z=QuantumScale*tmax;
01030 if (delta != 0.0f)
01031 {
01032 result.x =((pixel.x == tmax) ? 0.0f : ((pixel.y == tmax) ? 2.0f : 4.0f));
01033 result.x+=((pixel.x == tmax) ? (pixel.y-pixel.z) : ((pixel.y == tmax) ?
01034 (pixel.z-pixel.x) : (pixel.x-pixel.y)))/delta;
01035 result.x/=6.0f;
01036 result.x+=(result.x < 0.0f) ? 0.0f : 1.0f;
01037 }
01038 }
01039 return(result);
01040 }
01041
01042 static inline float4 ConvertHSBToRGB(const float4 pixel)
01043 {
01044 float hue=pixel.x;
01045 float saturation=pixel.y;
01046 float brightness=pixel.z;
01047
01048 float4 result=pixel;
01049
01050 if (saturation == 0.0f)
01051 {
01052 result.x=result.y=result.z=ClampToQuantum(QuantumRange*brightness);
01053 }
01054 else
01055 {
01056 float h=6.0f*(hue-floor(hue));
01057 float f=h-floor(h);
01058 float p=brightness*(1.0f-saturation);
01059 float q=brightness*(1.0f-saturation*f);
01060 float t=brightness*(1.0f-(saturation*(1.0f-f)));
01061 int ih = (int)h;
01062
01063 if (ih == 1)
01064 {
01065 result.x=ClampToQuantum(QuantumRange*q);
01066 result.y=ClampToQuantum(QuantumRange*brightness);
01067 result.z=ClampToQuantum(QuantumRange*p);
01068 }
01069 else if (ih == 2)
01070 {
01071 result.x=ClampToQuantum(QuantumRange*p);
01072 result.y=ClampToQuantum(QuantumRange*brightness);
01073 result.z=ClampToQuantum(QuantumRange*t);
01074 }
01075 else if (ih == 3)
01076 {
01077 result.x=ClampToQuantum(QuantumRange*p);
01078 result.y=ClampToQuantum(QuantumRange*q);
01079 result.z=ClampToQuantum(QuantumRange*brightness);
01080 }
01081 else if (ih == 4)
01082 {
01083 result.x=ClampToQuantum(QuantumRange*t);
01084 result.y=ClampToQuantum(QuantumRange*p);
01085 result.z=ClampToQuantum(QuantumRange*brightness);
01086 }
01087 else if (ih == 5)
01088 {
01089 result.x=ClampToQuantum(QuantumRange*brightness);
01090 result.y=ClampToQuantum(QuantumRange*p);
01091 result.z=ClampToQuantum(QuantumRange*q);
01092 }
01093 else
01094 {
01095 result.x=ClampToQuantum(QuantumRange*brightness);
01096 result.y=ClampToQuantum(QuantumRange*t);
01097 result.z=ClampToQuantum(QuantumRange*p);
01098 }
01099 }
01100 return(result);
01101 }
01102
01103 __kernel void Contrast(__global CLQuantum *image,
01104 const unsigned int number_channels,const int sign)
01105 {
01106 const int x=get_global_id(0);
01107 const int y=get_global_id(1);
01108 const unsigned int columns=get_global_size(0);
01109
01110 float4 pixel=ReadAllChannels(image,number_channels,columns,x,y);
01111 if (number_channels < 3)
01112 pixel.y=pixel.z=pixel.x;
01113
01114 pixel=ConvertRGBToHSB(pixel);
01115 float brightness=pixel.z;
01116 brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
01117 brightness=clamp(brightness,0.0f,1.0f);
01118 pixel.z=brightness;
01119 pixel=ConvertHSBToRGB(pixel);
01120
01121 WriteAllChannels(image,number_channels,columns,x,y,pixel);
01122 }
01123 )
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137 STRINGIFY(
01138
01139
01140 __kernel void Histogram(__global CLPixelType * restrict im,
01141 const ChannelType channel,
01142 const unsigned int colorspace,
01143 const unsigned int method,
01144 __global uint4 * restrict histogram)
01145 {
01146 const int x = get_global_id(0);
01147 const int y = get_global_id(1);
01148 const int columns = get_global_size(0);
01149 const int c = x + y * columns;
01150 if ((channel & SyncChannels) != 0)
01151 {
01152 float red=(float)getRed(im[c]);
01153 float green=(float)getGreen(im[c]);
01154 float blue=(float)getBlue(im[c]);
01155
01156 float intensity = GetPixelIntensity(colorspace, method, red, green, blue);
01157 uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
01158 atomic_inc((__global uint *)(&(histogram[pos]))+2);
01159 }
01160 else
01161 {
01162
01163
01164 }
01165 }
01166 )
01167
01168 STRINGIFY(
01169
01170
01171 __kernel void ContrastStretch(__global CLPixelType * restrict im,
01172 const ChannelType channel,
01173 __global CLPixelType * restrict stretch_map,
01174 const float4 white, const float4 black)
01175 {
01176 const int x = get_global_id(0);
01177 const int y = get_global_id(1);
01178 const int columns = get_global_size(0);
01179 const int c = x + y * columns;
01180
01181 uint ePos;
01182 CLPixelType oValue, eValue;
01183 CLQuantum red, green, blue, alpha;
01184
01185
01186 oValue=im[c];
01187
01188 if ((channel & RedChannel) != 0)
01189 {
01190 if (getRedF4(white) != getRedF4(black))
01191 {
01192 ePos = ScaleQuantumToMap(getRed(oValue));
01193 eValue = stretch_map[ePos];
01194 red = getRed(eValue);
01195 }
01196 }
01197
01198 if ((channel & GreenChannel) != 0)
01199 {
01200 if (getGreenF4(white) != getGreenF4(black))
01201 {
01202 ePos = ScaleQuantumToMap(getGreen(oValue));
01203 eValue = stretch_map[ePos];
01204 green = getGreen(eValue);
01205 }
01206 }
01207
01208 if ((channel & BlueChannel) != 0)
01209 {
01210 if (getBlueF4(white) != getBlueF4(black))
01211 {
01212 ePos = ScaleQuantumToMap(getBlue(oValue));
01213 eValue = stretch_map[ePos];
01214 blue = getBlue(eValue);
01215 }
01216 }
01217
01218 if ((channel & AlphaChannel) != 0)
01219 {
01220 if (getAlphaF4(white) != getAlphaF4(black))
01221 {
01222 ePos = ScaleQuantumToMap(getAlpha(oValue));
01223 eValue = stretch_map[ePos];
01224 alpha = getAlpha(eValue);
01225 }
01226 }
01227
01228
01229 im[c]=(CLPixelType)(blue, green, red, alpha);
01230
01231 }
01232 )
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246 STRINGIFY(
01247 __kernel
01248 void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
01249 const unsigned int imageWidth, const unsigned int imageHeight,
01250 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
01251 const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
01252
01253 int2 blockID;
01254 blockID.x = get_global_id(0) / get_local_size(0);
01255 blockID.y = get_global_id(1) / get_local_size(1);
01256
01257
01258 int2 imageAreaOrg;
01259 imageAreaOrg.x = blockID.x * get_local_size(0);
01260 imageAreaOrg.y = blockID.y * get_local_size(1);
01261
01262 int2 midFilterDimen;
01263 midFilterDimen.x = (filterWidth-1)/2;
01264 midFilterDimen.y = (filterHeight-1)/2;
01265
01266 int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
01267
01268
01269 int2 cachedAreaDimen;
01270 cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
01271 cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
01272
01273
01274 int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
01275 int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
01276 int groupSize = get_local_size(0) * get_local_size(1);
01277 for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
01278
01279 int2 cachedAreaIndex;
01280 cachedAreaIndex.x = i % cachedAreaDimen.x;
01281 cachedAreaIndex.y = i / cachedAreaDimen.x;
01282
01283 int2 imagePixelIndex;
01284 imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
01285
01286
01287
01288 imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
01289 imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
01290
01291 pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
01292 }
01293
01294
01295 for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
01296 filterCache[i] = filter[i];
01297 }
01298 barrier(CLK_LOCAL_MEM_FENCE);
01299
01300
01301 int2 imageIndex;
01302 imageIndex.x = imageAreaOrg.x + get_local_id(0);
01303 imageIndex.y = imageAreaOrg.y + get_local_id(1);
01304
01305
01306 if (imageIndex.x >= imageWidth
01307 || imageIndex.y >= imageHeight) {
01308 return;
01309 }
01310
01311 int filterIndex = 0;
01312 float4 sum = (float4)0.0f;
01313 float gamma = 0.0f;
01314 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
01315 int cacheIndexY = get_local_id(1);
01316 for (int j = 0; j < filterHeight; j++) {
01317 int cacheIndexX = get_local_id(0);
01318 for (int i = 0; i < filterWidth; i++) {
01319 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
01320 float f = filterCache[filterIndex];
01321
01322 sum.x += f * p.x;
01323 sum.y += f * p.y;
01324 sum.z += f * p.z;
01325 sum.w += f * p.w;
01326
01327 gamma += f;
01328 filterIndex++;
01329 cacheIndexX++;
01330 }
01331 cacheIndexY++;
01332 }
01333 }
01334 else {
01335 int cacheIndexY = get_local_id(1);
01336 for (int j = 0; j < filterHeight; j++) {
01337 int cacheIndexX = get_local_id(0);
01338 for (int i = 0; i < filterWidth; i++) {
01339
01340 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
01341 float alpha = QuantumScale*p.w;
01342 float f = filterCache[filterIndex];
01343 float g = alpha * f;
01344
01345 sum.x += g*p.x;
01346 sum.y += g*p.y;
01347 sum.z += g*p.z;
01348 sum.w += f*p.w;
01349
01350 gamma += g;
01351 filterIndex++;
01352 cacheIndexX++;
01353 }
01354 cacheIndexY++;
01355 }
01356 gamma = PerceptibleReciprocal(gamma);
01357 sum.xyz = gamma*sum.xyz;
01358 }
01359 CLPixelType outputPixel;
01360 outputPixel.x = ClampToQuantum(sum.x);
01361 outputPixel.y = ClampToQuantum(sum.y);
01362 outputPixel.z = ClampToQuantum(sum.z);
01363 outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
01364
01365 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
01366 }
01367 )
01368
01369 STRINGIFY(
01370 __kernel
01371 void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
01372 const uint imageWidth, const uint imageHeight,
01373 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
01374 const uint matte, const ChannelType channel) {
01375
01376 int2 imageIndex;
01377 imageIndex.x = get_global_id(0);
01378 imageIndex.y = get_global_id(1);
01379
01380
01381
01382
01383
01384 if (imageIndex.x >= imageWidth
01385 || imageIndex.y >= imageHeight)
01386 return;
01387
01388 int2 midFilterDimen;
01389 midFilterDimen.x = (filterWidth-1)/2;
01390 midFilterDimen.y = (filterHeight-1)/2;
01391
01392 int filterIndex = 0;
01393 float4 sum = (float4)0.0f;
01394 float gamma = 0.0f;
01395 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
01396 for (int j = 0; j < filterHeight; j++) {
01397 int2 inputPixelIndex;
01398 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
01399 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
01400 for (int i = 0; i < filterWidth; i++) {
01401 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
01402 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
01403
01404 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
01405 float f = filter[filterIndex];
01406
01407 sum.x += f * p.x;
01408 sum.y += f * p.y;
01409 sum.z += f * p.z;
01410 sum.w += f * p.w;
01411
01412 gamma += f;
01413
01414 filterIndex++;
01415 }
01416 }
01417 }
01418 else {
01419
01420 for (int j = 0; j < filterHeight; j++) {
01421 int2 inputPixelIndex;
01422 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
01423 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
01424 for (int i = 0; i < filterWidth; i++) {
01425 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
01426 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
01427
01428 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
01429 float alpha = QuantumScale*p.w;
01430 float f = filter[filterIndex];
01431 float g = alpha * f;
01432
01433 sum.x += g*p.x;
01434 sum.y += g*p.y;
01435 sum.z += g*p.z;
01436 sum.w += f*p.w;
01437
01438 gamma += g;
01439
01440
01441 filterIndex++;
01442 }
01443 }
01444 gamma = PerceptibleReciprocal(gamma);
01445 sum.xyz = gamma*sum.xyz;
01446 }
01447
01448 CLPixelType outputPixel;
01449 outputPixel.x = ClampToQuantum(sum.x);
01450 outputPixel.y = ClampToQuantum(sum.y);
01451 outputPixel.z = ClampToQuantum(sum.z);
01452 outputPixel.w = ((channel & AlphaChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
01453
01454 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
01455 }
01456 )
01457
01458
01459
01460
01461
01462
01463
01464
01465
01466
01467
01468
01469
01470 STRINGIFY(
01471
01472 __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
01473 , const unsigned int imageWidth, const unsigned int imageHeight
01474 , const int2 offset, const int polarity, const int matte) {
01475
01476 int x = get_global_id(0);
01477 int y = get_global_id(1);
01478
01479 CLPixelType v = inputImage[y*imageWidth+x];
01480
01481 int2 neighbor;
01482 neighbor.y = y + offset.y;
01483 neighbor.x = x + offset.x;
01484
01485 int2 clampedNeighbor;
01486 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
01487 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
01488
01489 CLPixelType r = (clampedNeighbor.x == neighbor.x
01490 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
01491 :(CLPixelType)0;
01492
01493 int sv[4];
01494 sv[0] = (int)v.x;
01495 sv[1] = (int)v.y;
01496 sv[2] = (int)v.z;
01497 sv[3] = (int)v.w;
01498
01499 int sr[4];
01500 sr[0] = (int)r.x;
01501 sr[1] = (int)r.y;
01502 sr[2] = (int)r.z;
01503 sr[3] = (int)r.w;
01504
01505 if (polarity > 0) {
01506 \n #pragma unroll 4\n
01507 for (unsigned int i = 0; i < 4; i++) {
01508 sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
01509 }
01510 }
01511 else {
01512 \n #pragma unroll 4\n
01513 for (unsigned int i = 0; i < 4; i++) {
01514 sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
01515 }
01516
01517 }
01518
01519 v.x = (CLQuantum)sv[0];
01520 v.y = (CLQuantum)sv[1];
01521 v.z = (CLQuantum)sv[2];
01522
01523 if (matte!=0)
01524 v.w = (CLQuantum)sv[3];
01525
01526 outputImage[y*imageWidth+x] = v;
01527
01528 }
01529
01530
01531 )
01532
01533 STRINGIFY(
01534
01535 __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
01536 , const unsigned int imageWidth, const unsigned int imageHeight
01537 , const int2 offset, const int polarity, const int matte) {
01538
01539 int x = get_global_id(0);
01540 int y = get_global_id(1);
01541
01542 CLPixelType v = inputImage[y*imageWidth+x];
01543
01544 int2 neighbor, clampedNeighbor;
01545
01546 neighbor.y = y + offset.y;
01547 neighbor.x = x + offset.x;
01548 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
01549 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
01550
01551 CLPixelType r = (clampedNeighbor.x == neighbor.x
01552 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
01553 :(CLPixelType)0;
01554
01555
01556 neighbor.y = y - offset.y;
01557 neighbor.x = x - offset.x;
01558 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
01559 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
01560
01561 CLPixelType s = (clampedNeighbor.x == neighbor.x
01562 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
01563 :(CLPixelType)0;
01564
01565
01566 int sv[4];
01567 sv[0] = (int)v.x;
01568 sv[1] = (int)v.y;
01569 sv[2] = (int)v.z;
01570 sv[3] = (int)v.w;
01571
01572 int sr[4];
01573 sr[0] = (int)r.x;
01574 sr[1] = (int)r.y;
01575 sr[2] = (int)r.z;
01576 sr[3] = (int)r.w;
01577
01578 int ss[4];
01579 ss[0] = (int)s.x;
01580 ss[1] = (int)s.y;
01581 ss[2] = (int)s.z;
01582 ss[3] = (int)s.w;
01583
01584 if (polarity > 0) {
01585 \n #pragma unroll 4\n
01586 for (unsigned int i = 0; i < 4; i++) {
01587
01588
01589
01590
01591 sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
01592 }
01593 }
01594 else {
01595 \n #pragma unroll 4\n
01596 for (unsigned int i = 0; i < 4; i++) {
01597
01598
01599
01600 sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
01601 }
01602 }
01603
01604 v.x = (CLQuantum)sv[0];
01605 v.y = (CLQuantum)sv[1];
01606 v.z = (CLQuantum)sv[2];
01607
01608 if (matte!=0)
01609 v.w = (CLQuantum)sv[3];
01610
01611 outputImage[y*imageWidth+x] = v;
01612
01613 }
01614 )
01615
01616
01617
01618
01619
01620
01621
01622
01623
01624
01625
01626
01627
01628 STRINGIFY(
01629
01630
01631 __kernel void Equalize(__global CLPixelType * restrict im,
01632 const ChannelType channel,
01633 __global CLPixelType * restrict equalize_map,
01634 const float4 white, const float4 black)
01635 {
01636 const int x = get_global_id(0);
01637 const int y = get_global_id(1);
01638 const int columns = get_global_size(0);
01639 const int c = x + y * columns;
01640
01641 uint ePos;
01642 CLPixelType oValue, eValue;
01643 CLQuantum red, green, blue, alpha;
01644
01645
01646 oValue=im[c];
01647
01648 if ((channel & SyncChannels) != 0)
01649 {
01650 if (getRedF4(white) != getRedF4(black))
01651 {
01652 ePos = ScaleQuantumToMap(getRed(oValue));
01653 eValue = equalize_map[ePos];
01654 red = getRed(eValue);
01655 ePos = ScaleQuantumToMap(getGreen(oValue));
01656 eValue = equalize_map[ePos];
01657 green = getRed(eValue);
01658 ePos = ScaleQuantumToMap(getBlue(oValue));
01659 eValue = equalize_map[ePos];
01660 blue = getRed(eValue);
01661 ePos = ScaleQuantumToMap(getAlpha(oValue));
01662 eValue = equalize_map[ePos];
01663 alpha = getRed(eValue);
01664
01665
01666 im[c]=(CLPixelType)(blue, green, red, alpha);
01667 }
01668
01669 }
01670
01671
01672
01673
01674 }
01675 )
01676
01677
01678
01679
01680
01681
01682
01683
01684
01685
01686
01687
01688
01689 STRINGIFY(
01690
01691
01692
01693 CLQuantum ApplyFunction(float pixel,const MagickFunction function,
01694 const unsigned int number_parameters,__constant float *parameters)
01695 {
01696 float result = 0.0f;
01697
01698 switch (function)
01699 {
01700 case PolynomialFunction:
01701 {
01702 for (unsigned int i=0; i < number_parameters; i++)
01703 result = result*QuantumScale*pixel + parameters[i];
01704 result *= QuantumRange;
01705 break;
01706 }
01707 case SinusoidFunction:
01708 {
01709 float freq,phase,ampl,bias;
01710 freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
01711 phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
01712 ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
01713 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
01714 result = QuantumRange*(ampl*sin(2.0f*MagickPI*
01715 (freq*QuantumScale*pixel + phase/360.0f)) + bias);
01716 break;
01717 }
01718 case ArcsinFunction:
01719 {
01720 float width,range,center,bias;
01721 width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
01722 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
01723 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
01724 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
01725
01726 result = 2.0f/width*(QuantumScale*pixel - center);
01727 result = range/MagickPI*asin(result)+bias;
01728 result = ( result <= -1.0f ) ? bias - range/2.0f : result;
01729 result = ( result >= 1.0f ) ? bias + range/2.0f : result;
01730 result *= QuantumRange;
01731 break;
01732 }
01733 case ArctanFunction:
01734 {
01735 float slope,range,center,bias;
01736 slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
01737 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
01738 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
01739 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
01740 result = MagickPI*slope*(QuantumScale*pixel-center);
01741 result = QuantumRange*(range/MagickPI*atan(result) + bias);
01742 break;
01743 }
01744 case UndefinedFunction:
01745 break;
01746 }
01747 return(ClampToQuantum(result));
01748 }
01749 )
01750
01751 STRINGIFY(
01752
01753
01754
01755
01756
01757
01758
01759 __kernel void ComputeFunction(__global CLQuantum *image,const unsigned int number_channels,
01760 const ChannelType channel,const MagickFunction function,const unsigned int number_parameters,
01761 __constant float *parameters)
01762 {
01763 const unsigned int x = get_global_id(0);
01764 const unsigned int y = get_global_id(1);
01765 const unsigned int columns = get_global_size(0);
01766 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
01767
01768 float red;
01769 float green;
01770 float blue;
01771 float alpha;
01772
01773 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
01774
01775 if ((channel & RedChannel) != 0)
01776 red=ApplyFunction(red, function, number_parameters, parameters);
01777
01778 if (number_channels > 2)
01779 {
01780 if ((channel & GreenChannel) != 0)
01781 green=ApplyFunction(green, function, number_parameters, parameters);
01782
01783 if ((channel & BlueChannel) != 0)
01784 blue=ApplyFunction(blue, function, number_parameters, parameters);
01785 }
01786
01787 if (((number_channels == 4) || (number_channels == 2)) &&
01788 ((channel & AlphaChannel) != 0))
01789 alpha=ApplyFunction(alpha, function, number_parameters, parameters);
01790
01791 WriteChannels(p, number_channels, channel, red, green, blue, alpha);
01792 }
01793 )
01794
01795
01796
01797
01798
01799
01800
01801
01802
01803
01804
01805
01806
01807 STRINGIFY(
01808 __kernel void Grayscale(__global CLQuantum *image,const int number_channels,
01809 const unsigned int colorspace,const unsigned int method)
01810 {
01811 const unsigned int x = get_global_id(0);
01812 const unsigned int y = get_global_id(1);
01813 const unsigned int columns = get_global_size(0);
01814 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
01815
01816 float
01817 blue,
01818 green,
01819 red;
01820
01821 red=getPixelRed(p);
01822 green=getPixelGreen(p);
01823 blue=getPixelBlue(p);
01824
01825 CLQuantum intensity=ClampToQuantum(GetPixelIntensity(colorspace, method, red, green, blue));
01826
01827 setPixelRed(p,intensity);
01828 setPixelGreen(p,intensity);
01829 setPixelBlue(p,intensity);
01830 }
01831 )
01832
01833
01834
01835
01836
01837
01838
01839
01840
01841
01842
01843
01844
01845 STRINGIFY(
01846
01847 __kernel void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *tmpImage,
01848 const int radius,
01849 const int imageWidth,
01850 const int imageHeight)
01851 {
01852 const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
01853
01854 int x = get_local_id(0);
01855 int y = get_global_id(1);
01856
01857 if ((x >= imageWidth) || (y >= imageHeight))
01858 return;
01859
01860 global CLPixelType *src = srcImage + y * imageWidth;
01861
01862 for (int i = x; i < imageWidth; i += get_local_size(0)) {
01863 float sum = 0.0f;
01864 float weight = 1.0f;
01865
01866 int j = i - radius;
01867 while ((j + 7) < i) {
01868 for (int k = 0; k < 8; ++k)
01869 sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
01870 weight += 8.0f;
01871 j+=8;
01872 }
01873 while (j < i) {
01874 sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
01875 weight += 1.0f;
01876 ++j;
01877 }
01878
01879 while ((j + 7) < radius + i) {
01880 for (int k = 0; k < 8; ++k)
01881 sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
01882 weight -= 8.0f;
01883 j+=8;
01884 }
01885 while (j < radius + i) {
01886 sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
01887 weight -= 1.0f;
01888 ++j;
01889 }
01890
01891 tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
01892 }
01893 }
01894 )
01895
01896 STRINGIFY(
01897 __kernel void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *blurImage,
01898 const int radius,
01899 const float strength,
01900 const int imageWidth,
01901 const int imageHeight)
01902 {
01903 const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
01904
01905 int x = get_global_id(0);
01906 int y = get_global_id(1);
01907
01908 if ((x >= imageWidth) || (y >= imageHeight))
01909 return;
01910
01911 global float *src = blurImage + x;
01912
01913 float sum = 0.0f;
01914 float weight = 1.0f;
01915
01916 int j = y - radius;
01917 while ((j + 7) < y) {
01918 for (int k = 0; k < 8; ++k)
01919 sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
01920 weight += 8.0f;
01921 j+=8;
01922 }
01923 while (j < y) {
01924 sum += weight * src[mirrorBottom(j) * imageWidth];
01925 weight += 1.0f;
01926 ++j;
01927 }
01928
01929 while ((j + 7) < radius + y) {
01930 for (int k = 0; k < 8; ++k)
01931 sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
01932 weight -= 8.0f;
01933 j+=8;
01934 }
01935 while (j < radius + y) {
01936 sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
01937 weight -= 1.0f;
01938 ++j;
01939 }
01940
01941 CLPixelType pixel = srcImage[x + y * imageWidth];
01942 float srcVal = dot(RGB, convert_float4(pixel));
01943 float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
01944 mult = (srcVal + mult) / srcVal;
01945
01946 pixel.x = ClampToQuantum(pixel.x * mult);
01947 pixel.y = ClampToQuantum(pixel.y * mult);
01948 pixel.z = ClampToQuantum(pixel.z * mult);
01949
01950 dstImage[x + y * imageWidth] = pixel;
01951 }
01952 )
01953
01954
01955
01956
01957
01958
01959
01960
01961
01962
01963
01964
01965
01966 STRINGIFY(
01967
01968 static inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
01969 float *hue, float *saturation, float *lightness)
01970 {
01971 float
01972 c,
01973 tmax,
01974 tmin;
01975
01976
01977
01978
01979 tmax=MagickMax(QuantumScale*red,MagickMax(QuantumScale*green, QuantumScale*blue));
01980 tmin=MagickMin(QuantumScale*red,MagickMin(QuantumScale*green, QuantumScale*blue));
01981
01982 c=tmax-tmin;
01983
01984 *lightness=(tmax+tmin)/2.0;
01985 if (c <= 0.0)
01986 {
01987 *hue=0.0;
01988 *saturation=0.0;
01989 return;
01990 }
01991
01992 if (tmax == (QuantumScale*red))
01993 {
01994 *hue=(QuantumScale*green-QuantumScale*blue)/c;
01995 if ((QuantumScale*green) < (QuantumScale*blue))
01996 *hue+=6.0;
01997 }
01998 else
01999 if (tmax == (QuantumScale*green))
02000 *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
02001 else
02002 *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
02003
02004 *hue*=60.0/360.0;
02005 if (*lightness <= 0.5)
02006 *saturation=c/(2.0*(*lightness));
02007 else
02008 *saturation=c/(2.0-2.0*(*lightness));
02009 }
02010
02011 static inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
02012 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
02013 {
02014 float
02015 b,
02016 c,
02017 g,
02018 h,
02019 tmin,
02020 r,
02021 x;
02022
02023
02024
02025
02026 h=hue*360.0;
02027 if (lightness <= 0.5)
02028 c=2.0*lightness*saturation;
02029 else
02030 c=(2.0-2.0*lightness)*saturation;
02031 tmin=lightness-0.5*c;
02032 h-=360.0*floor(h/360.0);
02033 h/=60.0;
02034 x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
02035 switch ((int) floor(h) % 6)
02036 {
02037 case 0:
02038 {
02039 r=tmin+c;
02040 g=tmin+x;
02041 b=tmin;
02042 break;
02043 }
02044 case 1:
02045 {
02046 r=tmin+x;
02047 g=tmin+c;
02048 b=tmin;
02049 break;
02050 }
02051 case 2:
02052 {
02053 r=tmin;
02054 g=tmin+c;
02055 b=tmin+x;
02056 break;
02057 }
02058 case 3:
02059 {
02060 r=tmin;
02061 g=tmin+x;
02062 b=tmin+c;
02063 break;
02064 }
02065 case 4:
02066 {
02067 r=tmin+x;
02068 g=tmin;
02069 b=tmin+c;
02070 break;
02071 }
02072 case 5:
02073 {
02074 r=tmin+c;
02075 g=tmin;
02076 b=tmin+x;
02077 break;
02078 }
02079 default:
02080 {
02081 r=0.0;
02082 g=0.0;
02083 b=0.0;
02084 }
02085 }
02086 *red=ClampToQuantum(QuantumRange*r);
02087 *green=ClampToQuantum(QuantumRange*g);
02088 *blue=ClampToQuantum(QuantumRange*b);
02089 }
02090
02091 static inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
02092 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
02093 {
02094 float
02095 hue,
02096 lightness,
02097 saturation;
02098
02099
02100
02101
02102 ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
02103 hue+=0.5*(0.01*percent_hue-1.0);
02104 while (hue < 0.0)
02105 hue+=1.0;
02106 while (hue >= 1.0)
02107 hue-=1.0;
02108 saturation*=0.01*percent_saturation;
02109 lightness*=0.01*percent_lightness;
02110 ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
02111 }
02112
02113 __kernel void Modulate(__global CLPixelType *im,
02114 const float percent_brightness,
02115 const float percent_hue,
02116 const float percent_saturation,
02117 const int colorspace)
02118 {
02119
02120 const int x = get_global_id(0);
02121 const int y = get_global_id(1);
02122 const int columns = get_global_size(0);
02123 const int c = x + y * columns;
02124
02125 CLPixelType pixel = im[c];
02126
02127 CLQuantum
02128 blue,
02129 green,
02130 red;
02131
02132 red=getRed(pixel);
02133 green=getGreen(pixel);
02134 blue=getBlue(pixel);
02135
02136 switch (colorspace)
02137 {
02138 case HSLColorspace:
02139 default:
02140 {
02141 ModulateHSL(percent_hue, percent_saturation, percent_brightness,
02142 &red, &green, &blue);
02143 }
02144
02145 }
02146
02147 CLPixelType filteredPixel;
02148
02149 setRed(&filteredPixel, red);
02150 setGreen(&filteredPixel, green);
02151 setBlue(&filteredPixel, blue);
02152 filteredPixel.w = pixel.w;
02153
02154 im[c] = filteredPixel;
02155 }
02156 )
02157
02158
02159
02160
02161
02162
02163
02164
02165
02166
02167
02168
02169
02170 STRINGIFY(
02171 __kernel
02172 void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
02173 const unsigned int imageWidth, const unsigned int imageHeight,
02174 const __global float *filter, const unsigned int width, const __global int2* offset,
02175 const float4 bias,
02176 const ChannelType channel, const unsigned int matte) {
02177
02178 int2 currentPixel;
02179 currentPixel.x = get_global_id(0);
02180 currentPixel.y = get_global_id(1);
02181
02182 if (currentPixel.x >= imageWidth
02183 || currentPixel.y >= imageHeight)
02184 return;
02185
02186 float4 pixel;
02187 pixel.x = (float)bias.x;
02188 pixel.y = (float)bias.y;
02189 pixel.z = (float)bias.z;
02190 pixel.w = (float)bias.w;
02191
02192 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
02193
02194 for (int i = 0; i < width; i++) {
02195
02196
02197 int2 samplePixel = currentPixel + offset[i];
02198 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
02199 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
02200 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
02201
02202 pixel.x += (filter[i] * (float)samplePixelValue.x);
02203 pixel.y += (filter[i] * (float)samplePixelValue.y);
02204 pixel.z += (filter[i] * (float)samplePixelValue.z);
02205 pixel.w += (filter[i] * (float)samplePixelValue.w);
02206 }
02207
02208 CLPixelType outputPixel;
02209 outputPixel.x = ClampToQuantum(pixel.x);
02210 outputPixel.y = ClampToQuantum(pixel.y);
02211 outputPixel.z = ClampToQuantum(pixel.z);
02212 outputPixel.w = ClampToQuantum(pixel.w);
02213 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
02214 }
02215 else {
02216
02217 float gamma = 0.0f;
02218 for (int i = 0; i < width; i++) {
02219
02220
02221 int2 samplePixel = currentPixel + offset[i];
02222 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
02223 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
02224
02225 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
02226
02227 float alpha = QuantumScale*samplePixelValue.w;
02228 float k = filter[i];
02229 pixel.x = pixel.x + k * alpha * samplePixelValue.x;
02230 pixel.y = pixel.y + k * alpha * samplePixelValue.y;
02231 pixel.z = pixel.z + k * alpha * samplePixelValue.z;
02232
02233 pixel.w += k * alpha * samplePixelValue.w;
02234
02235 gamma+=k*alpha;
02236 }
02237 gamma = PerceptibleReciprocal(gamma);
02238 pixel.xyz = gamma*pixel.xyz;
02239
02240 CLPixelType outputPixel;
02241 outputPixel.x = ClampToQuantum(pixel.x);
02242 outputPixel.y = ClampToQuantum(pixel.y);
02243 outputPixel.z = ClampToQuantum(pixel.z);
02244 outputPixel.w = ClampToQuantum(pixel.w);
02245 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
02246 }
02247 }
02248 )
02249
02250
02251
02252
02253
02254
02255
02256
02257
02258
02259
02260
02261
02262 STRINGIFY(
02263
02264 float BoxResizeFilter(const float x)
02265 {
02266 return 1.0f;
02267 }
02268 )
02269
02270 STRINGIFY(
02271
02272 float CubicBC(const float x,const __global float* resizeFilterCoefficients)
02273 {
02274
02275
02276
02277
02278
02279
02280
02281
02282
02283
02284
02285
02286
02287
02288
02289
02290
02291
02292
02293
02294
02295
02296
02297
02298
02299
02300
02301
02302
02303 if (x < 1.0)
02304 return(resizeFilterCoefficients[0]+x*(x*
02305 (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
02306 if (x < 2.0)
02307 return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
02308 (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
02309 return(0.0);
02310 }
02311 )
02312
02313 STRINGIFY(
02314 float Sinc(const float x)
02315 {
02316 if (x != 0.0f)
02317 {
02318 const float alpha=(float) (MagickPI*x);
02319 return sinpi(x)/alpha;
02320 }
02321 return(1.0f);
02322 }
02323 )
02324
02325 STRINGIFY(
02326 float Triangle(const float x)
02327 {
02328
02329
02330
02331
02332
02333 return ((x<1.0f)?(1.0f-x):0.0f);
02334 }
02335 )
02336
02337
02338 STRINGIFY(
02339 float Hann(const float x)
02340 {
02341
02342
02343
02344
02345 const float cosine=cos((MagickPI*x));
02346 return(0.5f+0.5f*cosine);
02347 }
02348 )
02349
02350 STRINGIFY(
02351 float Hamming(const float x)
02352 {
02353
02354
02355
02356
02357 const float cosine=cos((MagickPI*x));
02358 return(0.54f+0.46f*cosine);
02359 }
02360 )
02361
02362 STRINGIFY(
02363 float Blackman(const float x)
02364 {
02365
02366
02367
02368
02369
02370
02371
02372 const float cosine=cos((MagickPI*x));
02373 return(0.34f+cosine*(0.5f+cosine*0.16f));
02374 }
02375 )
02376
02377 STRINGIFY(
02378 static inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
02379 {
02380 switch (filterType)
02381 {
02382
02383
02384 case SincWeightingFunction:
02385 case SincFastWeightingFunction:
02386 return Sinc(x);
02387 case CubicBCWeightingFunction:
02388 return CubicBC(x,filterCoefficients);
02389 case BoxWeightingFunction:
02390 return BoxResizeFilter(x);
02391 case TriangleWeightingFunction:
02392 return Triangle(x);
02393 case HannWeightingFunction:
02394 return Hann(x);
02395 case HammingWeightingFunction:
02396 return Hamming(x);
02397 case BlackmanWeightingFunction:
02398 return Blackman(x);
02399
02400 default:
02401 return 0.0f;
02402 }
02403 }
02404 )
02405
02406
02407 STRINGIFY(
02408 static inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
02409 , const ResizeWeightingFunctionType resizeWindowType
02410 , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
02411 {
02412 float scale;
02413 float xBlur = fabs(x/resizeFilterBlur);
02414 if (resizeWindowSupport < MagickEpsilon
02415 || resizeWindowType == BoxWeightingFunction)
02416 {
02417 scale = 1.0f;
02418 }
02419 else
02420 {
02421 scale = resizeFilterScale;
02422 scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
02423 }
02424 float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
02425 return weight;
02426 }
02427
02428 )
02429
02430 ;
02431 const char *accelerateKernels2 =
02432
02433 STRINGIFY(
02434
02435 static inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
02436 return (numWorkItems/pixelPerWorkgroup);
02437 }
02438
02439
02440
02441 static inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
02442 const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
02443 int pixelIndex = itemID/numWorkItemsPerPixel;
02444 pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
02445 return pixelIndex;
02446 }
02447
02448 )
02449
02450 STRINGIFY(
02451 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
02452 void ResizeHorizontalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
02453 const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
02454 const unsigned int filteredColumns, const unsigned int filteredRows, const float xFactor,
02455 const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
02456 const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
02457 const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
02458 const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
02459 __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
02460 {
02461
02462 const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
02463 const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns);
02464 const unsigned int actualNumPixelToCompute = stopX - startX;
02465
02466
02467 float scale = MagickMax(1.0f/xFactor+MagickEpsilon ,1.0f);
02468 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
02469 scale = PerceptibleReciprocal(scale);
02470
02471 const int cacheRangeStartX = MagickMax((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
02472 const int cacheRangeEndX = MagickMin((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
02473
02474
02475 const unsigned int y = get_global_id(1);
02476 const unsigned int pos = getPixelIndex(number_channels, inputColumns, cacheRangeStartX, y);
02477 const unsigned int num_elements = (cacheRangeEndX - cacheRangeStartX) * number_channels;
02478 event_t e = async_work_group_copy(inputImageCache, inputImage + pos, num_elements, 0);
02479 wait_group_events(1, &e);
02480
02481 unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
02482 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
02483 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
02484 {
02485 const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
02486 const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX);
02487 const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
02488
02489
02490 const unsigned int itemID = get_local_id(0);
02491 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
02492
02493 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
02494
02495 float4 filteredPixel = (float4)0.0f;
02496 float density = 0.0f;
02497 float gamma = 0.0f;
02498
02499 if (pixelIndex != -1)
02500 {
02501
02502 const int x = chunkStartX + pixelIndex;
02503
02504
02505 const float bisect = (x+0.5)/xFactor+MagickEpsilon;
02506 const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
02507 const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputColumns);
02508 const unsigned int n = stop - start;
02509
02510
02511 unsigned int numStepsPerWorkItem = n / numItems;
02512 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
02513
02514 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
02515 if (startStep < n)
02516 {
02517 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
02518
02519 unsigned int cacheIndex = start+startStep-cacheRangeStartX;
02520 for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
02521 {
02522 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
02523 (ResizeWeightingFunctionType) resizeFilterType,
02524 (ResizeWeightingFunctionType) resizeWindowType,
02525 resizeFilterScale, resizeFilterWindowSupport,
02526 resizeFilterBlur, scale*(start + i - bisect + 0.5));
02527
02528 float4 cp = (float4)0.0f;
02529
02530 __local CLQuantum *p = inputImageCache + (cacheIndex*number_channels);
02531 cp.x = (float) *(p);
02532 if (number_channels > 2)
02533 {
02534 cp.y = (float) *(p + 1);
02535 cp.z = (float) *(p + 2);
02536 }
02537
02538 if (alpha_index != 0)
02539 {
02540 cp.w = (float) *(p + alpha_index);
02541
02542 float alpha = weight * QuantumScale * cp.w;
02543
02544 filteredPixel.x += alpha * cp.x;
02545 filteredPixel.y += alpha * cp.y;
02546 filteredPixel.z += alpha * cp.z;
02547 filteredPixel.w += weight * cp.w;
02548 gamma += alpha;
02549 }
02550 else
02551 filteredPixel += ((float4) weight)*cp;
02552
02553 density += weight;
02554 }
02555 }
02556 }
02557
02558
02559 if (itemID < actualNumPixelInThisChunk) {
02560 outputPixelCache[itemID] = (float4)0.0f;
02561 densityCache[itemID] = 0.0f;
02562 if (alpha_index != 0)
02563 gammaCache[itemID] = 0.0f;
02564 }
02565 barrier(CLK_LOCAL_MEM_FENCE);
02566
02567
02568 for (unsigned int i = 0; i < numItems; i++) {
02569 if (pixelIndex != -1) {
02570 if (itemID%numItems == i) {
02571 outputPixelCache[pixelIndex]+=filteredPixel;
02572 densityCache[pixelIndex]+=density;
02573 if (alpha_index != 0)
02574 gammaCache[pixelIndex]+=gamma;
02575 }
02576 }
02577 barrier(CLK_LOCAL_MEM_FENCE);
02578 }
02579
02580 if (itemID < actualNumPixelInThisChunk)
02581 {
02582 float4 filteredPixel = outputPixelCache[itemID];
02583
02584 float gamma = 0.0f;
02585 if (alpha_index != 0)
02586 gamma = gammaCache[itemID];
02587
02588 float density = densityCache[itemID];
02589 if ((density != 0.0f) && (density != 1.0f))
02590 {
02591 density = PerceptibleReciprocal(density);
02592 filteredPixel *= (float4) density;
02593 if (alpha_index != 0)
02594 gamma *= density;
02595 }
02596
02597 if (alpha_index != 0)
02598 {
02599 gamma = PerceptibleReciprocal(gamma);
02600 filteredPixel.x *= gamma;
02601 filteredPixel.y *= gamma;
02602 filteredPixel.z *= gamma;
02603 }
02604
02605 WriteAllChannels(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, filteredPixel);
02606 }
02607 }
02608 }
02609 )
02610
02611
02612 STRINGIFY(
02613 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
02614 void ResizeVerticalFilter(const __global CLQuantum *inputImage, const unsigned int number_channels,
02615 const unsigned int inputColumns, const unsigned int inputRows, __global CLQuantum *filteredImage,
02616 const unsigned int filteredColumns, const unsigned int filteredRows, const float yFactor,
02617 const int resizeFilterType, const int resizeWindowType, const __global float *resizeFilterCubicCoefficients,
02618 const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport,
02619 const float resizeFilterBlur, __local CLQuantum *inputImageCache, const int numCachedPixels,
02620 const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize,
02621 __local float4 *outputPixelCache, __local float *densityCache, __local float *gammaCache)
02622 {
02623
02624 const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
02625 const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows);
02626 const unsigned int actualNumPixelToCompute = stopY - startY;
02627
02628
02629 float scale = MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f);
02630 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
02631 scale = PerceptibleReciprocal(scale);
02632
02633 const int cacheRangeStartY = MagickMax((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
02634 const int cacheRangeEndY = MagickMin((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
02635
02636
02637 const unsigned int x = get_global_id(0);
02638 unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY);
02639 unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY;
02640 unsigned int stride = inputColumns * number_channels;
02641 for (unsigned int i = 0; i < number_channels; i++)
02642 {
02643 event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0);
02644 wait_group_events(1,&e);
02645 }
02646
02647 unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
02648 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
02649 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
02650 {
02651 const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
02652 const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY);
02653 const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
02654
02655
02656 const unsigned int itemID = get_local_id(1);
02657 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
02658
02659 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
02660
02661 float4 filteredPixel = (float4)0.0f;
02662 float density = 0.0f;
02663 float gamma = 0.0f;
02664
02665 if (pixelIndex != -1)
02666 {
02667
02668 const int y = chunkStartY + pixelIndex;
02669
02670
02671 const float bisect = (y+0.5)/yFactor+MagickEpsilon;
02672 const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
02673 const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputRows);
02674 const unsigned int n = stop - start;
02675
02676
02677 unsigned int numStepsPerWorkItem = n / numItems;
02678 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
02679
02680 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
02681 if (startStep < n)
02682 {
02683 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
02684
02685 unsigned int cacheIndex = start+startStep-cacheRangeStartY;
02686 for (unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
02687 {
02688 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
02689 (ResizeWeightingFunctionType) resizeFilterType,
02690 (ResizeWeightingFunctionType) resizeWindowType,
02691 resizeFilterScale, resizeFilterWindowSupport,
02692 resizeFilterBlur, scale*(start + i - bisect + 0.5));
02693
02694 float4 cp = (float4)0.0f;
02695
02696 __local CLQuantum *p = inputImageCache + cacheIndex;
02697 cp.x = (float) *(p);
02698 if (number_channels > 2)
02699 {
02700 cp.y = (float) *(p + rangeLength);
02701 cp.z = (float) *(p + (rangeLength * 2));
02702 }
02703
02704 if (alpha_index != 0)
02705 {
02706 cp.w = (float) *(p + (rangeLength * alpha_index));
02707
02708 float alpha = weight * QuantumScale * cp.w;
02709
02710 filteredPixel.x += alpha * cp.x;
02711 filteredPixel.y += alpha * cp.y;
02712 filteredPixel.z += alpha * cp.z;
02713 filteredPixel.w += weight * cp.w;
02714 gamma += alpha;
02715 }
02716 else
02717 filteredPixel += ((float4) weight)*cp;
02718
02719 density += weight;
02720 }
02721 }
02722 }
02723
02724
02725 if (itemID < actualNumPixelInThisChunk) {
02726 outputPixelCache[itemID] = (float4)0.0f;
02727 densityCache[itemID] = 0.0f;
02728 if (alpha_index != 0)
02729 gammaCache[itemID] = 0.0f;
02730 }
02731 barrier(CLK_LOCAL_MEM_FENCE);
02732
02733
02734 for (unsigned int i = 0; i < numItems; i++) {
02735 if (pixelIndex != -1) {
02736 if (itemID%numItems == i) {
02737 outputPixelCache[pixelIndex]+=filteredPixel;
02738 densityCache[pixelIndex]+=density;
02739 if (alpha_index != 0)
02740 gammaCache[pixelIndex]+=gamma;
02741 }
02742 }
02743 barrier(CLK_LOCAL_MEM_FENCE);
02744 }
02745
02746 if (itemID < actualNumPixelInThisChunk)
02747 {
02748 float4 filteredPixel = outputPixelCache[itemID];
02749
02750 float gamma = 0.0f;
02751 if (alpha_index != 0)
02752 gamma = gammaCache[itemID];
02753
02754 float density = densityCache[itemID];
02755 if ((density != 0.0f) && (density != 1.0f))
02756 {
02757 density = PerceptibleReciprocal(density);
02758 filteredPixel *= (float4) density;
02759 if (alpha_index != 0)
02760 gamma *= density;
02761 }
02762
02763 if (alpha_index != 0)
02764 {
02765 gamma = PerceptibleReciprocal(gamma);
02766 filteredPixel.x *= gamma;
02767 filteredPixel.y *= gamma;
02768 filteredPixel.z *= gamma;
02769 }
02770
02771 WriteAllChannels(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, filteredPixel);
02772 }
02773 }
02774 }
02775 )
02776
02777
02778
02779
02780
02781
02782
02783
02784
02785
02786
02787
02788
02789 STRINGIFY(
02790 __kernel void RotationalBlur(const __global CLQuantum *image,
02791 const unsigned int number_channels,const unsigned int channel,
02792 const float2 blurCenter,__constant float *cos_theta,
02793 __constant float *sin_theta,const unsigned int cossin_theta_size,
02794 __global CLQuantum *filteredImage)
02795 {
02796 const int x = get_global_id(0);
02797 const int y = get_global_id(1);
02798 const int columns = get_global_size(0);
02799 const int rows = get_global_size(1);
02800 unsigned int step = 1;
02801 float center_x = (float) x - blurCenter.x;
02802 float center_y = (float) y - blurCenter.y;
02803 float radius = hypot(center_x, center_y);
02804
02805 float blur_radius = hypot(blurCenter.x, blurCenter.y);
02806
02807 if (radius > MagickEpsilon)
02808 {
02809 step = (unsigned int) (blur_radius / radius);
02810 if (step == 0)
02811 step = 1;
02812 if (step >= cossin_theta_size)
02813 step = cossin_theta_size-1;
02814 }
02815
02816 float4 result = 0.0f;
02817 float normalize = 0.0f;
02818 float gamma = 0.0f;
02819
02820 for (unsigned int i=0; i<cossin_theta_size; i+=step)
02821 {
02822 int cx = ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns);
02823 int cy = ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f,rows);
02824
02825 float4 pixel = ReadAllChannels(image, number_channels, columns, cx, cy);
02826
02827 if ((number_channels == 4) || (number_channels == 2))
02828 {
02829 float alpha = (float)(QuantumScale*pixel.w);
02830
02831 gamma += alpha;
02832
02833 result.x += alpha * pixel.x;
02834 result.y += alpha * pixel.y;
02835 result.z += alpha * pixel.z;
02836 result.w += pixel.w;
02837 }
02838 else
02839 result += pixel;
02840
02841 normalize += 1.0f;
02842 }
02843
02844 normalize = PerceptibleReciprocal(normalize);
02845
02846 if ((number_channels == 4) || (number_channels == 2))
02847 {
02848 gamma = PerceptibleReciprocal(gamma);
02849 result.x *= gamma;
02850 result.y *= gamma;
02851 result.z *= gamma;
02852 result.w *= normalize;
02853 }
02854 else
02855 result *= normalize;
02856
02857 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
02858 }
02859 )
02860
02861
02862
02863
02864
02865
02866
02867
02868
02869
02870
02871
02872
02873 STRINGIFY(
02874 __kernel void UnsharpMaskBlurColumn(const __global CLQuantum* image,
02875 const __global float4 *blurRowData,const unsigned int number_channels,
02876 const ChannelType channel,const unsigned int columns,
02877 const unsigned int rows,__local float4* cachedData,
02878 __local float* cachedFilter,const __global float *filter,
02879 const unsigned int width,const float gain, const float threshold,
02880 __global CLQuantum *filteredImage)
02881 {
02882 const unsigned int radius = (width-1)/2;
02883
02884
02885 const int groupX = get_group_id(0);
02886 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
02887 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
02888
02889 if ((groupStartY >= 0) && (groupStopY < rows))
02890 {
02891 event_t e = async_work_group_strided_copy(cachedData,
02892 blurRowData+groupStartY*columns+groupX,groupStopY-groupStartY,columns,0);
02893 wait_group_events(1,&e);
02894 }
02895 else
02896 {
02897 for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1))
02898 cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,rows)*columns + groupX];
02899
02900 barrier(CLK_LOCAL_MEM_FENCE);
02901 }
02902
02903 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
02904 wait_group_events(1,&e);
02905
02906
02907 const int cy = get_global_id(1);
02908
02909 if (cy < rows)
02910 {
02911 float4 blurredPixel = (float4) 0.0f;
02912
02913 int i = 0;
02914
02915 for ( ; i+7 < width; )
02916 {
02917 for (int j=0; j < 8; j++, i++)
02918 blurredPixel+=cachedFilter[i+j]*cachedData[i+j+get_local_id(1)];
02919 }
02920
02921 for ( ; i < width; i++)
02922 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
02923
02924 float4 inputImagePixel = ReadFloat4(image,number_channels,columns,groupX,cy,channel);
02925 float4 outputPixel = inputImagePixel - blurredPixel;
02926
02927 float quantumThreshold = QuantumRange*threshold;
02928
02929 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
02930 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
02931
02932
02933 WriteFloat4(filteredImage,number_channels,columns,groupX,cy,channel,outputPixel);
02934 }
02935 }
02936 )
02937
02938 STRINGIFY(
02939 __kernel void UnsharpMask(const __global CLQuantum *image,const unsigned int number_channels,
02940 const ChannelType channel,__constant float *filter,const unsigned int width,
02941 const unsigned int columns,const unsigned int rows,__local float4 *pixels,
02942 const float gain,const float threshold,__global CLQuantum *filteredImage)
02943 {
02944 const unsigned int x = get_global_id(0);
02945 const unsigned int y = get_global_id(1);
02946
02947 const unsigned int radius = (width - 1) / 2;
02948
02949 int row = y - radius;
02950 int baseRow = get_group_id(1) * get_local_size(1) - radius;
02951 int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
02952
02953 while (row < endRow) {
02954 int srcy = (row < 0) ? -row : row;
02955 srcy = (srcy >= rows) ? (2 * rows - srcy - 1) : srcy;
02956
02957 float4 value = 0.0f;
02958
02959 int ix = x - radius;
02960 int i = 0;
02961
02962 while (i + 7 < width) {
02963 for (int j = 0; j < 8; ++j) {
02964 int srcx = ix + j;
02965 srcx = (srcx < 0) ? -srcx : srcx;
02966 srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
02967 value += filter[i + j] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
02968 }
02969 ix += 8;
02970 i += 8;
02971 }
02972
02973 while (i < width) {
02974 int srcx = (ix < 0) ? -ix : ix;
02975 srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
02976 value += filter[i] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
02977 ++i;
02978 ++ix;
02979 }
02980 pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
02981 row += get_local_size(1);
02982 }
02983
02984 barrier(CLK_LOCAL_MEM_FENCE);
02985
02986 const int px = get_local_id(0);
02987 const int py = get_local_id(1);
02988 const int prp = get_local_size(0);
02989 float4 value = (float4)(0.0f);
02990
02991 int i = 0;
02992 while (i + 7 < width) {
02993 for (int j = 0; j < 8; ++j)
02994 value += (float4)(filter[i]) * pixels[px + (py + i + j) * prp];
02995 i += 8;
02996 }
02997 while (i < width) {
02998 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
02999 ++i;
03000 }
03001
03002 if ((x < columns) && (y < rows)) {
03003 float4 srcPixel = ReadFloat4(image, number_channels, columns, x, y, channel);
03004 float4 diff = srcPixel - value;
03005
03006 float quantumThreshold = QuantumRange*threshold;
03007
03008 int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
03009 value = select(srcPixel + diff * gain, srcPixel, mask);
03010
03011 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, value);
03012 }
03013 }
03014 )
03015
03016 STRINGIFY(
03017 __kernel __attribute__((reqd_work_group_size(64, 4, 1)))
03018 void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage,
03019 const unsigned int number_channels,const unsigned int max_channels,
03020 const float threshold,const int passes,const unsigned int imageWidth,
03021 const unsigned int imageHeight)
03022 {
03023 const int pad = (1 << (passes - 1));
03024 const int tileSize = 64;
03025 const int tileRowPixels = 64;
03026 const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 };
03027
03028 CLQuantum stage[48];
03029
03030 local float buffer[64 * 64];
03031
03032 int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0);
03033 int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad;
03034
03035 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03036 int pos = (mirrorTop(mirrorBottom(srcx), imageWidth) * number_channels) +
03037 (mirrorTop(mirrorBottom(srcy + i), imageHeight)) * imageWidth * number_channels;
03038
03039 for (int channel = 0; channel < max_channels; ++channel)
03040 stage[(i / 4) + (16 * channel)] = srcImage[pos + channel];
03041 }
03042
03043 for (int channel = 0; channel < max_channels; ++channel) {
03044
03045 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03046 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[(i / 4) + (16 * channel)]);
03047
03048
03049
03050 float tmp[16];
03051 float accum[16];
03052 float pixel;
03053
03054 for (int i = 0; i < 16; i++)
03055 accum[i]=0.0f;
03056
03057 for (int pass = 0; pass < passes; ++pass) {
03058 const int radius = 1 << pass;
03059 const int x = get_local_id(0);
03060 const float thresh = threshold * noise[pass];
03061
03062
03063 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03064 const int offset = i * tileRowPixels;
03065 if (pass == 0)
03066 tmp[i / 4] = buffer[x + offset];
03067 pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]);
03068 barrier(CLK_LOCAL_MEM_FENCE);
03069 buffer[x + offset] = pixel;
03070 }
03071 barrier(CLK_LOCAL_MEM_FENCE);
03072
03073
03074 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03075 pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]);
03076 float delta = tmp[i / 4] - pixel;
03077 tmp[i / 4] = pixel;
03078 if (delta < -thresh)
03079 delta += thresh;
03080 else if (delta > thresh)
03081 delta -= thresh;
03082 else
03083 delta = 0;
03084 accum[i / 4] += delta;
03085 }
03086 barrier(CLK_LOCAL_MEM_FENCE);
03087
03088 if (pass < passes - 1)
03089 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03090 buffer[x + i * tileRowPixels] = tmp[i / 4];
03091 else
03092 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03093 accum[i / 4] += tmp[i / 4];
03094 barrier(CLK_LOCAL_MEM_FENCE);
03095 }
03096
03097 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03098 stage[(i / 4) + (16 * channel)] = ClampToQuantum(accum[i / 4]);
03099
03100 barrier(CLK_LOCAL_MEM_FENCE);
03101 }
03102
03103
03104
03105 if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) {
03106 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03107 if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) {
03108 int pos = (srcx * number_channels) + ((srcy + i) * (imageWidth * number_channels));
03109 for (int channel = 0; channel < max_channels; ++channel) {
03110 dstImage[pos + channel] = stage[(i / 4) + (16 * channel)];
03111 }
03112 }
03113 }
03114 }
03115 }
03116 )
03117
03118 ;
03119
03120 #endif // MAGICKCORE_OPENCL_SUPPORT
03121
03122 #if defined(__cplusplus) || defined(c_plusplus)
03123 }
03124 #endif
03125
03126 #endif // MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H