00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
00020 #define MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
00021
00022 #if defined(__cplusplus) || defined(c_plusplus)
00023 extern "C" {
00024 #endif
00025
00026 #if defined(MAGICKCORE_OPENCL_SUPPORT)
00027
00028
00029
00030
00031 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
00032 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
00033 #define OPENCL_ELSE() "\n #""else " " \n"
00034 #define OPENCL_ENDIF() "\n #""endif " " \n"
00035 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
00036 #define STRINGIFY(...) #__VA_ARGS__ "\n"
00037
00038 const char* accelerateKernels =
00039
00040
00041
00042
00043 OPENCL_DEFINE(GetPixelAlpha(pixel),(QuantumRange-(pixel).w))
00044 OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
00045 OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
00046 OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
00047 OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
00048 OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
00049 OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
00050 OPENCL_DEFINE(SigmaRandom, (attenuate))
00051 OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
00052 OPENCL_DEFINE(MagickMax(x, y), (((x) > (y)) ? (x) : (y)))
00053 OPENCL_DEFINE(MagickMin(x, y), (((x) < (y)) ? (x) : (y)))
00054
00055
00056
00057
00058 STRINGIFY(
00059 typedef enum
00060 {
00061 UndefinedColorspace,
00062 RGBColorspace,
00063 GRAYColorspace,
00064 TransparentColorspace,
00065 OHTAColorspace,
00066 LabColorspace,
00067 XYZColorspace,
00068 YCbCrColorspace,
00069 YCCColorspace,
00070 YIQColorspace,
00071 YPbPrColorspace,
00072 YUVColorspace,
00073 CMYKColorspace,
00074 sRGBColorspace,
00075 HSBColorspace,
00076 HSLColorspace,
00077 HWBColorspace,
00078 Rec601LumaColorspace,
00079 Rec601YCbCrColorspace,
00080 Rec709LumaColorspace,
00081 Rec709YCbCrColorspace,
00082 LogColorspace,
00083 CMYColorspace,
00084 LuvColorspace,
00085 HCLColorspace,
00086 LCHColorspace,
00087 LMSColorspace,
00088 LCHabColorspace,
00089 LCHuvColorspace,
00090 scRGBColorspace,
00091 HSIColorspace,
00092 HSVColorspace,
00093 HCLpColorspace,
00094 YDbDrColorspace,
00095 LinearGRAYColorspace
00096 } ColorspaceType;
00097 )
00098
00099 STRINGIFY(
00100 typedef enum
00101 {
00102 UndefinedCompositeOp,
00103 NoCompositeOp,
00104 ModulusAddCompositeOp,
00105 AtopCompositeOp,
00106 BlendCompositeOp,
00107 BumpmapCompositeOp,
00108 ChangeMaskCompositeOp,
00109 ClearCompositeOp,
00110 ColorBurnCompositeOp,
00111 ColorDodgeCompositeOp,
00112 ColorizeCompositeOp,
00113 CopyBlackCompositeOp,
00114 CopyBlueCompositeOp,
00115 CopyCompositeOp,
00116 CopyCyanCompositeOp,
00117 CopyGreenCompositeOp,
00118 CopyMagentaCompositeOp,
00119 CopyOpacityCompositeOp,
00120 CopyRedCompositeOp,
00121 CopyYellowCompositeOp,
00122 DarkenCompositeOp,
00123 DstAtopCompositeOp,
00124 DstCompositeOp,
00125 DstInCompositeOp,
00126 DstOutCompositeOp,
00127 DstOverCompositeOp,
00128 DifferenceCompositeOp,
00129 DisplaceCompositeOp,
00130 DissolveCompositeOp,
00131 ExclusionCompositeOp,
00132 HardLightCompositeOp,
00133 HueCompositeOp,
00134 InCompositeOp,
00135 LightenCompositeOp,
00136 LinearLightCompositeOp,
00137 LuminizeCompositeOp,
00138 MinusDstCompositeOp,
00139 ModulateCompositeOp,
00140 MultiplyCompositeOp,
00141 OutCompositeOp,
00142 OverCompositeOp,
00143 OverlayCompositeOp,
00144 PlusCompositeOp,
00145 ReplaceCompositeOp,
00146 SaturateCompositeOp,
00147 ScreenCompositeOp,
00148 SoftLightCompositeOp,
00149 SrcAtopCompositeOp,
00150 SrcCompositeOp,
00151 SrcInCompositeOp,
00152 SrcOutCompositeOp,
00153 SrcOverCompositeOp,
00154 ModulusSubtractCompositeOp,
00155 ThresholdCompositeOp,
00156 XorCompositeOp,
00157
00158
00159
00160
00161 DivideDstCompositeOp,
00162 DistortCompositeOp,
00163 BlurCompositeOp,
00164 PegtopLightCompositeOp,
00165 VividLightCompositeOp,
00166 PinLightCompositeOp,
00167 LinearDodgeCompositeOp,
00168 LinearBurnCompositeOp,
00169 MathematicsCompositeOp,
00170 DivideSrcCompositeOp,
00171 MinusSrcCompositeOp,
00172 DarkenIntensityCompositeOp,
00173 LightenIntensityCompositeOp
00174 } CompositeOperator;
00175 )
00176
00177 STRINGIFY(
00178 typedef enum
00179 {
00180 UndefinedFunction,
00181 PolynomialFunction,
00182 SinusoidFunction,
00183 ArcsinFunction,
00184 ArctanFunction
00185 } MagickFunction;
00186 )
00187
00188 STRINGIFY(
00189 typedef enum
00190 {
00191 UndefinedNoise,
00192 UniformNoise,
00193 GaussianNoise,
00194 MultiplicativeGaussianNoise,
00195 ImpulseNoise,
00196 LaplacianNoise,
00197 PoissonNoise,
00198 RandomNoise
00199 } NoiseType;
00200 )
00201
00202 STRINGIFY(
00203 typedef enum
00204 {
00205 UndefinedPixelIntensityMethod = 0,
00206 AveragePixelIntensityMethod,
00207 BrightnessPixelIntensityMethod,
00208 LightnessPixelIntensityMethod,
00209 Rec601LumaPixelIntensityMethod,
00210 Rec601LuminancePixelIntensityMethod,
00211 Rec709LumaPixelIntensityMethod,
00212 Rec709LuminancePixelIntensityMethod,
00213 RMSPixelIntensityMethod,
00214 MSPixelIntensityMethod
00215 } PixelIntensityMethod;
00216 )
00217
00218 STRINGIFY(
00219 typedef enum {
00220 BoxWeightingFunction = 0,
00221 TriangleWeightingFunction,
00222 CubicBCWeightingFunction,
00223 HanningWeightingFunction,
00224 HammingWeightingFunction,
00225 BlackmanWeightingFunction,
00226 GaussianWeightingFunction,
00227 QuadraticWeightingFunction,
00228 JincWeightingFunction,
00229 SincWeightingFunction,
00230 SincFastWeightingFunction,
00231 KaiserWeightingFunction,
00232 WelshWeightingFunction,
00233 BohmanWeightingFunction,
00234 LagrangeWeightingFunction,
00235 CosineWeightingFunction,
00236 } ResizeWeightingFunctionType;
00237 )
00238
00239 STRINGIFY(
00240 typedef enum
00241 {
00242 UndefinedChannel,
00243 RedChannel = 0x0001,
00244 GrayChannel = 0x0001,
00245 CyanChannel = 0x0001,
00246 GreenChannel = 0x0002,
00247 MagentaChannel = 0x0002,
00248 BlueChannel = 0x0004,
00249 YellowChannel = 0x0004,
00250 AlphaChannel = 0x0008,
00251 OpacityChannel = 0x0008,
00252 MatteChannel = 0x0008,
00253 BlackChannel = 0x0020,
00254 IndexChannel = 0x0020,
00255 CompositeChannels = 0x002F,
00256 AllChannels = 0x7ffffff,
00257
00258
00259
00260 TrueAlphaChannel = 0x0040,
00261 RGBChannels = 0x0080,
00262 GrayChannels = 0x0080,
00263 SyncChannels = 0x0100,
00264 DefaultChannels = ((AllChannels | SyncChannels) &~ OpacityChannel)
00265 } ChannelType;
00266 )
00267
00268
00269
00270
00271
00272 OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
00273
00274 STRINGIFY(
00275 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
00276 {
00277 return((CLQuantum) value);
00278 }
00279 )
00280
00281 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
00282
00283 STRINGIFY(
00284 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
00285 {
00286 return((CLQuantum) (257.0f*value));
00287 }
00288 )
00289
00290 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
00291
00292 STRINGIFY(
00293 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
00294 {
00295 return((CLQuantum) (16843009.0*value));
00296 }
00297 )
00298
00299 OPENCL_ENDIF()
00300
00301 OPENCL_IF((MAGICKCORE_HDRI_SUPPORT == 1))
00302
00303 STRINGIFY(
00304 static inline CLQuantum ClampToQuantum(const float value)
00305 {
00306 return (CLQuantum) value;
00307 }
00308 )
00309
00310 OPENCL_ELSE()
00311
00312 STRINGIFY(
00313 static inline CLQuantum ClampToQuantum(const float value)
00314 {
00315 return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f);
00316 }
00317 )
00318
00319 OPENCL_ENDIF()
00320
00321 STRINGIFY(
00322 static inline int ClampToCanvas(const int offset, const int range)
00323 {
00324 return clamp(offset, (int)0, range - 1);
00325 }
00326 )
00327
00328 STRINGIFY(
00329 static inline int ClampToCanvasWithHalo(const int offset, const int range, const int edge, const int section)
00330 {
00331 return clamp(offset, section ? (int)(0 - edge) : (int)0, section ? (range - 1) : (range - 1 + edge));
00332 }
00333 )
00334
00335 STRINGIFY(
00336 static inline uint ScaleQuantumToMap(CLQuantum value)
00337 {
00338 if (value >= (CLQuantum)MaxMap)
00339 return ((uint)MaxMap);
00340 else
00341 return ((uint)value);
00342 }
00343 )
00344
00345 STRINGIFY(
00346 static inline float PerceptibleReciprocal(const float x)
00347 {
00348 float sign = x < (float) 0.0 ? (float)-1.0 : (float) 1.0;
00349 return((sign*x) >= MagickEpsilon ? (float) 1.0 / x : sign*((float) 1.0 / MagickEpsilon));
00350 }
00351 )
00352
00353 STRINGIFY(
00354 static inline float RoundToUnity(const float value)
00355 {
00356 return clamp(value, 0.0f, 1.0f);
00357 }
00358 )
00359
00360 STRINGIFY(
00361
00362 static inline CLQuantum getBlue(CLPixelType p) { return p.x; }
00363 static inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
00364 static inline float getBlueF4(float4 p) { return p.x; }
00365 static inline void setBlueF4(float4* p, float value) { (*p).x = value; }
00366
00367 static inline CLQuantum getGreen(CLPixelType p) { return p.y; }
00368 static inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
00369 static inline float getGreenF4(float4 p) { return p.y; }
00370 static inline void setGreenF4(float4* p, float value) { (*p).y = value; }
00371
00372 static inline CLQuantum getRed(CLPixelType p) { return p.z; }
00373 static inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
00374 static inline float getRedF4(float4 p) { return p.z; }
00375 static inline void setRedF4(float4* p, float value) { (*p).z = value; }
00376
00377 static inline CLQuantum getOpacity(CLPixelType p) { return p.w; }
00378 static inline void setOpacity(CLPixelType* p, CLQuantum value) { (*p).w = value; }
00379 static inline float getOpacityF4(float4 p) { return p.w; }
00380 static inline void setOpacityF4(float4* p, float value) { (*p).w = value; }
00381
00382 static inline void setGray(CLPixelType* p, CLQuantum value) { (*p).z = value; (*p).y = value; (*p).x = value; }
00383
00384 static inline float GetPixelIntensity(const int method, const int colorspace, CLPixelType p)
00385 {
00386 float red = getRed(p);
00387 float green = getGreen(p);
00388 float blue = getBlue(p);
00389
00390 float intensity;
00391
00392 if (colorspace == GRAYColorspace)
00393 return red;
00394
00395 switch (method)
00396 {
00397 case AveragePixelIntensityMethod:
00398 {
00399 intensity = (red + green + blue) / 3.0;
00400 break;
00401 }
00402 case BrightnessPixelIntensityMethod:
00403 {
00404 intensity = MagickMax(MagickMax(red, green), blue);
00405 break;
00406 }
00407 case LightnessPixelIntensityMethod:
00408 {
00409 intensity = (MagickMin(MagickMin(red, green), blue) +
00410 MagickMax(MagickMax(red, green), blue)) / 2.0;
00411 break;
00412 }
00413 case MSPixelIntensityMethod:
00414 {
00415 intensity = (float)(((float)red*red + green*green + blue*blue) /
00416 (3.0*QuantumRange));
00417 break;
00418 }
00419 case Rec601LumaPixelIntensityMethod:
00420 {
00421
00422
00423
00424
00425
00426
00427
00428
00429 intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
00430 break;
00431 }
00432 case Rec601LuminancePixelIntensityMethod:
00433 {
00434
00435
00436
00437
00438
00439
00440
00441
00442 intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
00443 break;
00444 }
00445 case Rec709LumaPixelIntensityMethod:
00446 default:
00447 {
00448
00449
00450
00451
00452
00453
00454
00455
00456 intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
00457 break;
00458 }
00459 case Rec709LuminancePixelIntensityMethod:
00460 {
00461
00462
00463
00464
00465
00466
00467
00468
00469 intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
00470 break;
00471 }
00472 case RMSPixelIntensityMethod:
00473 {
00474 intensity = (float)(sqrt((float)red*red + green*green + blue*blue) /
00475 sqrt(3.0));
00476 break;
00477 }
00478 }
00479
00480 return intensity;
00481
00482 }
00483 )
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497 STRINGIFY(
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507 ulong MWC_AddMod64(ulong a, ulong b, ulong M)
00508 {
00509 ulong v=a+b;
00510
00511 if( (v>=M) || (convert_float(v) < convert_float(a)) )
00512 v=v-M;
00513 return v;
00514 }
00515
00516
00517
00518
00519
00520
00521
00522 ulong MWC_MulMod64(ulong a, ulong b, ulong M)
00523 {
00524 ulong r=0;
00525 while(a!=0){
00526 if(a&1)
00527 r=MWC_AddMod64(r,b,M);
00528 b=MWC_AddMod64(b,b,M);
00529 a=a>>1;
00530 }
00531 return r;
00532 }
00533
00534
00535
00536
00537
00538
00539 ulong MWC_PowMod64(ulong a, ulong e, ulong M)
00540 {
00541 ulong sqr=a, acc=1;
00542 while(e!=0){
00543 if(e&1)
00544 acc=MWC_MulMod64(acc,sqr,M);
00545 sqr=MWC_MulMod64(sqr,sqr,M);
00546 e=e>>1;
00547 }
00548 return acc;
00549 }
00550
00551 uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
00552 {
00553 ulong m=MWC_PowMod64(A, distance, M);
00554 ulong x=curr.x*(ulong)A+curr.y;
00555 x=MWC_MulMod64(x, m, M);
00556 return (uint2)((uint)(x/A), (uint)(x%A));
00557 }
00558
00559 uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
00560 {
00561
00562
00563
00564
00565
00566 enum{ MWC_BASEID = 4077358422479273989UL };
00567
00568 ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
00569 ulong m=MWC_PowMod64(A, dist, M);
00570
00571 ulong x=MWC_MulMod64(MWC_BASEID, m, M);
00572 return (uint2)((uint)(x/A), (uint)(x%A));
00573 }
00574
00576 typedef struct{ uint x; uint c; uint seed0; ulong seed1; } mwc64x_state_t;
00577
00578 void MWC64X_Step(mwc64x_state_t *s)
00579 {
00580 uint X=s->x, C=s->c;
00581
00582 uint Xn=s->seed0*X+C;
00583 uint carry=(uint)(Xn<C);
00584 uint Cn=mad_hi(s->seed0,X,carry);
00585
00586 s->x=Xn;
00587 s->c=Cn;
00588 }
00589
00590 void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
00591 {
00592 uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), s->seed0, s->seed1, distance);
00593 s->x=tmp.x;
00594 s->c=tmp.y;
00595 }
00596
00597 void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
00598 {
00599 uint2 tmp=MWC_SeedImpl_Mod64(s->seed0, s->seed1, 1, 0, baseOffset, perStreamOffset);
00600 s->x=tmp.x;
00601 s->c=tmp.y;
00602 }
00603
00605 uint MWC64X_NextUint(mwc64x_state_t *s)
00606 {
00607 uint res=s->x ^ s->c;
00608 MWC64X_Step(s);
00609 return res;
00610 }
00611
00612
00613
00614
00615
00616 float mwcReadPseudoRandomValue(mwc64x_state_t* rng) {
00617 return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff);
00618 }
00619
00620
00621 float mwcGenerateDifferentialNoise(mwc64x_state_t* r, CLQuantum pixel, NoiseType noise_type, float attenuate) {
00622
00623 float
00624 alpha,
00625 beta,
00626 noise,
00627 sigma;
00628
00629 noise = 0.0f;
00630 alpha=mwcReadPseudoRandomValue(r);
00631 switch(noise_type) {
00632 case UniformNoise:
00633 default:
00634 {
00635 noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
00636 break;
00637 }
00638 case GaussianNoise:
00639 {
00640 float
00641 gamma,
00642 tau;
00643
00644 if (alpha == 0.0f)
00645 alpha=1.0f;
00646 beta=mwcReadPseudoRandomValue(r);
00647 gamma=sqrt(-2.0f*log(alpha));
00648 sigma=gamma*cospi((2.0f*beta));
00649 tau=gamma*sinpi((2.0f*beta));
00650 noise=(float)(pixel+sqrt((float) pixel)*SigmaGaussian*sigma+
00651 QuantumRange*TauGaussian*tau);
00652 break;
00653 }
00654
00655
00656 case ImpulseNoise:
00657 {
00658 if (alpha < (SigmaImpulse/2.0f))
00659 noise=0.0f;
00660 else
00661 if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
00662 noise=(float)QuantumRange;
00663 else
00664 noise=(float)pixel;
00665 break;
00666 }
00667 case LaplacianNoise:
00668 {
00669 if (alpha <= 0.5f)
00670 {
00671 if (alpha <= MagickEpsilon)
00672 noise=(float) (pixel-QuantumRange);
00673 else
00674 noise=(float) (pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
00675 0.5f);
00676 break;
00677 }
00678 beta=1.0f-alpha;
00679 if (beta <= (0.5f*MagickEpsilon))
00680 noise=(float) (pixel+QuantumRange);
00681 else
00682 noise=(float) (pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
00683 break;
00684 }
00685 case MultiplicativeGaussianNoise:
00686 {
00687 sigma=1.0f;
00688 if (alpha > MagickEpsilon)
00689 sigma=sqrt(-2.0f*log(alpha));
00690 beta=mwcReadPseudoRandomValue(r);
00691 noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma*
00692 cospi((float) (2.0f*beta))/2.0f);
00693 break;
00694 }
00695 case PoissonNoise:
00696 {
00697 float
00698 poisson;
00699 unsigned int i;
00700 poisson=exp(-SigmaPoisson*QuantumScale*pixel);
00701 for (i=0; alpha > poisson; i++)
00702 {
00703 beta=mwcReadPseudoRandomValue(r);
00704 alpha*=beta;
00705 }
00706 noise=(float) (QuantumRange*i/SigmaPoisson);
00707 break;
00708 }
00709 case RandomNoise:
00710 {
00711 noise=(float) (QuantumRange*SigmaRandom*alpha);
00712 break;
00713 }
00714
00715 };
00716 return noise;
00717 }
00718
00719 __kernel
00720 void AddNoise(const __global CLPixelType* inputImage, __global CLPixelType* filteredImage
00721 ,const unsigned int inputPixelCount, const unsigned int pixelsPerWorkItem
00722 ,const ChannelType channel
00723 ,const NoiseType noise_type, const float attenuate
00724 ,const unsigned int seed0, const unsigned int seed1
00725 ,const unsigned int numRandomNumbersPerPixel) {
00726
00727 mwc64x_state_t rng;
00728 rng.seed0 = seed0;
00729 rng.seed1 = seed1;
00730
00731 uint span = pixelsPerWorkItem * numRandomNumbersPerPixel;
00732 uint offset = span * get_local_size(0) * get_group_id(0);
00733
00734 MWC64X_SeedStreams(&rng, offset, span);
00735
00736 uint pos = get_local_size(0) * get_group_id(0) * pixelsPerWorkItem + get_local_id(0);
00737
00738 uint count = pixelsPerWorkItem;
00739
00740 while (count > 0) {
00741 if (pos < inputPixelCount) {
00742 CLPixelType p = inputImage[pos];
00743
00744 if ((channel&RedChannel)!=0) {
00745 setRed(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getRed(p),noise_type,attenuate)));
00746 }
00747
00748 if ((channel&GreenChannel)!=0) {
00749 setGreen(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getGreen(p),noise_type,attenuate)));
00750 }
00751
00752 if ((channel&BlueChannel)!=0) {
00753 setBlue(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getBlue(p),noise_type,attenuate)));
00754 }
00755
00756 if ((channel & OpacityChannel) != 0) {
00757 setOpacity(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getOpacity(p),noise_type,attenuate)));
00758 }
00759
00760 filteredImage[pos] = p;
00761
00762 }
00763 pos += get_local_size(0);
00764 --count;
00765 }
00766 }
00767 )
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781 STRINGIFY(
00782
00783
00784
00785
00786
00787
00788
00789 __kernel void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
00790 const ChannelType channel, __constant float *filter,
00791 const unsigned int width,
00792 const unsigned int imageColumns, const unsigned int imageRows,
00793 __local CLPixelType *temp)
00794 {
00795 const int x = get_global_id(0);
00796 const int y = get_global_id(1);
00797
00798 const int columns = imageColumns;
00799
00800 const unsigned int radius = (width-1)/2;
00801 const int wsize = get_local_size(0);
00802 const unsigned int loadSize = wsize+width;
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812
00813
00814
00815
00816
00817
00818
00819
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830 const int groupX=get_local_size(0)*get_group_id(0);
00831 const int groupY=get_local_size(1)*get_group_id(1);
00832
00833
00834 for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
00835 {
00836
00837 temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
00838
00839
00840
00841
00842
00843 }
00844
00845
00846 barrier(CLK_LOCAL_MEM_FENCE);
00847
00848
00849 if (get_global_id(0) < columns)
00850 {
00851
00852 float4 result = (float4) 0;
00853
00854 int i = 0;
00855
00856 \n #ifndef UFACTOR \n
00857 \n #define UFACTOR 8 \n
00858 \n #endif \n
00859
00860 for ( ; i+UFACTOR < width; )
00861 {
00862 \n #pragma unroll UFACTOR\n
00863 for (int j=0; j < UFACTOR; j++, i++)
00864 {
00865 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
00866 }
00867 }
00868
00869 for ( ; i < width; i++)
00870 {
00871 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
00872 }
00873
00874 result.x = ClampToQuantum(result.x);
00875 result.y = ClampToQuantum(result.y);
00876 result.z = ClampToQuantum(result.z);
00877 result.w = ClampToQuantum(result.w);
00878
00879
00880 filtered_im[y*columns+x] = result;
00881 }
00882 }
00883 )
00884
00885 STRINGIFY(
00886
00887
00888
00889
00890
00891
00892
00893 __kernel void BlurColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
00894 const ChannelType channel, __constant float *filter,
00895 const unsigned int width,
00896 const unsigned int imageColumns, const unsigned int imageRows,
00897 __local float4 *temp)
00898 {
00899 const int x = get_global_id(0);
00900 const int y = get_global_id(1);
00901
00902
00903
00904 const int columns = imageColumns;
00905 const int rows = imageRows;
00906
00907 unsigned int radius = (width-1)/2;
00908 const int wsize = get_local_size(1);
00909 const unsigned int loadSize = wsize+width;
00910
00911
00912 const int groupX=get_local_size(0)*get_group_id(0);
00913 const int groupY=get_local_size(1)*get_group_id(1);
00914
00915
00916
00917
00918 for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
00919 {
00920 temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
00921 }
00922
00923
00924 barrier(CLK_LOCAL_MEM_FENCE);
00925
00926
00927 if (get_global_id(1) < rows)
00928 {
00929
00930 float4 result = (float4) 0;
00931
00932 int i = 0;
00933
00934 \n #ifndef UFACTOR \n
00935 \n #define UFACTOR 8 \n
00936 \n #endif \n
00937
00938 for ( ; i+UFACTOR < width; )
00939 {
00940 \n #pragma unroll UFACTOR \n
00941 for (int j=0; j < UFACTOR; j++, i++)
00942 {
00943 result+=filter[i]*temp[i+get_local_id(1)];
00944 }
00945 }
00946
00947 for ( ; i < width; i++)
00948 {
00949 result+=filter[i]*temp[i+get_local_id(1)];
00950 }
00951
00952 result.x = ClampToQuantum(result.x);
00953 result.y = ClampToQuantum(result.y);
00954 result.z = ClampToQuantum(result.z);
00955 result.w = ClampToQuantum(result.w);
00956
00957
00958 filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
00959 }
00960
00961 }
00962 )
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976 STRINGIFY(
00977 static inline float ColorDodge(const float Sca,
00978 const float Sa,const float Dca,const float Da)
00979 {
00980
00981
00982
00983 if ((Sca*Da+Dca*Sa) >= Sa*Da)
00984 return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
00985 return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016 }
01017
01018 static inline void CompositeColorDodge(const float4 *p,
01019 const float4 *q,float4 *composite) {
01020
01021 float
01022 Da,
01023 gamma,
01024 Sa;
01025
01026 Sa=1.0f-QuantumScale*getOpacityF4(*p);
01027 Da=1.0f-QuantumScale*getOpacityF4(*q);
01028 gamma=RoundToUnity(Sa+Da-Sa*Da);
01029 setOpacityF4(composite, QuantumRange*(1.0-gamma));
01030 gamma=QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma);
01031 setRedF4(composite,gamma*ColorDodge(QuantumScale*getRedF4(*p)*Sa,Sa,QuantumScale*
01032 getRedF4(*q)*Da,Da));
01033 setGreenF4(composite,gamma*ColorDodge(QuantumScale*getGreenF4(*p)*Sa,Sa,QuantumScale*
01034 getGreenF4(*q)*Da,Da));
01035 setBlueF4(composite,gamma*ColorDodge(QuantumScale*getBlueF4(*p)*Sa,Sa,QuantumScale*
01036 getBlueF4(*q)*Da,Da));
01037 }
01038 )
01039
01040 STRINGIFY(
01041 static inline void MagickPixelCompositePlus(const float4 *p,
01042 const float alpha,const float4 *q,
01043 const float beta,float4 *composite)
01044 {
01045 float
01046 gamma;
01047
01048 float
01049 Da,
01050 Sa;
01051
01052
01053
01054 Sa=1.0-QuantumScale*alpha;
01055 Da=1.0-QuantumScale*beta;
01056 gamma=RoundToUnity(Sa+Da);
01057 setOpacityF4(composite,(float) QuantumRange*(1.0-gamma));
01058 gamma=PerceptibleReciprocal(gamma);
01059 setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q)));
01060 setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q)));
01061 setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q)));
01062 }
01063 )
01064
01065 STRINGIFY(
01066 static inline void MagickPixelCompositeBlend(const float4 *p,
01067 const float alpha,const float4 *q,
01068 const float beta,float4 *composite)
01069 {
01070 MagickPixelCompositePlus(p,(float) (QuantumRange-alpha*
01071 (QuantumRange-getOpacityF4(*p))),q,(float) (QuantumRange-beta*
01072 (QuantumRange-getOpacityF4(*q))),composite);
01073 }
01074 )
01075
01076 STRINGIFY(
01077 __kernel
01078 void Composite(__global CLPixelType *image,
01079 const unsigned int imageWidth,
01080 const unsigned int imageHeight,
01081 const unsigned int imageMatte,
01082 const __global CLPixelType *compositeImage,
01083 const unsigned int compositeWidth,
01084 const unsigned int compositeHeight,
01085 const unsigned int compositeMatte,
01086 const unsigned int compose,
01087 const ChannelType channel,
01088 const float destination_dissolve,
01089 const float source_dissolve) {
01090
01091 uint2 index;
01092 index.x = get_global_id(0);
01093 index.y = get_global_id(1);
01094
01095
01096 if (index.x >= imageWidth
01097 || index.y >= imageHeight) {
01098 return;
01099 }
01100 const CLPixelType inputPixel = image[index.y*imageWidth+index.x];
01101 float4 destination;
01102 setRedF4(&destination,getRed(inputPixel));
01103 setGreenF4(&destination,getGreen(inputPixel));
01104 setBlueF4(&destination,getBlue(inputPixel));
01105
01106
01107 const CLPixelType compositePixel
01108 = compositeImage[index.y*imageWidth+index.x];
01109 float4 source;
01110 setRedF4(&source,getRed(compositePixel));
01111 setGreenF4(&source,getGreen(compositePixel));
01112 setBlueF4(&source,getBlue(compositePixel));
01113
01114 if (imageMatte != 0) {
01115 setOpacityF4(&destination,getOpacity(inputPixel));
01116 }
01117 else {
01118 setOpacityF4(&destination,0.0f);
01119 }
01120
01121 if (compositeMatte != 0) {
01122 setOpacityF4(&source,getOpacity(compositePixel));
01123 }
01124 else {
01125 setOpacityF4(&source,0.0f);
01126 }
01127
01128 float4 composite=destination;
01129
01130 CompositeOperator op = (CompositeOperator)compose;
01131 switch (op) {
01132 case ColorDodgeCompositeOp:
01133 CompositeColorDodge(&source,&destination,&composite);
01134 break;
01135 case BlendCompositeOp:
01136 MagickPixelCompositeBlend(&source,source_dissolve,&destination,
01137 destination_dissolve,&composite);
01138 break;
01139 default:
01140
01141 break;
01142 };
01143
01144 CLPixelType outputPixel;
01145 setRed(&outputPixel, ClampToQuantum(getRedF4(composite)));
01146 setGreen(&outputPixel, ClampToQuantum(getGreenF4(composite)));
01147 setBlue(&outputPixel, ClampToQuantum(getBlueF4(composite)));
01148 setOpacity(&outputPixel, ClampToQuantum(getOpacityF4(composite)));
01149 image[index.y*imageWidth+index.x] = outputPixel;
01150 }
01151 )
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165 STRINGIFY(
01166
01167 static inline float3 ConvertRGBToHSB(CLPixelType pixel) {
01168 float3 HueSaturationBrightness;
01169 HueSaturationBrightness.x = 0.0f;
01170 HueSaturationBrightness.y = 0.0f;
01171 HueSaturationBrightness.z = 0.0f;
01172
01173 float r=(float) getRed(pixel);
01174 float g=(float) getGreen(pixel);
01175 float b=(float) getBlue(pixel);
01176
01177 float tmin=MagickMin(MagickMin(r,g),b);
01178 float tmax= MagickMax(MagickMax(r,g),b);
01179
01180 if (tmax!=0.0f) {
01181 float delta=tmax-tmin;
01182 HueSaturationBrightness.y=delta/tmax;
01183 HueSaturationBrightness.z=QuantumScale*tmax;
01184
01185 if (delta != 0.0f) {
01186 HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
01187 HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
01188 HueSaturationBrightness.x/=6.0f;
01189 HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f;
01190 }
01191 }
01192 return HueSaturationBrightness;
01193 }
01194
01195 static inline CLPixelType ConvertHSBToRGB(float3 HueSaturationBrightness) {
01196
01197 float hue = HueSaturationBrightness.x;
01198 float brightness = HueSaturationBrightness.z;
01199 float saturation = HueSaturationBrightness.y;
01200
01201 CLPixelType rgb;
01202
01203 if (saturation == 0.0f) {
01204 setRed(&rgb,ClampToQuantum(QuantumRange*brightness));
01205 setGreen(&rgb,getRed(rgb));
01206 setBlue(&rgb,getRed(rgb));
01207 }
01208 else {
01209
01210 float h=6.0f*(hue-floor(hue));
01211 float f=h-floor(h);
01212 float p=brightness*(1.0f-saturation);
01213 float q=brightness*(1.0f-saturation*f);
01214 float t=brightness*(1.0f-(saturation*(1.0f-f)));
01215
01216 float clampedBrightness = ClampToQuantum(QuantumRange*brightness);
01217 float clamped_t = ClampToQuantum(QuantumRange*t);
01218 float clamped_p = ClampToQuantum(QuantumRange*p);
01219 float clamped_q = ClampToQuantum(QuantumRange*q);
01220 int ih = (int)h;
01221 setRed(&rgb, (ih == 1)?clamped_q:
01222 (ih == 2 || ih == 3)?clamped_p:
01223 (ih == 4)?clamped_t:
01224 clampedBrightness);
01225
01226 setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness:
01227 (ih == 3)?clamped_q:
01228 (ih == 4 || ih == 5)?clamped_p:
01229 clamped_t);
01230
01231 setBlue(&rgb, (ih == 2)?clamped_t:
01232 (ih == 3 || ih == 4)?clampedBrightness:
01233 (ih == 5)?clamped_q:
01234 clamped_p);
01235 }
01236 return rgb;
01237 }
01238
01239 __kernel void Contrast(__global CLPixelType *im, const unsigned int sharpen)
01240 {
01241
01242 const int sign = sharpen!=0?1:-1;
01243 const int x = get_global_id(0);
01244 const int y = get_global_id(1);
01245 const int columns = get_global_size(0);
01246 const int c = x + y * columns;
01247
01248 CLPixelType pixel = im[c];
01249 float3 HueSaturationBrightness = ConvertRGBToHSB(pixel);
01250 float brightness = HueSaturationBrightness.z;
01251 brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
01252 brightness = clamp(brightness,0.0f,1.0f);
01253 HueSaturationBrightness.z = brightness;
01254
01255 CLPixelType filteredPixel = ConvertHSBToRGB(HueSaturationBrightness);
01256 filteredPixel.w = pixel.w;
01257 im[c] = filteredPixel;
01258 }
01259 )
01260
01261
01262
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272
01273 STRINGIFY(
01274
01275
01276 __kernel void Histogram(__global CLPixelType * restrict im,
01277 const ChannelType channel,
01278 const int method,
01279 const int colorspace,
01280 __global uint4 * restrict histogram)
01281 {
01282 const int x = get_global_id(0);
01283 const int y = get_global_id(1);
01284 const int columns = get_global_size(0);
01285 const int c = x + y * columns;
01286 if ((channel & SyncChannels) != 0)
01287 {
01288 float intensity = GetPixelIntensity(method, colorspace,im[c]);
01289 uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
01290 atomic_inc((__global uint *)(&(histogram[pos]))+2);
01291 }
01292 else
01293 {
01294
01295
01296 }
01297 }
01298 )
01299
01300 STRINGIFY(
01301
01302
01303 __kernel void ContrastStretch(__global CLPixelType * restrict im,
01304 const ChannelType channel,
01305 __global CLPixelType * restrict stretch_map,
01306 const float4 white, const float4 black)
01307 {
01308 const int x = get_global_id(0);
01309 const int y = get_global_id(1);
01310 const int columns = get_global_size(0);
01311 const int c = x + y * columns;
01312
01313 uint ePos;
01314 CLPixelType oValue, eValue;
01315 CLQuantum red, green, blue, opacity;
01316
01317
01318 oValue=im[c];
01319
01320 if ((channel & RedChannel) != 0)
01321 {
01322 if (getRedF4(white) != getRedF4(black))
01323 {
01324 ePos = ScaleQuantumToMap(getRed(oValue));
01325 eValue = stretch_map[ePos];
01326 red = getRed(eValue);
01327 }
01328 }
01329
01330 if ((channel & GreenChannel) != 0)
01331 {
01332 if (getGreenF4(white) != getGreenF4(black))
01333 {
01334 ePos = ScaleQuantumToMap(getGreen(oValue));
01335 eValue = stretch_map[ePos];
01336 green = getGreen(eValue);
01337 }
01338 }
01339
01340 if ((channel & BlueChannel) != 0)
01341 {
01342 if (getBlueF4(white) != getBlueF4(black))
01343 {
01344 ePos = ScaleQuantumToMap(getBlue(oValue));
01345 eValue = stretch_map[ePos];
01346 blue = getBlue(eValue);
01347 }
01348 }
01349
01350 if ((channel & OpacityChannel) != 0)
01351 {
01352 if (getOpacityF4(white) != getOpacityF4(black))
01353 {
01354 ePos = ScaleQuantumToMap(getOpacity(oValue));
01355 eValue = stretch_map[ePos];
01356 opacity = getOpacity(eValue);
01357 }
01358 }
01359
01360
01361 im[c]=(CLPixelType)(blue, green, red, opacity);
01362
01363 }
01364 )
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378 STRINGIFY(
01379 __kernel
01380 void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
01381 const unsigned int imageWidth, const unsigned int imageHeight,
01382 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
01383 const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
01384
01385 int2 blockID;
01386 blockID.x = get_group_id(0);
01387 blockID.y = get_group_id(1);
01388
01389
01390 int2 imageAreaOrg;
01391 imageAreaOrg.x = blockID.x * get_local_size(0);
01392 imageAreaOrg.y = blockID.y * get_local_size(1);
01393
01394 int2 midFilterDimen;
01395 midFilterDimen.x = (filterWidth-1)/2;
01396 midFilterDimen.y = (filterHeight-1)/2;
01397
01398 int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
01399
01400
01401 int2 cachedAreaDimen;
01402 cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
01403 cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
01404
01405
01406 int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
01407 int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
01408 int groupSize = get_local_size(0) * get_local_size(1);
01409 for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
01410
01411 int2 cachedAreaIndex;
01412 cachedAreaIndex.x = i % cachedAreaDimen.x;
01413 cachedAreaIndex.y = i / cachedAreaDimen.x;
01414
01415 int2 imagePixelIndex;
01416 imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
01417
01418
01419
01420 imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
01421 imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
01422
01423 pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
01424 }
01425
01426
01427 for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
01428 filterCache[i] = filter[i];
01429 }
01430 barrier(CLK_LOCAL_MEM_FENCE);
01431
01432
01433 int2 imageIndex;
01434 imageIndex.x = imageAreaOrg.x + get_local_id(0);
01435 imageIndex.y = imageAreaOrg.y + get_local_id(1);
01436
01437
01438 if (imageIndex.x >= imageWidth
01439 || imageIndex.y >= imageHeight) {
01440 return;
01441 }
01442
01443 int filterIndex = 0;
01444 float4 sum = (float4)0.0f;
01445 float gamma = 0.0f;
01446 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
01447 int cacheIndexY = get_local_id(1);
01448 for (int j = 0; j < filterHeight; j++) {
01449 int cacheIndexX = get_local_id(0);
01450 for (int i = 0; i < filterWidth; i++) {
01451 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
01452 float f = filterCache[filterIndex];
01453
01454 sum.x += f * p.x;
01455 sum.y += f * p.y;
01456 sum.z += f * p.z;
01457 sum.w += f * p.w;
01458
01459 gamma += f;
01460 filterIndex++;
01461 cacheIndexX++;
01462 }
01463 cacheIndexY++;
01464 }
01465 }
01466 else {
01467 int cacheIndexY = get_local_id(1);
01468 for (int j = 0; j < filterHeight; j++) {
01469 int cacheIndexX = get_local_id(0);
01470 for (int i = 0; i < filterWidth; i++) {
01471
01472 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
01473 float alpha = QuantumScale*(QuantumRange-p.w);
01474 float f = filterCache[filterIndex];
01475 float g = alpha * f;
01476
01477 sum.x += g*p.x;
01478 sum.y += g*p.y;
01479 sum.z += g*p.z;
01480 sum.w += f*p.w;
01481
01482 gamma += g;
01483 filterIndex++;
01484 cacheIndexX++;
01485 }
01486 cacheIndexY++;
01487 }
01488 gamma = PerceptibleReciprocal(gamma);
01489 sum.xyz = gamma*sum.xyz;
01490 }
01491 CLPixelType outputPixel;
01492 outputPixel.x = ClampToQuantum(sum.x);
01493 outputPixel.y = ClampToQuantum(sum.y);
01494 outputPixel.z = ClampToQuantum(sum.z);
01495 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
01496
01497 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
01498 }
01499 )
01500
01501 STRINGIFY(
01502 __kernel
01503 void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
01504 const uint imageWidth, const uint imageHeight,
01505 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
01506 const uint matte, const ChannelType channel) {
01507
01508 int2 imageIndex;
01509 imageIndex.x = get_global_id(0);
01510 imageIndex.y = get_global_id(1);
01511
01512
01513
01514
01515
01516 if (imageIndex.x >= imageWidth
01517 || imageIndex.y >= imageHeight)
01518 return;
01519
01520 int2 midFilterDimen;
01521 midFilterDimen.x = (filterWidth-1)/2;
01522 midFilterDimen.y = (filterHeight-1)/2;
01523
01524 int filterIndex = 0;
01525 float4 sum = (float4)0.0f;
01526 float gamma = 0.0f;
01527 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
01528 for (int j = 0; j < filterHeight; j++) {
01529 int2 inputPixelIndex;
01530 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
01531 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
01532 for (int i = 0; i < filterWidth; i++) {
01533 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
01534 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
01535
01536 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
01537 float f = filter[filterIndex];
01538
01539 sum.x += f * p.x;
01540 sum.y += f * p.y;
01541 sum.z += f * p.z;
01542 sum.w += f * p.w;
01543
01544 gamma += f;
01545
01546 filterIndex++;
01547 }
01548 }
01549 }
01550 else {
01551
01552 for (int j = 0; j < filterHeight; j++) {
01553 int2 inputPixelIndex;
01554 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
01555 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
01556 for (int i = 0; i < filterWidth; i++) {
01557 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
01558 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
01559
01560 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
01561 float alpha = QuantumScale*(QuantumRange-p.w);
01562 float f = filter[filterIndex];
01563 float g = alpha * f;
01564
01565 sum.x += g*p.x;
01566 sum.y += g*p.y;
01567 sum.z += g*p.z;
01568 sum.w += f*p.w;
01569
01570 gamma += g;
01571
01572
01573 filterIndex++;
01574 }
01575 }
01576 gamma = PerceptibleReciprocal(gamma);
01577 sum.xyz = gamma*sum.xyz;
01578 }
01579
01580 CLPixelType outputPixel;
01581 outputPixel.x = ClampToQuantum(sum.x);
01582 outputPixel.y = ClampToQuantum(sum.y);
01583 outputPixel.z = ClampToQuantum(sum.z);
01584 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
01585
01586 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
01587 }
01588 )
01589
01590
01591
01592
01593
01594
01595
01596
01597
01598
01599
01600
01601
01602 STRINGIFY(
01603
01604 __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
01605 , const unsigned int imageWidth, const unsigned int imageHeight
01606 , const int2 offset, const int polarity, const int matte) {
01607
01608 int x = get_global_id(0);
01609 int y = get_global_id(1);
01610
01611 CLPixelType v = inputImage[y*imageWidth+x];
01612
01613 int2 neighbor;
01614 neighbor.y = y + offset.y;
01615 neighbor.x = x + offset.x;
01616
01617 int2 clampedNeighbor;
01618 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
01619 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
01620
01621 CLPixelType r = (clampedNeighbor.x == neighbor.x
01622 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
01623 :(CLPixelType)0;
01624
01625 int sv[4];
01626 sv[0] = (int)v.x;
01627 sv[1] = (int)v.y;
01628 sv[2] = (int)v.z;
01629 sv[3] = (int)v.w;
01630
01631 int sr[4];
01632 sr[0] = (int)r.x;
01633 sr[1] = (int)r.y;
01634 sr[2] = (int)r.z;
01635 sr[3] = (int)r.w;
01636
01637 if (polarity > 0) {
01638 \n #pragma unroll 4\n
01639 for (unsigned int i = 0; i < 4; i++) {
01640 sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
01641 }
01642 }
01643 else {
01644 \n #pragma unroll 4\n
01645 for (unsigned int i = 0; i < 4; i++) {
01646 sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
01647 }
01648
01649 }
01650
01651 v.x = (CLQuantum)sv[0];
01652 v.y = (CLQuantum)sv[1];
01653 v.z = (CLQuantum)sv[2];
01654
01655 if (matte!=0)
01656 v.w = (CLQuantum)sv[3];
01657
01658 outputImage[y*imageWidth+x] = v;
01659
01660 }
01661
01662
01663 )
01664
01665
01666
01667 STRINGIFY(
01668
01669 __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
01670 , const unsigned int imageWidth, const unsigned int imageHeight
01671 , const int2 offset, const int polarity, const int matte) {
01672
01673 int x = get_global_id(0);
01674 int y = get_global_id(1);
01675
01676 CLPixelType v = inputImage[y*imageWidth+x];
01677
01678 int2 neighbor, clampedNeighbor;
01679
01680 neighbor.y = y + offset.y;
01681 neighbor.x = x + offset.x;
01682 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
01683 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
01684
01685 CLPixelType r = (clampedNeighbor.x == neighbor.x
01686 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
01687 :(CLPixelType)0;
01688
01689
01690 neighbor.y = y - offset.y;
01691 neighbor.x = x - offset.x;
01692 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
01693 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
01694
01695 CLPixelType s = (clampedNeighbor.x == neighbor.x
01696 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
01697 :(CLPixelType)0;
01698
01699
01700 int sv[4];
01701 sv[0] = (int)v.x;
01702 sv[1] = (int)v.y;
01703 sv[2] = (int)v.z;
01704 sv[3] = (int)v.w;
01705
01706 int sr[4];
01707 sr[0] = (int)r.x;
01708 sr[1] = (int)r.y;
01709 sr[2] = (int)r.z;
01710 sr[3] = (int)r.w;
01711
01712 int ss[4];
01713 ss[0] = (int)s.x;
01714 ss[1] = (int)s.y;
01715 ss[2] = (int)s.z;
01716 ss[3] = (int)s.w;
01717
01718 if (polarity > 0) {
01719 \n #pragma unroll 4\n
01720 for (unsigned int i = 0; i < 4; i++) {
01721
01722
01723
01724
01725 sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
01726 }
01727 }
01728 else {
01729 \n #pragma unroll 4\n
01730 for (unsigned int i = 0; i < 4; i++) {
01731
01732
01733
01734 sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
01735 }
01736 }
01737
01738 v.x = (CLQuantum)sv[0];
01739 v.y = (CLQuantum)sv[1];
01740 v.z = (CLQuantum)sv[2];
01741
01742 if (matte!=0)
01743 v.w = (CLQuantum)sv[3];
01744
01745 outputImage[y*imageWidth+x] = v;
01746
01747 }
01748
01749
01750 )
01751
01752
01753
01754
01755
01756
01757
01758
01759
01760
01761
01762
01763
01764 STRINGIFY(
01765
01766
01767 __kernel void Equalize(__global CLPixelType * restrict im,
01768 const ChannelType channel,
01769 __global CLPixelType * restrict equalize_map,
01770 const float4 white, const float4 black)
01771 {
01772 const int x = get_global_id(0);
01773 const int y = get_global_id(1);
01774 const int columns = get_global_size(0);
01775 const int c = x + y * columns;
01776
01777 uint ePos;
01778 CLPixelType oValue, eValue;
01779 CLQuantum red, green, blue, opacity;
01780
01781
01782 oValue=im[c];
01783
01784 if ((channel & SyncChannels) != 0)
01785 {
01786 if (getRedF4(white) != getRedF4(black))
01787 {
01788 ePos = ScaleQuantumToMap(getRed(oValue));
01789 eValue = equalize_map[ePos];
01790 red = getRed(eValue);
01791 ePos = ScaleQuantumToMap(getGreen(oValue));
01792 eValue = equalize_map[ePos];
01793 green = getRed(eValue);
01794 ePos = ScaleQuantumToMap(getBlue(oValue));
01795 eValue = equalize_map[ePos];
01796 blue = getRed(eValue);
01797 ePos = ScaleQuantumToMap(getOpacity(oValue));
01798 eValue = equalize_map[ePos];
01799 opacity = getRed(eValue);
01800
01801
01802 im[c]=(CLPixelType)(blue, green, red, opacity);
01803 }
01804
01805 }
01806
01807
01808
01809
01810 }
01811 )
01812
01813
01814
01815
01816
01817
01818
01819
01820
01821
01822
01823
01824
01825 STRINGIFY(
01826
01827
01828
01829
01830 CLPixelType ApplyFunction(CLPixelType pixel,const MagickFunction function,
01831 const unsigned int number_parameters,
01832 __constant float *parameters)
01833 {
01834 float4 result = (float4) 0.0f;
01835 switch (function)
01836 {
01837 case PolynomialFunction:
01838 {
01839 for (unsigned int i=0; i < number_parameters; i++)
01840 result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i];
01841 result *= (float4)QuantumRange;
01842 break;
01843 }
01844 case SinusoidFunction:
01845 {
01846 float freq,phase,ampl,bias;
01847 freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
01848 phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
01849 ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
01850 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
01851 result.x = QuantumRange*(ampl*sin(2.0f*MagickPI*
01852 (freq*QuantumScale*(float)pixel.x + phase/360.0f)) + bias);
01853 result.y = QuantumRange*(ampl*sin(2.0f*MagickPI*
01854 (freq*QuantumScale*(float)pixel.y + phase/360.0f)) + bias);
01855 result.z = QuantumRange*(ampl*sin(2.0f*MagickPI*
01856 (freq*QuantumScale*(float)pixel.z + phase/360.0f)) + bias);
01857 result.w = QuantumRange*(ampl*sin(2.0f*MagickPI*
01858 (freq*QuantumScale*(float)pixel.w + phase/360.0f)) + bias);
01859 break;
01860 }
01861 case ArcsinFunction:
01862 {
01863 float width,range,center,bias;
01864 width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
01865 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
01866 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
01867 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
01868
01869 result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center);
01870 result.x = range/MagickPI*asin(result.x)+bias;
01871 result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x;
01872 result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x;
01873
01874 result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center);
01875 result.y = range/MagickPI*asin(result.y)+bias;
01876 result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y;
01877 result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y;
01878
01879 result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center);
01880 result.z = range/MagickPI*asin(result.z)+bias;
01881 result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x;
01882 result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x;
01883
01884
01885 result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center);
01886 result.w = range/MagickPI*asin(result.w)+bias;
01887 result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w;
01888 result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w;
01889
01890 result *= (float4)QuantumRange;
01891 break;
01892 }
01893 case ArctanFunction:
01894 {
01895 float slope,range,center,bias;
01896 slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
01897 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
01898 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
01899 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
01900 result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center);
01901 result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias);
01902 break;
01903 }
01904 case UndefinedFunction:
01905 break;
01906 }
01907 return (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
01908 ClampToQuantum(result.z), ClampToQuantum(result.w));
01909 }
01910 )
01911
01912 STRINGIFY(
01913
01914
01915
01916
01917
01918
01919
01920 __kernel void ComputeFunction(__global CLPixelType *im,
01921 const ChannelType channel, const MagickFunction function,
01922 const unsigned int number_parameters, __constant float *parameters)
01923 {
01924 const int x = get_global_id(0);
01925 const int y = get_global_id(1);
01926 const int columns = get_global_size(0);
01927 const int c = x + y * columns;
01928 im[c] = ApplyFunction(im[c], function, number_parameters, parameters);
01929 }
01930 )
01931
01932
01933
01934
01935
01936
01937
01938
01939
01940
01941
01942
01943
01944 STRINGIFY(
01945 __kernel void Grayscale(__global CLPixelType *im,
01946 const int method, const int colorspace)
01947 {
01948
01949 const int x = get_global_id(0);
01950 const int y = get_global_id(1);
01951 const int columns = get_global_size(0);
01952 const int c = x + y * columns;
01953
01954 CLPixelType pixel = im[c];
01955
01956 float
01957 blue,
01958 green,
01959 intensity,
01960 red;
01961
01962 red=(float)getRed(pixel);
01963 green=(float)getGreen(pixel);
01964 blue=(float)getBlue(pixel);
01965
01966 intensity=0.0;
01967
01968 CLPixelType filteredPixel;
01969
01970 switch (method)
01971 {
01972 case AveragePixelIntensityMethod:
01973 {
01974 intensity=(red+green+blue)/3.0;
01975 break;
01976 }
01977 case BrightnessPixelIntensityMethod:
01978 {
01979 intensity=MagickMax(MagickMax(red,green),blue);
01980 break;
01981 }
01982 case LightnessPixelIntensityMethod:
01983 {
01984 intensity=(MagickMin(MagickMin(red,green),blue)+
01985 MagickMax(MagickMax(red,green),blue))/2.0;
01986 break;
01987 }
01988 case MSPixelIntensityMethod:
01989 {
01990 intensity=(float) (((float) red*red+green*green+
01991 blue*blue)/(3.0*QuantumRange));
01992 break;
01993 }
01994 case Rec601LumaPixelIntensityMethod:
01995 {
01996
01997
01998
01999
02000
02001
02002
02003
02004 intensity=0.298839*red+0.586811*green+0.114350*blue;
02005 break;
02006 }
02007 case Rec601LuminancePixelIntensityMethod:
02008 {
02009
02010
02011
02012
02013
02014
02015
02016
02017 intensity=0.298839*red+0.586811*green+0.114350*blue;
02018 break;
02019 }
02020 case Rec709LumaPixelIntensityMethod:
02021 default:
02022 {
02023
02024
02025
02026
02027
02028
02029
02030
02031 intensity=0.212656*red+0.715158*green+0.072186*blue;
02032 break;
02033 }
02034 case Rec709LuminancePixelIntensityMethod:
02035 {
02036
02037
02038
02039
02040
02041
02042
02043
02044 intensity=0.212656*red+0.715158*green+0.072186*blue;
02045 break;
02046 }
02047 case RMSPixelIntensityMethod:
02048 {
02049 intensity=(float) (sqrt((float) red*red+green*green+
02050 blue*blue)/sqrt(3.0));
02051 break;
02052 }
02053
02054 }
02055
02056 setGray(&filteredPixel, ClampToQuantum(intensity));
02057
02058 filteredPixel.w = pixel.w;
02059
02060 im[c] = filteredPixel;
02061 }
02062 )
02063
02064
02065
02066
02067
02068
02069
02070
02071
02072
02073
02074
02075
02076 STRINGIFY(
02077 static inline int mirrorBottom(int value)
02078 {
02079 return (value < 0) ? - (value) : value;
02080 }
02081 static inline int mirrorTop(int value, int width)
02082 {
02083 return (value >= width) ? (2 * width - value - 1) : value;
02084 }
02085
02086 __kernel void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *tmpImage,
02087 const int radius,
02088 const int imageWidth,
02089 const int imageHeight)
02090 {
02091 const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
02092
02093 int x = get_local_id(0);
02094 int y = get_global_id(1);
02095
02096 if ((x >= imageWidth) || (y >= imageHeight))
02097 return;
02098
02099 global CLPixelType *src = srcImage + y * imageWidth;
02100
02101 for (int i = x; i < imageWidth; i += get_local_size(0)) {
02102 float sum = 0.0f;
02103 float weight = 1.0f;
02104
02105 int j = i - radius;
02106 while ((j + 7) < i) {
02107 for (int k = 0; k < 8; ++k)
02108 sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
02109 weight += 8.0f;
02110 j+=8;
02111 }
02112 while (j < i) {
02113 sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
02114 weight += 1.0f;
02115 ++j;
02116 }
02117
02118 while ((j + 7) < radius + i) {
02119 for (int k = 0; k < 8; ++k)
02120 sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
02121 weight -= 8.0f;
02122 j+=8;
02123 }
02124 while (j < radius + i) {
02125 sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
02126 weight -= 1.0f;
02127 ++j;
02128 }
02129
02130 tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
02131 }
02132 }
02133 )
02134
02135 STRINGIFY(
02136 __kernel void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *blurImage,
02137 const int radius,
02138 const float strength,
02139 const int imageWidth,
02140 const int imageHeight)
02141 {
02142 const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
02143
02144 int x = get_global_id(0);
02145 int y = get_global_id(1);
02146
02147 if ((x >= imageWidth) || (y >= imageHeight))
02148 return;
02149
02150 global float *src = blurImage + x;
02151
02152 float sum = 0.0f;
02153 float weight = 1.0f;
02154
02155 int j = y - radius;
02156 while ((j + 7) < y) {
02157 for (int k = 0; k < 8; ++k)
02158 sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
02159 weight += 8.0f;
02160 j+=8;
02161 }
02162 while (j < y) {
02163 sum += weight * src[mirrorBottom(j) * imageWidth];
02164 weight += 1.0f;
02165 ++j;
02166 }
02167
02168 while ((j + 7) < radius + y) {
02169 for (int k = 0; k < 8; ++k)
02170 sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
02171 weight -= 8.0f;
02172 j+=8;
02173 }
02174 while (j < radius + y) {
02175 sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
02176 weight -= 1.0f;
02177 ++j;
02178 }
02179
02180 CLPixelType pixel = srcImage[x + y * imageWidth];
02181 float srcVal = dot(RGB, convert_float4(pixel));
02182 float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
02183 mult = (srcVal + mult) / srcVal;
02184
02185 pixel.x = ClampToQuantum(pixel.x * mult);
02186 pixel.y = ClampToQuantum(pixel.y * mult);
02187 pixel.z = ClampToQuantum(pixel.z * mult);
02188
02189 dstImage[x + y * imageWidth] = pixel;
02190 }
02191 )
02192
02193
02194
02195
02196
02197
02198
02199
02200
02201
02202
02203
02204
02205 STRINGIFY(
02206
02207 static inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
02208 float *hue, float *saturation, float *lightness)
02209 {
02210 float
02211 c,
02212 tmax,
02213 tmin;
02214
02215
02216
02217
02218 tmax=MagickMax(QuantumScale*red,MagickMax(QuantumScale*green, QuantumScale*blue));
02219 tmin=MagickMin(QuantumScale*red,MagickMin(QuantumScale*green, QuantumScale*blue));
02220
02221 c=tmax-tmin;
02222
02223 *lightness=(tmax+tmin)/2.0;
02224 if (c <= 0.0)
02225 {
02226 *hue=0.0;
02227 *saturation=0.0;
02228 return;
02229 }
02230
02231 if (tmax == (QuantumScale*red))
02232 {
02233 *hue=(QuantumScale*green-QuantumScale*blue)/c;
02234 if ((QuantumScale*green) < (QuantumScale*blue))
02235 *hue+=6.0;
02236 }
02237 else
02238 if (tmax == (QuantumScale*green))
02239 *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
02240 else
02241 *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
02242
02243 *hue*=60.0/360.0;
02244 if (*lightness <= 0.5)
02245 *saturation=c/(2.0*(*lightness));
02246 else
02247 *saturation=c/(2.0-2.0*(*lightness));
02248 }
02249
02250 static inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
02251 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
02252 {
02253 float
02254 b,
02255 c,
02256 g,
02257 h,
02258 tmin,
02259 r,
02260 x;
02261
02262
02263
02264
02265 h=hue*360.0;
02266 if (lightness <= 0.5)
02267 c=2.0*lightness*saturation;
02268 else
02269 c=(2.0-2.0*lightness)*saturation;
02270 tmin=lightness-0.5*c;
02271 h-=360.0*floor(h/360.0);
02272 h/=60.0;
02273 x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
02274 switch ((int) floor(h) % 6)
02275 {
02276 case 0:
02277 {
02278 r=tmin+c;
02279 g=tmin+x;
02280 b=tmin;
02281 break;
02282 }
02283 case 1:
02284 {
02285 r=tmin+x;
02286 g=tmin+c;
02287 b=tmin;
02288 break;
02289 }
02290 case 2:
02291 {
02292 r=tmin;
02293 g=tmin+c;
02294 b=tmin+x;
02295 break;
02296 }
02297 case 3:
02298 {
02299 r=tmin;
02300 g=tmin+x;
02301 b=tmin+c;
02302 break;
02303 }
02304 case 4:
02305 {
02306 r=tmin+x;
02307 g=tmin;
02308 b=tmin+c;
02309 break;
02310 }
02311 case 5:
02312 {
02313 r=tmin+c;
02314 g=tmin;
02315 b=tmin+x;
02316 break;
02317 }
02318 default:
02319 {
02320 r=0.0;
02321 g=0.0;
02322 b=0.0;
02323 }
02324 }
02325 *red=ClampToQuantum(QuantumRange*r);
02326 *green=ClampToQuantum(QuantumRange*g);
02327 *blue=ClampToQuantum(QuantumRange*b);
02328 }
02329
02330 static inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
02331 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
02332 {
02333 float
02334 hue,
02335 lightness,
02336 saturation;
02337
02338
02339
02340
02341 ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
02342 hue+=0.5*(0.01*percent_hue-1.0);
02343 while (hue < 0.0)
02344 hue+=1.0;
02345 while (hue >= 1.0)
02346 hue-=1.0;
02347 saturation*=0.01*percent_saturation;
02348 lightness*=0.01*percent_lightness;
02349 ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
02350 }
02351
02352 __kernel void Modulate(__global CLPixelType *im,
02353 const float percent_brightness,
02354 const float percent_hue,
02355 const float percent_saturation,
02356 const int colorspace)
02357 {
02358
02359 const int x = get_global_id(0);
02360 const int y = get_global_id(1);
02361 const int columns = get_global_size(0);
02362 const int c = x + y * columns;
02363
02364 CLPixelType pixel = im[c];
02365
02366 CLQuantum
02367 blue,
02368 green,
02369 red;
02370
02371 red=getRed(pixel);
02372 green=getGreen(pixel);
02373 blue=getBlue(pixel);
02374
02375 switch (colorspace)
02376 {
02377 case HSLColorspace:
02378 default:
02379 {
02380 ModulateHSL(percent_hue, percent_saturation, percent_brightness,
02381 &red, &green, &blue);
02382 }
02383
02384 }
02385
02386 CLPixelType filteredPixel;
02387
02388 setRed(&filteredPixel, red);
02389 setGreen(&filteredPixel, green);
02390 setBlue(&filteredPixel, blue);
02391 filteredPixel.w = pixel.w;
02392
02393 im[c] = filteredPixel;
02394 }
02395 )
02396
02397
02398
02399
02400
02401
02402
02403
02404
02405
02406
02407
02408
02409 STRINGIFY(
02410 __kernel
02411 void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
02412 const unsigned int imageWidth, const unsigned int imageHeight,
02413 const __global float *filter, const unsigned int width, const __global int2* offset,
02414 const float4 bias,
02415 const ChannelType channel, const unsigned int matte) {
02416
02417 int2 currentPixel;
02418 currentPixel.x = get_global_id(0);
02419 currentPixel.y = get_global_id(1);
02420
02421 if (currentPixel.x >= imageWidth
02422 || currentPixel.y >= imageHeight)
02423 return;
02424
02425 float4 pixel;
02426 pixel.x = (float)bias.x;
02427 pixel.y = (float)bias.y;
02428 pixel.z = (float)bias.z;
02429 pixel.w = (float)bias.w;
02430
02431 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
02432
02433 for (int i = 0; i < width; i++) {
02434
02435
02436 int2 samplePixel = currentPixel + offset[i];
02437 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
02438 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
02439 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
02440
02441 pixel.x += (filter[i] * (float)samplePixelValue.x);
02442 pixel.y += (filter[i] * (float)samplePixelValue.y);
02443 pixel.z += (filter[i] * (float)samplePixelValue.z);
02444 pixel.w += (filter[i] * (float)samplePixelValue.w);
02445 }
02446
02447 CLPixelType outputPixel;
02448 outputPixel.x = ClampToQuantum(pixel.x);
02449 outputPixel.y = ClampToQuantum(pixel.y);
02450 outputPixel.z = ClampToQuantum(pixel.z);
02451 outputPixel.w = ClampToQuantum(pixel.w);
02452 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
02453 }
02454 else {
02455
02456 float gamma = 0.0f;
02457 for (int i = 0; i < width; i++) {
02458
02459
02460 int2 samplePixel = currentPixel + offset[i];
02461 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
02462 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
02463
02464 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
02465
02466 float alpha = QuantumScale*(QuantumRange-samplePixelValue.w);
02467 float k = filter[i];
02468 pixel.x = pixel.x + k * alpha * samplePixelValue.x;
02469 pixel.y = pixel.y + k * alpha * samplePixelValue.y;
02470 pixel.z = pixel.z + k * alpha * samplePixelValue.z;
02471
02472 pixel.w += k * alpha * samplePixelValue.w;
02473
02474 gamma+=k*alpha;
02475 }
02476 gamma = PerceptibleReciprocal(gamma);
02477 pixel.xyz = gamma*pixel.xyz;
02478
02479 CLPixelType outputPixel;
02480 outputPixel.x = ClampToQuantum(pixel.x);
02481 outputPixel.y = ClampToQuantum(pixel.y);
02482 outputPixel.z = ClampToQuantum(pixel.z);
02483 outputPixel.w = ClampToQuantum(pixel.w);
02484 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
02485 }
02486 }
02487 )
02488
02489
02490
02491
02492
02493
02494
02495
02496
02497
02498
02499
02500
02501 STRINGIFY(
02502 __kernel void RadialBlur(const __global CLPixelType *im, __global CLPixelType *filtered_im,
02503 const float4 bias,
02504 const unsigned int channel, const unsigned int matte,
02505 const float2 blurCenter,
02506 __constant float *cos_theta, __constant float *sin_theta,
02507 const unsigned int cossin_theta_size)
02508 {
02509 const int x = get_global_id(0);
02510 const int y = get_global_id(1);
02511 const int columns = get_global_size(0);
02512 const int rows = get_global_size(1);
02513 unsigned int step = 1;
02514 float center_x = (float) x - blurCenter.x;
02515 float center_y = (float) y - blurCenter.y;
02516 float radius = hypot(center_x, center_y);
02517
02518
02519 float blur_radius = hypot(blurCenter.x, blurCenter.y);
02520
02521 if (radius > MagickEpsilon)
02522 {
02523 step = (unsigned int) (blur_radius / radius);
02524 if (step == 0)
02525 step = 1;
02526 if (step >= cossin_theta_size)
02527 step = cossin_theta_size-1;
02528 }
02529
02530 float4 result;
02531 result.x = (float)bias.x;
02532 result.y = (float)bias.y;
02533 result.z = (float)bias.z;
02534 result.w = (float)bias.w;
02535 float normalize = 0.0f;
02536
02537 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
02538 for (unsigned int i=0; i<cossin_theta_size; i+=step)
02539 {
02540 result += convert_float4(im[
02541 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
02542 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
02543 normalize += 1.0f;
02544 }
02545 normalize = PerceptibleReciprocal(normalize);
02546 result = result * normalize;
02547 }
02548 else {
02549 float gamma = 0.0f;
02550 for (unsigned int i=0; i<cossin_theta_size; i+=step)
02551 {
02552 float4 p = convert_float4(im[
02553 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
02554 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
02555
02556 float alpha = (float)(QuantumScale*(QuantumRange-p.w));
02557 result.x += alpha * p.x;
02558 result.y += alpha * p.y;
02559 result.z += alpha * p.z;
02560 result.w += p.w;
02561 gamma+=alpha;
02562 normalize += 1.0f;
02563 }
02564 gamma = PerceptibleReciprocal(gamma);
02565 normalize = PerceptibleReciprocal(normalize);
02566 result.x = gamma*result.x;
02567 result.y = gamma*result.y;
02568 result.z = gamma*result.z;
02569 result.w = normalize*result.w;
02570 }
02571 filtered_im[y * columns + x] = (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
02572 ClampToQuantum(result.z), ClampToQuantum(result.w));
02573 }
02574 )
02575
02576
02577
02578
02579
02580
02581
02582
02583
02584
02585
02586
02587
02588 STRINGIFY(
02589
02590 float BoxResizeFilter(const float x)
02591 {
02592 return 1.0f;
02593 }
02594 )
02595
02596 STRINGIFY(
02597
02598 float CubicBC(const float x,const __global float* resizeFilterCoefficients)
02599 {
02600
02601
02602
02603
02604
02605
02606
02607
02608
02609
02610
02611
02612
02613
02614
02615
02616
02617
02618
02619
02620
02621
02622
02623
02624
02625
02626
02627
02628
02629 if (x < 1.0)
02630 return(resizeFilterCoefficients[0]+x*(x*
02631 (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
02632 if (x < 2.0)
02633 return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
02634 (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
02635 return(0.0);
02636 }
02637 )
02638
02639 STRINGIFY(
02640 float Sinc(const float x)
02641 {
02642 if (x != 0.0f)
02643 {
02644 const float alpha=(float) (MagickPI*x);
02645 return sinpi(x)/alpha;
02646 }
02647 return(1.0f);
02648 }
02649 )
02650
02651 STRINGIFY(
02652 float Triangle(const float x)
02653 {
02654
02655
02656
02657
02658
02659 return ((x<1.0f)?(1.0f-x):0.0f);
02660 }
02661 )
02662
02663
02664 STRINGIFY(
02665 float Hanning(const float x)
02666 {
02667
02668
02669
02670
02671 const float cosine=cos((MagickPI*x));
02672 return(0.5f+0.5f*cosine);
02673 }
02674 )
02675
02676 STRINGIFY(
02677 float Hamming(const float x)
02678 {
02679
02680
02681
02682
02683 const float cosine=cos((MagickPI*x));
02684 return(0.54f+0.46f*cosine);
02685 }
02686 )
02687
02688 STRINGIFY(
02689 float Blackman(const float x)
02690 {
02691
02692
02693
02694
02695
02696
02697
02698 const float cosine=cos((MagickPI*x));
02699 return(0.34f+cosine*(0.5f+cosine*0.16f));
02700 }
02701 )
02702
02703
02704
02705
02706 STRINGIFY(
02707 static inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
02708 {
02709 switch (filterType)
02710 {
02711
02712
02713 case SincWeightingFunction:
02714 case SincFastWeightingFunction:
02715 return Sinc(x);
02716 case CubicBCWeightingFunction:
02717 return CubicBC(x,filterCoefficients);
02718 case BoxWeightingFunction:
02719 return BoxResizeFilter(x);
02720 case TriangleWeightingFunction:
02721 return Triangle(x);
02722 case HanningWeightingFunction:
02723 return Hanning(x);
02724 case HammingWeightingFunction:
02725 return Hamming(x);
02726 case BlackmanWeightingFunction:
02727 return Blackman(x);
02728
02729 default:
02730 return 0.0f;
02731 }
02732 }
02733 )
02734
02735
02736 STRINGIFY(
02737 static inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
02738 , const ResizeWeightingFunctionType resizeWindowType
02739 , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
02740 {
02741 float scale;
02742 float xBlur = fabs(x/resizeFilterBlur);
02743 if (resizeWindowSupport < MagickEpsilon
02744 || resizeWindowType == BoxWeightingFunction)
02745 {
02746 scale = 1.0f;
02747 }
02748 else
02749 {
02750 scale = resizeFilterScale;
02751 scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
02752 }
02753 float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
02754 return weight;
02755 }
02756
02757 )
02758
02759 ;
02760 const char* accelerateKernels2 =
02761
02762 STRINGIFY(
02763
02764 static inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
02765 return (numWorkItems/pixelPerWorkgroup);
02766 }
02767
02768
02769
02770 static inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
02771 const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
02772 int pixelIndex = itemID/numWorkItemsPerPixel;
02773 pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
02774 return pixelIndex;
02775 }
02776
02777 )
02778
02779 STRINGIFY(
02780 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
02781 void ResizeHorizontalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
02782 , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
02783 , const int resizeFilterType, const int resizeWindowType
02784 , const __global float* resizeFilterCubicCoefficients
02785 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
02786 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
02787 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
02788
02789
02790
02791 const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
02792 const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns);
02793 const unsigned int actualNumPixelToCompute = stopX - startX;
02794
02795
02796 float scale = MagickMax(1.0f/xFactor+MagickEpsilon ,1.0f);
02797 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
02798 scale = PerceptibleReciprocal(scale);
02799
02800 const int cacheRangeStartX = MagickMax((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
02801 const int cacheRangeEndX = MagickMin((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
02802
02803
02804 const unsigned int y = get_global_id(1);
02805 event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
02806 wait_group_events(1,&e);
02807
02808 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
02809 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
02810 {
02811
02812 const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
02813 const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX);
02814 const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
02815
02816
02817 const unsigned int itemID = get_local_id(0);
02818 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
02819
02820 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
02821
02822 float4 filteredPixel = (float4)0.0f;
02823 float density = 0.0f;
02824 float gamma = 0.0f;
02825
02826 if (pixelIndex != -1) {
02827
02828
02829 const int x = chunkStartX + pixelIndex;
02830
02831
02832 const float bisect = (x+0.5)/xFactor+MagickEpsilon;
02833 const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
02834 const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputColumns);
02835 const unsigned int n = stop - start;
02836
02837
02838 unsigned int numStepsPerWorkItem = n / numItems;
02839 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
02840
02841 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
02842 if (startStep < n) {
02843 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
02844
02845 unsigned int cacheIndex = start+startStep-cacheRangeStartX;
02846 if (matte == 0) {
02847
02848 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
02849 float4 cp = convert_float4(inputImageCache[cacheIndex]);
02850
02851 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
02852 , (ResizeWeightingFunctionType)resizeWindowType
02853 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
02854
02855 filteredPixel += ((float4)weight)*cp;
02856 density+=weight;
02857 }
02858
02859
02860 }
02861 else {
02862 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
02863 CLPixelType p = inputImageCache[cacheIndex];
02864
02865 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
02866 , (ResizeWeightingFunctionType)resizeWindowType
02867 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
02868
02869 float alpha = weight * QuantumScale * GetPixelAlpha(p);
02870 float4 cp = convert_float4(p);
02871
02872 filteredPixel.x += alpha * cp.x;
02873 filteredPixel.y += alpha * cp.y;
02874 filteredPixel.z += alpha * cp.z;
02875 filteredPixel.w += weight * cp.w;
02876
02877 density+=weight;
02878 gamma+=alpha;
02879 }
02880 }
02881 }
02882 }
02883
02884
02885 if (itemID < actualNumPixelInThisChunk) {
02886 outputPixelCache[itemID] = (float4)0.0f;
02887 densityCache[itemID] = 0.0f;
02888 if (matte != 0)
02889 gammaCache[itemID] = 0.0f;
02890 }
02891 barrier(CLK_LOCAL_MEM_FENCE);
02892
02893
02894 for (unsigned int i = 0; i < numItems; i++) {
02895 if (pixelIndex != -1) {
02896 if (itemID%numItems == i) {
02897 outputPixelCache[pixelIndex]+=filteredPixel;
02898 densityCache[pixelIndex]+=density;
02899 if (matte!=0) {
02900 gammaCache[pixelIndex]+=gamma;
02901 }
02902 }
02903 }
02904 barrier(CLK_LOCAL_MEM_FENCE);
02905 }
02906
02907 if (itemID < actualNumPixelInThisChunk) {
02908 if (matte==0) {
02909 float density = densityCache[itemID];
02910 float4 filteredPixel = outputPixelCache[itemID];
02911 if (density!= 0.0f && density != 1.0)
02912 {
02913 density = PerceptibleReciprocal(density);
02914 filteredPixel *= (float4)density;
02915 }
02916 filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
02917 , ClampToQuantum(filteredPixel.y)
02918 , ClampToQuantum(filteredPixel.z)
02919 , ClampToQuantum(filteredPixel.w));
02920 }
02921 else {
02922 float density = densityCache[itemID];
02923 float gamma = gammaCache[itemID];
02924 float4 filteredPixel = outputPixelCache[itemID];
02925
02926 if (density!= 0.0f && density != 1.0) {
02927 density = PerceptibleReciprocal(density);
02928 filteredPixel *= (float4)density;
02929 gamma *= density;
02930 }
02931 gamma = PerceptibleReciprocal(gamma);
02932
02933 CLPixelType fp;
02934 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
02935 , ClampToQuantum(gamma*filteredPixel.y)
02936 , ClampToQuantum(gamma*filteredPixel.z)
02937 , ClampToQuantum(filteredPixel.w));
02938
02939 filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
02940
02941 }
02942 }
02943
02944 }
02945 }
02946 )
02947
02948
02949 STRINGIFY(
02950 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
02951 void ResizeVerticalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
02952 , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
02953 , const int resizeFilterType, const int resizeWindowType
02954 , const __global float* resizeFilterCubicCoefficients
02955 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
02956 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
02957 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
02958
02959
02960
02961 const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
02962 const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows);
02963 const unsigned int actualNumPixelToCompute = stopY - startY;
02964
02965
02966 float scale = MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f);
02967 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
02968 scale = PerceptibleReciprocal(scale);
02969
02970 const int cacheRangeStartY = MagickMax((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
02971 const int cacheRangeEndY = MagickMin((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
02972
02973
02974 const unsigned int x = get_global_id(0);
02975 event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
02976 wait_group_events(1,&e);
02977
02978 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
02979 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
02980 {
02981
02982 const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
02983 const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY);
02984 const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
02985
02986
02987 const unsigned int itemID = get_local_id(1);
02988 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
02989
02990 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
02991
02992 float4 filteredPixel = (float4)0.0f;
02993 float density = 0.0f;
02994 float gamma = 0.0f;
02995
02996 if (pixelIndex != -1) {
02997
02998
02999 const int y = chunkStartY + pixelIndex;
03000
03001
03002 const float bisect = (y+0.5)/yFactor+MagickEpsilon;
03003 const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
03004 const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputRows);
03005 const unsigned int n = stop - start;
03006
03007
03008 unsigned int numStepsPerWorkItem = n / numItems;
03009 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
03010
03011 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
03012 if (startStep < n) {
03013 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
03014
03015 unsigned int cacheIndex = start+startStep-cacheRangeStartY;
03016 if (matte == 0) {
03017
03018 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
03019 float4 cp = convert_float4(inputImageCache[cacheIndex]);
03020
03021 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
03022 , (ResizeWeightingFunctionType)resizeWindowType
03023 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
03024
03025 filteredPixel += ((float4)weight)*cp;
03026 density+=weight;
03027 }
03028
03029
03030 }
03031 else {
03032 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
03033 CLPixelType p = inputImageCache[cacheIndex];
03034
03035 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
03036 , (ResizeWeightingFunctionType)resizeWindowType
03037 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
03038
03039 float alpha = weight * QuantumScale * GetPixelAlpha(p);
03040 float4 cp = convert_float4(p);
03041
03042 filteredPixel.x += alpha * cp.x;
03043 filteredPixel.y += alpha * cp.y;
03044 filteredPixel.z += alpha * cp.z;
03045 filteredPixel.w += weight * cp.w;
03046
03047 density+=weight;
03048 gamma+=alpha;
03049 }
03050 }
03051 }
03052 }
03053
03054
03055 if (itemID < actualNumPixelInThisChunk) {
03056 outputPixelCache[itemID] = (float4)0.0f;
03057 densityCache[itemID] = 0.0f;
03058 if (matte != 0)
03059 gammaCache[itemID] = 0.0f;
03060 }
03061 barrier(CLK_LOCAL_MEM_FENCE);
03062
03063
03064 for (unsigned int i = 0; i < numItems; i++) {
03065 if (pixelIndex != -1) {
03066 if (itemID%numItems == i) {
03067 outputPixelCache[pixelIndex]+=filteredPixel;
03068 densityCache[pixelIndex]+=density;
03069 if (matte!=0) {
03070 gammaCache[pixelIndex]+=gamma;
03071 }
03072 }
03073 }
03074 barrier(CLK_LOCAL_MEM_FENCE);
03075 }
03076
03077 if (itemID < actualNumPixelInThisChunk) {
03078 if (matte==0) {
03079 float density = densityCache[itemID];
03080 float4 filteredPixel = outputPixelCache[itemID];
03081 if (density!= 0.0f && density != 1.0)
03082 {
03083 density = PerceptibleReciprocal(density);
03084 filteredPixel *= (float4)density;
03085 }
03086 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
03087 , ClampToQuantum(filteredPixel.y)
03088 , ClampToQuantum(filteredPixel.z)
03089 , ClampToQuantum(filteredPixel.w));
03090 }
03091 else {
03092 float density = densityCache[itemID];
03093 float gamma = gammaCache[itemID];
03094 float4 filteredPixel = outputPixelCache[itemID];
03095
03096 if (density!= 0.0f && density != 1.0) {
03097 density = PerceptibleReciprocal(density);
03098 filteredPixel *= (float4)density;
03099 gamma *= density;
03100 }
03101 gamma = PerceptibleReciprocal(gamma);
03102
03103 CLPixelType fp;
03104 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
03105 , ClampToQuantum(gamma*filteredPixel.y)
03106 , ClampToQuantum(gamma*filteredPixel.z)
03107 , ClampToQuantum(filteredPixel.w));
03108
03109 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
03110
03111 }
03112 }
03113
03114 }
03115 }
03116 )
03117
03118
03119
03120
03121
03122
03123
03124
03125
03126
03127
03128
03129
03130 STRINGIFY(
03131 __kernel void UnsharpMaskBlurColumn(const __global CLPixelType* inputImage,
03132 const __global float4 *blurRowData, __global CLPixelType *filtered_im,
03133 const unsigned int imageColumns, const unsigned int imageRows,
03134 __local float4* cachedData, __local float* cachedFilter,
03135 const ChannelType channel, const __global float *filter, const unsigned int width,
03136 const float gain, const float threshold)
03137 {
03138 const unsigned int radius = (width-1)/2;
03139
03140
03141 const int groupX = get_group_id(0);
03142 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
03143 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
03144
03145 if (groupStartY >= 0
03146 && groupStopY < imageRows) {
03147 event_t e = async_work_group_strided_copy(cachedData
03148 ,blurRowData+groupStartY*imageColumns+groupX
03149 ,groupStopY-groupStartY,imageColumns,0);
03150 wait_group_events(1,&e);
03151 }
03152 else {
03153 for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
03154 cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX];
03155 }
03156 barrier(CLK_LOCAL_MEM_FENCE);
03157 }
03158
03159 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
03160 wait_group_events(1,&e);
03161
03162
03163
03164 const int cy = get_global_id(1);
03165
03166 if (cy < imageRows) {
03167 float4 blurredPixel = (float4) 0.0f;
03168
03169 int i = 0;
03170
03171 \n #ifndef UFACTOR \n
03172 \n #define UFACTOR 8 \n
03173 \n #endif \n
03174
03175 for ( ; i+UFACTOR < width; )
03176 {
03177 \n #pragma unroll UFACTOR \n
03178 for (int j=0; j < UFACTOR; j++, i++)
03179 {
03180 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
03181 }
03182 }
03183
03184 for ( ; i < width; i++)
03185 {
03186 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
03187 }
03188
03189 blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
03190 ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
03191
03192 float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
03193 float4 outputPixel = inputImagePixel - blurredPixel;
03194
03195 float quantumThreshold = QuantumRange*threshold;
03196
03197 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
03198 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
03199
03200
03201 filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
03202 ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
03203
03204 }
03205 }
03206 )
03207
03208
03209
03210 STRINGIFY(
03211 __kernel void UnsharpMask(__global CLPixelType *im, __global CLPixelType *filtered_im,
03212 __constant float *filter,
03213 const unsigned int width,
03214 const unsigned int imageColumns, const unsigned int imageRows,
03215 __local float4 *pixels,
03216 const float gain, const float threshold, const unsigned int justBlur)
03217 {
03218 const int x = get_global_id(0);
03219 const int y = get_global_id(1);
03220
03221 const unsigned int radius = (width - 1) / 2;
03222
03223 int row = y - radius;
03224 int baseRow = get_group_id(1) * get_local_size(1) - radius;
03225 int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
03226
03227 while (row < endRow) {
03228 int srcy = (row < 0) ? -row : row;
03229 srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy;
03230
03231 float4 value = 0.0f;
03232
03233 int ix = x - radius;
03234 int i = 0;
03235
03236 while (i + 7 < width) {
03237 for (int j = 0; j < 8; ++j) {
03238 int srcx = ix + j;
03239 srcx = (srcx < 0) ? -srcx : srcx;
03240 srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
03241 value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]);
03242 }
03243 ix += 8;
03244 i += 8;
03245 }
03246
03247 while (i < width) {
03248 int srcx = (ix < 0) ? -ix : ix;
03249 srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
03250 value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]);
03251 ++i;
03252 ++ix;
03253 }
03254 pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
03255 row += get_local_size(1);
03256 }
03257
03258
03259 barrier(CLK_LOCAL_MEM_FENCE);
03260
03261
03262 const int px = get_local_id(0);
03263 const int py = get_local_id(1);
03264 const int prp = get_local_size(0);
03265 float4 value = (float4)(0.0f);
03266
03267 int i = 0;
03268 while (i + 7 < width) {
03269 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
03270 value += (float4)(filter[i]) * pixels[px + (py + i + 1) * prp];
03271 value += (float4)(filter[i]) * pixels[px + (py + i + 2) * prp];
03272 value += (float4)(filter[i]) * pixels[px + (py + i + 3) * prp];
03273 value += (float4)(filter[i]) * pixels[px + (py + i + 4) * prp];
03274 value += (float4)(filter[i]) * pixels[px + (py + i + 5) * prp];
03275 value += (float4)(filter[i]) * pixels[px + (py + i + 6) * prp];
03276 value += (float4)(filter[i]) * pixels[px + (py + i + 7) * prp];
03277 i += 8;
03278 }
03279 while (i < width) {
03280 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
03281 ++i;
03282 }
03283 if ((x < imageColumns) && (y < imageRows)) {
03284 if (justBlur == 0) {
03285 float4 srcPixel = convert_float4(im[x + y * imageColumns]);
03286 float4 diff = srcPixel - value;
03287
03288 float quantumThreshold = QuantumRange*threshold;
03289
03290 int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
03291 value = select(srcPixel + diff * gain, srcPixel, mask);
03292 }
03293 filtered_im[x + y * imageColumns] = (CLPixelType)(ClampToQuantum(value.s0), ClampToQuantum(value.s1), ClampToQuantum(value.s2), ClampToQuantum(value.s3));
03294 }
03295 }
03296 )
03297
03298 STRINGIFY(
03299 __kernel __attribute__((reqd_work_group_size(64, 4, 1))) void WaveletDenoise(__global CLPixelType *srcImage, __global CLPixelType *dstImage,
03300 const float threshold,
03301 const int passes,
03302 const int imageWidth,
03303 const int imageHeight)
03304 {
03305 const int pad = (1 << (passes - 1));
03306 const int tileSize = 64;
03307 const int tileRowPixels = 64;
03308 const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 };
03309
03310 CLPixelType stage[16];
03311
03312 local float buffer[64 * 64];
03313
03314 int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0);
03315 int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad;
03316
03317 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03318 stage[i / 4] = srcImage[mirrorTop(mirrorBottom(srcx), imageWidth) + (mirrorTop(mirrorBottom(srcy + i) , imageHeight)) * imageWidth];
03319 }
03320
03321
03322 for (int channel = 0; channel < 3; ++channel) {
03323
03324 switch (channel) {
03325 case 0:
03326 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03327 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s0);
03328 break;
03329 case 1:
03330 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03331 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s1);
03332 break;
03333 case 2:
03334 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03335 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s2);
03336 break;
03337 }
03338
03339
03340
03341
03342 float tmp[16];
03343 float accum[16];
03344 float pixel;
03345
03346 for (int pass = 0; pass < passes; ++pass) {
03347 const int radius = 1 << pass;
03348 const int x = get_local_id(0);
03349 const float thresh = threshold * noise[pass];
03350
03351 if (pass == 0)
03352 accum[0] = accum[1] = accum[2] = accum[3] = accum[4] = accum[5] = accum[6] = accum[6] = accum[7] = accum[8] = accum[9] = accum[10] = accum[11] = accum[12] = accum[13] = accum[14] = accum[15] = 0.0f;
03353
03354
03355
03356
03357 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03358 const int offset = i * tileRowPixels;
03359 if (pass == 0)
03360 tmp[i / 4] = buffer[x + offset];
03361 pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]);
03362 barrier(CLK_LOCAL_MEM_FENCE);
03363 buffer[x + offset] = pixel;
03364 }
03365 barrier(CLK_LOCAL_MEM_FENCE);
03366
03367 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03368 pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]);
03369 float delta = tmp[i / 4] - pixel;
03370 tmp[i / 4] = pixel;
03371 if (delta < -thresh)
03372 delta += thresh;
03373 else if (delta > thresh)
03374 delta -= thresh;
03375 else
03376 delta = 0;
03377 accum[i / 4] += delta;
03378
03379 }
03380 barrier(CLK_LOCAL_MEM_FENCE);
03381 if (pass < passes - 1)
03382 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03383 buffer[x + i * tileRowPixels] = tmp[i / 4];
03384 else
03385 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03386 accum[i / 4] += tmp[i / 4];
03387 barrier(CLK_LOCAL_MEM_FENCE);
03388 }
03389
03390 switch (channel) {
03391 case 0:
03392 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03393 stage[i / 4].s0 = ClampToQuantum(accum[i / 4]);
03394 break;
03395 case 1:
03396 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03397 stage[i / 4].s1 = ClampToQuantum(accum[i / 4]);
03398 break;
03399 case 2:
03400 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
03401 stage[i / 4].s2 = ClampToQuantum(accum[i / 4]);
03402 break;
03403 }
03404
03405 barrier(CLK_LOCAL_MEM_FENCE);
03406 }
03407
03408
03409
03410 if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) {
03411
03412 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
03413 if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) {
03414 dstImage[srcx + (srcy + i) * imageWidth] = stage[i / 4];
03415 }
03416 }
03417 }
03418 }
03419 )
03420 ;
03421
03422 #endif // MAGICKCORE_OPENCL_SUPPORT
03423
03424 #if defined(__cplusplus) || defined(c_plusplus)
03425 }
03426 #endif
03427
03428 #endif // MAGICKCORE_ACCELERATE_PRIVATE_H