本文整理自网络,侵删。
阅读提示:
《Delphi图像处理》系列以效率为侧重点,一般代码为PASCAL,核心代码采用BASM。
《C++图像处理》系列以代码清晰,可读性为主,全部使用C++代码。
尽可能保持二者内容一致,可相互对照。
本文代码必须包括文章《Delphi图像处理 -- 数据类型及公用过程》中的ImageData.pas单元。
在图像的处理过程中,经常要用到卷积模板,如图像锐化、图像平滑、高斯模糊、Hough变换等,为此,本人使用Delphi编写了图像通用卷积处理过程和高斯模糊过程,代码如下:
procedure Convolution32(var Dest: TImageData; const Source: TImageData; Template: Pointer; Size, TemplateOffset: Integer);var width, height: Integer; dstOffset, srcOffset: Integer;asm push esi push edi push ebx push ecx call _SetCopyRegs mov width, ecx mov height, edx mov dstOffset, ebx mov srcOffset, eax mov eax, TemplateOffset pop ebx // ebx = Template pxor xmm7, xmm7@@yLoop: // for (y = 0; y < Dst.Height; y ++) push width // {@@xLoop: // for (x = 0; x < Dst.Width; x ++) push esi // { push ebx pxor xmm0, xmm0 // xmm0 = 0 mov edx, size // for (I = 0; I < TemplateSize; I ++)@@iLoop: // { mov ecx, size // for (J = 0; J <= TemplateSize; J ++)@@jLoop: // { movd xmm1, [esi] punpcklbw xmm1, xmm7 punpcklwd xmm1, xmm7 cvtdq2ps xmm1, xmm1 mulps xmm1, [ebx] // xmm1 = pixels[I, J] * Template[i * j] addps xmm0, xmm1 // xmm0 += xmm1 add esi, 4 // esi += 4 add ebx, 16 // edx += 16 loop @@jLoop // } add esi, eax dec edx jnz @@iLoop // } cvtps2dq xmm0, xmm0 packssdw xmm0, xmm7 packuswb xmm0, xmm7 movd [edi], xmm0 // *(ARGB*)edi = xmm0 pop ebx pop esi add esi, 4 // esi += 4 add edi, 4 // edi += 4 dec width jnz @@xLoop // } add esi, srcOffset // esi += srcOffset add edi, dstOffset // edi += dstOffset pop width dec height jnz @@yLoop // } pop ebx pop edi pop esiend;
procedure ConvolutionOne(var Dest: TImageData; const Source: TImageData; Template: Pointer; Size, TemplateOffset: Integer);var width, height: Integer; dstOffset, srcOffset: Integer;asm push esi push edi push ebx push ecx call _SetCopyRegs mov width, ecx mov height, edx mov dstOffset, ebx mov srcOffset, eax mov eax, TemplateOffset pop ebx // ebx = Template pxor mm7, mm7@@yLoop: // for (y = 0; y < Dst.Height; y ++) push width // {@@xLoop: // for (x = 0; x < Dst.Width; x ++) push esi // { push ebx pxor mm0, mm0 // mm0 = 0 mov edx, size // for (I = 0; I < TemplateSize; I ++)@@iLoop: // { mov ecx, size // for (J = 0; J <= TemplateSize; J ++)@@jLoop: // { movd mm1, [esi] punpcklbw mm1, mm7 pmullw mm1, [ebx] // mm1 = pixels[I, J] * Template[i * j] paddsw mm0, mm1 // mm0 += mm1 add esi, 4 // esi += 4 add ebx, 8 // edx += 8 loop @@jLoop // } add esi, eax dec edx jnz @@iLoop // } packuswb mm0, mm7 movd [edi], mm0 // *(ARGB*)edi = mm0 pop ebx pop esi add esi, 4 // esi += 4 add edi, 4 // edi += 4 dec width jnz @@xLoop // } add esi, srcOffset // esi += srcOffset add edi, dstOffset // edi += dstOffset pop width dec height jnz @@yLoop // } emms pop ebx pop edi pop esiend;
procedure CvtTemplate8(Template8, Template: Pointer; n: Integer);asm push eax push ecx pcmpeqw mm7, mm7 psrlq mm7, 16@@Loop: movd mm0, [edx] pshufw mm0, mm0, 0 pand mm0, mm7 movq [eax], mm0 add edx, 4 add eax, 8 loop @@Loop pop ecx pop eax shr ecx, 1 shl ecx, 3 add eax, ecx add eax, 6 mov word ptr[eax], 1 // (short)Template8[n / 2][3] = 1 emmsend;
procedure CvtTemplate16(Template16, Template: Pointer; n: Integer);var nuclear: Single;asm push edx push ecx fldz@@SumLoop: fadd dword ptr[edx] add edx, 4 loop @@SumLoop fstp nuclear fwait pop ecx pop edx cmp nuclear, 0 je @@1 movss xmm7, nuclear shufps xmm7, xmm7, 0@@Loop: movss xmm0, [edx] shufps xmm0, xmm0, 0 divps xmm0, xmm7 movaps [eax], xmm0 add edx, 4 add eax, 16 loop @@Loop jmp @@Exit@@1: push eax push ecx pcmpeqd xmm7, xmm7 psrldq xmm7, 4 // xmm7 >> 32@@Loop1: movss xmm0, [edx] shufps xmm0, xmm0, 0 andps xmm0, xmm7 movaps [eax], xmm0 add edx, 4 add eax, 16 loop @@Loop1 pop ecx pop eax shr ecx, 1 shl ecx, 4 add eax, ecx add eax, 12 fld1 fstp dword ptr[eax] // (float)Template16[n / 2][3] = 1.0@@Exit:end;
procedure CvtTemplate16I(Template16, Template: Pointer; n, nuclear: Integer);asm cmp nuclear, 1 jle @@1 movd xmm7, nuclear pshufd xmm7, xmm7, 0 cvtdq2ps xmm7, xmm7@@Loop: movd xmm0, [edx] pshufd xmm0, xmm0, 0 cvtdq2ps xmm0, xmm0 divps xmm0, xmm7 movaps [eax], xmm0 add edx, 4 add eax, 16 loop @@Loop jmp @@Exit@@1: push eax push ecx pcmpeqd xmm7, xmm7 psrldq xmm7, 4 // xmm7 >> 32@@Loop1: movd xmm0, [edx] pshufd xmm0, xmm0, 0 pand xmm0, xmm7 cvtdq2ps xmm0, xmm0 movaps [eax], xmm0 add edx, 4 add eax, 16 loop @@Loop1 pop ecx pop eax shr ecx, 1 shl ecx, 4 add eax, ecx add eax, 12 fld1 fstp dword ptr[eax] // (float)Template16[n / 2][3] = 1.0@@Exit:end;
procedure ImageConvolutionI(var Data: TImageData; const Template: array of Integer);var i, n, sum: Integer; buffer, pTemplate: Pointer; templateSize, templateOffset: Integer; nuclear: Integer; isFloatOp: Boolean; src: TImageData;begin if Data.AlphaFlag then ArgbConvertPArgb(Data); n := Length(Template); templateSize := Trunc(Sqrt(n)); src := _GetExpandData(Data, templateSize shr 1); templateOffset := src.Stride - (templateSize shl 2); nuclear := 0; for i := 0 to n - 1 do Inc(nuclear, Template[i]); isFloatOp := nuclear > 1; if not isFloatOp then begin sum := 0; for i := 0 to n - 1 do begin Inc(sum, Template[i] * 255); isFloatOp := (sum > 32767) or (sum < -32768); if isFloatOp then Break; end; end; if not isFloatOp then begin buffer := GlobalAllocPtr(GHND, n * Sizeof(TMMType)); CvtTemplate8(buffer, @Template[0], n); ConvolutionOne(Data, src, buffer, templateSize, templateOffset); end else begin buffer := GlobalAllocPtr(GHND, n * 16 + 12); pTemplate := Pointer((Integer(buffer) + 12) and (not 15)); CvtTemplate16I(pTemplate, @Template[0], n, nuclear); Convolution32(Data, src, PTemplate, templateSize, templateOffset); end; GlobalFreePtr(Buffer); FreeImageData(src); if Data.AlphaFlag then PArgbConvertArgb(Data);end;
procedure ImageConvolution(var Data: TImageData; const Template: array of Single);var n: Integer; buffer, pTemplate: Pointer; templateSize, templateOffset: Integer; src: TImageData;begin if Data.AlphaFlag then ArgbConvertPArgb(Data); n := Length(Template); templateSize := Trunc(Sqrt(n)); src := _GetExpandData(Data, templateSize shr 1); templateOffset := src.Stride - (templateSize shl 2); buffer := GlobalAllocPtr(GHND, n * 16 + 12); pTemplate := Pointer((Integer(buffer) + 12) and (not 15)); CvtTemplate16(pTemplate, @Template[0], n); Convolution32(Data, src, PTemplate, templateSize, templateOffset); GlobalFreePtr(Buffer); FreeImageData(src); if Data.AlphaFlag then PArgbConvertArgb(Data);end;
因为要进行图像卷积操作,必须拷贝出源图像,本文过程在拷贝源图像时使用_GetExpandData过程附带扩展了其边框,这样在卷积图像边界像素时不必再进行坐标范围判断,从而简化了代码和提高了运行速度。
考虑不同的应用环境,本文写了浮点卷积矩阵和整数卷积矩阵2个版本。
浮点版本采用SSE浮点运算,每次可同时处理图像像素ARGB4个分量,运行速度比较快;
整数版本有2种处理方法,对于卷积核小于等于1的卷积模板采用了MMX处理,过程速度相当快,当然也要求其模板单个数据项不得超过127,整个模板数据项与255的乘积和不得超过$7fff,一般卷积核小于等于1的卷积模板都远远小于这些值(当然,代码中已经对此作了检查,并做出了相应的处理)。对于卷积核大于1的卷积模板,则转换为浮点数版本处理。
下面给出一个直接调用GdipConvolution过程进行Hough变换的例子:procedure TForm1.Button3Click(Sender: TObject);const// Ray: array[0..8] of Integer = (-1, 0, 1, -1, 0, 1, -1, 0, 1);// Ray: array[0..8] of Integer = (-1, -1, 0, -1, 0, 1, 0, 1, 1); Ray: array[0..8] of Integer = (-1, -1, -1, 0, 0, 0, 1, 1, 1);var bmp: TGpBitmap; g: TGpGraphics; data: TImageData;begin bmp := TGpBitmap.Create('..\media\msn.jpg'); g := TGpGraphics.Create(Canvas.Handle); g.DrawImage(bmp, 0, 0); data := LockGpBitmap(bmp); ImageConvolutionI(Data, Ray); UnlockGpBitmap(bmp, data); g.DrawImage(bmp, data.Width, 0); g.Free; bmp.Free;end;
相关阅读 >>
Delphi利用微软sapi实现语音控制(Delphi,tts,sr,灰色代码)_
Delphi http协议验证访问datasnap rest 服务器
Delphi win32,win64用于单/多线程计数素数的整数性能比较
更多相关阅读请进入《Delphi》频道 >>