C库精巧的处理-memchr

本文深入探讨了C库中memchr函数的设计,通过优化字符串查找过程,显著提高了性能。着重介绍了如何在不同场景下实现高效、内存对齐的字符串遍历,以及如何利用位操作和寄存器来减少指令执行次数,从而提升代码效率。

我实在感叹这些C库设计者的强大

CODESEG

public memchr
memchr proc \
buf:ptr byte, \
chr:byte, \
cnt:dword

OPTION PROLOGUE:NONE, EPILOGUE:NONE

.FPO ( 0, 1, 0, 0, 0, 0 )

mov eax,[esp+0ch] ; eax = count
push ebx ; Preserve ebx

test eax,eax ; check if count=0
jz short retnull ; if count=0, leave

mov edx,[esp+8] ; edx = buffer
xor ebx,ebx

mov bl,[esp+0ch] ; bl = search char

test edx,3 ; test if string is aligned on 32 bits
jz short main_loop_start

str_misaligned: ; simple byte loop until string is aligned
mov cl,byte ptr [edx]
add edx,1
xor cl,bl
je short found
sub eax,1 ; counter--
jz short retnull
test edx,3 ; already aligned ?
jne short str_misaligned

main_loop_start:
sub eax,4
jb short tail_less_then_4

; set all 4 bytes of ebx to [value]
push edi ; Preserve edi
mov edi,ebx ; edi=0/0/0/char
shl ebx,8 ; ebx=0/0/char/0
add ebx,edi ; ebx=0/0/char/char
mov edi,ebx ; edi=0/0/char/char
shl ebx,10h ; ebx=char/char/0/0
add ebx,edi ; ebx = all 4 bytes = [search char]
jmp short main_loop_entry ; ecx >=0

return_from_main:
pop edi

tail_less_then_4:
add eax,4
jz retnull

tail_loop: ; 0 < eax < 4
mov cl,byte ptr [edx]
add edx,1
xor cl,bl
je short found
sub eax,1
jnz short tail_loop
retnull:
pop ebx
ret ; _cdecl return

main_loop:
sub eax,4
jb short return_from_main
main_loop_entry: ;(((longword + magic_bits) ^ ~longword) & ~magic_bits) != 0
mov ecx,dword ptr [edx] ; read 4 bytes

xor ecx,ebx ; ebx is byte\byte\byte\byte
mov edi,7efefeffh

add edi,ecx
xor ecx,-1

xor ecx,edi
add edx,4

and ecx,81010100h
je short main_loop

; found zero byte in the loop?
char_is_found:
mov ecx,[edx - 4]
xor cl,bl ; is it byte 0
je short byte_0
xor ch,bl ; is it byte 1
je short byte_1
shr ecx,10h ; is it byte 2
xor cl,bl
je short byte_2
xor ch,bl ; is it byte 3
je short byte_3
jmp short main_loop ; taken if bits 24-30 are clear and bit
; 31 is set

byte_3:
pop edi ; restore edi
found:
lea eax,[edx - 1]
pop ebx ; restore ebx
ret ; _cdecl return

byte_2:
lea eax,[edx - 2]
pop edi
pop ebx
ret ; _cdecl return

byte_1:
lea eax,[edx - 3]
pop edi
pop ebx
ret ; _cdecl return

byte_0:
lea eax,[edx - 4]
pop edi ; restore edi
pop ebx ; restore ebx
ret ; _cdecl return

memchr endp
end

参考:
http://blog.youkuaiyun.com/ken_2642/article/details/1958347
http://blog.youkuaiyun.com/qcharles/article/details/7679491
simple byte loop until string is aligned
http://blog.youkuaiyun.com/eparg/article/details/1791973
http://blog.youkuaiyun.com/masefee/article/details/7040012
http://blog.youkuaiyun.com/eparg/article/details/1791973
http://www.newsmth.net/bbsanc.php?path=%2Fgroups%2Fcomp.faq%2FCPlusPlus%2Fcodeandtrick%2FM.1088674980.K0
google 搜索mov edx,7efefeffh

转载于:https://www.cnblogs.com/moonflow/archive/2012/11/03/2752885.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值