用位运算来做大小写转换,一次转换八个字符,超快
#include
#include
#include
void
fast_to_upper(const char *src, char *dest, size_t len) {
size_t i = 0;
const uint64_t *src_data = (const uint64_t*)
src;
uint64_t *dest_data = (uint64_t*)
dest;
size_t blocks = len /
sizeof(uint64_t);
uint64_t src8, a, b;
unsigned char src_char, a_char,
b_char;
for (; i < blocks; i )
{
src8 =
src_data[i] & 0x7f7f7f7f7f7f7f7f;
a = src8
0x0505050505050505;
b = src8
0x7f7f7f7f7f7f7f7f;
a = a
& b;
a = a
& (a >> 1)
& 0x2020202020202020;
dest_data[i] = src8 - a;
}
i *= sizeof(uint64_t);
for (; i < len; i ) {
src_char =
src[i] & 0x7f;
a_char =
src_char 0x05;
b_char =
src_char 0x7f;
a_char =
a_char & b_char;
a_char =
a_char & (a_char >>
1) & 0x20;
dest[i] =
src_char - a_char;
}
dest[i] = 0;
}
int main()
{
char dest[1024];
const char *src = "Hello World!!";
fast_to_upper(src, dest,
strlen(src));
printf("%s\n", dest);
return 0;
}
补充说明:
首先说明,ascii
码中,小写转写字母只需降二进制种的第 5 位置 0 即可,比如 A 的二进制是 01000001 而 a 的二进制是
01100001
小写字母的取值范围是
01100001 到 01111010,也就是说,只有 'a' 到 'z'
满足下面的条件:
1. x - 1
的二进制数值是 011xxxxx
2. x 5 的二进制数值是
011xxxxx
我们来看后面单循环的版本吧。
1. 首先先对字符
& 0x7f
把最高位消除,防止进位造成问题。
2. a
就是对原本的字符进行 5
3. b
就是对原本的字符进行 -1 ( 0x7f 等同于 -1 )
4. a =
a&b,根据上面所说,a&b 的开头是 011
的就说明是小写字母,否则则不是
5. a
& (a >>
1),如果是小写字母的话必定开头是 001,否则则不是小写字母。过后再 & 0x20
来把其他位都置空。最终得到的数值,如果原本字符是小写字母,则得到 0x20,否则为
0
6. 开始说的,小写字母的
ascii 码减 0x20 就能得到大写字母了
有人怀疑性能,我来补充
benchmark:
char
lowercase_to_uppercase_map[256];
void
init_map() {
for (int i = 0; i < 256; i )
{
if (i
<= 'z' && i
>= 'a') {
lowercase_to_uppercase_map[i]
= i - 'a' 'A';
} else
{
lowercase_to_uppercase_map[i]
= i;
}
}
}
void
to_upper_by_map(const char *src, char *dest, size_t len)
{
for (int i = 0; i < len; i )
{
dest[i] =
lowercase_to_uppercase_map[src[i]];
}
dest[len] = 0;
}
#include
#include
void
benchmark() {
int size = 1 <<
20;
char *src = (char *) malloc(size);
char *dest = (char *) malloc(size);
for (unsigned int i = 0; i < size
- 1; i ) {
src[i] =
(i*29327)� '!';
}
src[size] = 0;
init_map();
struct timeval tv_start;
struct timeval tv_end;
int usec_passed;
gettimeofday(&tv_start,
NULL);
for (int i = 0; i < 1000; i )
{
to_upper_by_map(src, dest, size - 1);
}
gettimeofday(&tv_end,
NULL);
usec_passed = (tv_end.tv_sec - tv_start.tv_sec)
* 1000000 (tv_end.tv_usec - tv_start.tv_usec);
printf("Method 1: %d us\n",
usec_passed);
gettimeofday(&tv_start,
NULL);
for (int i = 0; i < 1000; i )
{
fast_to_upper(src, dest, size - 1);
}
gettimeofday(&tv_end,
NULL);
usec_passed = (tv_end.tv_sec - tv_start.tv_sec)
* 1000000 (tv_end.tv_usec - tv_start.tv_usec);
printf("Method 2: %d us\n",
usec_passed);
}
实际测试:(我本机测试,2014年底的 rMBP,具体
spec 就不贴了)
Method 1:
3280091 us
Method 2:
687517 us
性能大概差四倍多吧