linux内核的atoi测试
v_JULY_v君的问题非常好(请见文章的评论)! 每次都让我思考. 现将linux内核的atoi测试代码贴出来, 为了区别了C标准库的atoi函数, 我把测试的函数名改为matoi:
#include <ctype.h>
#include <string.h>
#include <stdio.h>
/*http://lxr.free-electrons.com/source/lib/kstrtox.h#L4*/
#define KSTRTOX_OVERFLOW (1U << 31)
const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);
/*http://lxr.free-electrons.com/source/arch/powerpc/boot/types.h#L12*/
typedef int s32;
typedef unsigned int u32;
typedef unsigned long long u64;
/*http://lxr.free-electrons.com/source/drivers/media/pci/ngene/ngene-dvb.c#L127*/
static u32 overflow;
/*http://lxr.free-electrons.com/source/include/linux/kernel.h#L29*/
#define ULLONG_MAX (~0ULL)
#define unlikely(cond) (cond)
/*http://lxr.free-electrons.com/source/lib/kstrtox.c#L23*/
const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
{
if (*base == 0) {
if (s[0] == '0') {
if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
*base = 16;
else
*base = 8;
} else
*base = 10;
}
if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
s += 2;
return s;
}
/*http://lxr.free-electrons.com/source/lib/kstrtox.c#L47*/
/*
* Convert non-negative integer string representation in explicitly given radix
* to an integer.
* Return number of characters consumed maybe or-ed with overflow bit.
* If overflow occurs, result integer (incorrect) is still returned.
*
* Don't you dare use this function.
*/
unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
{
unsigned long long res;
unsigned int rv;
int overflow;
res = 0;
rv = 0;
overflow = 0;
while (*s) {
unsigned int val;
if ('0' <= *s && *s <= '9')
val = *s - '0';
else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
val = _tolower(*s) - 'a' + 10;
else
break;
if (val >= base)
break;
/*
* Check for overflow only if we are within range of
* it in the max base we support (16)
*/
if (unlikely(res & (~0ull << 60))) {
if (res > ULLONG_MAX - val/base)
overflow = 1;
}
res = res * base + val;
rv++;
s++;
}
*p = res;
if (overflow)
rv |= KSTRTOX_OVERFLOW;
return rv;
}
/*http://lxr.free-electrons.com/source/lib/vsprintf.c#L44*/
/**
* simple_strtoull - convert a string to an unsigned long long
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
*
* This function is obsolete. Please use kstrtoull instead.
*/
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
{
unsigned long long result;
unsigned int rv;
cp = _parse_integer_fixup_radix(cp, &base);
rv = _parse_integer(cp, base, &result);
/* FIXME */
cp += (rv & ~KSTRTOX_OVERFLOW);
if (endp)
*endp = (char *)cp;
return result;
}
/*http://lxr.free-electrons.com/source/lib/vsprintf.c#L83*/
/**
* simple_strtoul - convert a string to an unsigned long
* @cp: The start of the string
* @endp: A pointer to the end of the parsed string will be placed here
* @base: The number base to use
*
* This function is obsolete. Please use kstrtoul instead.
*/
unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
{
return simple_strtoull(cp, endp, base);
}
/*http://lxr.free-electrons.com/source/drivers/staging/tidspbridge/rmgr/dbdcd.c#L950*/
/*
* ======== atoi ========
* Purpose:
* This function converts strings in decimal or hex format to integers.
*/
static s32 matoi(const char *psz_buf)
{
char *pch = psz_buf;
s32 base = 0;
while (isspace(*pch))
pch++;
if (*pch == '-' || *pch == '+') {
base = 10;
pch++;
} else if (*pch && tolower(pch[strlen(pch) - 1]) == 'h') {
base = 16;
}
return simple_strtoul(pch, NULL, base);
}
void test(const char* str) {
printf("%s : %d\n", str, matoi(str));
}
int main() {
test("2147483647");
test("2147483648");
test("-2147483648");
test("-2147483649");
test("10522545459");
test("-10522545459");
return 0;
}
修改的地方在第75行, 原来的代码为:
if (res > div_u64(ULLONG_MAX - val, base))
而div_u64调用的div_u64_rem函数中包含汇编代码编译不过(原因尚未可知, 有待进一步研究), 所以我把这段程序去掉了.
程序的输出结果(很显然, 对于溢出的情况, 程序没有处理):
2147483647 : 2147483647 2147483648 : -2147483648 10522545459 : 1932610867 -2147483648 : -2147483648 -2147483649 : -2147483647 -10522545459 : 1932610867
Nut/OS的atoi测试
以下是测试代码(在ubuntu 10.4.1, gcc 4.4.3上编译通过, 为了区别于C标准库的函数, 函数名strtol更改为mstrtol, atoi更改为matoi2):
#include <errno.h>
#include <stdio.h>
#include <ctype.h>
#include <limits.h>
#define CONST const
long mstrtol(CONST char *nptr, char **endptr, int base)
{
register CONST char *s;
register long acc, cutoff;
register int c;
register int neg, any, cutlim;
/*
* Skip white space and pick up leading +/- sign if any.
* If base is 0, allow 0x for hex and 0 for octal, else
* assume decimal; if base is already 16, allow 0x.
*/
s = nptr;
do {
c = (unsigned char) *s++;
} while (isspace(c));
if (c == '-') {
neg = 1;
c = *s++;
} else {
neg = 0;
if (c == '+')
c = *s++;
}
if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X')) {
c = s[1];
s += 2;
base = 16;
}
if (base == 0)
base = c == '0' ? 8 : 10;
/*
* Compute the cutoff value between legal numbers and illegal
* numbers. That is the largest legal value, divided by the
* base. An input number that is greater than this value, if
* followed by a legal input character, is too big. One that
* is equal to this value may be valid or not; the limit
* between valid and invalid numbers is then based on the last
* digit. For instance, if the range for longs is
* [-2147483648..2147483647] and the input base is 10,
* cutoff will be set to 214748364 and cutlim to either
* 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
* a value > 214748364, or equal but the next digit is > 7 (or 8),
* the number is too big, and we will return a range error.
*
* Set any if any `digits' consumed; make it negative to indicate
* overflow.
*/
cutoff = neg ? LONG_MIN : LONG_MAX;
cutlim = cutoff % base;
cutoff /= base;
if (neg) {
if (cutlim > 0) {
cutlim -= base;
cutoff += 1;
}
cutlim = -cutlim;
}
for (acc = 0, any = 0;; c = (unsigned char) *s++) {
if (isdigit(c))
c -= '0';
else if (isalpha(c))
c -= isupper(c) ? 'A' - 10 : 'a' - 10;
else
break;
if (c >= base)
break;
if (any < 0)
continue;
if (neg) {
if ((acc < cutoff || acc == cutoff) && c > cutlim) {
any = -1;
acc = LONG_MIN;
errno = ERANGE;
} else {
any = 1;
acc *= base;
acc -= c;
}
} else {
if ((acc > cutoff || acc == cutoff) && c > cutlim) {
any = -1;
acc = LONG_MAX;
errno = ERANGE;
} else {
any = 1;
acc *= base;
acc += c;
}
}
}
if (endptr != 0)
*endptr = (char *) (any ? s - 1 : nptr);
return (acc);
}
int matoi2(CONST char *str)
{
return ((int) mstrtol(str, (char **) NULL, 10));
}
int mgetline(char* buf, size_t n) {
size_t idx = 0;
int c;
while (--n > 0 && (c = getchar()) != EOF && c != '\n') {
buf[idx++] = c;
}
buf[idx] = '\0';
return idx;
}
#define MAX_LINE 200
int main() {
char buf[MAX_LINE];
while (mgetline(buf, MAX_LINE) >= 0) {
if (strcmp(buf, "quit") == 0) break;
printf("matoi2=%d\n", matoi2(buf));
}
return 0;
}
程序的测试结果:
10522545459 matoi2=2147483647 -10522545459 matoi2=-2147483648
程序貌似对溢出的处理是正确的, 真的吗? 请注意代码的第79和第89行. 现在我把测试数据换成"10522545454", 与"10522545459"区别在于最后一个字符.
10522545454 matoi2=1932610862 -10522545454 matoi2=-1932610862
bingo! 正中下怀! 对于字串"10522545454", 在读取最后的数字字符'4'时, 整数1052254545已经大于2147483647/10了, 说明已经溢出, 不应该再判断字串的最后一位4是否大于2147483647%10, 所以第79行应该改为(89行修改方法类似):
if (acc < cutoff || (acc == cutoff && c > cutlim)) {
修改过后的代码测试正常:
10522545459 matoi2=2147483647 -10522545459\ matoi2=-2147483648 10522545454 matoi2=2147483647 -10522545454 matoi2=-2147483648 quit
关于此bug, 我已经邮件通知En-Nut-Discussion.
以下为邮件回复的截图, Uwe Bonnes说: 可以打个补丁到分支. 不过他把单词reasonable给拼错了.