最近想实现一个多语言版的strtotime函数,所以阅读了php源码中strtotime函数的实现,很感谢“胖胖”大大的文章(http://www.phppan.com/2011/06/php-strtotime/),为本人分析strtotime提供了一个大概的思路,阅读本文前请先阅读“胖胖”大大的文章。
先贴上strtotime的分析结果:
1. 使用词法分析器re2c对英文文本的日期时间描述进行分析(/ext/date/lib/parse_date.c中scan())。
2.针对分析的token,做出相应的操作,即计算出英文文本的日期时间描述与标准时间的差值,结果存在结构体timelib_rel_time。
3.根据差值与标准时间,转换为一个时间戳。
我们以例子strtotime("last sunday")为例来说明。
关键的三个结构如下:
typedef struct Scanner {
int fd;
uchar *lim, *str, *ptr, *cur, *tok, *pos;
unsigned int line, len;
struct timelib_error_container *errors;
struct timelib_time *time;
const timelib_tzdb *tzdb;
} Scanner;
typedef struct timelib_time {
timelib_sll y, m, d; /* Year, Month, Day */
timelib_sll h, i, s; /* Hour, mInute, Second */
double f; /* Fraction */
int z; /* GMT offset in minutes */
char *tz_abbr; /* Timezone abbreviation (display only) */
timelib_tzinfo *tz_info; /* Timezone structure */
signed int dst; /* Flag if we were parsing a DST zone */
timelib_rel_time relative;
timelib_sll sse; /* Seconds since epoch */
unsigned int have_time, have_date, have_zone, have_relative, have_weeknr_day;
unsigned int sse_uptodate; /* !0 if the sse member is up to date with the date/time members */
unsigned int tim_uptodate; /* !0 if the date/time members are up to date with the sse member */
unsigned int is_localtime; /* 1 if the current struct represents localtime, 0 if it is in GMT */
unsigned int zone_type; /* 1 time offset,
* 3 TimeZone identifier,
* 2 TimeZone abbreviation */
} timelib_time;
typedef struct timelib_rel_time {
timelib_sll y, m, d; /* Years, Months and Days */
timelib_sll h, i, s; /* Hours, mInutes and Seconds */
int weekday; /* Stores the day in 'next monday' */
int weekday_behavior; /* 0: the current day should *not* be counted when advancing forwards; 1: the current day *should* be counted */
int first_last_day_of;
int invert; /* Whether the difference should be inverted */
timelib_sll days; /* Contains the number of *days*, instead of Y-M-D differences */
timelib_special special;
unsigned int have_weekday_relative, have_special_relative;
} timelib_rel_time;
strtotime函数,将任何英文文本的日期时间描述解析为 Unix 时间戳,既然这里涉及到英文文本,那么怎么把这个英文文本转换为计算机可以理解的逻辑呢?学过编译原理的同学都知道,在编译的过程中有词法分析阶段,通过词法分析,将字符串转换为token的过程。php解析英文文本的字符串使用了re2c,这个词法分析工具支持正则表达式,在/ext/date/lib/parse_date.re 中scan()就是负责词法分析的过程。
这里需要特别注意的是,/ext/date/lib/parse_date.re是没被re2c前的原始文件,/ext/date/lib/parse_date.c是被re2c解析后生成的文件,源码分析时阅读/ext/date/lib/parse_date.re就好了,/ext/date/lib/parse_date.c文件中有大量的词法分析代码,一大堆switch, goto, 单是scan()函数就有两万多行,伤不起啊!!!
既然是re2c是使用正则表达式的,我们来查看一下表示"last sunday"的正则表达式:
reltextnumber = 'first'|'second'|'third'|'fourth'|'fifth'|'sixth'|'seventh'|'eight'|'eighth'|'ninth'|'tenth'|'eleventh'|'twelfth';
reltexttext = 'next'|'last'|'previous'|'this';
reltextunit = (('sec'|'second'|'min'|'minute'|'hour'|'day'|'fortnight'|'forthnight'|'month'|'year') 's'?) | 'weeks' | daytext;
relativetext = (reltextnumber|reltexttext) space reltextunit;
"last"是reltexttext,“sunday”是 reltextunit, 所以"last sunday"是被解析为relativetext,在/ext/date/lib/parse_date.re查找relativetext 对应的操作:
relativetext
{
timelib_sll i;
int behavior = 0;
DEBUG_OUTPUT("relativetext");
TIMELIB_INIT;
TIMELIB_HAVE_RELATIVE();
while(*ptr) {
i = timelib_get_relative_text((char **) &ptr, &behavior);
timelib_eat_spaces((char **) &ptr);
timelib_set_relative((char **) &ptr, i, behavior, s);
}
TIMELIB_DEINIT;
return TIMELIB_RELATIVE;
}
timelib_get_relative_text()是分析 “last”这个token,关键的结构如下:
typedef struct _timelib_lookup_table {
const char *name;
int type;
int value;
} timelib_lookup_table;
static timelib_lookup_table const timelib_reltext_lookup[] = {
{ "first", 0, 1 },
{ "next", 0, 1 },
{ "second", 0, 2 },
{ "third", 0, 3 },
{ "fourth", 0, 4 },
{ "fifth", 0, 5 },
{ "sixth", 0, 6 },
{ "seventh", 0, 7 },
{ "eight", 0, 8 },
{ "eighth", 0, 8 },
{ "ninth", 0, 9 },
{ "tenth", 0, 10 },
{ "eleventh", 0, 11 },
{ "twelfth", 0, 12 },
{ "last", 0, -1 },
{ "previous", 0, -1 },
{ "this", 1, 0 },
{ NULL, 1, 0 }
};
代码如下:
static timelib_sll timelib_get_relative_text(char **ptr, int *behavior)
{
while (**ptr == ' ' || **ptr == '\t' || **ptr == '-' || **ptr == '/') {
++*ptr;
}
return timelib_lookup_relative_text(ptr, behavior);
}
static timelib_sll timelib_lookup_relative_text(char **ptr, int *behavior)
{
char *word;
char *begin = *ptr, *end;
timelib_sll value = 0;
const timelib_lookup_table *tp;
while ((**ptr >= 'A' && **ptr <= 'Z') || (**ptr >= 'a' && **ptr <= 'z')) {
++*ptr;
}
end = *ptr;
word = calloc(1, end - begin + 1);
memcpy(word, begin, end - begin);
for (tp = timelib_reltext_lookup; tp->name; tp++) {
if (strcasecmp(word, tp->name) == 0) {
value = tp->value;
*behavior = tp->type;
}
}
free(word);
return value;
}
当运行完后i= -1, behavior=0(请注意 value = tp->value;*behavior = tp->type; )
接着在
static void timelib_set_relative(char **ptr, timelib_sll amount, int behavior, Scanner *s)
{
const timelib_relunit* relunit;
if (!(relunit = timelib_lookup_relunit(ptr))) { //分析“ sunday”
return;
}
switch (relunit->unit) {
case TIMELIB_SECOND: s->time->relative.s += amount * relunit->multiplier; break;
case TIMELIB_MINUTE: s->time->relative.i += amount * relunit->multiplier; break;
case TIMELIB_HOUR: s->time->relative.h += amount * relunit->multiplier; break;
case TIMELIB_DAY: s->time->relative.d += amount * relunit->multiplier; break;
case TIMELIB_MONTH: s->time->relative.m += amount * relunit->multiplier; break;
case TIMELIB_YEAR: s->time->relative.y += amount * relunit->multiplier; break;
case TIMELIB_WEEKDAY: //计算差值存放在结构体timelib_rel_time
TIMELIB_HAVE_WEEKDAY_RELATIVE();
TIMELIB_UNHAVE_TIME();
s->time->relative.d += (amount > 0 ? amount - 1 : amount) * 7;
s->time->relative.weekday = relunit->multiplier;
s->time->relative.weekday_behavior = behavior;
break;
case TIMELIB_SPECIAL:
TIMELIB_HAVE_SPECIAL_RELATIVE();
TIMELIB_UNHAVE_TIME();
s->time->relative.special.type = relunit->multiplier;
s->time->relative.special.amount = amount;
}
}
timelib_lookup_relunit的关键结构体和代码如下:
typedef struct _timelib_relunit {
const char *name;
int unit;
int multiplier;
} timelib_relunit
static timelib_relunit const timelib_relunit_lookup[] = {
{ "sec", TIMELIB_SECOND, 1 },
{ "secs", TIMELIB_SECOND, 1 },
{ "second", TIMELIB_SECOND, 1 },
{ "seconds", TIMELIB_SECOND, 1 },
{ "min", TIMELIB_MINUTE, 1 },
{ "mins", TIMELIB_MINUTE, 1 },
{ "minute", TIMELIB_MINUTE, 1 },
{ "minutes", TIMELIB_MINUTE, 1 },
{ "hour", TIMELIB_HOUR, 1 },
{ "hours", TIMELIB_HOUR, 1 },
{ "day", TIMELIB_DAY, 1 },
{ "days", TIMELIB_DAY, 1 },
{ "week", TIMELIB_DAY, 7 },
{ "weeks", TIMELIB_DAY, 7 },
{ "fortnight", TIMELIB_DAY, 14 },
{ "fortnights", TIMELIB_DAY, 14 },
{ "forthnight", TIMELIB_DAY, 14 },
{ "forthnights", TIMELIB_DAY, 14 },
{ "month", TIMELIB_MONTH, 1 },
{ "months", TIMELIB_MONTH, 1 },
{ "year", TIMELIB_YEAR, 1 },
{ "years", TIMELIB_YEAR, 1 },
{ "monday", TIMELIB_WEEKDAY, 1 },
{ "mon", TIMELIB_WEEKDAY, 1 },
{ "tuesday", TIMELIB_WEEKDAY, 2 },
{ "tue", TIMELIB_WEEKDAY, 2 },
{ "wednesday", TIMELIB_WEEKDAY, 3 },
{ "wed", TIMELIB_WEEKDAY, 3 },
{ "thursday", TIMELIB_WEEKDAY, 4 },
{ "thu", TIMELIB_WEEKDAY, 4 },
{ "friday", TIMELIB_WEEKDAY, 5 },
{ "fri", TIMELIB_WEEKDAY, 5 },
{ "saturday", TIMELIB_WEEKDAY, 6 },
{ "sat", TIMELIB_WEEKDAY, 6 },
{ "sunday", TIMELIB_WEEKDAY, 0 },
{ "sun", TIMELIB_WEEKDAY, 0 },
{ "weekday", TIMELIB_SPECIAL, TIMELIB_SPECIAL_WEEKDAY },
{ "weekdays", TIMELIB_SPECIAL, TIMELIB_SPECIAL_WEEKDAY },
{ NULL, 0, 0 }
};
static const timelib_relunit* timelib_lookup_relunit(char **ptr)
{
char *word;
char *begin = *ptr, *end;
const timelib_relunit *tp, *value = NULL;
while (**ptr != '\0' && **ptr != ' ' && **ptr != ',' && **ptr != '\t') {
++*ptr;
}
end = *ptr;
word = calloc(1, end - begin + 1);
memcpy(word, begin, end - begin);
for (tp = timelib_relunit_lookup; tp->name; tp++) {
if (strcasecmp(word, tp->name) == 0) {
value = tp;
break;
}
}
free(word);
return value;
}
运行完,可得到结构体timelib_relunit,其中的值是{ "sunday", TIMELIB_WEEKDAY, 0 },
最后在下面的代码中获取一个差值
case TIMELIB_WEEKDAY: //计算差值存放在结构体timelib_rel_time
TIMELIB_HAVE_WEEKDAY_RELATIVE();
TIMELIB_UNHAVE_TIME();
s->time->relative.d += (amount > 0 ? amount - 1 : amount) * 7;
s->time->relative.weekday = relunit->multiplier;
s->time->relative.weekday_behavior = behavior;
break;
得到差值后,通过下面的三个函数转换为一个时间戳:
static void do_adjust_relative(timelib_time* time) //把差值转换为标准时间
{
if (time->relative.have_weekday_relative) {
do_adjust_for_weekday(time);
}
timelib_do_normalize(time);
if (time->have_relative) {
time->s += time->relative.s;
time->i += time->relative.i;
time->h += time->relative.h;
time->d += time->relative.d;
time->m += time->relative.m;
time->y += time->relative.y;
}
switch (time->relative.first_last_day_of) {
case 1: /* first */
time->d = 1;
break;
case 2: /* last */
time->d = 0;
time->m++;
break;
}
timelib_do_normalize(time);
}
static void do_adjust_for_weekday(timelib_time* time) //对星期类型进行处理
{
timelib_sll current_dow, difference;
current_dow = timelib_day_of_week(time->y, time->m, time->d);
if (time->relative.weekday_behavior == 2)
{
if (time->relative.weekday == 0) {
time->relative.weekday = 7;
}
time->d -= current_dow;
time->d += time->relative.weekday;
return;
}
difference = time->relative.weekday - current_dow;
if ((time->relative.d < 0 && difference < 0) || (time->relative.d >= 0 && difference <= -time->relative.weekday_behavior)) {
difference += 7;
}
if (time->relative.weekday >= 0) {
time->d += difference;
} else {
time->d -= (7 - (abs(time->relative.weekday) - current_dow));
}
time->relative.have_weekday_relative = 0;
}
void timelib_update_ts(timelib_time* time, timelib_tzinfo* tzi) //转换为时间戳
{
timelib_sll res = 0;
do_adjust_special_early(time);
do_adjust_relative(time);
do_adjust_special(time);
res += do_years(time->y);
res += do_months(time->m, time->y);
res += do_days(time->d);
res += do_time(time->h, time->i, time->s);
time->sse = res;
res += do_adjust_timezone(time, tzi);
time->sse = res;
time->sse_uptodate = 1;
time->have_relative = time->relative.have_weekday_relative = time->relative.have_special_relative = 0;
}
[文章作者]曾健生
[作者邮箱]zengjiansheng1@126.com
[作者QQ]190678908
[博客] http://blog.youkuaiyun.com/newjueqi