杂谈：Linux时间管理之gettimeofday实现-优快云博客

最近花了一些时间看了下Linux的时间管理，为什么要看这个呢？因为一直以来心里总有些疑问, 这些疑问总时不时的出现，很是困惑：

Linux的时间是如何保持的？date调整了时间后为啥下次开机还有效？
那App总调用系统接口比如gettimeofday获取时间是低效率的吗？
定时器是咋弄的？App经常使用的select/epoll等接口都有超时机制，那这个超时机制是如何做到的呢？
时间为啥有分墙上时间，单调时间等？这些有什么区别呢？

我们今天先谈谈gettimeofday的实现，因为这是跟我们打交道比较多的函数，我们有时会写如下的代码：

#include <sys/time.h>
#include <stdio.h>
#include <unistd.h>

int main(int argc, char* argv[])
{
  struct timeval tv_begin, tv_end;
  gettimeofday(&tv_begin, NULL);
  printf("start tv_sec %ld tv_usec %ld\n", tv_begin.tv_sec, tv_begin.tv_usec);
    
  usleep(1000);
  
  gettimeofday(&tv_end, NULL);
  printf("end tv_sec %ld tv_usec %ld\n", tv_end.tv_sec, tv_end.tv_usec);
}

kernel\time\time.c中有下面的代码：如果我们直接找内核代码，会发现有个系统调用

SYSCALL_DEFINE2(gettimeofday, struct __kernel_old_timeval __user *, tv,
    struct timezone __user *, tz)
{
   if (likely(tv != NULL)) {
    struct timespec64 ts;

    ktime_get_real_ts64(&ts);
    ...
   if (unlikely(tz != NULL)) {
    if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
      return -EFAULT;
  }
  return 0;
}

注意如果这个时候你想当然的认为用户态的gettimeofday是直接使用系统调用的话，就可能错了，这个时候我们要先去glibc中看一下，因为这个函数的用户态是glibc(以2.35版本为例子)提供的，我们得去看看glibc的实现，我们发现该函数定义在：glibc2.35\time\gettimeofday.c：

int
___gettimeofday (struct timeval *restrict tv, void *restrict tz)
{
  if (__glibc_unlikely (tz != 0))
    memset (tz, 0, sizeof (struct timezone));

  struct timespec ts;
  if (__clock_gettime (CLOCK_REALTIME, &ts))
    return -1;

  TIMESPEC_TO_TIMEVAL (tv, &ts);
  return 0;
}

看到没有，有时候我们的直觉并不可靠，gilbc是使用__clock_gettime来实现它的，我们接着看__clock_gettime函数

glibc2.35\sysdeps\unix\sysv\linux\clock_gettime.c:

在__TIMESIZE != 64时

int
__clock_gettime (clockid_t clock_id, struct timespec *tp)
{
  int ret;
  struct __timespec64 tp64;

  ret = __clock_gettime64 (clock_id, &tp64);
  ...
  }

在__TIMESIZE == 64时：

#if __TIMESIZE == 64
# define __clock_nanosleep_time64 __clock_nanosleep
# define __clock_gettime64 __clock_gettime
# define __timespec_get64 __timespec_get
# define __timespec_getres64 __timespec_getres
注：函数名也可以被宏替换（我被这个困惑了几天）
这里简单举个例子：
int max_a(int left, int right)
{
return left < right;
}
#define my_max max_a

int main(int argc, char* argv)
{
    my_max(1,2); //这样是合法的，实际执行的是max_a函数
}

最终我们看到无论哪种情况下调用的是__clock_gettime64函数，如果想知道如何在glibc中找到这个函数的可以参考这个文章，写的很清楚（https://blog.youkuaiyun.com/canpool/article/details/119305926，要注意的是）。

我们看看这个函数__clock_gettime64

\sysdeps\unix\sysv\linux\clock_gettime.c:

int
__clock_gettime64 (clockid_t clock_id, struct __timespec64 *tp)
{
  int r;

#ifndef __NR_clock_gettime64
# define __NR_clock_gettime64 __NR_clock_gettime
#endif

#ifdef HAVE_CLOCK_GETTIME64_VSYSCALL
  int (*vdso_time64) (clockid_t clock_id, struct __timespec64 *tp)
    = GLRO(dl_vdso_clock_gettime64);
  if (vdso_time64 != NULL)
    {
      r = INTERNAL_VSYSCALL_CALL (vdso_time64, 2, clock_id, tp);
      if (r == 0)
  return 0;
      return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);
    }
#endif

#ifdef HAVE_CLOCK_GETTIME_VSYSCALL
  int (*vdso_time) (clockid_t clock_id, struct timespec *tp)
    = GLRO(dl_vdso_clock_gettime);
  if (vdso_time != NULL)
    {
      struct timespec tp32;
      r = INTERNAL_VSYSCALL_CALL (vdso_time, 2, clock_id, &tp32);
      if (r == 0 && tp32.tv_sec >= 0)
  {
    *tp = valid_timespec_to_timespec64 (tp32);
    return 0;
  }
      else if (r != 0)
  return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);

      /* Fallback to syscall if the 32-bit time_t vDSO returns overflows.  */
    }
#endif

  r = INTERNAL_SYSCALL_CALL (clock_gettime64, clock_id, tp);
  if (r == 0)
    return 0;
  if (r != -ENOSYS)
    return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);

#ifndef __ASSUME_TIME64_SYSCALLS
  /* Fallback code that uses 32-bit support.  */
  struct timespec tp32;
  r = INTERNAL_SYSCALL_CALL (clock_gettime, clock_id, &tp32);
  if (r == 0)
    {
      *tp = valid_timespec_to_timespec64 (tp32);
      return 0;
    }
#endif

  return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);
}

我们要关注第一个分支：

#ifdef HAVE_CLOCK_GETTIME64_VSYSCALL
  int (*vdso_time64) (clockid_t clock_id, struct __timespec64 *tp)
    = GLRO(dl_vdso_clock_gettime64);
  if (vdso_time64 != NULL)
    {
      r = INTERNAL_VSYSCALL_CALL (vdso_time64, 2, clock_id, tp);
      if (r == 0)
  return 0;
      return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);
    }
#endif

vdso的影子出现了，这个可以让我们避免付出系统调用的代价。vdso

我就不在这里说了，有兴趣的可以网上找找，它利用内存映射的办法直接把内核的数据映射到用户空间，这样就免去了系统调用的开销，很有意思的方法。

如何知道我们使用的系统中有哪些系统调用支持vdso了呢，有个办法可以看看当前的linux的vdso支持哪些函数：

1.先在另一个 terminal中 执行 cat, 等待输入
2.在另外一个terminal中执行下面的：
   a)~# ps aux | grep cat
   b)cat /proc/9869/maps | grep -i vdso(得到映射到用户空间的地址)
   c) gdb /bin/cat 9869
   d) dump memory /tmp/vdso.so 0x7ffe717e6000 0x7ffe717e7000(root，地址来自于步奏b)
   e) objdump -T /tmp/vdso.so 我们得到下面的输出
DYNAMIC SYMBOL TABLE:
0000000000000950  w   DF .text  00000000000000a1  LINUX_2.6   clock_gettime
00000000000008a0 g    DF .text  0000000000000083  LINUX_2.6   __vdso_gettimeofday
0000000000000a00  w   DF .text  000000000000000a  LINUX_2.6   clock_getres
0000000000000a00 g    DF .text  000000000000000a  LINUX_2.6   __vdso_clock_getres
00000000000008a0  w   DF .text  0000000000000083  LINUX_2.6   gettimeofday
0000000000000930 g    DF .text  0000000000000015  LINUX_2.6   __vdso_time
0000000000000930  w   DF .text  0000000000000015  LINUX_2.6   time
0000000000000950 g    DF .text  00000000000000a1  LINUX_2.6   __vdso_clock_gettime
0000000000000000 g    DO *ABS*  0000000000000000  LINUX_2.6   LINUX_2.6
0000000000000a10 g    DF .text  000000000000002a  LINUX_2.6   __vdso_getcpu
0000000000000a10  w   DF .text  000000000000002a  LINUX_2.6   getcpu

以上是用户态，我们简单看看内核态的处理，我们知道内核有有周期定时器（先不考虑NOHZ的情况）内核周期性调用tick_sched_timer，来做一些更新，其中就包括时间的保持，调用流程如下：

我们看到没有在update_vsyscall中来更新vdso的数据结构，

当然另外一方面，系统还是支持正常的中断调用的。

/include/uapi/asm-generic/unistd.h

#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_clock_settime 112
__SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime)
#define __NR_clock_gettime 113
__SC_3264(__NR_clock_gettime, sys_clock_gettime32, sys_clock_gettime)
#define __NR_clock_getres 114
__SC_3264(__NR_clock_getres, sys_clock_getres_time32, sys_clock_getres)
#define __NR_clock_nanosleep 115
__SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \
    sys_clock_nanosleep)
#endif

SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
    struct __kernel_timespec __user *, tp)
{
  const struct k_clock *kc = clockid_to_kclock(which_clock);
  struct timespec64 kernel_tp;
  int error;

  if (!kc)
    return -EINVAL;

  error = kc->clock_get_timespec(which_clock, &kernel_tp);

  if (!error && put_timespec64(&kernel_tp, tp))
    error = -EFAULT;

  return error;
}

总结：这篇文件有点长，但是实际上这篇文章只是想说clock_gettime支持vdso，而vdso是通过内存映射的方式把内核态的数据直接映射到用户态，但是看似简单的过程，要想没有跳跃的弄清楚还真的需要些功夫，比如glibc的结构组织，vdso，内核时间更新等等，很是复杂，而这篇文章还没有涉及内核维护时间的内容，见下篇文章吧。

转载请注明出处，喜欢的话请关注公众号：评谈网络技术

http://weixin.qq.com/r/OhzN1XDEny3grTS290ko (二维码自动识别)

杂谈：Linux时间管理之gettimeofday实现 - 知乎