perform_dry_run函数总结-优快云博客

本文链接：https://blog.youkuaiyun.com/weixin_44033321/article/details/136430578

还是先把无关紧要的函数语句删掉了，只保留了主要部分。

perform_dry_run

/* Perform dry run of all test cases to confirm that the app is working as
   expected. This is done only for the initial inputs, and only once. */

static void perform_dry_run(char** argv) {

  struct queue_entry* q = queue;
  // 校准失败次数
  u32 cal_failures = 0;
  u8* skip_crashes = getenv("AFL_SKIP_CRASHES");

  // 重复执行每个testcase
  while (q) {
    // use_mem 用于保存testcase的实际内容
    u8* use_mem;
    u8  res;
    s32 fd;

    // 获取testdcase对应的文件名称
    u8* fn = strrchr(q->fname, '/') + 1;

    ACTF("Attempting dry run with '%s'...", fn);

    // 打开文件描述符
    fd = open(q->fname, O_RDONLY);
    if (fd < 0) PFATAL("Unable to open '%s'", q->fname);

    // 分配内存，用于存储实际的testcase
    use_mem = ck_alloc_nozero(q->len);

    // 将testcase读取到use_mem
    if (read(fd, use_mem, q->len) != q->len)
      FATAL("Short read from '%s'", q->fname);

    // 关闭文件描述符
    close(fd);

    // 这个函数里面会多次执行testcast（3、8次），并调用run_target
    res = calibrate_case(argv, q, use_mem, 0, 1);
    // 释放内存
    ck_free(use_mem);

    if (stop_soon) return;

    switch (res) {

      case FAULT_NONE:

        if (q == queue) check_map_coverage();

        break;

    }

    if (q->var_behavior) WARNF("Instrumentation output varies across runs.");

    q = q->next;

  }

  OKF("All test cases processed.");

}

calibrate_case

static u8 calibrate_case(char** argv, struct queue_entry* q, u8* use_mem,
                         u32 handicap, u8 from_queue) {
  // 第一次运行testcase所产生的trace_bits
  static u8 first_trace[MAP_SIZE];

  u8  fault = 0, 
      new_bits = 0, 
      var_detected = 0, 
      // has_new_bits，表示产生了新的覆盖率
      hnb = 0,
      first_run = (q->exec_cksum == 0);

  // 开始时间，结束时间
  u64 start_us, stop_us;

  s32 old_sc = stage_cur, old_sm = stage_max;
  u32 use_tmout = exec_tmout;
  u8* old_sn = stage_name;

  // 为什么还没开始执行，就cal_failed++？
  q->cal_failed++;

  // 根据是否设置了快速执行，重复运行testcase 3/8次，多次运行是有意义的，
  // 因为无法保证代码每次执行都是一样的，存在不确定性
  stage_name = "calibration";
  stage_max  = fast_cal ? 3 : CAL_CYCLES;

  /* Make sure the forkserver is up before we do anything, and let's not
     count its spin-up time toward binary calibration. */
     
  // 如果没有打开forkserver，就启动forkserver进程，这个forkserver会在main函数处停留，并等待来自fuzzer的命令
  // https://blog.youkuaiyun.com/weixin_44033321/article/details/136401701
  if (dumb_mode != 1 && !no_forkserver && !forksrv_pid)
    init_forkserver(argv);

  // 这里q->exec_cksum是一定为0的，但是后面fuzz_one会调用这个函数，那个时候就是非0
  if (q->exec_cksum) {
  
    memcpy(first_trace, trace_bits, MAP_SIZE);
    hnb = has_new_bits(virgin_bits);
    if (hnb > new_bits) new_bits = hnb;

  }

  // 记录开始时间
  start_us = get_cur_time_us();

  for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {

    u32 cksum;

    if (!first_run && !(stage_cur % stats_update_freq)) show_stats();
    
    // 这个函数没有仔细研究，不太明白
    write_to_testcase(use_mem, q->len);

    // 运行函数，并返回运行结果，返回crash，timeout，或者
    fault = run_target(argv, use_tmout);

    /* stop_soon is set by the handler for Ctrl+C. When it's pressed,
       we want to bail out quickly. */

    // 计算新的校验和
    cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);

    // 比较两次执行的校验和是否发生的变化，如果发生变化，要么是第一次执行，要么是两次trace_bits不同
    if (q->exec_cksum != cksum) {

      // 这里看virgin_bits和trace_bits是否一致，如果不一致，返回1/2，并更新virgin_bits
      // https://blog.youkuaiyun.com/weixin_44033321/article/details/136422289
      hnb = has_new_bits(virgin_bits);

      // new_bits初始值是0，被更新成1/2
      if (hnb > new_bits) new_bits = hnb;

      // 判断是不是第一次执行，如果不是
      if (q->exec_cksum) {

        u32 i;

        for (i = 0; i < MAP_SIZE; i++) {

          if (!var_bytes[i] && first_trace[i] != trace_bits[i]) {
            // 记录变化的bytes
            var_bytes[i] = 1;
            // 增加执行轮次
            stage_max    = CAL_CYCLES_LONG;

          }

        }

        var_detected = 1;

      } else {
        // 第一次执行，更新校验和，更新first_trace
        q->exec_cksum = cksum;
        memcpy(first_trace, trace_bits, MAP_SIZE);

      }

    }

  }
  // 记录结束时间
  stop_us = get_cur_time_us();
  // 计算总的执行时间和轮次
  total_cal_us     += stop_us - start_us;
  total_cal_cycles += stage_max;

  /* OK, let's collect some stats about the performance of this test case.
     This is used for fuzzing air time calculations in calculate_score(). */
  // 更新数据
  q->exec_us     = (stop_us - start_us) / stage_max;
  q->bitmap_size = count_bytes(trace_bits);
  q->handicap    = handicap;
  q->cal_failed  = 0;

  total_bitmap_size += q->bitmap_size;
  total_bitmap_entries++;

  // 这个函数后面再看，大概意思是看看这个testcase是不是更优的，给testcase排个序
  update_bitmap_score(q);

  /* If this case didn't result in new output from the instrumentation, tell
     parent. This is a non-critical problem, but something to warn the user
     about. */

  if (!dumb_mode && first_run && !fault && !new_bits) fault = FAULT_NOBITS;

abort_calibration:

  if (new_bits == 2 && !q->has_new_cov) {
    q->has_new_cov = 1;
    queued_with_cov++;
  }

  /* Mark variable paths. */

  if (var_detected) {

    var_byte_count = count_bytes(var_bytes);

    if (!q->var_behavior) {
      mark_as_variable(q);
      queued_variable++;
    }

  }

  stage_name = old_sn;
  stage_cur  = old_sc;
  stage_max  = old_sm;

  if (!first_run) show_stats();

  return fault;

}

run_target


/* Execute target application, monitoring for timeouts. Return status
   information. The called program will update trace_bits[]. */

static u8 run_target(char** argv, u32 timeout) {

  static struct itimerval it;
  static u32 prev_timed_out = 0;
  static u64 exec_ms = 0;

  int status = 0;
  u32 tb4;

  child_timed_out = 0;

  /* After this memset, trace_bits[] are effectively volatile, so we
     must prevent any earlier operations from venturing into that
     territory. */

  memset(trace_bits, 0, MAP_SIZE);
  MEM_BARRIER();

  /* If we're running in "dumb" mode, we can't rely on the fork server
     logic compiled into the target program, so we will just keep calling
     execve(). There is a bit of code duplication between here and 
     init_forkserver(), but c'est la vie. */

  if (dumb_mode == 1 || no_forkserver) {

  } else {

    s32 res;

    /* In non-dumb mode, we have the fork server up and running, so simply
       tell it to have at it, and then read back PID. */

    if ((res = write(fsrv_ctl_fd, &prev_timed_out, 4)) != 4) {

      if (stop_soon) return 0;
      RPFATAL(res, "Unable to request new process from fork server (OOM?)");

    }

    if ((res = read(fsrv_st_fd, &child_pid, 4)) != 4) {

      if (stop_soon) return 0;
      RPFATAL(res, "Unable to request new process from fork server (OOM?)");

    }

    if (child_pid <= 0) FATAL("Fork server is misbehaving (OOM?)");

  }

  /* Configure timeout, as requested by user, then wait for child to terminate. */

  it.it_value.tv_sec = (timeout / 1000);
  it.it_value.tv_usec = (timeout % 1000) * 1000;

  setitimer(ITIMER_REAL, &it, NULL);

  /* The SIGALRM handler simply kills the child_pid and sets child_timed_out. */

  if (dumb_mode == 1 || no_forkserver) {

    if (waitpid(child_pid, &status, 0) <= 0) PFATAL("waitpid() failed");

  } else {

    s32 res;

    if ((res = read(fsrv_st_fd, &status, 4)) != 4) {

      if (stop_soon) return 0;
      RPFATAL(res, "Unable to communicate with fork server (OOM?)");

    }

  }

  if (!WIFSTOPPED(status)) child_pid = 0;

  getitimer(ITIMER_REAL, &it);
  exec_ms = (u64) timeout - (it.it_value.tv_sec * 1000 +
                             it.it_value.tv_usec / 1000);

  it.it_value.tv_sec = 0;
  it.it_value.tv_usec = 0;

  setitimer(ITIMER_REAL, &it, NULL);

  total_execs++;

  /* Any subsequent operations on trace_bits must not be moved by the
     compiler below this point. Past this location, trace_bits[] behave
     very normally and do not have to be treated as volatile. */

  MEM_BARRIER();

  tb4 = *(u32*)trace_bits;

#ifdef WORD_SIZE_64
  classify_counts((u64*)trace_bits);
#else
  classify_counts((u32*)trace_bits);
#endif /* ^WORD_SIZE_64 */

  prev_timed_out = child_timed_out;

  /* Report outcome to caller. */

  if (WIFSIGNALED(status) && !stop_soon) {

    kill_signal = WTERMSIG(status);

    if (child_timed_out && kill_signal == SIGKILL) return FAULT_TMOUT;

    return FAULT_CRASH;

  }

  /* A somewhat nasty hack for MSAN, which doesn't support abort_on_error and
     must use a special exit code. */

  if (uses_asan && WEXITSTATUS(status) == MSAN_ERROR) {
    kill_signal = 0;
    return FAULT_CRASH;
  }

  if ((dumb_mode == 1 || no_forkserver) && tb4 == EXEC_FAIL_SIG)
    return FAULT_ERROR;

  /* It makes sense to account for the slowest units only if the testcase was run
  under the user defined timeout. */
  if (!(timeout > exec_tmout) && (slowest_exec_ms < exec_ms)) {
    slowest_exec_ms = exec_ms;
  }

  return FAULT_NONE;

}

这里解释下setitimer之后代码的处理逻辑;

setitimer首先设置一个计时器，代码继续往下执行
从forkserver读取child进程的执行状态（read(fsrv_st_fd, &status, 4)）
这个时候有2种情况，
第一种是child代码顺利执行完毕，给read函数返回一个status
第二种是timer到时间了，read还没有读到任何信息，还在阻塞，那么timer就会发送一个SIGALARM信号，前面设置的信号处理函数setup_signal_handler会调用相应处理函数进行处理（具体来说，调用kill把child进程杀死）。read结束阻塞，还是读取到信号。
调用WIFSIGNALED(status)查看程序是不是正常退出，如果不是，调用WTERMSIG(status)查看具体状态。

classify_counts函数

static inline void classify_counts(u64* mem) {

  u32 i = MAP_SIZE >> 3;

  while (i--) {

    /* Optimize for sparse bitmaps. */

    if (unlikely(*mem)) {

      u16* mem16 = (u16*)mem;

      mem16[0] = count_class_lookup16[mem16[0]];
      mem16[1] = count_class_lookup16[mem16[1]];
      mem16[2] = count_class_lookup16[mem16[2]];
      mem16[3] = count_class_lookup16[mem16[3]];

    }

    mem++;

  }
}

count_class_lookup16

/* 这段代码简单来说，就是把16位的前8位用count_class_lookup8转换1次，
   把16位的后8位用count_class_lookup8转换1次。
   从结果上来看，和按8位进行处理的结果是一样的*/
EXP_ST void init_count_class16(void) {

  u32 b1, b2;

  for (b1 = 0; b1 < 256; b1++) 
    for (b2 = 0; b2 < 256; b2++)
      count_class_lookup16[(b1 << 8) + b2] = 
        (count_class_lookup8[b1] << 8) |
        count_class_lookup8[b2];

}

count_class_lookup8

static const u8 count_class_lookup8[256] = {

  [0]           = 0,    // 00000000
  [1]           = 1,    // 00000001
  [2]           = 2,    // 00000010
  [3]           = 4,    // 00000100
  [4 ... 7]     = 8,    // 00001000
  [8 ... 15]    = 16,   // 00010000
  [16 ... 31]   = 32,   // 00100000
  [32 ... 127]  = 64,   // 01000000
  [128 ... 255] = 128   // 10000000

};

has_new_bits函数

/* Check if the current execution path brings anything new to the table.
   Update virgin bits to reflect the finds. Returns 1 if the only change is
   the hit-count for a particular tuple; 2 if there are new tuples seen. 
   Updates the map, so subsequent calls will always return 0.

   This function is called after every exec() on a fairly large buffer, so
   it needs to be fast. We do this in 32-bit and 64-bit flavors. */

static inline u8 has_new_bits(u8* virgin_map) {

  u64* current = (u64*)trace_bits;
  u64* virgin  = (u64*)virgin_map;

  u32  i = (MAP_SIZE >> 3);


  u8   ret = 0;

  while (i--) {

    /* Optimize for (*current & *virgin) == 0 - i.e., no bits in current bitmap
       that have not been already cleared from the virgin map - since this will
       almost always be the case. */

    if (unlikely(*current) && unlikely(*current & *virgin)) {

      if (likely(ret < 2)) {

        u8* cur = (u8*)current;
        u8* vir = (u8*)virgin;

        /* Looks like we have not found any new bytes yet; see if any non-zero
           bytes in current[] are pristine in virgin[]. */

        if ((cur[0] && vir[0] == 0xff) || (cur[1] && vir[1] == 0xff) ||
            (cur[2] && vir[2] == 0xff) || (cur[3] && vir[3] == 0xff) ||
            (cur[4] && vir[4] == 0xff) || (cur[5] && vir[5] == 0xff) ||
            (cur[6] && vir[6] == 0xff) || (cur[7] && vir[7] == 0xff)) ret = 2;
        else ret = 1;

      }

      *virgin &= ~*current;

    }

    current++;
    virgin++;

  }

  if (ret && virgin_map == virgin_bits) bitmap_changed = 1;

  return ret;

}