“ModSecurity2”源码分析

一、相关结构体

struct msre_engine{
  apr_pool_t *mp;
  apr_table_t *variables;
  apr_table_t *operators;
  apr_table_t *actions;
  apr_table_t *tfns;
  apr_table_t *reqbody_processors;
};
//在msre_engine_variable_register()函数中初始化metadata使用的结构体
struct msre_var_metadata {
  const char *name;
  unsigned int type;   /* VAR_TYPE_ constants*/
  unsigned int argc_min;
  unsigned int argc_max;
  fn_var_validate_t validate;
  fn_var_generate_t generate;
  unsigned int is_cacheable;/* 0-no,1-yes*/
  unsigned int availability;/*when does this variable become available?*/
};
struct msc_string { 
  char *name;
  unsigned int name_len;
  cahr *value;
  unsigned int value_len;
};
//运算符元数据结构体
struct msre_op_metadata {
  const char   *name;
  fn_op_param_init_t param_init;
  fn_op_execute_t execute;
};
//转换函数结构体
struct msre_tfn_metadata {
  const char *name;
  fn_tfn_execute_t execute;
}
//引擎动作结构体
struct msre_action_metadata {
  const char *name;
  unsigned int type;
  unsigned int argc_min;
  unsigned int argc_max;
  unsigned int allow_param_plusminus;
  unsigned int cardinality;
  unsigned int cardinality_group;
  fn_action_validate_t validate;
  fn_action_init_t init;
  fn_action_execute_t execute;
};
//在modsecurity_tx_init()函数出现的modsec_rec结构体
struct modsec_rec {
  apr_pool_t *mp;
  msc_engine  *modsecurity;
  request_rec *r_early;
  request_rec *r;
  directory_config *dcfg1;
  directory_config *dcfg2;
  directory_config *usercfg;
  directory_config *txcfg;
  
  unsigned int reqbody_should_exist;
  unsigned int reqbody_chunked;
  
  unsigned int phase;
  unsigned int phase_request_headers_complete;
  unsigned int phase_request_body_complete;
  apr_bucket_brigade *if_brigade;
  unsigned int if_seen_eos;
  unsigned int if_status;
  unsigned int if_started_forwarding;

  apr_size_t reqbody_length;

  apr_bucket_brigade *of_brigade;
  unsigned int of_status;
  unsigned int of_done_reading;
  unsigned int of_skipping;
  unsigned int of_partial;
  unsigned int of_is_error;

  unsigned int resbody_status;
  apr_size_t resbody_length;
  char *resbody_data;
  unsigned int resbody_contains_html;
  apr_size_t stream_input_length;
  char *stream_input_data;
  apr_size_t stream_output_length;
  char *stream_output_data;
  unsigned int of_stream_changed;
  unsigned int if_stream_changed;

  apr_array_header_t *error_messages;
  apr_array_header_t *alerts;

  const char *txid;
  const char *sessionid;
  const char *userid;

  const char *server_software;
  const char *local_addr;
  unsigned int local_port;
  const char *local_user;

  /*client*/
  const char *remote_addr;
  unsigned int remote_port;
  const char *remote_user;

  /*useragent*/
  const char *useragent_ip;
  /*request*/
  const char *request_line;
  const char *request_method;
  const char *request_uri;
  const char *query_string;
  const char *request_protocol;
  const char *hostname;
  apr_table_t *request_headers;
  apr_off_t request_content_length;
  const char *request_content_type;
  apr_table_t *arguments;
  apr_table_t *arguments_to_sanitize;
  apr_table_t *request_headers_to_sanitize;
  apr_table_t *response_headers_to_sanitize;
  apr_table_t *request_cookies;
  apr_table_t *pattern_to_sanitize;

  unsigned int urlencoded_error;
  unsigned int inbound_error;
  unsigned int outbound_error;

  unsigned int is_relevant;
  apr_table_t *tx_vars;
  apr_table_t *geo_vars;
  /*response*/
  unsigned int response_status;
  const char *status_line;
  const char *response_protocol;
  apr_table_t *response_headers;
  unsigned int response_headers_sent;
  apr_off_t bytes_sent;
  /* modsecurity request body processing stuff*/
  unsigned int msc_reqbody_storage;  /*on disk or in memory*/
  unsigned int msc_reqbody_spilltodisk;
  unsigned int msc_reqbody_read;
  apr_pool_t *msc_reqbody_mp;
  apr_array_header_t *msc_reqbody_chunks;
  unsigned int msc_reqbody_length;
  int msc_reqbody_chunk_position;
  unsigned int msc_reqbody_chunk_offset;
  msc_data_chunk *msc_reqbody_chunk_current;
  char *msc_reqbody_buffer;
  const char *msc_reqbody_filename;
  int msc_reqbody_fd;
  msc_data_chunk *msc_reqbody_disk_chunk;

  const char *msc_reqbody_processor;
  int msc_reqbody_error;
  cosnt char *msc_reqbody_error_msg;

  apr_size_t msc_reqbody_no_files_length;

  char *msc_full_request_buffer;
  int msc_full_request_length;
  char *multipart_filename;
  char *multipart_name;
  multipart_data *mpd;
  xml_data *xml;
#ifdef WITH_YAJL
  json_data *json
#endif
  /* audit logging */
  char *new_auditlog_boundary;
  char *new_auditlog_filename;
  apr_file_t *new_auditlog_fd;
  unsigned int new_auditlog_size;
  apr_md5_ctx_t new_auditlog_md5ctx;
  
  unsigned int was_intercepted;
  unsigned int rule_was_intercepted;
  unsigned int intercept_phase;
  msre_actionset *intercept_actionset;
  const char *intercept_message;

  /*performance measurement*/
  apr_time_t request_time;
  apr_time_t time_phase1;
  apr_time_t time_phase2;
  apr_time_t time_phase3;
  apr_time_t time_phase4;
  apr_time_t time_phase5;
  apr_time_t time_storage_read;
  apr_time_t time_storage_write;
  apr_time_t time_logging;
  apr_time_t time_gc;
  apr_table_t *perf_rules;
 
  apr_array_header_t *matched_rules;
  msc_string *matched_var;
  int highest_severity;

  /* upload */
  int upload_extract_files;
  int upload_remove_files;
  int upload_files_count;
  /* other*/
  apr_table_t *collections_original;
  apr_table_t *collections;
  apr_table_t *collections_dirty;
  /* rule processing temp pool */
  apr_pool_t *msc_rule_mptmp;
  /* content injection*/
  const char *content_prepend;
  apr_off_t content_prepend_len;
  const char *content_append;
  apr_off_t content_append_len;

  /*data cache*/
  apr_hash_t *tcache;
  apr_size_t tcache_items;
  /*removed rules*/
  apr_array_header_t *removed_rules;
  apr_arrya_header_t *removed_rules_tag;
  apr_array_header_t *removed_rules_msg;
  /*removed targets*/
  apr_table_t *removed_targets;
  unsigned int allow_scope;
  /*matched vars*/
  apr_table_t *matched_vars;
  void *reqbody_processor_ctx;
  htmlDocPtr crypto_html_tree;
#if defined(WITH_LUA)
  #ifdef CACHE_LUA
  lua_State *L;
  #endif
#endif
  int msc_sdbm_delete_error;
};
//在parse_arguments()函数中出现的msc_arg结构体
struct msc_arg {
  const char *name;
  unsigned int name_len; 
  unsigned int name_origin_offset;
  unsigned int name_origin_len;
  const char *value;
  unsigned int value_len;
  unsigned int value_origin_offset;
  unsigned int value_origin_len;
  const char *origin;
};
//出现在msc_regexec()函数中的msc_regex_t结构体
struct msc_regex_t {
  void *re;
  void *pe;
  const char *pattern;
};
//出现在msre_ruleset_process_phase()函数中的msre_ruleset结构体
struct msre_ruleset {
  apr_pool_t *mp;
  msre_engine *engine;
  apr_array_header_t *phase_request_headers;
  apr_array_header_t *phase_request_body;
  apr_array_header_t *phase_response_headers;
  apr_array_header_t *phase_response_body;
  apr_array_header_t *phase_logging;
};
//出现在modsecurity_process_phase()函数中的msre_cache_rec结构体
struct msre_cache_rec {
  int hits;
  int changed;
  int num;
  const char *path;
  const char *val;
  apr_size_t val_len;
};

 1.1 位于apache2/mod_securitty2.c文件中,有个模块的入口点,这是挂载到apache主程序的入口:

/* Module entry points 模块的入口点*/
module AP_MODULE_DECLARE_DATA security2_module = {
    STANDARD20_MODULE_STUFF,
    create_directory_config, //a
    merge_directory_configs, //b
    NULL,    /* create_server_config */
    NULL,    /* merge_server_configs */
    module_directives,   //c
    register_hooks        //d
};

   上述代码块中的a是创建一个directory_config结构体变量,然后赋初值,并return到这个结构体地址。

我们来看看这个结构体内容:

struct directory_config {
  apr_pool_t *mp;
  msre_ruleset *ruleset;
  int is_enabled;
  int reqbody_access;
  int reqintercept_oe;
  int reqbody_buffering;
  long int reqbody_inmemory_limit;
  long int reqbody_limit;
  long int reqbody_no_files_limit;
  int resbody_access;
  
  long int of_limit;
  apr_table_t *of_mime_types;
  int of_mime_types_cleared;
  int of_limit_action;
  int if_limit_action;

  const char *debuglog_name;
  int debuglog_level;
  apr_file_t *debuglog_fd;
  
  int cookie_format;
  int argument_separator;
  const char *cookiev0_separator;

  int rule_inheritance;
  apr_array_header_t *rule_exceptions;

  /* -- Audit log -- */
  
  /* Max rule time */
  int max_rule_time;
  
  /* Whether audit log should be enabled in the context or not */
  int auditlog_flag;
  /* AUDITLOG_SERIAL (single file) or AUDITLOG_CONCURRENT (multiple files) */
  int auditlog_type;
#ifdef WITH_YAJL
  /* AUDITLOGFORMAT_NATIVE or AUDITLOGFORMAT_JSON */
  int auditlog_format;
#endif
  /* Mode for audit log directories and files */
  apr_fileperms_t auditlog_dirperms;
  apr_fileperms_t auditlog_fileperms;
  char *auditlog_name;
  char *auditlog2_name;

  /* The file descriptors for the files above */
  apr_file_t *auditlog_fd;
  apr_file_t *auditlog2_fd;
   /* For the new-style audit log only, the path where audit log entries will be stored */
  char *auditlog_storage_dir;
  char *auditlog_parts;
  /* A regular expression that determines if a response status is treated as relevant */
  msc_regex_t *auditlog_relevant_regex;
  /* Upload */
  const char *tmp_dir;
  const char *upload_dir;
  int upload_keep_files;
  int upload_validates_files;
  int upload_filemode; /* int only so NOT_SET works */
  int upload_file_limit;
  
  /* Used only in the configuration phase */
  msre_rule *tmp_chain_starter;
  msre_actionset *tmp_default_actionset;
  apr_table_t *tmp_rule_placeholders;
  /* Misc */
  const char *data_dir;
  const char *webappid;
  const char *sensor_id;
  const char *httpBlkey;
  /* Content injection*/
  int content_injection_enabled;
  /* Stream Inspection */
  int stream_inbody_inspection;
  int stream_outbody_inspection;

  /* Geo Lookup */
  geo_db *geo;
  /* Gsb Lookup */
  gsb_db *gsb;
  /* Unicode map*/
  unicode_map *u_map;
  /*Cache */
  int cache_trans;
  int cache_trans_incremental;
  apr_size_t cache_trans_min;
  apr_size_t cache_trans_max;
  apr_size_t cache_trans_maxitems;
  
  apr_array_header_t *component_signatures;
  /* Request character encoding */
  const char *request_encoding;
  int disable_backend_compression;
  /* Collection timeout */
  int col_timeout;
  /*hash of ids*/
  apr_hash_t *rule_id_htab;
  /* Hash */
  apr_array_header_t *hash_method;
  const char *crypto_key;
  int crypto_key_len;
  const char *crypto_param_name;
  int hash_is_enabled;
  int hash_enforcement;
  int crypto_key_add;
  int crypto_hash_href_rx;
  int crypto_hash_faction_rx;
  int crypto_hash_location_rx;
  int crypto_hash_iframesrc_rx;
  int crypto_hash_framesrc_rx;
  int crypto_hash_href_pm;
  int crypto_hash_faction_pm;
  int crypto_hash_location_pm;
  int crypto_hash_iframesrc_pm;
  int crypto_hash_framesrc_pm;

  /* xml */
  int xml_external_entity;
};

  上述代码块中的b作用是:合并两个目录配置,参数2和参数3分别是_parent和_child,说明是合并两个父子目录。

  上述代码块中的c中结构体叫做module_directives,这里面跟apache中的module的参数写法一样,调用的函数分别是AP_INIT_TAKE1、AP_INIT_TAKE12等,主要是指令名和参数的个数区别。

   上述代码块中的d是注册Apache的模块钩子。在此钩子中,相继调用了多个函数,比如初始化函数等。

    下面分析register_hooks()中所做的事情:

     1.1.1 注册可选函数

#if (!defined(NO_MODSEC_API))
  /*导出可选的函数
    在模块register_hooks函数内注册可选函数,将可选函数添加到apache内核维护的全局可选函数哈希表中,
    Optional Function将可选函数注册到apache内核的全局可选函数哈希表中*/
  APR_REGISTER_OPTIONAL_FN(modsec_register_tfn);
  APR_REGISTER_OPTIONAL_FN(modsec_register_operator);
  API_REGISTER_OPTIONAL_FN(modsec_register_variable);
  APR_REGISTER_OPTIONAL_FN(modsec_register_reqbody_processor);
#endif

     1.1.2 主要的钩子函数

           1.1.2.1

ap_hook_pre_config(hook_pre_config, NULL, NULL, APR_HOOK_FIRST); 
                上面函数中目的是预配置的初始化,在hook_pre_config()函数中初始化创建ModSecuritty引擎,其中modsecurity是全局变量。然后有条件的注册了一个modsec_var_log_handler(),看起来是用于log作用的,这里先不分析这个log函数。
hook_pre_config()
  |->modsecurity=modsecurity_create(mp,MODSEC_ONLINE)
    |->msre_engine_create(msce->mp);
      |->apr_pool_create()
      |->engine=apr_pcalloc()
      |->engine->tfns=apr_table_make()
    |->msre_engine_register_default_variables(msce->msre);//在此函数中使用msre_engine_variable_register()函数向引擎中注册很多个默认变量
      |->msre_engine_variable_register(); 
        |->msre_var_metadata *metadata = ap_pcalloc();
        |->赋值(包括变量名,回调函数等,回调函数放到后面举例介绍)然后apr_table_setn(engine->variables, name,(void *)metadata);
    |->msre_engine_register_default_operators(msce->msre);//注册了很多运算符
      |->msre_engine_op_register()
        |->msre_op_metadata *metadata = apr_pcalloc()
        |->赋值(包括变量名,回调函数等,回调函数放到后面举例介绍)然后apr_table_setn();
    |->msre_engine_register_default_tfns(msce->msre);
      |->msre_engine_tfn_register()
        |->msre_tfn_metadata *metadata = apr_pcalloc()
        |->赋值(包括变量名,回调函数等,回调函数放到后面举例介绍)然后apr_table_setn()
    |->msre_engine_register_default_actions(msce->msre);
      |->msre_engine_action_register()
        |->msre_action_metadata *metadata = apr_pcalloc()
        |->赋值(包括变量名,回调函数等,回调函数放到后面举例介绍)然后apr_table_setn()

               通过举例来说明msre_engine_register_default_variables()函数的需要完成的任务:

/*ARGS_POST*/
msre_engine_variable_register(engine,
      "ARGS_POST",
      VAR_LIST,
      0, 1,
      var_generic_list_validate,
      var_args_post_generate,
      VAR_CACHE,
      PHASE_REQUEST_BODY
);
其中,var_generic_list_validate()函数中主要判断了参数是否是一个正则表达式
var_args_post_generate()函数:
  |->for(i=0;i<arr->nelts;i++) 
       if(strcmp("BODY",arg->origin)!=0) continue;
       if(var->param==NULL)match=1;
       else
         if(var->param_data!=NULL) //正则表达式
           msc_regexec((msc_regex_t *)var->param_data,...)
         else
           if(strcasecmp(arg->name,var->param)==0) match=1//简单的比较
       if(match)  //如果我们有一个匹配,将这个参数添加到集合中
         apr_table_addn(vartab,rvar->name,(void *)rvar)

                 通过举例来说明msre_engine_register_default_operators()函数需要做的工作,此处的例子中还有一个

1./* contains*/
msre_engine_op_register(engine,
     "contains",
      NULL,/*init function to flag var substitution*/
      msre_op_contains_execute
);
其中,msre_op_contains_execute()函数,参考一个SecRule例子:
SecRule REQUEST_LINE "!@contains .php" t:none,deny,status:403
SecRule ARGS:ip "!@contains %{TX.1}"
|->msre_op_contains_execute() 
  |->expand_macros(msr,str,rule,msr->mp)//在给定的变量中扩展宏("%{NAME}"实体 
  |->for(i=0;i<=i_max;i++) { 
       if(target[i] == match[0]) { 
         if((match_length==1) || (memcmp((match+1),(target+i+1),(match_length-1)) == 0)) 
           return 1;//匹配 
  |->return 0;//没有匹配
2./* detectSQLi */
    msre_engine_op_register(engine,
        "detectSQLi",
         NULL,
         msre_op_detectSQLi_execute
    );
其中msre_op_detectSQLi_execute()函数会使用libinjection/目录下的相关文件的函数,具体的分析看源代码,暂不介绍

                 通过举例来说明msre_engine_register_default_tfns()函数的需要完成的任务:

/*lowercase*/
msre_engine_tfn_register(engine,
        "lowercase",
        msre_fn_lowercase_execute
);
其中,msre_fn_lowercase_execute()函数具有小写化的功能
|->msre_fn_lowercase_execute()
  |->while(i<input_len) {
       int x = input[i];
       input[i]=tolower(x);
       if(x!=input[i]) changed=1;
       i++;
     } 

               通过举例来说明msre_engine_register_default_actions()函数的需要完成的任务:

/*phase*/
msre_engine_action_register(engine,
        "phase",
        ACTION_DISRUPTIVE,
        1, 1,
        NO_PLUS_MINUS,
        ACTION_CARDINALITY_ONE,
        ACTION_CGROUP_NONE,
        msre_action_phase_validate,
        msre_action_phase_init,
        NULL
    );
其中,msre_action_phase_validate()函数什么也没做,msre_action_phase_init()函数根据参数名将actionset->phase设置成相应的值
if(strcasecmp(action->param,"request") == 0)
        actionset->phase = 2;
    else if(strcasecmp(action->param,"response") == 0)
        actionset->phase = 4;
    else if(strcasecmp(action->param,"logging") == 0)
        actionset->phase = 5;

               1.1.2.2 

ap_hook_post_config(hook_post_config, postconfig_beforeme_list,postconfig_afterme_list,APR_HOOK_REALLY_LAST);

                 由于没有找到ap_hook_post_config()函数的定义,所以上面函数中的postconfig_beforeme_list

        和postconfig_afterme_list参数暂时不清楚,我们将重点放在hook_post_config()函数上:

//此函数是(后配置)模块初始化
|->hook_post_config()
  |->apr_pool_userdata_get(&init_flag,...)//通过apr函数获取在当前池中的key的value
  |->如果init_flag==NULL,调用apr_pool_userdata_set(),否则调用modsecurity_init(modsecurity,mp);//在hook_pre_config()中已经初始化好了modsecurity对象
    |->modsecurity_init()预置modsecurity引擎,这个函数必须在配置处理完成后被调用,因为Apache需要知道正在运行的用户名
      |->rc=apr_global_mutex_create(&msce->auditlog_lock,...)
      |->rc=apr_global_mutex_create(&msce->geo_lock,...)
      |->rc=apr_global_mutex_create(&msce->dbm_lock,...)
  |->real_server_signature=apr_pstrdup(mp, apache_get_server_version()) //存储原始服务器签名
  |->如果real_server_signature不是NULL,则ap_add_version_component()和change_server_signature()//忽略此函数的过程
  |->#if (!(defined(WIN32) || defined(NETWARE))) 则执行内部一系列chroot功能
  |->apr_pool_cleanup_register(mp,(void *)s, module_cleanup,apr_pool_cleanup_null);//在主池被销毁时,为稍后的时间安排主要的清理工作

                1.1.2.3 

ap_hook_child_init(hook_child_init,NULL,NULL,APR_HOOK_MIDDLE);

                  上面函数中hook_child_init()函数为每个新的子进程执行初始化

|->hook_child_init()
  |->modsecurity_child_init(modsecurity);
    |->xmlInitParser();//在任何其他XML调用之前,需要将此过程调用一次
    |->apr_status_t rc = apr_global_mutex_child_init()//apr_global_mutex_child_init在子进程中重新打开互斥锁
    |->apr_global_mutex_child_init()
    |->apr_global_mutex_child_init()

                1.1.2.4 连接进程钩子

ap_hook_process_connection(hook_connection_early, NULL, NULL, APR_HOOK_FIRST)

                 上面函数中hook_connection_early()函数目的是为连接钩子限制繁忙状态的连接数

|->hook_connection_early()
  |->ap_get_scoreboard_worker(sbh)
  |->ws_record=ap_get_scoreboard_worker_from_indexes(i,j)
  |->tree_contains_ip()

                 1.1.2.5 事务进程钩子

ap_hook_post_read_request(hook_request_early,postread_beforeme_list, postread_afterme_list, APR_HOOK_REALLY_FIRST);

                   上面函数中的hook_request_early()函数初始请求处理,在Apache接受请求头之后立即执行,该函数将创建事务上下文。 在下面的函数分析中,有几个定义需要了解一下:

#define AUDITLOG_PART_FIRST                 'A'
#define AUDITLOG_PART_HEADER                'A'
#define AUDITLOG_PART_REQUEST_HEADERS       'B'
#define AUDITLOG_PART_REQUEST_BODY          'C'
#define AUDITLOG_PART_RESPONSE_HEADERS      'D'
#define AUDITLOG_PART_RESPONSE_BODY         'E'
#define AUDITLOG_PART_A_RESPONSE_HEADERS    'F'
#define AUDITLOG_PART_A_RESPONSE_BODY       'G'
#define AUDITLOG_PART_TRAILER               'H'
#define AUDITLOG_PART_FAKE_REQUEST_BODY     'I'
#define AUDITLOG_PART_UPLOADS               'J'
#define AUDITLOG_PART_MATCHEDRULES          'K'
#define AUDITLOG_PART_LAST                  'K'
#define AUDITLOG_PART_ENDMARKER             'Z'
#define NEXT_CHAIN 1
#define NEXT_RULE  2
#define SKIP_RULES 3
|->hook_request_early()
  |->msr=create_tx_context(r);//初始化事务上下文并创建初始配置
    |->msr=apr_pcalloc(r->pool,..)//创建一个新的msr并赋值
    |->apr_allocator_create(&allocator);//创建一个新的分配器
    |->apr_allocator_max_free_set(allocator, 1024);//设置当前的阈值,在该阈值中,分配器应该开始向系统返回块
    |->apr_pool_create_ex(&msr->mp,r->pool,NULL,allocator);//创建新pool,这个函数是线程安全的,因为多个线程可以同时安全地创建同一个父池的子池,类似地,一个线程可以在另一个线程访问父池的同时创建一个子池
    |->apr_allocator_owner_set(allocator, msr->mp);//设置分配器的所有者
    |->msr->dcfg1=ap_get_module_config(r->per_dir_config,&security2_module)
    |->msr->usercfg=create_directory_config()//创建特殊的用户配置,将被用来覆盖默认设置
    |->msr->txcfg=create_direcotry_config() //创建一个事务上下文并用我们刚从Apache得到的目录配置填充它
    |->msr->txcfg=merge_directory_configs(msr->mp,msr->txcfg,msr->dcfg1)
    |->init_directory_config(msr->txcfg);//初始化目录配置
    |->msr->txid=get_env_var(r, "UNIQUE_ID")//检索指定的环境变量,当mod_unique_id模块注册的时候这个值存在
      |->apr_table_get(r->notes, name)
    |->msr->request_uri=r->uri//这里有很多赋值操作,目的是填充tx字段,从r的字段到msr的相关字段的赋值
    |->msr->request_headers = apr_table_copy(msr->mp,r->headers_in)//创建一个新表,并将另一个表复制到其中
    |->msr->hostname=ap_get_server_name(r)//从请求中获取当前的服务器名称
    |->modsecurity_tx_init(msr) //调用引擎以继续初始化,继续给msr的相关字段赋值
      |->apr_pool_cleanup_register(msr->mp,msr,modsecurity_tx_cleanup,apr_pool_cleanup_null);
      |->apr_table_get(msr->request_headers,"Content-Length");//这里判断了请求是否有正文,总共两者情况有正文
      |->apr_table_get(msr->request_headers,"Content-Type")
      |->parse_arguments()  //解析QUERY_STRING字段值
        |->urldecode_nonstrict_inplace_ex()  //进行urldecode处理
        |->add_argument(msr,arguments,arg) //向msr的成员arguments成员中增加key-value对
          |->apr_table_addn(arguments,log_escape_nq_ex(msr->mp,arg->name,arg->name_len),(void *)arg)
      |->if(msr->txcfg->cookie_format==COOKIES_V0) parse_cookies_v0(msr,te[i].val, msr->request_cookies,";")
        |->apr_strtok(cookie_header,delim,&saveptr)
      |->else parse_cookies_v1(msr, te[i].val,msr->request_cookies)
    |->store_tx_context(msr,r);  //存储事务上下文,可以在随后的阶段、重定向或子请求中找到它
      |->apr_table_setn(r->nots,NOTE_MSR,(void *)msr);//apr_table_setn()向表中添加键/值对。如果另一个元素已经具有相同的键,那么覆盖之
  |->#ifdef REQUEST_EARLY
  |->if (modsecurity_process_phase(msr, PHASE_REQUEST_HEADERS) > 0) //一个事务阶段,由于在modsec_rec结构中已经可用,所以不需要显示地提供阶段号
    |->modsecurity_process_phase_request_headers(msr); //处理进程请求头(REQUEST_HEADERS)阶段
      |->rc=msre_ruleset_process_phase(msr->txcfg->ruleset,msr)
        |->首先确定我们需要使用哪一组规则(包括PHASE_REQUEST_HEADERS,PHASE_REQUEST_BODY等)
        |->apr_table_clear(msr->matched_vars)//从表中删除所有元素
        |->for(i=0;i<arr->nelts;i++)//这是一个循环,针对每一个ruleset中的相应成员(阶段)的元素来做处理,一直到整个函数结束
        |->if(mode==SKIP_RULES) //SKIP_RULES用于跳过所有规则,直到我们用指定的规则ID命中一个占位符,然后在此之后继续执行
        |->if(rule->placeholder != RULE_PH_NONE)//跳过任何标记为占位符的规则
        |->if(mode==NEXT_CHAIN) //当链中的一个规则不匹配时,就会使用NEXT_CHAIN,然后我们需要跳过该链中的剩余规则,以获得可以执行的下一个规则
        |->if((mode == NEXT_RULE)&&(skip>0))//如果我们在这里意味着是NEXT_RULE,如果设置"跳过"参数,则需要跳过
        |->if(((rule->actionser->id!=NULL) && !apr_is_empty_array(msr->removed_rules)) ||(apr_is_empty_array(msr->removed_rules_tag)==0 ||(
           apr_is_empty_array(msr->removed_rules_msg)==0)) //检查该规则是否在运行时被删除,此处的逻辑块不分析
        |->rc=msre_rule_process(rule,msr);//使用一个新的内存子池来处理每个规则
          |->apr_pool_create(&msr->msc_rule_mptmp,msr->mp)//创建规则处理临时池
          |->#if defined(WITH_LUA) msre_rule_process_lua(rule,msr)//处理lua脚本,这里直接不介绍
          |->msre_rule_process_normal(rule,msr) //对给定的事务执行规则
            |->apr_table_get(rule->actionset->actions, "multiMatch")  //获取multiMatch字段的值
            |->for(i=0;i<rule->targets->nelts;i++) {
                  list_count=targets[i]->metadata->generate(msr,targets[i],rule,vartab,mptmp)//这里调用之前初始化的回调函数
            |->for(i=0;i<arr->nelts;i++) //循环一直到函数结尾,循环遍历最终目标列表中的目标,根据需要执行转换,并调用操作符
            |->if(msr->txcfg->cache_trans != MODSEC_CACHE_DISABLED) //判断这是不是var缓存
            |->for(k=0;k<tarr->nelts;k++) //构建转换函数的最终列表
                apr_table_addn(normtab,action->param,(void *)action) //增加t的参数到表中
            |->if(usecache && !multi_match && (crec != NULL) &&(crec == last_crec)) //如果最后一个缓存的tfn是列表中的最后一个,那么我们可以在这里停止并立即执行该操作
            |->rc = execute_operator(var,rule,msr,acting_actionset, mptmp)//根据给定值调用规则操作符,例如: SecRule REQUEST_HEADERS:Content-Type "text/xml" ...或者 SecRule REQUEST_HEADERS:User-Agent "@contains SECRET_PASSWORD"
              |->tarr=apr_table_elts(msr->removed_targets)
              |->telts=(const apr_table_entry_t*)tarr->elts
              |->for(i=0;i<tarr->nelts;i++) //循环处理msr的removed_targets成员
                   rc=msre_ruleset_rule_matches_exception(rule,re) //
                   if(rc>0) rc=fetch_target_exception(rule,msr,var,exceptions)
              |->rc=rule->op_metadata->execute(msr,rule, var, &my_error_msg) //此函数调用了op_metadata的回调执行函数,是最关键的函数之一,另一个是转换metadata的回调执行函数和action_metadata的回调函数
              |->if(((rc==0)&&(rule->op_negated == 0)) || ((rc==1)&&(rule->op_negated==1)))//返回RULE_NO_MATCH
              |->else  //匹配
                   if(rc==0) //记录日志
                   *(const msre_rule **)apr_array_push(msr->matched_rules) = rule;
                   if (var!=NULL && msr !=NULL)//保存最后匹配的var数据给msr->matched_var的各个成员赋值,给创建的变量mvar赋值
                     apr_table_addn(msr->matched_vars, mvar->name, (void *)mvar)
                   if((acting_actionser->serverity>0)&&(acting_actionset->serverity<msr->highest_severity)&&!rule->actionset->is_chained)
                   msre_perform_nondisruptive_actions(msr,rule,rule->actionset,mptmp)//执行非破坏性操作
                     |->for(i=0;i<tarr->nelts;i++)
                          action->metadata->execute(msr,mptmp,rule,action)//执行action_metadata的回调执行函数
                   if(rule->actionset->is_chained==0)
                     msre_perform_disruptive_actions(msr,rule,acting_actionset,mptmp,my_error_msg)//执行破坏性操作
                       |->for(i=0;i<tarr->nelts;i++)
                            action->metadata->execute(msr,mptmp,rule,action)
                       |->if(actionset->intercept_action_rec->metadata->type==ACTION_DISRUPTIVE)
                            actionset->intercept_action_rec->metadata->execute(msr,mptmp,rule,actionset->intercept_action_rec)
                       |->if((msr->phase==PHASE_LOGGING)||...)
                            apr_array_push(msr->alerts)=msc_alert_message(msr,actionset,NULL,message)//msc_alert_message()格式化一个警告信息
                       |->msc_alert(msr, log_level, actionset, "Warning", message)
            |->tarr=apr_table_elts(normtab)//从normtab表中获取元素的,返回整个元素数组的地址
            |->for(;k<tarr->nelts;k++) 
                 if(multi_match && (k==0||tfnchanged)) //在多匹配模式下,我们在开始时执行一次运算符,然后每次变量被转换函数改变一次
                   rc=execute_operator(var,rule,msr,acting_actionset,mptmp)
                 metadata=(msre_tfn_metadata *)action->param_data;
                 tfnchanged=metadata->execute(mptmp,(unsigned char *)var->value,var->value_len,&rval,&rval_length)//调用metadata的回调函数
                 if(usecache) //这里不介绍,忽略
            |->if(!multi_match || tfnchanged) //如果没有启用多匹配,则执行操作符,或者如果是,我们需要处理最后一个转换的结果
                 rc=execute_opeartor(var,rule,msr,acting_actionset,mptmp)
        |->if(rc==RULE_NO_MATCH) //如果返回值rc==RULE_NO_MATCH
        |->else if(rc==RULE_MATCH)//如果返回值rc==RULE_MATCH
        |->else if(rc<0) //如果返回值rc小于0,表示规则匹配失败
        |->else   //剩余的情况表示规则匹配失败而且未知的返回码
    |->modsecurity_process_phase_request_body(msr)
      |->rc=msre_ruleset_process_phase(msr->txcfg->ruleset,msr)//到这儿,请求体和请求头的处理基本相同
    |->modsecurity_process_phase_response_headers(msr);
      |->rc=msre_ruleset_process_phase(msr->txcfg->ruleset,msr);//到这儿,响应头和请求头的处理基本相同
    |->modsecurity_process_phase_response_body(msr);
      |->msre_ruleset_process_phase(msr->txcfg->ruleset,msr)//到这儿,响应体和请求头的处理基本相同
    |->modsecurity_process_phase_logging(msr);
      |->msre_ruleset_process_phase(msr->txcfg->ruleset,msr)
      |->modsecurity_persist_data(msr)
        |->collection_store(msr,col)
        |->collections_remove_stale(msr,te[i].key)
      |->if(msr->is_relevant==0) //这个请求是否与日志记录有关?
           is_response_status_relevant(msr,msr->r->status) //检查状态
      |->if((msr->txcfg->upload_keep_files==KEEP_FILES_ON)||...)//如果我们向保存这些文件(如果有的话)
      |->sec_audit_logger(msr) //调用审计日志记录器
        |->#ifdef WITH_YAJL  sec_audit_logger_json(msr) //这里不介绍
        |->sec_audit_logger_native(msr) //以本机格式生成审计日志条目
          |->msr->new_auditlog_boundary=create_auditlog_boundary(msr->r)
          |->if(msr->txcfg->auditlog_type != AUDITLOG_CONCURRENT) //串行日志记录-我们已经有一个打开的文件描述符
          |->else
               apr_md5_init(&msr->new_auditlog_md5ctx)//MD5初始化,开始MD5操作,编写新的上下文
               msr->new_auditlog_filename=construct_auditlog_filename(msr->mp,msr->txid)//构造一个文件名,用于存储审计日志条目
               entry_filename=msr->txcfg->auditlog_storage_dir
               entry_basename=file_dirname(msr->mp,entry_filename)
               apr_dir_make_recursive()//在文件系统上创建一个新目录,但行为类似于“mkdir -p”。根据需要创建中间目录。如果路径已经存在,则不会报告错误。
               apr_file_open()
          |->apr_global_mutex_lock(msr->modsecurity->auditlog_lock)
          |->sec_auditlog_write(msr,text,strlen(text))
          |->if(strchr(msr->txcfg->auditlog_parts,AUDITLOG_PART_REQUEST_HEADERS)!=NULL) //REQUEST_HEADERS的日志
          |->if(strchr(msr->txcfg->auditlog_parts,AUDITLOG_PART_REQUEST_BODY)!=NULL) //REQUEST_BODY
          |->if(strchr(msr->txcfg->auditlog_parts,AUDITLOG_PART_RESPONSE_HEADERS) !=NULL) //RESPONSE_HEADERS
          |->if(strchr(msr->txcfg->auditlog_parts,AUDITLOG_PART_RESPONSE_BODY) !=NULL) //RESPONSE_BODY
          |->剩下的if分支在这里不显示
  |->rc=perform_interception(msr)  //使用结构本身指定的方法拦截事务,必须返回一个HTTP状态码,它将被用来终止事务
    |->switch(actionset->intercept_action) //确定如何响应和准备日志消息
         case ACTION_DENY:
         case ACTION_PROXY:
         case ACTION_DROP:
         case ACTION_REDIRECT:
           expand_macros(msr, var, NULL, msr->mp)
         case ACTION_ALLOW:
         case ACTION_PAUSE:
         case ACTION_ALLOW_PHASE:
         case ACTION_ALLOW_REQUEST:
         default:
     |->msc_alert_message(msr,actionset,NULL,message)
     |->msc_alert()

                1.1.2.6 

ap_hook_fixups(hook_request_late, fixups_beforeme_list, NULL,APR_HOOK_REALLY_FIRST)

                      上述函数中的hook_request_late()函数作为处理程序链中的第一个钩子,该函数执行ModSecurity请求处理的第二阶段

|->hook_request_late()
  |->msr=retrieve_tx_context(r) //找到事务上下文并确保我们继续进行
  |->if(msr->phase_request_body_complete) //这个阶段已经完成了吗?
  |->msr->dcfg2=(directory_config *)ap_get_module_config(r->per_dir_config,&security2_module)//获取第二个配置上下文
  |->msr->txcfg=create_directory_config(msr->mp,NULL)//创建一个事务上下文
  |->msr->txcfg=merge_directory_configs(msr->mp,msr->txcfg,msr->dcfg2) 
  |->msr->txcfg=merge_directory_configs(msr->mp,msr->txcfg,msr->usercfg);//使用显示用户设置更新
  |->init_directory_config(msr->txcfg)
  |->rc=read_request_body(msr,&my_error_msg) //从客户端读取请求体
    |->modsecurity_request_body_start(msr, error_msg) 
    |->bb_in=apr_brigade_create()
    |->do{
          rc=ap_get_brigade(r->input_filters,...)
          for(bucket=APR_BRIGADE_FIRST(bb_in);...) //循环遍历brigade中的Bucket,以便提取可用数据的大小
            rc=apr_bucket_read(bucket,&buf,&buflen,APR_BLOCK_READ)
            if(buflen!=0)
              modsecurity_request_body_store(msr,buf,buflen,error_msg)//存储一大块请求体数据
            if(APR_BUCKET_IS_EOS(bucket))
               finished_reading=1;msr->if_seen_eos=1;
       }while(!finished_reading);
     |->modsecurity_request_body_end(msr,error_msg) //停止接收请求体      

                 1.1.2.7 Logging

ap_hook_error_log(hook_error_log,NULL,NULL,APR_HOOK_MIDDLE)

                        此函数中的hook_error_log()函数在每次Apache都有要写入的错误日志的东西时调用

|->hook_error_log
  |->retrieve_tx_context((request_rec *)info->r) //通过查看朱请求和之前的请求来检索之前存储的事务上下文
ap_hook_log_transaction(hook_log_transaction,NULL,transaction_afterme_list,APR_HOOK_MIDDLE)

                        上述函数中的hook_log_transaction()函数在每个事务结束时调用

|->hook_log_transaction()
  |->msr=retrieve_tx_context(r)
    |->msr->response_protocol=get_response_protocol(origr)
    |->sec_guardian_logger(r,origr,msr)  //Guardian日志记录器用于连接到web服务器保护的外部脚本——httpd_guardian。
    |->modsecurity_process_phase(msr, PHASE_LOGGING) //调用引擎来完成剩余的工作

                 1.1.2.8 Filter hooks

ap_hook_insert_filter(hook_insert_filter,NULL,NULL,APR_HOOK_FIRST)

                          上述函数中的hook_insert_filter()在请求处理开始之前调用,这是我们需要决定是否要连接到输出过滤器链的时候

|->hook_insert_filter()
  |->msr=retrieve_tx_context(r)//首先发现事务上下文
  |->ap_add_input_filter("MODSECURITY_IN", msr, r, r->connection) //增加输入过滤器
  |->ap_add_output_filter("MODSECURITY_OUT", msr, r, r->connection)  //增加输出过滤器
ap_hook_insert_error_filter(hook_insert_error_filter,NULL,NULL,APR_HOOK_FIRST)

                             上述函数中的hook_insert_error_filter()在Apache开始处理错误时调用,这是一个插入到输出过滤器链中的机会。

|->hook_insert_error_filter()
  |->msr=retrieve_tx_context(r)
  |->ap_add_output_filter("MODSECURITY_OUT",msr,r,r->connection)//如果输出过滤器已经完成,不要运行此行

              1.1.2.9 注册一个输入过滤器

ap_register_input_filter("MODSECURITY_IN", input_filter, NULL, AP_FTYPE_CONTENT_SET)

                               上述函数用于在系统中注册一个输入过滤器,在执行此注册之后,可以使用ap_add_input_filter()将过滤器添加到过滤器链中,并简单地指定名称。其中input_filter()函数会将先前存储的请求体转发到链。

|->input_filter()
  |->rc=modsecurity_request_body_retrieve_start(msr,&my_error_msg)  //准备转发请求体
  |->rc=modsecurity_request_body_retrieve(msr,&chunk,(unsigned int)nbytes,&my_error_msg)
  |->if(rc==0) modsecurity_request_body_retrieve_end(msr)

               1.1.2.10 注册输出过滤器

ap_register_output_filter("MODSECURITY_OUT", output_filter, NULL, AP_FTYPE_CONTENT_SET - 3)

                                  确保输出过滤器在其他模块之前运行,这样我们就可以得到一个不被修改的更好的请求

|->output_filter()
  |->msr->response_protocol=get_response_protocol(r)
  |->rc=modify_response_header(msr)
  |->rc=modsecurity_process_phase(msr,PHASE_RESPONSE_HEADERS)
  |->if(rc>0) perform_interception(msr) //事务需要被中断
  |->rc=output_filter_init(msr,f,bb_in) //初始化输出过滤器
     switch(rc)
       case -2:
       case -1:
       case 0:
  |->for(bucket=APR_BRIGADE_FIRST(bb_in);...) {//循环遍历brigade中的bucket,以便提取可用数据的大小
       rc=apr_bucket_read(bucket,&buf,&buflen, APR_BLOCK_READ);
       if(APR_BUCKET_IF_EOS(bucket))
         bucket_ci=apr_bucket_heap_create(msr->content_append,...)
         APR_BUCKET_INSERT_BEFORE(bucket,bucket_ci);//在指定的桶前插入一个桶
  |->ap_save_brigade(f,&msr->of_brigade,&bb_in,msr->mp)
  |->flatten_response_body(msr) 
  |->rc=modsecurity_process_phase(msr,PHASE_RESPONSE_BODY);//处理阶段RESPONSE_BODY
  |->if(rc>0) perform_interception(msr)
  |->perpend_content_to_of_brigade(msr, f)
  |->rc=send_of_brigade(msr, f)
  |->if(msr->phase<PHASE_RESPONSE_BODY)
       flatten_response_body(msr)
       modsecurity_process_phase(msr,PHASE_RESPONSE_BODY)
  |->inject_content_to_of_brigade(msr,f)
  |->prepend_content_to_of_brigade(msr, f)
  |->rc=send_of_brigade(msr,f)//将数据发送到过滤器流















ModSecurity Handbook is the definitive guide to ModSecurity, a popular open source web application firewall. Written by Ivan Ristic, who designed and wrote much of ModSecurity, this book will teach you everything you need to know to monitor the activity on your web sites and protect them from attack. Situated between your web sites and the world, web application firewalls provide an additional security layer, monitoring everything that comes in and everything that goes out. They enable you to perform many advanced activities, such as real-time application security monitoring, access control, virtual patching, HTTP traffic logging, continuous passive security assessment, and web application hardening. They can be very effective in preventing application security attacks, such as cross-site scripting, SQL injection, remote file inclusion, and others. Considering that most web sites today suffer from one problem or another, ModSecurity Handbook will help anyone who has a web site to run. The topics covered include: Installation and configuration of ModSecurity Logging of complete HTTP traffic Rule writing IP address, session, and user tracking Session management hardening Whitelisting, blacklisting, and IP reputation management Advanced blocking strategies Integration with other Apache modules Working with rule sets Virtual patching Performance considerations Content injection XML inspection Writing rules in Lua Extending ModSecurity in C The book is suitable for all reader levels: it contains step-by-step installation and configuration instructions for those just starting out, as well as detailed explanations of the internals and discussion of advanced techniques for seasoned users. A comprehensive reference manual is included in the second part of the book. Digital version of ModSecurity Handbook (PDF or EPUB) can be obtained directly from the author, at feistyduck.com.
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值