HandlerSocket是mysql的一个插件,主要是越过parser,optimizer这一层,直接对数据库进行key/value的存储,对于大内存的数据库,能极大的提高性能.
通过阅读代码,发现它主要用到了mysql的下面的结构体
struct tablevec_entry {
TABLE *table;
size_t refcount;
bool modified;
tablevec_entry() : table(0), refcount(0), modified(false) { }
};
struct expr_user_lock : private noncopyable {
expr_user_lock(THD *thd, int timeout)
: lck_key("handlersocket_wr", 16, &my_charset_latin1),
lck_timeout(timeout),
lck_func_get_lock(&lck_key, &lck_timeout),
lck_func_release_lock(&lck_key)
{
lck_key.fix_fields(thd, 0);
lck_timeout.fix_fields(thd, 0);
lck_func_get_lock.fix_fields(thd, 0);
lck_func_release_lock.fix_fields(thd, 0);
}
long long get_lock() {
return lck_func_get_lock.val_int();
}
long long release_lock() {
return lck_func_release_lock.val_int();
}
private:
Item_string lck_key;
Item_int lck_timeout;
Item_func_get_lock lck_func_get_lock;
Item_func_release_lock lck_func_release_lock;
};
struct dbcontext : public dbcontext_i, private noncopyable {
dbcontext(volatile database *d, bool for_write);
virtual ~dbcontext();
virtual void init_thread(const void *stack_botton,
volatile int& shutdown_flag);
virtual void term_thread();
virtual bool check_alive();
virtual void lock_tables_if();
virtual void unlock_tables_if();
virtual bool get_commit_error();
virtual void clear_error();
virtual void close_tables_if();
virtual void table_addref(size_t tbl_id);
virtual void table_release(size_t tbl_id);
virtual void cmd_open(dbcallback_i& cb, const cmd_open_args& args);
virtual void cmd_exec(dbcallback_i& cb, const cmd_exec_args& args);
virtual void set_statistics(size_t num_conns, size_t num_active);
private:
int set_thread_message(const char *fmt, ...)
__attribute__((format (printf, 2, 3)));
bool parse_fields(TABLE *const table, const char *str,
prep_stmt::fields_type& flds);
void cmd_insert_internal(dbcallback_i& cb, const prep_stmt& pst,
const string_ref *fvals, size_t fvalslen);
void cmd_sql_internal(dbcallback_i& cb, const prep_stmt& pst,
const string_ref *fvals, size_t fvalslen);
void cmd_find_internal(dbcallback_i& cb, const prep_stmt& pst,
ha_rkey_function find_flag, const cmd_exec_args& args);
size_t calc_filter_buf_size(TABLE *table, const prep_stmt& pst,
const record_filter *filters);
bool fill_filter_buf(TABLE *table, const prep_stmt& pst,
const record_filter *filters, uchar *filter_buf, size_t len);
int check_filter(dbcallback_i& cb, TABLE *table, const prep_stmt& pst,
const record_filter *filters, const uchar *filter_buf);
void resp_record(dbcallback_i& cb, TABLE *const table, const prep_stmt& pst);
void dump_record(dbcallback_i& cb, TABLE *const table, const prep_stmt& pst);
int modify_record(dbcallback_i& cb, TABLE *const table,
const prep_stmt& pst, const cmd_exec_args& args, char mod_op,
size_t& modified_count);
private:
typedef std::vector<tablevec_entry> table_vec_type;
typedef std::pair<std::string, std::string> table_name_type;
typedef std::map<table_name_type, size_t> table_map_type;
private:
volatile database *const dbref;
bool for_write_flag;
THD *thd;
MYSQL_LOCK *lock;
bool lock_failed;
std::auto_ptr<expr_user_lock> user_lock;
int user_level_lock_timeout;
bool user_level_lock_locked;
bool commit_error;
std::vector<char> info_message_buf;
table_vec_type table_vec;
table_map_type table_map;
};
对表加锁,调用的是mysql的
MYSQL_LOCK *mysql_lock_tables(THD *thd, TABLE **tables, uint count, uint flags)
表解锁
void mysql_unlock_tables(THD *thd, MYSQL_LOCK *sql_lock)
修改完后,提交事务
bool trans_commit_stmt(THD *thd)
关闭表
void close_thread_tables(THD *thd)
打开表
bool open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root,
Open_table_context *ot_ctx)
//最核心的函数
void
dbcontext::cmd_find_internal(dbcallback_i& cb, const prep_stmt& pst,
ha_rkey_function find_flag, const cmd_exec_args& args)
{
const bool debug_out = (verbose_level >= 100);
bool need_resp_record = true;
char mod_op = 0;
const string_ref& mod_op_str = args.mod_op;
if (mod_op_str.size() != 0) {
if (!for_write_flag) {
return cb.dbcb_resp_short(2, "readonly");
}
mod_op = mod_op_str.begin()[0];
need_resp_record = mod_op_str.size() > 1 && mod_op_str.begin()[1] == '?';
switch (mod_op) {
case 'U': /* update */
case 'D': /* delete */
case '+': /* increment */
case '-': /* decrement */
break;
default:
if (debug_out) {
fprintf(stderr, "unknown modop: %c\n", mod_op);
}
return cb.dbcb_resp_short(2, "modop");
}
}
lock_tables_if();
if (lock == 0) {
return cb.dbcb_resp_short(1, "lock_tables");
}
if (pst.get_table_id() >= table_vec.size()) {
return cb.dbcb_resp_short(2, "tblnum");
}
TABLE *const table = table_vec[pst.get_table_id()].table;
/* keys */
if (pst.get_idxnum() >= table->s->keys) {
return cb.dbcb_resp_short(2, "idxnum");
}
KEY& kinfo = table->key_info[pst.get_idxnum()];
if (args.kvalslen > kinfo.key_parts) {
return cb.dbcb_resp_short(2, "kpnum");
}
uchar *const key_buf = DENA_ALLOCA_ALLOCATE(uchar, kinfo.key_length);
size_t invalues_idx = 0;
size_t kplen_sum = prepare_keybuf(args, key_buf, table, kinfo, invalues_idx);
/* filters */
uchar *filter_buf = 0;
if (args.filters != 0) {
const size_t filter_buf_len = calc_filter_buf_size(table, pst,
args.filters);
filter_buf = DENA_ALLOCA_ALLOCATE(uchar, filter_buf_len);
if (!fill_filter_buf(table, pst, args.filters, filter_buf,
filter_buf_len)) {
return cb.dbcb_resp_short(2, "filterblob");
}
}
/* handler */
table->read_set = &table->s->all_set;
handler *const hnd = table->file;
if (!for_write_flag) {
hnd->init_table_handle_for_HANDLER();
}
hnd->ha_index_or_rnd_end();
hnd->ha_index_init(pst.get_idxnum(), 1);
if (need_resp_record) {
cb.dbcb_resp_begin(pst.get_ret_fields().size());
}
const uint32_t limit = args.limit ? args.limit : 1;
uint32_t skip = args.skip;
size_t modified_count = 0;
int r = 0;
bool is_first = true;
for (uint32_t cnt = 0; cnt < limit + skip;) {
if (is_first) {
is_first = false;
const key_part_map kpm = (1U << args.kvalslen) - 1;
r = hnd->index_read_map(table->record[0], key_buf, kpm, find_flag);
} else if (args.invalues_keypart >= 0) {
if (++invalues_idx >= args.invalueslen) {
break;
}
kplen_sum = prepare_keybuf(args, key_buf, table, kinfo, invalues_idx);
const key_part_map kpm = (1U << args.kvalslen) - 1;
r = hnd->index_read_map(table->record[0], key_buf, kpm, find_flag);
} else {
switch (find_flag) {
case HA_READ_BEFORE_KEY:
case HA_READ_KEY_OR_PREV:
r = hnd->index_prev(table->record[0]);
break;
case HA_READ_AFTER_KEY:
case HA_READ_KEY_OR_NEXT:
r = hnd->index_next(table->record[0]);
break;
case HA_READ_KEY_EXACT:
r = hnd->index_next_same(table->record[0], key_buf, kplen_sum);
break;
default:
r = HA_ERR_END_OF_FILE; /* to finish the loop */
break;
}
}
if (debug_out) {
fprintf(stderr, "r=%d\n", r);
if (r == 0 || r == HA_ERR_RECORD_DELETED) {
dump_record(cb, table, pst);
}
}
int filter_res = 0;
if (r != 0) {
/* no-count */
} else if (args.filters != 0 && (filter_res = check_filter(cb, table,
pst, args.filters, filter_buf)) != 0) {
if (filter_res < 0) {
break;
}
} else if (skip > 0) {
--skip;
} else {
/* hit */
if (need_resp_record) {
resp_record(cb, table, pst);
}
if (mod_op != 0) {
r = modify_record(cb, table, pst, args, mod_op, modified_count);
}
++cnt;
}
if (args.invalues_keypart >= 0 && r == HA_ERR_KEY_NOT_FOUND) {
continue;
}
if (r != 0 && r != HA_ERR_RECORD_DELETED) {
break;
}
}
hnd->ha_index_or_rnd_end();
if (r != 0 && r != HA_ERR_RECORD_DELETED && r != HA_ERR_KEY_NOT_FOUND &&
r != HA_ERR_END_OF_FILE) {
/* failed */
if (need_resp_record) {
/* revert dbcb_resp_begin() and dbcb_resp_entry() */
cb.dbcb_resp_cancel();
}
cb.dbcb_resp_short_num(1, r);
} else {
/* succeeded */
if (need_resp_record) {
cb.dbcb_resp_end();
} else {
cb.dbcb_resp_short_num(0, modified_count);
}
}
DENA_ALLOCA_FREE(filter_buf);
DENA_ALLOCA_FREE(key_buf);
}
总的来说,HandlerSocket并没有直接调用到innodb的函数,还是调用了上面那层,用C++写的函数
把传进来的key,封装成索引的查找条件,之后去数据库中查找
锁的类度较大,是表级的,对于select较多的应较快,对于update多的话,性能不一定高吧
实质上类似于通过C语言直接调用oracle的OCI,性能提高主要还是mysql的实现好吧
,
本文深入探讨了HandlerSocket作为MySQL插件的工作原理,特别是在提高大内存数据库性能方面的应用。文章详细介绍了其通过直接进行key/value存储来绕过MySQL的解析器和优化器层,以及其实现过程中的关键数据结构和函数调用。
182

被折叠的 条评论
为什么被折叠?



