TFS文件系统格式化分析
TFS文件系统数据服务器 DataServer在使用之前,需要先对文件系统进行格式化,可以对相关的代码进行一下分析.
(1). 格式化文件系统
//格式化文件系统 int BlockFileManager::format_block_file_system(const SysParam::FileSystemParam& fs_param) { int ret = init_super_blk_param(fs_param); if (TFS_SUCCESS != ret) return ret; ret = create_fs_dir(); if (TFS_SUCCESS != ret) return ret; ret = create_fs_super_blk(); if (TFS_SUCCESS != ret) return ret; ret = create_block(C_MAIN_BLOCK); if (TFS_SUCCESS != ret) return ret; ret = create_block(C_EXT_BLOCK); if (TFS_SUCCESS != ret) return ret; return TFS_SUCCESS; } |
(2).初始化超级块
//根据文件参数初始化超级块 int BlockFileManager::init_super_blk_param(const SysParam::FileSystemParam& fs_param) { memset((void *) &super_block_, 0, sizeof(SuperBlock)); //"TAOBAO" memcpy(super_block_.mount_tag_, DEV_TAG, sizeof(super_block_.mount_tag_)); super_block_.time_ = time(NULL); //每个dataserver进程的挂载目录前缀(实际挂载目录类似于/data/tfs1,启动的时候传入序号) strncpy(super_block_.mount_point_, fs_param.mount_name_.c_str(), MAX_DEV_NAME_LEN); TBSYS_LOG(INFO, "super block mount point: %s.", super_block_.mount_point_); int32_t scale = 1024; //mount_maxsize = 209715200//挂载点的可用空间(以K为单位,例如即是G) //这里换算为字节 super_block_.mount_point_use_space_ = fs_param.max_mount_size_ * scale; super_block_.base_fs_type_ = static_cast<BaseFsType> (fs_param.base_fs_type_); if (EXT4 != super_block_.base_fs_type_ && EXT3_FULL != super_block_.base_fs_type_ && EXT3_FTRUN != super_block_.base_fs_type_) { TBSYS_LOG(ERROR, "base fs type is not supported. base fs type: %d", super_block_.base_fs_type_); return TFS_ERROR; } //超级块存储的偏移量默认值是 super_block_.superblock_reserve_offset_ = fs_param.super_block_reserve_offset_; //位图起始偏移量=0+2*超级块大小+一个整形的size super_block_.bitmap_start_offset_ = super_block_.superblock_reserve_offset_ + 2 * sizeof(SuperBlock)+ sizeof(int32_t); //超级块段的平均尺寸(平均文件大小),默认是K super_block_.avg_segment_size_ = fs_param.avg_segment_size_; //block_type_ratio_默认值是,主块/扩展块 super_block_.block_type_ratio_ = fs_param.block_type_ratio_; //INDEXFILE_SAFE_MULT=4 META_INFO_SIZE=sizeof(MetaInfo) int32_t data_ratio = super_block_.avg_segment_size_ / (META_INFO_SIZE * INDEXFILE_SAFE_MULT); //可以得到的数据空间:修正该avail_data_space值大小使之成为data_ratio的整数倍 int64_t avail_data_space = static_cast<int64_t> (super_block_.mount_point_use_space_* (static_cast<float> (data_ratio) / static_cast<float> (data_ratio + 1))); if (avail_data_space <= 0) { TBSYS_LOG( ERROR, "format filesystem fail. avail data space: %" PRI64_PREFIX "d, avg segment size: %d, single meta size: %u, data ratio: %d/n", avail_data_space, super_block_.avg_segment_size_, META_INFO_SIZE, data_ratio); return TFS_ERROR; } //main_block_size_默认值是:67108864=64M super_block_.main_block_size_ = fs_param.main_block_size_; //extend_block_size_默认值是:33554432=32M super_block_.extend_block_size_ = fs_param.extend_block_size_; int32_t main_block_count = 0, extend_block_count = 0; //通过可得到的数据空间得到主块的个数,扩展块的个数 calc_block_count(avail_data_space, main_block_count, extend_block_count); super_block_.main_block_count_ = main_block_count; super_block_.extend_block_count_ = extend_block_count; super_block_.used_block_count_ = 0; super_block_.used_extend_block_count_ = 0; super_block_.hash_slot_ratio_ = fs_param.hash_slot_ratio_; //计算每块的文件个数=(super_block_.main_block_size_+ super_block_.extend_block_size_ /fs_param.block_type_ratio_)/super_block_.avg_segment_size_ int32_t per_block_file_num = static_cast<int32_t> ((super_block_.main_block_size_+ static_cast<float> (super_block_.extend_block_size_) / fs_param.block_type_ratio_)/ super_block_.avg_segment_size_); //bucket : file = hash_slot_ratio_ //hash_slot_ratio = 0.5 //每个Block中hash槽和文件个数的比例 int32_t hash_bucket_size = static_cast<int32_t> (super_block_.hash_slot_ratio_* static_cast<float> (per_block_file_num)); //得出槽的内存大小:存放元数据文件的偏移量信息 int32_t hash_bucket_mem_size = hash_bucket_size * sizeof(int32_t); //源数据大小=sizeof(MetaInfo)*每块中的文件数 int32_t meta_info_size = META_INFO_SIZE * per_block_file_num; //映射文件大小=槽的内存大小+源数据大小 int32_t need_mmap_size = hash_bucket_mem_size + meta_info_size; super_block_.hash_slot_size_ = hash_bucket_size; int32_t sz = getpagesize(); //内存页的大小 int32_t count = need_mmap_size / sz; //需要得到页的个数 int32_t remainder = need_mmap_size % sz; //剩余 MMapOption mmap_option; //修正内存文件映射起始大小,使他成为内存页大小的整数倍 mmap_option.first_mmap_size_ = remainder ? (count + 1) * sz : count * sz; //每次映射大小 mmap_option.per_mmap_size_ = sz; //INNERFILE_MAX_MULTIPE=30,最大映射文件大小 mmap_option.max_mmap_size_ = mmap_option.first_mmap_size_ * INNERFILE_MAX_MULTIPE; super_block_.mmap_option_ = mmap_option; super_block_.version_ = fs_param.file_system_version_; super_block_.display(); return TFS_SUCCESS; } |
在初始化超级块中调用了计算物理主块,物理扩展块的函数:
void BlockFileManager::calc_block_count(const int64_t avail_data_space, int32_t& main_block_count, int32_t& ext_block_count) { //计算公式 //avail_data_space=super_block_.extend_block_size_*ext_block_count+ext_block_count*super_block_.block_type_ratio_*super_block_.main_block_size_ //扩展块的个数=avail_data_space / (super_block_.block_type_ratio_* super_block_.main_block_size_ + super_block_.extend_block_size_) ext_block_count = static_cast<int32_t> (static_cast<float> (avail_data_space) / (super_block_.block_type_ratio_* static_cast<float> (super_block_.main_block_size_) + static_cast<float> (super_block_.extend_block_size_))); //主块的个数=ext_block_count * super_block_.block_type_ratio_ main_block_count = static_cast<int32_t> (static_cast<float> (ext_block_count) * super_block_.block_type_ratio_); TBSYS_LOG(INFO, "cal block count. avail data space: %" PRI64_PREFIX "d, main block count: %d, ext block count: %d", avail_data_space, main_block_count, ext_block_count); } |
(3). 创建文件系统目录
int BlockFileManager::create_fs_dir() { //super_block_.display(); //创建挂载点目录:每个dataserver进程的挂载目录前缀(实际挂载目录类似于/data/tfs1,启动的时候传入序号) int ret = mkdir(super_block_.mount_point_, DIR_MODE); if (ret && errno != EEXIST) { TBSYS_LOG(ERROR, "make extend dir error. dir: %s, ret: %d, error: %d, error desc: %s", super_block_.mount_point_, ret, errno, strerror(errno)); return TFS_ERROR; } std::string extend_dir = super_block_.mount_point_; extend_dir += EXTENDBLOCK_DIR_PREFIX; //创建扩展目录extend_dir=/data/tfs1/extend/ ret = mkdir(extend_dir.c_str(), DIR_MODE); if (ret) { TBSYS_LOG(ERROR, "make extend dir:%s error. ret: %d, error: %d", extend_dir.c_str(), ret, errno); return TFS_ERROR; } //创建索引目录index_dir=/data/tfs1/index/ std::string index_dir = super_block_.mount_point_; index_dir += INDEX_DIR_PREFIX; ret = mkdir(index_dir.c_str(), DIR_MODE); if (ret) { TBSYS_LOG(ERROR, "make index dir error. ret: %d, error: %d", ret, errno); return TFS_ERROR; }
return TFS_SUCCESS; } |
(4). 创建文件系统超级块
//创建文件系统超级块 int BlockFileManager::create_fs_super_blk() { uint32_t bit_map_size = calc_bitmap_count(); int super_block_file_size = 2 * sizeof(SuperBlock) + sizeof(int32_t) + 4 * bit_map_size; char* tmp_buffer = new char[super_block_file_size]; memcpy(tmp_buffer, &super_block_, sizeof(SuperBlock)); memcpy(tmp_buffer + sizeof(SuperBlock), &super_block_, sizeof(SuperBlock)); //两倍的* sizeof(SuperBlock)内存后面* bit_map_size + sizeof(int)的内存全部清零,作为写入的内容 memset(tmp_buffer + 2 * sizeof(SuperBlock), 0, 4 * bit_map_size + sizeof(int)); std::string super_block_file = super_block_.mount_point_; //super_block_file=data/tfs1//fs_super super_block_file += SUPERBLOCK_NAME; FileOperation* super_file_op = new FileOperation(super_block_file, O_RDWR | O_CREAT); //参数:写入缓冲区,写入字节数,写入的偏移量 int ret = super_file_op->pwrite_file(tmp_buffer, super_block_file_size, 0); if (TFS_SUCCESS != ret) { TBSYS_LOG(ERROR, "write super block file error. ret: %d.", ret); return ret; } tbsys::gDelete(super_file_op); tbsys::gDeleteA(tmp_buffer); return TFS_SUCCESS; } |
在该函数里面,调用了计算位图槽个数的函数:
uint32_t BlockFileManager::calc_bitmap_count() { //项目个数=超级块个数+扩展块个数+1 uint32_t item_count = super_block_.main_block_count_ + super_block_.extend_block_count_ + 1; BitMap tmp_bit_map(item_count); //得到槽的个数 uint32_t slot_count = tmp_bit_map.get_slot_count(); TBSYS_LOG(INFO, "cal bitmap count. item count: %u, slot count: %u", item_count, slot_count); return slot_count; } |
对于BitMap类有如下实现方法:
//分别是x80 1000 0000 // 0x40 0100 0000 // 0x20 0010 0000 // 0x10 0001 0000 // 0x08 0000 1000 // 0x04 0000 0100 // 0x02 0000 0010 // 0x01 0000 0001 const unsigned char BitMap::BITMAPMASK[SLOT_SIZE] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; |
//set_flag参数默认是false BitMap::BitMap(const uint32_t item_count, const bool set_flag) { assert(item_count != 0); //假定主块是,扩展块是,那么+3+1+8-1=20 在右移位 0001 0100===>0000 0010 //item_count=3+9+1=13 //槽的个数=2 slot_count_ = (item_count + SLOT_SIZE - 1) >> 3; data_ = new char[sizeof(char) * slot_count_]; //分配两个字节缓冲 item_count_ = item_count; memset(data_, set_flag ? 0xFF : 0x0, slot_count_ * sizeof(char)); set_count_ = set_flag ? item_count : 0; mount_ = false; } |
void BitMap::copy(const uint32_t slot_count, const char* bitmap_data) { assert(NULL != data_); assert(slot_count_ == slot_count); //拷贝两个字节 memcpy(data_, bitmap_data, slot_count); set_count_ = 0; //假定主块是,扩展块是,那么+9+1就是item_count_=13 for (uint32_t pos = 0; pos < item_count_; ++pos) { if (test(pos)) { ++set_count_; //已经占用的个数 } } return; } |
//测试该index是否被占用 bool BitMap::test(const uint32_t index) const { //假定主块是9,扩展块是3,那么+9+1=13=item_count_,而SLOT_SIZE=8,而index在到之间 assert(index < item_count_); uint32_t quot = index / SLOT_SIZE; //只有可能是,1 uint32_t rem = index % SLOT_SIZE; //可能是到 return (data_[quot] & BITMAPMASK[rem]) != 0; //&位操作,两个表达式对应位全为时,结果才为,否则为 } |
//设置该index是被占用 void BitMap::set(const uint32_t index) { assert(index < item_count_); uint32_t quot = index / SLOT_SIZE; uint32_t rem = index % SLOT_SIZE; if (!(data_[quot] & BITMAPMASK[rem])) { //一个字节位,设置该位的值为BITMAPMASK[rem] data_[quot] |= BITMAPMASK[rem]; //只要一个位为则结果为 ++set_count_; } } |
//撤销该index的占用 void BitMap::reset(const uint32_t index) { assert(index < item_count_); uint32_t quot = index / SLOT_SIZE; uint32_t rem = index % SLOT_SIZE; if (data_[quot] & BITMAPMASK[rem]) { data_[quot] &= ~BITMAPMASK[rem]; //~取反,顾名思义,变为,变为 --set_count_; } } |
(5). 格式化文件系统时创建块
//格式化文件系统时创建块 int BlockFileManager::create_block(const BlockType block_type) { int32_t prefix_size = sizeof(BlockPrefix); char* block_prefix = new char[prefix_size]; memset(block_prefix, 0, prefix_size); FileFormater* file_formater = NULL; if (EXT4 == super_block_.base_fs_type_) { file_formater = new Ext4FileFormater(); } else if (EXT3_FULL == super_block_.base_fs_type_) { file_formater = new Ext3FullFileFormater(); } else if (EXT3_FTRUN == super_block_.base_fs_type_) { file_formater = new Ext3SimpleFileFormater(); } else { TBSYS_LOG(ERROR, "base fs type is not supported. base fs type: %d", super_block_.base_fs_type_); return TFS_ERROR; } int32_t block_count = 0; int32_t block_size = 0; if (C_MAIN_BLOCK == block_type) { block_count = super_block_.main_block_count_; block_size = super_block_.main_block_size_; } else if (C_EXT_BLOCK == block_type) { block_count = super_block_.extend_block_count_; block_size = super_block_.extend_block_size_; } else { return TFS_ERROR; } for (int32_t i = 1; i <= block_count; ++i) { std::string block_file; std::stringstream tmp_stream; if (C_MAIN_BLOCK == block_type) { //MAINBLOCK_DIR_PREFIX="/" tmp_stream << super_block_.mount_point_ << MAINBLOCK_DIR_PREFIX << i; } else { //EXTENDBLOCK_DIR_PREFIX=/extend/ tmp_stream << super_block_.mount_point_ << EXTENDBLOCK_DIR_PREFIX << (i + super_block_.main_block_count_); } //格式化时创建块 //主块= /data/tfs1/1 /data/tfs1/2 /data/tfs1/3 //对应扩展块 /data/tfs1/extend/1+主块个数 /data/tfs1/extend/2+主块个数 /data/tfs1/extend/3+主块个数 tmp_stream >> block_file; FileOperation* file_op = new FileOperation(block_file, O_RDWR | O_CREAT); int ret = file_op->open_file(); if (ret < 0) { TBSYS_LOG(ERROR, "allocate space error. ret: %d, error: %d, error desc: %s/n", ret, errno, strerror(errno)); return ret; } ret = file_formater->block_file_format(file_op->get_fd(), block_size); if (TFS_SUCCESS != ret) { TBSYS_LOG(ERROR, "allocate space error. ret: %d, error: %d, error desc: %s/n", ret, errno, strerror(errno)); return ret; } //格式化块文件后在先写入块前缀 ret = file_op->pwrite_file(block_prefix, prefix_size, 0); if (TFS_SUCCESS != ret) { TBSYS_LOG(ERROR, "write block file error. physcial block id: %d, block type: %d, ret: %d.", i, block_type, ret); return ret; } tbsys::gDelete(file_op); } tbsys::gDelete(file_formater); return TFS_SUCCESS; }
|