问题背景:在做安卓系统开发的时候,会编译生成很多个相同的文件,时常不知道此次编译更新的是哪一个文件,因此导致修改并未推到设备上生效,导致白白花些时间定位日志,最后才发现是文件没有更新所致,在查找文件时也需要很长时间,因此想写一个监控所有文件的服务,希望给文件建立索引以达到快速查找的目的,在文件发生修改时更新文件的最后修改时间,在文件发生删除时祛除存储的索引,由于客户机平时需要做开发,而且文件的总数目达200万,因此降内存消耗,降CPU占用,并且还要保证速度,难度就上来了。
整个结构的核心是linux的文件监控接口inotify,需要将inotify的wd保存下来,同时wd需要与保存的文件全路径简历映射关系,在操作系统通知时可以获知是哪一个路径的文件发生了修改,去更新修改时间,但是都保存全路径会造成空间的浪费,因此我决定对字典树进行改良,用以保存文件路径,根节点保存根目录字符串,叶节点保存最底层文件夹的名字,wd映射叶节点的指针,wd可以获取到改变文件的名称。对文件的监控使用redis的AE方案进行优化,降内存和查找是目前block的点,根据测试,内存占用达200M,将文件列表以特定的形式存储起来并建立索引是比较好方案,但是组织和查找需要更加优秀的算法,目前在考虑sqlite作为内存和磁盘的交换媒介
linux监控文件并快速查找的实现一:监控文件
#include <unistd.h>
#include <sys/stat.h>
#include <stdint.h>
#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
#include <dirent.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/inotify.h>
#include <linux/limits.h>
#include "trieTree.h"
static int inotifyFd = -1;
static trieNode *rootNode = NULL;
int fileExist(char *fileName){
struct stat statbuf;
return stat(fileName, &statbuf) == 0 && S_ISREG(statbuf.st_mode);
}
int dirExists(char *dirName){
struct stat statbuf;
return stat(dirName, &statbuf) == 0 && S_ISDIR(statbuf.st_mode);
}
int monitorDir(char *dirName){
if(inotifyFd == -1){
inotifyFd = inotify_init();
}
int wd = inotify_add_watch(inotifyFd, dirName, IN_ATTRIB | IN_CLOSE_WRITE | IN_CREATE | IN_DELETE | IN_DELETE_SELF | IN_MODIFY | IN_MOVE_SELF | IN_MOVED_FROM | IN_MOVED_TO);
// Log("monitor dir: %s by %d", dirName, wd);
return wd;
}
void loadWdManager(int wd, trieNode *node){
if(wdManager->len == 0){
wdManager->len++;
wdManager->nodes[0].notifyFd = wd;
wdManager->nodes[0].tail = node;
node->notifyFd = wd;
return;
}
if(wdManager->len + 1 > (malloc_usable_size(wdManager) - sizeof(WdManager)) / sizeof(Wd)){
Log("expand memory for wd manager");
uint32_t doubleSize = 0, doubleWdSize = 0;
assert(!__builtin_umul_overflow(wdManager->len, 2 * sizeof(Wd), &doubleSize));
assert(!__builtin_add_overflow(doubleSize, sizeof(WdManager), &doubleWdSize));
WdManager *biggerStore = (WdManager *)malloc(doubleWdSize);
memcpy(biggerStore, wdManager, sizeof(WdManager) + wdManager->len * sizeof(Wd));
free(wdManager);
wdManager = biggerStore;
}
uint32_t pos = findPos(wd);
memmove(wdManager->nodes + pos + 1, wdManager->nodes + pos, sizeof(Wd) * (wdManager->len - pos));
wdManager->len++;
// Log("insert %d, size is %d", wd, wdManager->len);
wdManager->nodes[pos].notifyFd = wd;
wdManager->nodes[pos].tail = node;
node->notifyFd = wd;
}
void accessSubFile(char *dirName, trieNode *node){
DIR *dir;
if((dir = opendir(dirName)) == NULL){
printf("open dir %s fail, errno: %d\n", dirName, errno);
return;
}
struct dirent *dirEntry;
struct stat statEntry;
char fullPath[PATH_MAX + 1];
while((dirEntry = readdir(dir)) != NULL){
if(!strcmp(dirEntry->d_name, ".") || !strcmp(dirEntry->d_name, ".."))
continue;
snprintf(fullPath, sizeof(fullPath), "%s/%s", dirName, dirEntry->d_name);
if(!dirExists(fullPath) && !fileExist(fullPath)){
printf("file %s not exits\n", fullPath);
continue;
}
int fd = open(fullPath, O_RDONLY | O_NONBLOCK);
if(fd == -1){
closedir(dir);
printf("open %s fail, errno: %d\n", fullPath, errno);
return;
}
if(fstat(fd, &statEntry) == -1){
close(fd);
// closedir(dir);
printf("stat %s fail, errno: %d\n", fullPath, errno);
continue;
}
close(fd);
if(S_ISDIR(statEntry.st_mode) != 0){
// Log("prepare to insert %s to %s %p", dirEntry->d_name, node->data, node->subNode);
uint32_t index = addSubNode(node, dirEntry->d_name);
// Log("%s->%s", node->subNode[index].pre != NULL? node->subNode[index].pre->data: "", node->data);
loadWdManager(monitorDir(fullPath), node->subNode + index);
accessSubFile(fullPath, node->subNode + index);
}
}
closedir(dir);
}
static void displayInotifyEvent(struct inotify_event *i){
printf(" wd=%2d; ", i->wd);
if(wdManager->nodes[findPos(i->wd)].notifyFd == i->wd){
trieNode *node = wdManager->nodes[findPos(i->wd)].tail;
while(node !=NULL){
Log("%s", node->data);
node = node->pre;
}
}
if(i->cookie > 0)
printf("cookie = %4d; ", i->cookie);
printf("mask = ");
if(i->mask & IN_ACCESS) printf("IN_ACCESS");
if(i->mask & IN_ATTRIB) printf("IN_ATTRIB");
if(i->mask & IN_CLOSE_NOWRITE) printf("IN_CLOSE_NOWRITE");
if(i->mask & IN_CLOSE_WRITE) printf("IN_CLOSE_WRITE");
if(i->mask & IN_CREATE) printf("IN_CREATE");
if(i->mask & IN_DELETE) printf("IN_DELETE");
if(i->mask & IN_DELETE_SELF) printf("IN_DELETE_SELF");
if(i->mask & IN_IGNORED) printf("IN_IGNORED");
if(i->mask & IN_ISDIR) printf("IN_ISDIR");
if(i->mask & IN_MODIFY) printf("IN_MODIFY");
if(i->mask & IN_MOVE_SELF) printf("IN_MOVE_SELF");
if(i->mask & IN_MOVED_FROM) printf("IN_MOVED_FROM");
if(i->mask & IN_MOVED_TO) printf("IN_MOVED_TO");
if(i->mask & IN_OPEN) printf("IN_OPEN");
if(i->mask & IN_Q_OVERFLOW) printf("IN_Q_OVERFLOW");
if(i->mask & IN_UNMOUNT) printf("IN_UNMOUNT");
printf("\n");
if(i->len > 0)
printf(" name = %s\n ", i->name);
}
int main(int argc, char *argv[]){
if(argc < 2){
printf("parameter lose, exist!\n");
return -1;
}
char absPath[PATH_MAX + 1];
if(realpath(argv[1], absPath) == NULL){
printf("transform %s to real path fail, errno: %d\n", argv[1], errno);
return -1;
}
const char *output = NULL;
if(argc == 3){
output = argv[2];
}
struct stat statbuf;
if(stat(absPath, &statbuf) != 0){
printf("open file %s failed, errno is %d\n", absPath, errno);
return -1;
}
rootNode = initTrieTree(absPath);
wdManager = (WdManager *)malloc(sizeof(WdManager) + 2 * sizeof(Wd));
memset(wdManager, 0, sizeof(*wdManager));
wdManager->len = 0;
// printf("%p %p %p\n",wdManager, wdManager + 1, (wdManager->nodes));
// printf("%d %d\n", sizeof(WdManager), sizeof(Wd));
// return 0;
loadWdManager(monitorDir(absPath), rootNode);
if(S_ISDIR(statbuf.st_mode)){//文件夹
accessSubFile(absPath, rootNode);
}
// debugAllNode(rootNode, 0);
for(;;){
char buf[PATH_MAX + 1];
ssize_t numRead;
numRead = read(inotifyFd, buf, PATH_MAX + 1);
if(numRead <= 0){
printf("read error %ld\n", numRead);
return 0;
}
printf("read %ld bytes from inotify fd\n", numRead);
for(char *p = buf; p < buf + numRead;){
struct inotify_event *event = (struct inotify_event *)p;
displayInotifyEvent(event);
p += sizeof(struct inotify_event) + event->len;
}
}
free(wdManager);
return 0;
}
#include <limits.h>
int wmain(int argc, char *argv[]){
int inotifyFd, wd;
char buf[PATH_MAX + 1];
ssize_t numRead;
char *p;
struct inotify_event *event;
if(argc < 2 || strcmp(argv[1], "--help") == 0)
printf("%s pathname...\n", argv[0]);
inotifyFd = inotify_init();
if(inotifyFd == -1)
printf("error in inotify_init\n");
for (size_t j = 0; j < argc; j++)
{
wd = inotify_add_watch(inotifyFd, argv[j], IN_ALL_EVENTS);
if(wd == -1){
printf("error in inotify_add_watch\n");
return 0;
}
printf("watching %s using wd %d\n", argv[j], wd);
}
for(;;){
numRead = read(inotifyFd, buf, PATH_MAX + 1);
if(numRead <= 0){
printf("read error %ld\n", numRead);
return 0;
}
printf("read %ld bytes from inotify fd\n", numRead);
for(p = buf; p < buf + numRead;){
event = (struct inotify_event *)p;
displayInotifyEvent(event);
p += sizeof(struct inotify_event) + event->len;
}
}
}