效果:
root@localhost :/home/James/mypro/Linux-Pro/dir_operation# ./detect_dir_loop /home/James/mypro/
Error occured when getting realpath of [link2]: No such file or directory
Error occured when getting realpath of [link3]: No such file or directory
Error occured when getting realpath of [link5]: No such file or directory
Error occured when getting realpath of [link4]: No such file or directory
Error occured when getting realpath of [link6]: No such file or directory
Error occured when getting realpath of [link1]: No such file or directory
Error occured when getting realpath of [bar]: Too many levels of symbolic links
/home/James/mypro/Linux-Pro/dir_operation/mnt1/dir1/link1 recursively reference itself!
/home/James/mypro/Linux-Pro/dir_operation/mnt1/dir2/link2/link1 recursively reference itself!
正确性检测:
root@localhost :/home/James/proto3-standard/external/alsa-lib/include# grep -Ri "xxxxxxxxxx" /home/James/mypro/
grep: /home/James/mypro/Linux-Pro/Toys/symlink/link2: 没有那个文件或目录
grep: /home/James/mypro/Linux-Pro/Toys/symlink/link3: 没有那个文件或目录
grep: /home/James/mypro/Linux-Pro/Toys/symlink/link5: 没有那个文件或目录
grep: /home/James/mypro/Linux-Pro/Toys/symlink/link4: 没有那个文件或目录
grep: /home/James/mypro/Linux-Pro/Toys/symlink/link6: 没有那个文件或目录
grep: /home/James/mypro/Linux-Pro/Toys/symlink/link1: 没有那个文件或目录
grep: /home/James/mypro/Linux-Pro/Toys/symlink/bar: 符号连接的层数过多
grep: 警告: /home/James/mypro/Linux-Pro/dir_operation/mnt1/dir1/link1: 递归目录循环
grep: 警告: /home/James/mypro/Linux-Pro/dir_operation/mnt1/dir2/link2/link1: 递归目录循环
由上可见它是正确的。
速度:对8G所有的目录检查,需要17秒。如下
root@localhost :/home/James/mypro/Linux-Pro/dir_operation# time ./detect_dir_loop /home/James/proto3-standard/
Error occured when getting realpath of [fsl_cache.h]: No such file or directory
/home/James/proto3-standard/external/alsa-lib/include/alsa recursively reference itself!
Error occured when getting realpath of [target]: No such file or directory
Error occured when getting realpath of [testsuite]: No such file or directory
Error occured when getting realpath of [en-US]: No such file or directory
Error occured when getting realpath of [testsuite]: No such file or directory
Error occured when getting realpath of [en-US]: No such file or directory
real 0m17.783s
user 0m0.444s
sys 0m2.340s
源码:
/**
* detect_dir_loop.c
*
* detect recursive references in a specified directory
**/
#include <unistd.h>
#include <limits.h>
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>
#include <string.h>
#include <errno.h>
#define MAX_NESTED_DIR_DEPTH 256
#define DEBUG 0
#if DEBUG
#define PDEBUG(fmt, arg...) printf(fmt, ##arg)
#else
#define PDEBUG(fmt, arg...)
#endif
struct stack
{
char **stack;
int top;
};
struct stack stack_dir; /* store absolute path of a directory */
struct stack stack_relative_path;
static void parse(const char *path);
static void parse_dir(const char *dir);
static void parse_link(const char *link);
static void init_stack(struct stack *st);
static void clean_stack(struct stack *st);
static void push_stack(const char *dir, struct stack *st); /* alloc memory, push dir into stack, called only after check_collision's called */
static void pop_stack(struct stack *st); /* pop the top element out, free corresponding memory */
static char* top_stack(struct stack *st);
static void print_stack_from_bottom_to_up(struct stack *st, const char *sep);
static int check_collision(char *dir, struct stack *st); /* check whether dir conflicts, if so, it recursively reference itself*/
static void parse(const char *path)
{
assert(path != NULL);
// PDEBUG("parse: %s\n", path);
if (strcmp(".", path) == 0 ||
strcmp("..", path) == 0)
return;
struct stat statbuf;
int ret = lstat(path, &statbuf);
if (ret < 0)
{
/* read status of a path failed */
fprintf(stderr, "lstat [%s] failed: %s \n",
path, strerror(errno));
return;
}
/* success */
if (S_ISDIR(statbuf.st_mode))
{
parse_dir(path);
}
else if (S_ISLNK(statbuf.st_mode))
{
parse_link(path);
}
else
{
/* do something with other files */
;
}
}
static void parse_dir(const char *dir)
{
/* parse_dir is called from parse, so dir must be a valid directory */
PDEBUG("parse_dir: %s\n", dir);
char *real_path = realpath(dir, NULL);
if (real_path == NULL)
{
fprintf(stderr, "Error occured when getting realpath of [%s]: %s \n",
dir, strerror(errno));
return;
}
int ret = check_collision(real_path, &stack_dir);
if (ret == 0) /* no collision */
{
/* push stack */
push_stack(real_path, &stack_dir);
free(real_path);
real_path = NULL;
push_stack(dir, &stack_relative_path);
/* traverse the directory */
DIR *dp;
struct dirent *entry;
if ((dp = opendir(dir)) == NULL)
{
fprintf(stderr, "cannot open diretory [%s]: %s\n",
dir, strerror(errno));
return;
}
/* enter dir */
chdir(dir);
while ((entry=readdir(dp)) != NULL)
{
parse(entry->d_name);
}
/* restore current dir */
chdir("..");
closedir(dp);
/* pop stack */
pop_stack(&stack_relative_path);
pop_stack(&stack_dir);
}
else /* detect recursive reference */
{
print_stack_from_bottom_to_up(&stack_relative_path, "/");
printf("%s recursively reference itself! \n", dir);
}
}
static void parse_link(const char *link)
{
/* parse_link is called from pase, so link must be a valid link */
PDEBUG("parse_link: %s\n", link);
char *cur_dir;
char *real_path = realpath(link, NULL);
if (real_path == NULL)
{
fprintf(stderr, "Error occured when getting realpath of [%s]: %s \n",
link, strerror(errno));
return;
}
struct stat statbuf;
lstat(real_path, &statbuf);
cur_dir = getcwd(NULL, 256);
int ret = S_ISDIR(statbuf.st_mode);
if (ret == 0)
return; /* link does not points to a directory */
ret = check_collision(real_path, &stack_dir);
if (ret == 0) /* directory */
{
/* push stack */
push_stack(real_path, &stack_dir);
push_stack(link, &stack_relative_path);
/* traverse the directory */
DIR *dp;
struct dirent *entry;
if ((dp = opendir(real_path)) == NULL)
{
fprintf(stderr, "cannot open diretory [%s]: %s\n",
link, strerror(errno));
return;
}
free(real_path);
real_path = NULL;
chdir(link);
while ((entry=readdir(dp)) != NULL)
{
parse(entry->d_name);
}
chdir(cur_dir);
free(cur_dir);
closedir(dp);
/* pop stack */
pop_stack(&stack_relative_path);
pop_stack(&stack_dir);
}
else /* detect recursive reference */
{
print_stack_from_bottom_to_up(&stack_relative_path, "/");
printf("%s recursively reference itself! \n", link);
}
}
static void init_resource()
{
init_stack(&stack_dir);
init_stack(&stack_relative_path);
}
static void clean_resource()
{
clean_stack(&stack_relative_path);
clean_stack(&stack_dir);
}
static void init_stack(struct stack *st)
{
int i;
st->stack = (char **)malloc( sizeof(char *) * MAX_NESTED_DIR_DEPTH );
if (st->stack == NULL)
{
fprintf(stderr, "allocate stack failed: %s \n", strerror(errno));
exit(EXIT_FAILURE);
}
for (i=0; i<MAX_NESTED_DIR_DEPTH; i++)
st->stack[i] = NULL;
st->top = 0; /* there're totally top elements in stack */
}
static void clean_stack(struct stack *st)
{
int i;
for (i=0; i<MAX_NESTED_DIR_DEPTH; i++)
{
free(st->stack[i]);
st->stack[i] = NULL;
}
free(st->stack);
st->top = 0;
}
static void print_stack_from_bottom_to_up(struct stack *st, const char *sep)
{
assert(sep != NULL);
int i=0;
for (i=0; i<st->top; i++)
{
if (i==0)
{
printf("%s", st->stack[i]);
}
else
{
printf("%s%s", st->stack[i], sep);
}
}
}
static char* top_stack(struct stack *st)
{
assert(st->top >= 0);
if (st->top == 0)
return NULL;
else
return (st->stack[st->top-1]);
}
static void pop_stack(struct stack *st)
{
assert(st->top >= 0);
if (st->top == 0)
fprintf(stderr, "Error in pop stack: stack empty!\n");
else
{
st->top--;
free(st->stack[st->top]);
st->stack[st->top] = NULL;
}
}
static void push_stack(const char *path, struct stack *st)
{
assert (path != NULL);
assert(st->top >= 0);
assert(st->stack[st->top] == NULL); /* everytime we pop, we make it NULL */
if (st->top >= MAX_NESTED_DIR_DEPTH)
{
fprintf(stderr, "Error in push_stack: stack overflow!\n");
exit(EXIT_FAILURE);
}
int len = strlen(path);
st->stack[st->top] = (char *)malloc(sizeof(char) * (len+1));
if (st->stack[st->top] == NULL)
{
fprintf(stderr, "Memory allocation in push_stack failed\n");
exit(EXIT_FAILURE);
}
memset(st->stack[st->top], 0, len+1);
strcpy(st->stack[st->top], path);
st->top++;
}
static int is_or_is_parent(char *dir2, char *dir1)
{
/* dir2 and dir1 are all absolute path */
assert(dir2[0] == '/');
assert(dir1[0] == '/');
// PDEBUG("is_or_is_parent: [%s] [%s] \n", dir2, dir1);
int i=0;
while (dir2[i]!='\0' && dir1[i]!='\0')
{
if (dir2[i] != dir1[i])
return 0;
i++;
}
if (dir2[i] == '\0')
return 1;
return 0; /* dir2[i] != 0 but dir1[i] == 0 */
}
static int check_collision(char *dir, struct stack *st)
{
assert(dir != NULL);
assert(dir[0] == '/'); /* dir must be an absolute path */
// PDEBUG("check_collision: [%s] \n", dir);
int i=0;
for (i=0; i<st->top; i++)
{
if (is_or_is_parent(dir, st->stack[i]))
return 1;
}
return 0;
}
int main(int argc, char *argv[])
{
assert(argc == 2);
const char *targetpath = argv[1];
init_resource();
parse(targetpath);
clean_resource();
exit(EXIT_SUCCESS);
}
后记:
1. C语言对没有内存自动回收机制,在内存处理上要小心。说实话,自己写个stack,然后每次得到real path时还要注意释放内存,这个真的有点让人不爽。
2. 主要算法是“递归下降分析”。递归递归就解决了。(数学和算法就是让人看起来比他原来要聪明)
3. 该算法中,限制了目录的嵌套层数是256.说实在的,你目录嵌套了256层是位了啥吧,肯定有问题!所以,如果目录嵌套了256层以上,会有stack overflow提示。虽然可以做stack自动扩展,但是,我认为,更好的处理方法,就是提示stack overflow,让执行者知道,有个目录嵌套过多!
4. 参见http://www.oschina.net/question/158589_56229 (Linux对symbolic link的限制)。注意这里既是有超过40层的link,只要总数没有超过256(stack overflow),也可以正确解析,原因是我们做的是path resolution,每次遇到link,会做解析出绝对路径,然后进栈的操作。于是系统用lstat等函数时,得到的是一个最多只带一个link的路径名,自然不到达到40这个数目。