hdfs 常用命令练习

本文介绍了一个使用Hadoop进行日志模拟生成、采集、上传至HDFS并查询的完整流程。通过定时任务模拟生成日志,采集并上传至Hadoop分布式文件系统,最后提供了API查询上传日志文件的方法。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

hadoop fs  -ls  /   查看根目录下的文件列表

hadoop fs -mkdir -p /aaa/bbb 递归创建文件目录

hadoop fs -cp /xxx  /xxxx文件拷贝

hadoop fs -cp ir /xxx/  /xxxxx递归拷贝

hadoop fs -put /xxx /   上传本地文件 至文件系统的根目录

hadoop fs -get /xxxx  /xxxx  下载文件至本地系统的制定目录

hadoop  fs -rm /xxx.txt  删除文件

hadoop  fs -rm  -r /xxx   递归删除该目录下的文件

hadoop fs -mv /xxx   /xxx 移动文件

hadoop fs -cat /xxx 查看文件

hadoop  fs  -tail /xxxx.log 查看文件内容

1 hadoop练习(开发环境jdk1.8 eclipse)

(1)采用TimeTask定时任务模拟日志生成

       a log4j配置文件

   

 log4j.rootLogger=ALL,logRollingFile
     
    log4j.appender.logRollingFile=org.apache.log4j.RollingFileAppender
    log4j.appender.logRollingFile.layout=org.apache.log4j.PatternLayout
    log4j.appender.logRollingFile.layout.ConversionPattern=%m%n
    log4j.appender.logRollingFile.Threshold=INFO
    log4j.appender.logRollingFile.ImmediateFlush=TRUE
    log4j.appender.logRollingFile.Append=TRUE
    log4j.appender.logRollingFile.File=d://logs//access.log
    log4j.appender.logRollingFile.MaxFileSize=5MB
    log4j.appender.logRollingFile.MaxBackupIndex=50
    log4j.appender.logRollingFile.Encoding=UTF-8

   b 模拟日志产生定时任务

   

 package com.gc.hadoop.logProduct;
     
     
     
    import java.util.Random;
    import java.util.TimerTask;
     
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
     
    /**
     * 1 模拟生成日志文件
     *      a 定义字符数组
     *   b 生成随机数
     * @author guochao
     *
     */
    public class LogProduct extends  TimerTask {
        Logger log = LoggerFactory.getLogger(LogProduct.class);
        String[] str= {"ab","cd","ef","gg","hi","word","count","java",
                "html","css","jsp","jquery","js","servlet","easyUI",
                "springmvc","spring","springboot","springcloud","dubbo","redis","solr","rabbitmq","hadoop"
                ,"mysql","thread","list","set","map","hashmap","hashTable","hello"};
        @Override
        public void run() {
            Random r = new Random(System.currentTimeMillis());
            StringBuilder builder=null;
            for (int i = 0; i < 10000; i++) {
                builder =new StringBuilder();
                int random =Math.abs(r.nextInt());
                builder.append(str[random%str.length]+" "+str[i%str.length]);
                log.info(builder.toString());
            }
        }
    }

(2)编写logCollect日志采集定时任务

   

package com.gc.hadoop.logProduct;
     
    import java.io.File;
     
    import java.io.FilenameFilter;
     
    import java.net.URI;
     
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.TimerTask;
    import java.util.UUID;
     
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
     
    /**
     * 日志采集定时任务
     *     a 读取日志存放目录下已生成完的日志文件
     *  b  将文件拷贝至待上传目录
     *  c  将上传完成的文件拷贝至backUp备份目录
     * @author guochao
     *
     */
     
     
    public class DataCollection extends TimerTask {
     
        @Override
        public void run() {
            //日志很目录
            File logDir = new File("D:\\logs");
            if(!logDir.exists()) {
                System.out.println("目录不存在!");
                 return;
            }
            File[] logFiles = logDir.listFiles(new FilenameFilter() {
                @Override
                public boolean accept(File dir, String name) {
                    if(!name.endsWith(".log")) {//生成成功的日志以.数字结尾
                        return true;
                    }
                    return false;
                }
            });
            // 拷贝生成完的日志到上传目录 d://upload//
            SimpleDateFormat simple = new  SimpleDateFormat("yyyyMMddhhmmss");
            String format = simple.format(new Date());
            String uploadDir ="d://upload//";
            File upLoad = new File(uploadDir);
            if(!upLoad.exists()) {
                upLoad.mkdir();//创建文件目录
            }
            for (File file : logFiles) {
                file.renameTo(new File(uploadDir+file.getName().substring(0, file.getName().length()-2)+format+".log"));
            }
            // 开始读取待上传目录中的文件
            
            File[] uploadLogFiles = upLoad.listFiles(new FilenameFilter() {
                
                @Override
                public boolean accept(File dir, String name) {
                    if(name.startsWith("access")) {
                        return true;
                    }
                    return false;
                }
            });
            //构建备份目录
            simple = new  SimpleDateFormat("yyyy-MM-dd");
            String backUpDir ="d://backup//"+simple.format(new Date());
            File backup = new File(backUpDir);
            if(!backup.exists()) {
                backup.mkdirs();
            }
            if(uploadLogFiles!=null&&uploadLogFiles.length>0) {
            Configuration conf = new Configuration();
            try {
                FileSystem fs=FileSystem.get(new URI("hdfs://centos01:9000/"), conf, "root");
                
                Path logPath= new Path("/logs/"+simple.format(new Date()));
                if(!fs.exists(logPath)) {
                    fs.mkdirs(logPath);
                }
                for (File upload : uploadLogFiles) {
                    //上传日志文件至hdfs
                    fs.copyFromLocalFile(new Path(upload.getPath()), new Path(logPath.toString()+"/"+upload.getName()+UUID.randomUUID()));
                    upload.renameTo(new File(backUpDir+"\\"+upload.getName()));
                }
                fs.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
            }
            
        }
     
    }

3 编写启动类

     
   

 package com.gc.hadoop.dataCollection;
     
    import java.util.Timer;
    import java.util.TimerTask;
     
    import com.gc.hadoop.logProduct.CleanBackUpFile;
    import com.gc.hadoop.logProduct.DataCollection;
    import com.gc.hadoop.logProduct.LogProduct;
     
    /*
     * a 模拟生成日志文件
     * b采集文件
     * c 清理文件
     * @author guochao
     *
     */
    public class logDataCollection {
    public static void main(String[] args) {
        Timer timer = new Timer();
        LogProduct task =  new LogProduct();
        timer.schedule(task,0,1*1000);
        //定时任务开启采集文件
        timer.schedule(new DataCollection(), 0,2*60*1000);
    //    //定时任务开启清理备份文件 1分钟后执行  每5分钟执行一次
        timer.schedule(new CleanBackUpFile(), 1, 5*60*1000);
    }   
    }

4 编写简单api查询文件

   

package com.gc.hadoop.api;
     
     
     
     
    import java.net.URI;
     
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.LocatedFileStatus;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.fs.RemoteIterator;
    import org.junit.Test;
     
    /**
     * 查询上传完成的日志文件
     * @author guochao
     *
     */
    public class HadoopTest {
        /**
         * 查询所有的文件
         * @throws Exception
         */
        @Test
        public void  listFsFile() throws Exception{
            Configuration conf = new Configuration();
            FileSystem fs = FileSystem.get(new URI("hdfs://centos01:9000/"), conf,"root");
            RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("hdfs://centos01:9000/"), true);
            while(listFiles.hasNext()) {
                LocatedFileStatus file = listFiles.next();
                System.out.println("文件大小:"+file.getBlockSize());;
                System.out.println("文件路径:"+file.getPath().toString());;
            }
        }
        
        /**
         * 删除指定目录下的文件
         *
         */
        @Test
        public void  deleteFile()  throws Exception{
            Configuration conf = new Configuration();
            FileSystem fs = FileSystem.get(new URI("hdfs://centos01:9000/"), conf,"root");
            boolean delete = fs.delete(new Path("hdfs://centos01:9000/logs"), true);
            System.out.println(delete);
        }
    }

结果展示:
     
 

 文件路径:hdfs://centos01:9000/logs/2019-03-16/access.log.120190316011625.log0f37d549-9903-4e8a-8092-3a675fc54f30
     
    文件路径:hdfs://centos01:9000/logs/2019-03-16/access.log.320190316011625.loge969e645-c7b6-43b3-b616-40087f947b20
     
    文件路径:hdfs://centos01:9000/logs/2019-03-16/access.log20190316011839.log138106ab-c8c0-4bef-a82a-9ff0fe6075f8

第一天的小练习。
————————————————
版权声明:本文为优快云博主「Master_slaves」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.youkuaiyun.com/Master_chaoAndQi/article/details/88563894

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值