hadoop使用-优快云博客

本文链接：https://blog.youkuaiyun.com/iteye_696/article/details/148475248

hadoop使用（hdfs）

1.安装jdk
2.下载并安装hadoop

export HADOOP_HOME=/path/to/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH

3.配置环境变量
4.配置hadoop参数

配置ssh

 ssh-keygen -t rsa
  cat ~/.ssh/id_rsa.pub
  cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
  ssh localhost

一、格式化namecode

hdfs namenode -format
start-dfs.sh

执行

jps

查看是否有namecode datacode和SecondaryNameNode

文件上传和下载

hadoop fs 和 hdfs dfs是一样的效果 -f 代表强制覆盖

hdfs dfs -mkdir -p /user/hadoop
hdfs dfs -ls /user
hadoop fs -put /tmp/localfile.txt /user/hadoop
hdfs dfs -put /path/to/local/file /user/hadoop
hdfs dfs -put -f /path/to/local/mysql-9.2.0-macos15-arm64.dmg /user/hadoop

hdfs dfs -rm /user/hadoop/mysql-9.2.0-macos15-arm64.dmg

hdfs dfs -cat /user/hadoop/employee_data/part-m-00000

hadoop fs -get  /user/hadoop/mysql-9.2.0-macos15-arm64.dmg /Users/xxx/Downloads/123.dmg

yarn logs -applicationId <application_id>
yarn application -status application_1749456317232_0004

批量操作

hadoop fs -put /localpath/*.txt /hdfs/directory/

使用管道

hadoop fs -cat /hdfs/input/file.txt | grep "error" > /local/output/errors.txt

权限设置

hadoop fs -chmod 777 /hdfs/directory/file.txt

# 打印数结构
hadoop fs -ls -R /hdfs/directory/
#递归删除 注意不可逆
hadoop fs -rm -r /hdfs/directory/
# 清空垃圾站
hadoop fs -expunge

java编程操作文件

            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-common</artifactId>
                <version>3.3.5</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-hdfs</artifactId>
                <version>3.3.5</version>
            </dependency>
        <dependency>
            <groupId>com.google.protobuf</groupId>
            <artifactId>protobuf-java</artifactId>
            <version>3.7.1</version>
        </dependency>

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
***.URI;
 
public class HdfsFileUploader {
   
    public static void main(String[] args) {
   
        // 配置HDFS连接
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://namenode:8020");
        // 初始化HDFS客户端
        FileSystem fs = null;
        try {
   
            fs = FileSystem.get(URI.create("hdfs://namenode:8020"), conf);
            // 指定要上传的本地文件和HDFS目标路径
            Path localFile = new Path("/local/path/to/source.txt");
            Path hdfsFile = new Path("/hdfs/path/to/destination.txt");
            // 上传文件
            fs.copyFromLocalFile(localFile, hdfsFile);
            System.out.println("File uploaded successfully.");
        } catch (Exception e) {
   
            e.printStackTrace();
        } finally {
   
            if (fs != null) {
   
                try {
   
                    fs.close();
                } catch (Exception e) {
   
                    e.printStackTrace();
                }
            }
        }
    }
}

下载

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
 
public class HdfsFileDownloader {
   
    private static final Logger LOG = LoggerFactory.getLogger(HdfsFileDownloader.class);
    public static void main(String[] args) {
   
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://namenode:8020");
        FileSystem fs = null;
        try {
   
            fs = FileSystem.get(URI.create("hdfs://namenode:8020"), conf);
            // 指定HDFS源文件路径和本地目标路径
            Path hdfsSourceFile = new Path("/hdfs/path/to/source.txt");
            Path localDestFile = new Path("/local/path/to/destination.txt");
            // 下载文件
            fs.copyToLocalFile(hdfsSourceFile, localDestFile);
            System.out.println("File downloaded successfully.");
        } catch (Exception e) {
   
            LOG.error("Error during file download: ", e);
        } finally {
   
            if (fs != null) {
   
                try {
   
                    fs.close(