hadoop使用(hdfs)
1.安装jdk
2.下载并安装hadoop
export HADOOP_HOME=/path/to/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
3.配置环境变量
4.配置hadoop参数
配置ssh
ssh-keygen -t rsa
cat ~/.ssh/id_rsa.pub
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
ssh localhost
一、格式化namecode
hdfs namenode -format
start-dfs.sh
执行
jps
查看是否有namecode datacode和SecondaryNameNode
文件上传和下载
hadoop fs 和 hdfs dfs是一样的效果 -f 代表强制覆盖
hdfs dfs -mkdir -p /user/hadoop
hdfs dfs -ls /user
hadoop fs -put /tmp/localfile.txt /user/hadoop
hdfs dfs -put /path/to/local/file /user/hadoop
hdfs dfs -put -f /path/to/local/mysql-9.2.0-macos15-arm64.dmg /user/hadoop
hdfs dfs -rm /user/hadoop/mysql-9.2.0-macos15-arm64.dmg
hdfs dfs -cat /user/hadoop/employee_data/part-m-00000
hadoop fs -get /user/hadoop/mysql-9.2.0-macos15-arm64.dmg /Users/xxx/Downloads/123.dmg
yarn logs -applicationId <application_id>
yarn application -status application_1749456317232_0004
批量操作
hadoop fs -put /localpath/*.txt /hdfs/directory/
使用管道
hadoop fs -cat /hdfs/input/file.txt | grep "error" > /local/output/errors.txt
权限设置
hadoop fs -chmod 777 /hdfs/directory/file.txt
# 打印数结构
hadoop fs -ls -R /hdfs/directory/
#递归删除 注意不可逆
hadoop fs -rm -r /hdfs/directory/
# 清空垃圾站
hadoop fs -expunge
java编程操作文件
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.3.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.3.5</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>3.7.1</version>
</dependency>
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
***.URI;
public class HdfsFileUploader {
public static void main(String[] args) {
// 配置HDFS连接
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://namenode:8020");
// 初始化HDFS客户端
FileSystem fs = null;
try {
fs = FileSystem.get(URI.create("hdfs://namenode:8020"), conf);
// 指定要上传的本地文件和HDFS目标路径
Path localFile = new Path("/local/path/to/source.txt");
Path hdfsFile = new Path("/hdfs/path/to/destination.txt");
// 上传文件
fs.copyFromLocalFile(localFile, hdfsFile);
System.out.println("File uploaded successfully.");
} catch (Exception e) {
e.printStackTrace();
} finally {
if (fs != null) {
try {
fs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
}
下载
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HdfsFileDownloader {
private static final Logger LOG = LoggerFactory.getLogger(HdfsFileDownloader.class);
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://namenode:8020");
FileSystem fs = null;
try {
fs = FileSystem.get(URI.create("hdfs://namenode:8020"), conf);
// 指定HDFS源文件路径和本地目标路径
Path hdfsSourceFile = new Path("/hdfs/path/to/source.txt");
Path localDestFile = new Path("/local/path/to/destination.txt");
// 下载文件
fs.copyToLocalFile(hdfsSourceFile, localDestFile);
System.out.println("File downloaded successfully.");
} catch (Exception e) {
LOG.error("Error during file download: ", e);
} finally {
if (fs != null) {
try {
fs.close(