通过hadoop实现单词的统计,并将统计结果保存到Hbase以及错误排解:org.apache.hadoop.io.compress.SnappyCodec not found

本文介绍了一个使用Hadoop MapReduce统计单词并存储到HBase的过程,同时解决了运行时遇到的`org.apache.hadoop.io.compress.SnappyCodec`找不到的错误。错误出现在Java运行环境中,可能由于缺少Snappy库导致。通过添加相应依赖或检查环境配置可以解决此问题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

通过hadoop实现单词的统计,并将统计结果保存到Hbase以及错误排解:java.lang.ClassNotFoundException: org.apache.hadoop.io.compress.SnappyCodec

设计思想:通过MR框架来统计给定文件的单词数目,然后把统计结果保存到hbase中:

程序如下:

import java.io.IOException; 
import java.util.Iterator; 
import java.util.StringTokenizer; 
 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.hbase.HBaseConfiguration; 
import org.apache.hadoop.hbase.HColumnDescriptor; 
import org.apache.hadoop.hbase.HTableDescriptor; 
import org.apache.hadoop.hbase.client.HBaseAdmin; 
import org.apache.hadoop.hbase.client.Put; 
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; 
import org.apache.hadoop.hbase.mapreduce.TableReducer; 
import org.apache.hadoop.hbase.util.Bytes; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.io.NullWritable; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
//import org.apache.hadoop.io.compress.SnappyCodec;
 
 
public class WordCountHBase { 
 
  // 实现 Map 类 
  public static class Map extends 
      Mapper<LongWritable, Text, Text, IntWritable> { 
    private final static IntWritable one = new IntWritable(1); 
    private Text word = new Text(); 
 
    public void map(LongWritable key, Text value, Context context) 
        throws IOException, InterruptedException { 
      StringTokenizer itr = new StringTokenizer(value.toString()); 
      while (itr.hasMoreTokens()) { 
        word.set(itr.nextToken()); 
        context.write(word, one); 
      } 
    } 
  } 
 
  // 实现 Reduce 类 
  public static class Reduce extends 
      TableReducer<Text, IntWritable, NullWritable> { 
 
    public void reduce(Text key, Iterable<IntWritable> values, 
        Context context) throws IOException, InterruptedException { 
 
      int sum = 0; 
 
      /* Iterator<IntWritable> iterator = values.iterator(); 
      while (iterator.hasNext()) { 
        sum += iterator.next().get(); 
      } 
      */
      for (IntWritable val : values) {
          sum += val.get();
        }
 
      // Put 实例化,每个词存一行 
      Put put = new Put(Bytes.toBytes(key.toString())); 
      // 列族为 content,列修饰符为 count,列值为数目 
      put.add(Bytes.toBytes("content"), Bytes.toBytes("count"), 
          Bytes.toBytes(String.valueOf(sum))); 
 
      context.write(NullWritable.get(), put); 
    } 
  } 
 
  // 创建 HBase 数据表 
  public static void createHBaseTable(String tableName)  
throws IOException { 
    // 创建表描述 
    HTableDescriptor htd = new HTableDescriptor(tableName); 
    // 创建列族描述 
    HColumnDescriptor col = new HColumnDescriptor("content"); 
    htd.addFamily(col); 
 
    // 配置 HBase 
    Configuration conf = HBaseConfiguration.create(); 
    //配置 从机
    conf.set("hbase.zookeeper.quorum", "172.16.2.34,172.16.2.54,172.16.2.57"); 
    conf.set("hbase.zookeeper.property.clientPort", "2181"); 

   //配置主机
    conf.set("hbase.master", "172.16.2.42:60000");
    HBaseAdmin hAdmin = new HBaseAdmin(conf); 
 
    if (hAdmin.tableExists(tableName)) { 
      System.out.println("该数据表已经存在,正在重新创建。"); 
      hAdmin.disableTable(tableName); 
      hAdmin.deleteTable(tableName); 
    } 
 
    System.out.println("创建表:" + tableName); 
    hAdmin.createTable(htd); 
  } 
 
  public static void main(String[] args) throws Exception { 
    String tableName = "wordcount"; 
    // one step:创建数据库表 
    WordCountHBase.createHBaseTable(tableName); 
 
    // two step:进行 MapReduce 处理 
    // 配置 hbase 
    Configuration conf = new Configuration(); 
    
    conf.set("hbase.zookeeper.quorum", "172.16.2.34,172.16.2.54,172.16.2.57"); 
    conf.set("hbase.zookeeper.property.clientPort", "2181"); 
    conf.set("hbase.master", "172.16.2.42:60000");

      //配置hadoop
       conf.set("fs.default.name", "hdfs://172.16.2.42:9000/");
        conf.set("hadoop.job.user", "hadoop");
conf.set("mapred.job.tracker", "172.16.2.42:9001");

        //eclipse提交到hadoop上  需要打包   理论上是不需要这样的,但是本人一直没弄出来
conf.set( "mapred.jar","WordCountHBase.jar"); 
    
    conf.set(TableOutputFormat.OUTPUT_TABLE, tableName); 
 
    Job job = new Job(conf, "New Word Count"); 
 

java.lang.Exception: org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading HFile Trailer from file hdfs://192.168.8.201:8020/apps/hbase/data/data/gt_dw/profile_gid_lbs_locvalue/7ef0422f73082b2d140d755a08ab6904/lbs/75c83b238e0b4be496eecf33eed5e5c3     at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462) ~[hadoop-mapreduce-client-common-2.7.2.jar:na]     at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522) ~[hadoop-mapreduce-client-common-2.7.2.jar:na] Caused by: org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading HFile Trailer from file hdfs://192.168.8.201:8020/apps/hbase/data/data/gt_dw/profile_gid_lbs_locvalue/7ef0422f73082b2d140d755a08ab6904/lbs/75c83b238e0b4be496eecf33eed5e5c3     at org.apache.hadoop.hbase.io.hfile.HFile.pickReaderVersion(HFile.java:463) ~[hbase-server-0.98.13-hadoop2.jar:0.98.13-hadoop2]     at org.apache.hadoop.hbase.io.hfile.HFile.createReader(HFile.java:506) ~[hbase-server-0.98.13-hadoop2.jar:0.98.13-hadoop2]     at com.glab.fz.etl.hfile.util.HFileInputFormat$HFileRecordReader.initialize(HFileInputFormat.java:60) ~[classes/:na]     at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:548) ~[hadoop-mapreduce-client-core-2.7.2.jar:na]     at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:786) ~[hadoop-mapreduce-client-core-2.7.2.jar:na]     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341) ~[hadoop-mapreduce-client-core-2.7.2.jar:na]     at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243) ~[hadoop-mapreduce-client-common-2.7.2.jar:na]     at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[na:1.8.0_431]     at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_431]     at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[na:1.8.0_431]     at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[na:1.8.0_431]     at java.lang.Thread.run(Thread.java:750) ~[na:1.8.0_431] Caused by: java.lang.RuntimeException: native snappy library not available: this version of libhadoop was built without snappy support.     at org.apache.hadoop.io.compress.SnappyCodec.checkNativeCodeLoaded(SnappyCodec.java:65) ~[hadoop-common-2.7.2.jar:na]     at org.apache.hadoop.io.compress.SnappyCodec.getDecompressorType(SnappyCodec.java:193) ~[hadoop-common-2.7.2.jar:na]     at org.apache.hadoop.io.compress.CodecPool.getDecompressor(CodecPool.java:178) ~[hadoop-common-2.7.2.jar:na]     at org.apache.hadoop.hbase.io.compress.Compression$Algorithm.getDecompressor(Compression.java:327) ~[hbase-common-0.98.13-hadoop2.jar:0.98.13-hadoop2]     at org.apache.hadoop.hbase.io.compress.Compression.decompress(Compression.java:422) ~[hbase-common-0.98.13-hadoop2.jar:0.98.13-hadoop2]     at org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext.prepareDecoding(HFileBlockDefaultDecodingContext.java:91) ~[hbase-common-0.98.13-hadoop2.jar:0.98.13-hadoop2]     at org.apache.hadoop.hbase.io.hfile.HFileBlock.unpack(HFileBlock.java:507) ~[hbase-server-0.98.13-hadoop2.jar:0.98.13-hadoop2]     at org.apache.hadoop.hbase.io.hfile.HFileBlock$AbstractFSReader$1.nextBlock(HFileBlock.java:1255) ~[hbase-server-0.98.13-hadoop2.jar:0.98.13-hadoop2]     at org.apache.hadoop.hbase.io.hfile.HFileBlock$AbstractFSReader$1.nextBlockWithBlockType(HFileBlock.java:1261) ~[hbase-server-0.98.13-hadoop2.jar:0.98.13-hadoop2]     at org.apache.hadoop.hbase.io.hfile.HFileReaderV2.<init>(HFileReaderV2.java:147) ~[hbase-server-0.98.13-hadoop2.jar:0.98.13-hadoop2]     at org.apache.hadoop.hbase.io.hfile.HFileReaderV3.<init>(HFileReaderV3.java:73) ~[hbase-server-0.98.13-hadoop2.jar:0.98.13-hadoop2]     at org.apache.hadoop.hbase.io.hfile.HFile.pickReaderVersion(HFile.java:453) ~[hbase-server-0.98.13-hadoop2.jar:0.98.13-hadoop2]
最新发布
06-16
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值