Spark:org.apache.spark.SparkException: Task not serializable

本文描述了使用Java语言通过SparkSQL访问HBase数据表过程中遇到的问题及解决方案。主要问题是在封装代码功能到HBaseOperationImpl实现类时,由于匿名内部类不可序列化导致错误。解决方法是将匿名内部类封装为可序列化的类。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

最近调式Java语言写spark SQL 访问HBase数据表,遇到标题所述的问题,先描述下问题出现过程,然后讲述下问题解决办法。

第一,在单独类中调式代码,能成功返回HBase数据,并展示出来:

public class testSQLFinal {
    public static void main(String[] args)throws IOException {
        //设置spark属性
        System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        SparkSession spark = SparkSession.builder()
                .appName("wu_java_read_hbase_register_to_table")
                .master("local[4]")
                .getOrCreate();
        JavaSparkContext context = new JavaSparkContext(spark.sparkContext());
        //设置hbase连接参数
        Configuration configuration = HBaseConfiguration.create();
       configuration.set("hbase.zookeeper.quorum", "192.168.80.182,192.168.80.183,192.168.80.184");
       configuration.set("hbase.zookeeper.property.clientPort", "2181");
       configuration.set("hbase.master", "192.168.80.181:60000");
        Scan scan = new Scan();
        String tableName = "t1";
        configuration.set(TableInputFormat.INPUT_TABLE, tableName);
       
       //HBase封装类:用来实例化HBase实例和HBase crud操作方法
        IHBaseOperation ihBaseOperation= HBaseOperationImpl.getInstance(configuration);

         //列族 和列名  info:name
        final Map<String,String> cfq=ihBaseOperation.getFamiliesAndQualifiersAByTable("default","t1");

        //查询数据
        ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
        String ScanToString = Base64.encodeBytes(proto.toByteArray());
        configuration.set(TableInputFormat.SCAN, ScanToString);
        JavaPairRDD<ImmutableBytesWritable, Result> myRDD =    context.newAPIHadoopRDD(configuration,TableInputFormat.class, ImmutableBytesWritable.class, Result.class);

        JavaRDD<Row> personsRDD = myRDD.map(new Function<Tuple2<ImmutableBytesWritable,Result>,Row>() {  
            @Override
            public Row call(Tuple2<ImmutableBytesWritable, Result> tuple) throws Exception {
                // TODO Auto-generated method stub
                System.out.println("====tuple=========="+tuple);
                Result result = tuple._2();
                String rowkey = Bytes.toString(result.getRow());
                List<String> list=new ArrayList<String>();
                list.add(rowkey);
                    for (Map.Entry<String,String> entry:cfq.entrySet()){
                        String cf=entry.getValue();
                        String col=entry.getKey();
                        String s=Bytes.toString(result.getValue(Bytes.toBytes(cf),Bytes.toBytes(col)));
                        list.add(s);
                    }
                    //将tuple数据转换为rowRDD
                String[] fields =list.toArray(new String[list.size()]);
                return RowFactory.create(fields);
            }
        });

        List<StructField> structFields=new ArrayList<StructField>();
        structFields.add(DataTypes.createStructField("rowkey", DataTypes.StringType, true));
        List<String> fields=new ArrayList<String>(cfq.keySet());
        for (int i=0;i<fields.size();i++){
            structFields.add(DataTypes.createStructField(fields.get(i),DataTypes.StringType,true));
        }
        //将hbase表映射为schema
        StructType schema=DataTypes.createStructType(structFields);
        Dataset stuDf=spark.createDataFrame(personsRDD, schema);
        //注册表
        stuDf.createOrReplaceTempView("c1");
        Dataset<Row> nameDf=spark.sql("select * from c1");
        nameDf.show();
    }
}

第二步,考虑到可重用性,将上述代码功能进行封装,封装到HBaseOperationImpl 实现类中,

如下:

  public Dataset<Row> getAllDatas(String nameSpace, String tableName) throws IOException {
        Scan scan=new Scan();
        final Map<String,String> columnFamiliesQualier=getFamiliesAndQualifiersAByTable(nameSpace,tableName);
        ClientProtos.Scan proto=ProtobufUtil.toScan(scan);
        String ScanToString = Base64.encodeBytes(proto.toByteArray());
        conf.set(TableInputFormat.SCAN, ScanToString);
//        JavaSparkContext context = new JavaSparkContext(spark.sparkContext());
        JavaSparkContext context =sparkAppConf.javaSparkContext();
        JavaPairRDD<ImmutableBytesWritable, Result> myRDD = context.newAPIHadoopRDD(conf,TableInputFormat.class, ImmutableBytesWritable.class, Result.class);
        JavaRDD<Row> JavaRDD = myRDD.map(new Function<Tuple2<ImmutableBytesWritable,Result>,Row>() {
   //封装成方法后,报错点   
 @Override
    public Row call(Tuple2<ImmutableBytesWritable, Result> tuple) throws Exception {
        // TODO Auto-generated method stub
        System.out.println("====tuple=========="+tuple);
        Result result = tuple._2();
        String rowkey = Bytes.toString(result.getRow());
        List<String> list=new ArrayList<String>();
        list.add(rowkey);
            for (Map.Entry<String,String> entry:cfq.entrySet()){
                String cf=entry.getValue();
                String col=entry.getKey();
                String s=Bytes.toString(result.getValue(Bytes.toBytes(cf),Bytes.toBytes(col)));
                list.add(s);
            }
            //将tuple数据转换为rowRDD
        String[] fields =list.toArray(new String[list.size()]);
        return RowFactory.create(fields);
    }
});
        List<StructField> structFields=new ArrayList<StructField>();
        structFields.add(DataTypes.createStructField("rowkey", DataTypes.StringType, true));
        List<String> fields=new ArrayList<String>(columnFamiliesQualier.keySet());
        for (int i=0;i<fields.size();i++){
            structFields.add(DataTypes.createStructField(fields.get(i),DataTypes.StringType,true));
        }
        StructType schema=DataTypes.createStructType(structFields);
        Dataset stuDf=spark.createDataFrame(JavaRDD, schema);  
//        //注册表
        stuDf.createOrReplaceTempView("temp");
        Dataset<Row> nameDf=spark.sql("select * from temp");
        return nameDf;
    }

解决方案:

将map里面的匿名内部类额外封装为一个可序列化的类,此方法中调用此类。

public class Mapper implements Function<Tuple2<ImmutableBytesWritable,Result>,Row>, Serializable {
    private static final long serialVersionUID = 42L;
    private Map<String,String> columnFamiliesQualier;
    public Mapper(Map<String,String> columnFamiliesQualier) {
        this.columnFamiliesQualier = columnFamiliesQualier;
    }
    @Override
    public Row call(Tuple2<ImmutableBytesWritable, Result> tuple) throws IOException {
        // TODO Auto-generated method stub

//        System.out.println("====tuple==========" + tuple);
        Result result = tuple._2();
        String rowkey = Bytes.toString(result.getRow());
        List<String> list = new ArrayList<String>();
        list.add(rowkey);
        for (Map.Entry<String, String> entry : columnFamiliesQualier.entrySet()) {
            String cf = entry.getValue();
            String col = entry.getKey();
            String s = Bytes.toString(result.getValue(Bytes.toBytes(cf), Bytes.toBytes(col)));
            list.add(s);
        }
        String[] fields = list.toArray(new String[list.size()]);
        return RowFactory.create(fields);
    }
}

解决方案参考一下博客:

https://stackoverflow.com/questions/30828595/spark-notserializableexception

http://bighow.org/questions/30828595/spark-notserializableexception

https://blog.youkuaiyun.com/javastart/article/details/50845767

http://mangocool.com/detail_1_1439971291385.html

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值