Schema avroSchema = new Schema.Parser().parse(new File("src/main/resources/***.avsc"));
从Sequence file中反序列化avro数据
Path sequencePath = new Path("hdfs://ip:9000/xx.file");
SequenceFile.Reader sequenceReader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(sequencePath));
int i = 0;
try {
Text key = new Text();
BytesWritable value = new BytesWritable();
GenericRecord record;
while (sequenceReader.next(key, value)) {
i++;
ByteArrayInputStream in = new ByteArrayInputStream(value.getBytes());
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(in, null);
DatumReader<GenericRecord> avroReader = new SpecificDatumReader<>(avroSchema);
record = avroReader.read(null, decoder);
}
} finally {
IOUtils.closeStream(sequenceReader);
}
读取hdfs上的avro文件的schema和数据
public static void readAvroFromHDFS(String pathstr) throws IOException {
Path path = new Path(pathstr);
Configuration config = new Configuration(); // make this your Hadoop env config
SeekableInput input = new FsInput(path, config);
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
Schema avroSchema = fileReader.getSchema();
System.out.println(avroSchema);
for (GenericRecord datum : fileReader) {
System.out.println(datum);
}
fileReader.close();
System.exit(0);
}