我找了几个图片作为数据:
总共有5张(粉色的)
先上传至hdfs
hdfs://172.16.11.222:9000/JpgSequence
想要生成的sequencefile 的位置:
"hdfs://172.16.11.222:9000/Sequence/bb.txt";
具体步骤和解释全在代码里面标注《看完请评价》:
直接上代码:
这里先解释一下,sequence File的操作 由于hadoop版本的原因,会有所不同。这里用的是hadoop2.X的版本,比较新。
前提准备:hbase 已经启动;
有 student 表 info列族 (自己建一个)
import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.commons.io.IOUtils; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Writable; import java.net.URI; /** * Created by Administrator on 2017/7/24. */ public class SequenceFileTest { static String PATH = "hdfs://172.16.11.222:9000/Sequence/bb.txt"; static SequenceFile.Writer writer = null; public static void main(String[] args) throws Exception{ // Configuration conf = new Configuration(); // String path = "hdfs://172.16.11.222:9000/JpgSequence"; // URI uri = new URI(path); // FileSystem fileSystem = FileSystem.get(uri, conf); // writer = SequenceFile.createWriter(fileSystem, conf, new Path(PATH), Text.class, BytesWritable.class); // listFileAndWriteToSequenceFile(fileSystem,path); readSequenceFileAndWriteToHBase(new Path(PATH)); } /**** * 递归文件;并将文件写成SequenceFile文件 * @param fileSystem * @param path * @throws Exception */ public static void listFileAndWriteToSequenceFile(FileSystem fileSystem, String path) throws Exception{ final FileStatus[] listStatuses = fileSystem.listStatus(new Path(path)); for (FileStatus fileStatus : listStatuses) { if(fileStatus.isFile()){ Text fileText = new Text(fileStatus.getPath().toString()); System.out.println(fileText.toString()); FSDataInputStream in = fileSystem.open(new Path(fileText.toString())); byte[] buffer = IOUtils.toByteArray(in); in.read(buffer); BytesWritable value = new BytesWritable(buffer); //写成SequenceFile文件 writer.append(fileText, value); System.out.println(fileText+" 转换 SequenceFile成功"); } if(fileStatus.isDirectory()){ listFileAndWriteToSequenceFile(fileSystem,fileStatus.getPath().toString()); } } } /*** * * 读取sequenceFile文件,并将文件写入HBase (应当再加个 tablename 参数) * @param path1 * @throws Exception * **/ public static void readSequenceFileAndWriteToHBase(Path path1)throws Exception{ Configuration conf1 = new Configuration(); conf1.set("fs.default.name", "hdfs://172.16.11.222:9000"); //写入HBase Configuration conf =HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "172.16.11.221,172.16.11.222,172.16.11.223"); conf.set("hbase.zookeeper.property.clientPort", "2800"); // 将该值改大,防止hbase超时退出 conf.set("dfs.socket.timeout", "180000"); //指定表名 HTable htable = new HTable(conf,"student"); //读取sequenceFile文件,创建reader对象 // 新版 hadoop 2 的读取方式 SequenceFile.Reader.Option option1 = SequenceFile.Reader.file(path1); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(conf1,option1); Text key = (Text) org.apache.hadoop.util.ReflectionUtils.newInstance( reader.getKeyClass(), conf1); BytesWritable value = (BytesWritable) org.apache.hadoop.util.ReflectionUtils.newInstance( reader.getValueClass(), conf1); long position = reader.getPosition(); while (reader.next(key, value)) { String syncSeen = reader.syncSeen() ? "*" : ""; System.out.printf("[%s%s]\t%s\t%s\n", position, syncSeen, key, value); String temp = key.toString(); // System.out.println(key.toString()+"。。。。。。。。key 值"); // System.out.println(temp+".........切分key"); // temp = temp.substring(temp.indexOf("hdfs://")+7); // 172.16.11.222:9000/JpgSequence/化2.jpg // // String[] keyCat=temp.split("/"); // String tempIp=temp.split("/")[0].split(":")[0]; // 172.16.11.222 // String port=temp.split("/")[0].split(":")[1]; // 9000 // String path=temp.split("/")[1]; // JpgSequence // String data=temp.split("/")[keyCat.length-1]; // 化2.jpg // rowKey 设计 可以根据上面 自由定义,自由拼接 String rowKey =temp; //这里没有修改key System.out.println(rowKey); // //value 定义 String sequenceFileValue=value.toString(); //指定ROWKEY的值 Put put = new Put(Bytes.toBytes(rowKey)); //指定列簇名称、列修饰符、列值 put.add("info".getBytes(), temp.getBytes(), sequenceFileValue.getBytes()); htable.put(put); System.out.println(rowKey+"。。。。。载入成功"); position = reader.getPosition(); // beginning of next record } } finally { org.apache.hadoop.io.IOUtils.closeStream(reader); } //下面是网上原来的,我没有测试成功 // BytesWritable val = new BytesWritable(); // Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf1); // val = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf1); // // int i = 0; // while(reader.next(key, val)){ // // String temp = key.toString(); // temp = temp.substring(temp.indexOf("Image")+6, temp.indexOf(".")); // String[] tmp = temp.split("/"); // //rowKey 设计 // String rowKey = Integer.valueOf(tmp[0])-1+"_"+Integer.valueOf(tmp[1])/2+"_"+Integer.valueOf(tmp[2])/2; // System.out.println(rowKey); // // // // //指定ROWKEY的值 // Put put = new Put(Bytes.toBytes(rowKey)); // //指定列簇名称、列修饰符、列值 // put.add("picinfo".getBytes(), temp.getBytes(), val.getBytes()); // htable.put(put); // // } // org.apache.hadoop.io.IOUtils.closeStream(reader); } }
来张 结果图(序列太长了 就放个最后几行):
hbase (main)> scan 'student' 《测试成功请评价》