HBase – Hadoop Database,是一个高可靠性、高性能、面向列、可伸缩的分布式存储系统,利用HBase技术可在廉价PC Server上搭建起大规模结构化存储集群。
HBase是Google Bigtable的开源实现,类似Google Bigtable利用GFS作为其文件存储系统,HBase利用Hadoop HDFS作为其文件存储系统;Google运行MapReduce来处理Bigtable中的海量数据,HBase同样利用Hadoop MapReduce来处理HBase中的海量数据;Google Bigtable利用 Chubby作为协同服务,HBase利用Zookeeper作为对应。
HDFS:Hadoop分布式文件系统被设计成适合运行在通用硬件(commodity hardware)上的分布式文件系统。它和现有的分布式文件系统有很多共同点。但同时,它和其他的分布式文件系统的区别也是很明显的。HDFS是一个高度容错性的系统,适合部署在廉价的机器上。HDFS能提供高吞吐量的数据访问,非常适合大规模数据集上的应用。HDFS放宽了一部分POSIX约束,来实现流式读取文件系统数据的目的。HDFS在最开始是作为Apache Nutch搜索引擎项目的基础架构而开发的。HDFS是Apache Hadoop Core项目的一部分。
关于hbase和hdfs的一些基本操作:
1. start hdfs and hbase
$ start-dfs.sh
$ start-hbase.sh
2. stop hdfs and hbase
$ stop-hbase.sh
$ stop-dfs.sh
3. hdfs directory is ~/work/hdfs
4. To compile your java code MyCode.java (implementing class MyCode)
$ javac MyCode
then to run it
$ java MyCode <args>
5. compile and run HDFSTest.java
$ javac HDFSTest.java
$ java HDFSTest
6. compile and run HBaseTest.java
$ javac HBaseTest.java
$ java HBaseTest
check if we have successfully create mytable and put the new row
start hbase shell and run command in hbase shell
$ hbase shell
hbase(main):001:0> scan 'mytable'
ROW COLUMN+CELL
abc column=mycf:a, timestamp=1428459927307, value=789
1 row(s) in 1.8950 seconds
hbase(main):002:0> disable 'mytable'
0 row(s) in 1.9050 seconds
hbase(main):003:0> drop 'mytable'
0 row(s) in 1.2320 seconds
hbase(main):004:0> exit
其中HBaseTest.java代码如下:
/*
* Make sure that the classpath contains all the hbase libraries
*
* Compile:
* javac HBaseTest.java
*
* Run:
* java HBaseTest
*/
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.log4j.*;
public class HBaseTest {
public static void main(String[] args) throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
Logger.getRootLogger().setLevel(Level.WARN);
// create table descriptor
String tableName= "mytable";
HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
// create column descriptor
HColumnDescriptor cf = new HColumnDescriptor("mycf");
htd.addFamily(cf);
// configure HBase
Configuration configuration = HBaseConfiguration.create();
HBaseAdmin hAdmin = new HBaseAdmin(configuration);
if (hAdmin.tableExists(tableName)) {
System.out.println("Table already exists");
}
else {
hAdmin.createTable(htd);
System.out.println("table "+tableName+ " created successfully");
}
hAdmin.close();
// put "mytable","abc","mycf:a","789"
HTable table = new HTable(configuration,tableName);
Put put = new Put("abc".getBytes());
put.add("mycf".getBytes(),"a".getBytes(),"789".getBytes());
table.put(put);
table.close();
System.out.println("put successfully");
}
}
HDFS代码如下:
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
/**
*complie HDFSTest.java
*
* javac HDFSTest.java
*
*execute HDFSTest.java
*
* java HDFSTest
*
*/
public class HDFSTest {
public static void main(String[] args) throws IOException, URISyntaxException{
String file= "hdfs://localhost:9000/hw1/README.txt";
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(file), conf);
Path path = new Path(file);
FSDataInputStream in_stream = fs.open(path);
BufferedReader in = new BufferedReader(new InputStreamReader(in_stream));
String s;
while ((s=in.readLine())!=null) {
System.out.println(s);
}
in.close();
fs.close();
}
}
作业内容:学习HBase和HDFS的基本编程使用
具体要求:
//根据distinct key排序,然后相同的会在一起,在输出中把每一个和前一个比较,如果相同则跳过,如果不同则输出。
import java.util.ArrayList;
import java.util.List;
import java.io.*;
import java.util.Comparator;
import java.util.Collections;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.log4j.*;
/*@author wanqeiqiang
*@Time 2017-3-28 18:45*/
class ComparatorString implements Comparator<Object>
{
/*build the Comparator
*@return -1,0,1*/
public int compare(Object o1,Object o2)
{
String[] a=(String[])o1;
String[] b=(String[])o2;
int flag=0;
//Compare each attribute in turn
for(int i=0;i<a.length;i++)
{
flag=a[i].compareTo(b[i]);
//if a[i]==b[i],then compare a[i+1] with b[i+1]
if(flag==0)
{
if(i<a.length-1)
{ ; }
if(i==a.length-1)
{break;}
}
else
{ break;}
}
return flag;
}
}
public class Hw1Grp5 {
public static void main(String[] args) throws IOException, URISyntaxException{
//obtain the arg R=<file>
String file= "hdfs://localhost:9000"+args[0].substring(2);
//obtain the arg select:R...
String[] select=args[1].substring(7).split(",");
String select_R=select[0].substring(1);
int select_Ri=Integer.parseInt(select_R);
double select_number=Double.parseDouble(select[2]);
//obtain the arg distinct:...
String[] distinct=args[2].substring(9).split(",");
int len=distinct.length;
int[] distinct_number=new int[len];
for(int i=0;i<len;i++)
{
String distinct_R=distinct[i].substring(1);
distinct_number[i]=Integer.parseInt(distinct_R);
}
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(file), conf);
Path path = new Path(file);
FSDataInputStream in_stream = fs.open(path);
BufferedReader in = new BufferedReader(new InputStreamReader(in_stream));
List <String[]> lst = new ArrayList <String[]> ();
/*read the hdfs file and select the satisfying records to lst;
*if arg is "gt",select the item which Ri>select_number;
*if arg is "ge",select the item which Ri>=select_number;
*if arg is "eq",select the item which Ri==select_number;
*if arg is "ne",select the item which Ri!=select_number;
*if arg is "le",select the item which Ri<=select_number;
*if arg is "lt",select the item which Ri<select_number;*/
String s;
if(select[1].equals("gt"))
{
while ((s=in.readLine())!=null) {
String[] line=s.split("\\|");
double line_Ri=Double.parseDouble(line[select_Ri]);
if(line_Ri>select_number)
{
String[] line_distinct=new String[distinct_number.length];
int j=0;
for(int i:distinct_number)
{
line_distinct[j]=line[i];
j++;
}
lst.add(line_distinct);
}
}
}
if(select[1].equals("ge"))
{
while ((s=in.readLine())!=null) {
String[] line=s.split("\\|");
double line_Ri=Double.parseDouble(line[select_Ri]);
if(line_Ri>=select_number)
{
String[] line_distinct=new String[distinct_number.length];
int j=0;
for(int i:distinct_number)
{
line_distinct[j]=line[i];
j++;
}
lst.add(line_distinct);
}
}
}
if(select[1].equals("eq"))
{
while ((s=in.readLine())!=null) {
String[] line=s.split("\\|");
double line_Ri=Double.parseDouble(line[select_Ri]);
if(line_Ri==select_number)
{
String[] line_distinct=new String[distinct_number.length];
int j=0;
for(int i:distinct_number)
{
line_distinct[j]=line[i];
j++;
}
lst.add(line_distinct);
}
}
}
if(select[1].equals("ne"))
{
while ((s=in.readLine())!=null) {
String[] line=s.split("\\|");
double line_Ri=Double.parseDouble(line[select_Ri]);
if(line_Ri!=select_number)
{
String[] line_distinct=new String[distinct_number.length];
int j=0;
for(int i:distinct_number)
{
line_distinct[j]=line[i];
j++;
}
lst.add(line_distinct);
}
}
}
if(select[1].equals("le"))
{
while ((s=in.readLine())!=null)
{
String[] line=s.split("\\|");
double line_Ri=Double.parseDouble(line[select_Ri]);
if(line_Ri<=select_number)
{
String[] line_distinct=new String[distinct_number.length];
int j=0;
for(int i:distinct_number)
{
line_distinct[j]=line[i];
j++;
}
lst.add(line_distinct);
}
}
}
if(select[1].equals("lt"))
{
while ((s=in.readLine())!=null)
{
String[] line=s.split("\\|");
double line_Ri=Double.parseDouble(line[select_Ri]);
if(line_Ri<select_number)
{
String[] line_distinct=new String[distinct_number.length];
int j=0;
for(int i:distinct_number)
{
line_distinct[j]=line[i];
j++;
}
lst.add(line_distinct);
}
}
}
//sort lst
ComparatorString comparator=new ComparatorString();
Collections.sort(lst, comparator);
//distinct lst
List <String[]> list_distinct=listDistinct(lst);
//put lst to Hbase
putHbase(list_distinct, distinct);
in.close();
fs.close();
}
public static List<String[]> listDistinct(List<String[]> list){
//@return distinct list
List <String[]> list_distinct = new ArrayList <String[]> ();
String[] s=(String[]) list.get(0);
list_distinct.add(s);
for (int i=1;i<list.size();i++){
String[] s1=(String[]) list.get(i-1);
String[] s2=(String[]) list.get(i);
int j;
//compare s1[i] with s2[i]
for (j=0;j<s2.length;j++){
if(s1[j].equals(s2[j])) continue;
else break;
}
//if exists i,s1[i]!=s2[i],add s2 to the list
if (j<s2.length){
list_distinct.add(s2);
}
}
return list_distinct;
}
public static void putHbase(List<String[]> list, String[] distinct)throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
Logger.getRootLogger().setLevel(Level.WARN);
// create table descriptor
String tableName= "Result";
HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
// create column descriptor
HColumnDescriptor res = new HColumnDescriptor("res");
htd.addFamily(res);
// configure HBase
Configuration configuration = HBaseConfiguration.create();
HBaseAdmin hAdmin = new HBaseAdmin(configuration);
if (hAdmin.tableExists(tableName)) {
System.out.println("Table already exists");
hAdmin.disableTable(tableName);
hAdmin.deleteTable(tableName);
hAdmin.createTable(htd);
}
else {
hAdmin.createTable(htd);
System.out.println("table "+tableName+ " created successfully");
}
hAdmin.close();
HTable table = new HTable(configuration,tableName);
for (int i=0;i<list.size();i++){
String[] s=(String[]) list.get(i);
for (int j=0;j<distinct.length;j++){
Put put = new Put(String.valueOf(i).getBytes());
put.add("res".getBytes(),distinct[j].getBytes(),s[j].getBytes());
table.put(put);
}
}
table.close();
System.out.println("put successfully");
}
}