在這篇文章內容,將會學到如何使用HBase Client API對HBase的CRUD操作。本篇的範例程式碼所使用的HBase版本是 HBase 0.98.x ,相容於 HBase 1.x.x 。
首先先來看一下SQL、API與HBase Shell的CRUD對照:
SQL | HBase Client API | HBase Shell |
---|---|---|
select | Get/Scan | get/scan |
insert | Put | put |
delete | Delete | delete |
update | Put | get/scan |
與所有RDB相同,連線到HBase也需要設定connection:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "localhost");
conf.set("hbase.zookeeper.property.clientPort", “2181");
HBaseAdmin admin = new HBaseAdmin(conf);
String tableName = "employees";
HTable table = new HTable(conf, tableName);
Put
新增一筆(row)資料String tableName = "employees";
HTable htable = new HTable(conf, tableName);
Put put = new Put(Bytes.toBytes("Development_999999"));
byte[] familyName = Bytes.toBytes("cf1");
String[] columnNames = new String[] { "p-birth_date", "p-first_name",
"p-last_name","p-gender", "p-hire_date", "d-from_date", "d-to_date" };
String[] values = new String[] { "1970-03-10", "John", "Li", "M", "1988-05-08", "1988-05-08", "9999-01-01" };
put.add(familyName, Bytes.toBytes(columnNames[0]), Bytes.toBytes( toDateValue(values[0]) ));
put.add(familyName, Bytes.toBytes(columnNames[1]), Bytes.toBytes( values[1] ));
put.add(familyName, Bytes.toBytes(columnNames[2]), Bytes.toBytes( values[2] ));
put.add(familyName, Bytes.toBytes(columnNames[3]), Bytes.toBytes( values[3] ));
put.add(familyName, Bytes.toBytes(columnNames[4]), Bytes.toBytes( toDateValue(values[4]) ));
put.add(familyName, Bytes.toBytes(columnNames[5]), Bytes.toBytes( toDateValue(values[5]) ));
put.add(familyName, Bytes.toBytes(columnNames[6]), Bytes.toBytes( toDateValue(values[6]) ));
htable.put(put);
htable.close();
Get
查詢一筆(row)資料現在我們來查剛剛Put進去的資料。
String tableName = "employees";
HTable htable = new HTable(conf, tableName);
Get get = new Get(Bytes.toBytes("Development_999999"));
Result result = htable.get(get);
System.out.println( "result size: " + result.size() );
System.out.println( "result string: " + result.toString() );
for (int i=0; i<result.size(); i++) {
System.out.println( String.format("result[%d]: %s", i, result.raw()[i].toString()));
}
htable.close();
Scan
來查詢table所有資料String tableName = "employees";
HTable htable = new HTable(conf, tableName);
Scan scan = new Scan();
ResultScanner scanner = htable.getScanner(scan);
for (Result res : scanner) {
System.out.println(res);
}
scanner.close();
htable.close();
Delete
刪除一筆資料String tableName = "employees";
HTable htable = new HTable(conf, tableName);
Delete delete = new Delete(Bytes.toBytes("Development_999999"));
htable.delete(delete);
htable.close();
看完了Hello World版本的API後,接下來要介紹如何使用HBase Bolk-Load。
大大所寫的
Put put = new Put(Bytes.toBytes("Development_999999"));
裡面的Development_999999是ROW-KEY嗎
裡面的Development_999999是ROW-KEY嗎
是的
想請問我照著大大的方式做連線加數據但是有地方錯誤,第一次用也不知道是哪邊有問題,大大可不可以提點
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
public class Input_HBase {
public static void main(String avgs[]) throws IOException{
FileReader fr = new FileReader("D:\\DataIDSeq\\download-20190618T065704Z-001\\part-r-00000_Virus7538_VS_HomoSapiens20160721_26s_Len100_HumanAndVirus_NonPolyTandem.csv");
BufferedReader bfr = new BufferedReader(fr);
String str;
String data[];
Configuration config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum","10.96.3.69:2181");
config.set("hbase.zookeeper.property.clientPort","60000");
HBaseAdmin admin = new HBaseAdmin(config);
String tablename = "Virus_Search";
HTable hTable = new HTable(config,tablename);
while((str = bfr.readLine())!=null){
data = new String(str).trim().split(",");
for(int i = 0;i<data.length;i++){
Put p = new Put(Bytes.toBytes(data[0]));
byte[] familyName1 = Bytes.toBytes("Seq");
byte[] familyName2 = Bytes.toBytes("Len");
byte[] familyName3 = Bytes.toBytes("VirusID");
String [] Column1_1 = new String[]{"seq","len","virusid"};
p.add(familyName1,Bytes.toBytes(Column1_1[0]),Bytes.toBytes(data[0]));
p.add(familyName2,Bytes.toBytes(Column1_1[1]),Bytes.toBytes(data[1]));
p.add(familyName3,Bytes.toBytes(Column1_1[2]),Bytes.toBytes(data[2]));
hTable.put(p);
System.out.println("data inserted");
}
}
hTable.close();
}
}
下方是錯誤訊息
"C:\Program Files\Java\jdk1.8.0_231\bin\java.exe" "-javaagent:C:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2019.2.3\lib\idea_rt.jar=55024:C:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2019.2.3\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files\Java\jdk1.8.0_231\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\deploy.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\access-bridge-64.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\cldrdata.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\dnsns.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\jaccess.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\jfxrt.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\localedata.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\mysql-connector-java-5.1.25-bin.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\nashorn.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\sunec.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\sunjce_provider.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\sunmscapi.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\sunpkcs11.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\ext\zipfs.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\javaws.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\jfxswt.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\management-agent.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\plugin.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_231\jre\lib\rt.jar;D:\Code\InputData_HBase\out\production\InputData_HBase;E:\Hbase_Java_Connect_JARRRRRRRRRR\hadoop-common-2.7.3.2.6.3.0-235.jar;E:\Hbase_Java_Connect_JARRRRRRRRRR\hbase-client-1.1.2.2.6.3.0-235.jar;E:\Hbase_Java_Connect_JARRRRRRRRRR\hbase-common-1.1.2.2.6.3.0-235.jar" Input_HBase
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/commons/logging/LogFactory
at org.apache.hadoop.conf.Configuration.<clinit>(Configuration.java:186)
at Input_HBase.main(Input_HBase.java:25)
Caused by: java.lang.ClassNotFoundException: org.apache.commons.logging.LogFactory
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:355)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
... 2 more
Process finished with exit code 1
at Input_HBase.main(Input_HBase.java:25)Caused by: java.lang.ClassNotFoundException:org.apache.commons.logging.LogFactory
錯誤訊息已經提示說 找不到這個class 囉,請檢查一下是否沒有import org.apache.commons
相關的jar 檔囉
感謝您的解答