本文共 1780 字,大约阅读时间需要 5 分钟。
利用Python调用HBASE的 需要安装thrift hbase-thrift
启动hbase的thrift服务:bin/hbase-daemon.sh start thrift 默认端口是9090
mysql 到hbase的数据同步:
1、put
2、Importtsv
3、编写MapReduce Job导入
4、sqoop
简单code:
#!/usr/bin/env python
#coding=utf-8
import sys
sys.path.append('/usr/lib/python2.6/site-packages/hbase')
from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import *
import csv
from hbase.ttypes import ColumnDescriptor, Mutation, BatchMutation, TRegionInfo
from hbase.ttypes import IOError, AlreadyExists
######
def client_conn():
transport=TSocket.TSocket("172.16.10.87",9090)
transport=TTransport.TBufferedTransport(transport)
protocol=TBinaryProtocol.TBinaryProtocol(transport)
client=Hbase.Client(protocol)
transport.open()
return client
def __del__():
transport.close()
if __name__=="__main__":
client=client_conn()
#获取表名字
print client.getTableNames()
##创建表
#client.createTable('name2',[ColumnDescriptor(name="user_id:",maxVersions=1),ColumnDescriptor(name="user_name"),])
#写入数据
client.mutateRow('name2','a1',[Mutation(column='user_id:1',value="1")])
client.mutateRow('ca_record','1',[Mutation(column='user_id:0',value='5')])
##获取数据
aa=client.getRow('name2','a1')
for r in aa:
print 'row',r.row
print '\br'
print 'value',r.columns.get("user_id:1").value
##删除表
#client.disableTable("t1")
#client.deleteTable("t1")
print client.getTableNames()
###获取表的行键值
#print client.scannerGet(client.scannerOpen('t2',"cmd",["a"]))
print client.scannerGet(client.scannerOpen('t2',"",["a"])) #当row key为空取第一个
print "------"
print client.getColumnDescriptors('t3')
#在自己开发环境 只要安装好thrift 及hbase-thrift的包,在import的时候不会出问题
本文转自 DBAspace 51CTO博客,原文链接:http://blog.51cto.com/dbaspace/1950002
转载地址:http://wsgja.baihongyu.com/