最近在研究腾讯的TBDS产品, 但是没有相关的hdfs sdk, 因此自已就开发了一个 此sdk兼容其它平台的hdfs认证(InsecureClient, TokenClient)
安装tbds-hdfs
pip install tbds-hdfs
使用方法
获取client
SecretId = 'xxxxxxx'
SecretKey = 'xxxxxxx'
url = 'http://xxxx:50070;http://xxxxx:50070' #支持高可用写法
from hdfs import TbdsClient
client = TbdsClient(url='http://host:port', SecretId, SecretKey)
读取文件
# 一次性读取
with client.read('features') as reader:
features = reader.read().decode()
# 直接读取JSON
with client.read('model.json', encoding='utf-8') as reader:
from json import load
model = load(reader)
# 流式读取
with client.read('features', chunk_size=8096) as reader:
for chunk in reader:
pass
#按行读取
with client.read('samples.csv', encoding='utf-8', delimiter='\n') as reader:
for line in reader:
pass
写入文件
# 按行写入数据
with open('samples') as reader, client.write('samples', overwrite=True) as writer:
for line in reader:
writer.write(line.encode('utf8'))
# 写入JSON文件
with client.write('model.json', encoding='utf-8') as writer:
from json import dump
dump(model, writer)
其它文件操作方法
# 查看文件内容
content = client.content('dat')
# 列出文件夹下所有文件
fnames = client.list('dat')
# 获取文件或文件夹状态
status = client.status('dat/features')
# 重命名(移动)文件
client.rename('dat/features', 'features')
# 删除文件/文件夹
client.delete('dat', recursive=True)