# coding=utf-8
import os
from multiprocessing import cpu_count
import numpy as np
import paddle
import paddle.fluid as fluid
#import paddlehub as hub
class classify():
data_root_path = ""
dict_path = "data/data9658/dict.txt"
test_data_path = "data/data9658/Test_IDs.txt"
model_save_dir = "work/classify_nn/"
save_path = 'work/result.txt'
def train(self):
#
# 学员自行填充训练代码,
# 完成模型训练。
#
print('训练模型保存完成!')
self.test(self)
# 获取数据
def get_data(self,sentence):
# 读取数据字典
with open(self.dict_path, 'r', encoding='utf-8') as f_data:
dict_txt = eval(f_data.readlines()[0])
dict_txt = dict(dict_txt)
# 把字符串数据转换成列表数据
keys = dict_txt.keys()
data = []
for s in sentence:
# 判断是否存在未知字符
if not s in keys:
s = '<unk>'
data.append(int(dict_txt[s]))
return data
def test(self):
data = []
# 获取预测数据
with open(self.test_data_path, 'r', encoding='utf-8') as test_data:
lines = test_data.readlines()
for line in lines:
tmp_sents = []
for word in line.strip().split(','):
tmp_sents.append(int(word))
data.append(tmp_sents)
print ('数据加载完毕,数据长度:',len(data))
#a=self.get_data(self, 'w我是共产主义接班人!')
#data=[a]
def load_tensor(data):
# 获取每句话的单词数量
base_shape = [[len(c) for c in data]]
# 创建一个执行器,CPU训练速度比较慢
#place = fluid.CPUPlace()
place = fluid.CUDAPlace(0)
# 生成预测数据
print('loading tensor')
tensor_words = fluid.create_lod_tensor(data, base_shape, place)
#infer_place = fluid.CPUPlace()
infer_place = fluid.CUDAPlace(0)
# 执行预测
infer_exe = fluid.Executor(infer_place)
# 进行参数初始化
infer_exe.run(fluid.default_startup_program())
print('feeder')
# 从模型中获取预测程序、输入数据名称列表、分类器
print('loading model')
[infer_program, feeded_var_names, target_var] = fluid.io.load_inference_model(dirname=self.model_save_dir, executor=infer_exe)
result=[]
result = infer_exe.run(program=infer_program,
feed={feeded_var_names[0]: tensor_words},
fetch_list=target_var)
names = ["财经", "彩票", "房产", "股票", "家居", "教育", "科技",
"社会", "时尚", "时政", "体育", "星座", "游戏", "娱乐"]
# 输出结果
print('writting')
for i in range(len(data)):
lab = np.argsort(result)[0][i][-1]
#print('预测结果标签为:%d, 名称为:%s, 概率为:%f' % (lab, names[lab], result[0][i][lab]))
with open(self.save_path, 'a', encoding='utf-8') as ans:
#print (names[lab])
ans.write( names[lab]+"\n")
ans.close()
print('loading 1/4 data')
load_tensor(data[:int(len(data)/4)])
print('loading 2/4 data')
load_tensor(data[int(len(data)/4):2*int(len(data)/4)])
print('loading 3/4 data')
load_tensor(data[2*int(len(data)/4):3*int(len(data)/4)])
print('loading 4/4 data')
load_tensor(data[3*int(len(data)/4):])
print('测试输出已生成!')
if __name__ == "__main__":
classify.train(classify)
#classify.test(classify)
!rm -rf submit.sh
!wget -O submit.sh http://ai-studio-static.bj.bcebos.com/script/submit.sh
!sh submit.sh work/result.txt 密码
请点击此处查看本环境基本用法.
Please click here for more detailed instructions.