1. # coding=utf-8
    2. import os
    3. from multiprocessing import cpu_count
    4. import numpy as np
    5. import paddle
    6. import paddle.fluid as fluid
    7. #import paddlehub as hub
    1. class classify():
    2. data_root_path = ""
    3. dict_path = "data/data9658/dict.txt"
    4. test_data_path = "data/data9658/Test_IDs.txt"
    5. model_save_dir = "work/classify_nn/"
    6. save_path = 'work/result.txt'
    7. def train(self):
    8. #
    9. # 学员自行填充训练代码,
    10. # 完成模型训练。
    11. #
    12. print('训练模型保存完成!')
    13. self.test(self)
    14. # 获取数据
    15. def get_data(self,sentence):
    16. # 读取数据字典
    17. with open(self.dict_path, 'r', encoding='utf-8') as f_data:
    18. dict_txt = eval(f_data.readlines()[0])
    19. dict_txt = dict(dict_txt)
    20. # 把字符串数据转换成列表数据
    21. keys = dict_txt.keys()
    22. data = []
    23. for s in sentence:
    24. # 判断是否存在未知字符
    25. if not s in keys:
    26. s = '<unk>'
    27. data.append(int(dict_txt[s]))
    28. return data
    29. def test(self):
    30. data = []
    31. # 获取预测数据
    32. with open(self.test_data_path, 'r', encoding='utf-8') as test_data:
    33. lines = test_data.readlines()
    34. for line in lines:
    35. tmp_sents = []
    36. for word in line.strip().split(','):
    37. tmp_sents.append(int(word))
    38. data.append(tmp_sents)
    39. print ('数据加载完毕,数据长度:',len(data))
    40. #a=self.get_data(self, 'w我是共产主义接班人!')
    41. #data=[a]
    42. def load_tensor(data):
    43. # 获取每句话的单词数量
    44. base_shape = [[len(c) for c in data]]
    45. # 创建一个执行器,CPU训练速度比较慢
    46. #place = fluid.CPUPlace()
    47. place = fluid.CUDAPlace(0)
    48. # 生成预测数据
    49. print('loading tensor')
    50. tensor_words = fluid.create_lod_tensor(data, base_shape, place)
    51. #infer_place = fluid.CPUPlace()
    52. infer_place = fluid.CUDAPlace(0)
    53. # 执行预测
    54. infer_exe = fluid.Executor(infer_place)
    55. # 进行参数初始化
    56. infer_exe.run(fluid.default_startup_program())
    57. print('feeder')
    58. # 从模型中获取预测程序、输入数据名称列表、分类器
    59. print('loading model')
    60. [infer_program, feeded_var_names, target_var] = fluid.io.load_inference_model(dirname=self.model_save_dir, executor=infer_exe)
    61. result=[]
    62. result = infer_exe.run(program=infer_program,
    63. feed={feeded_var_names[0]: tensor_words},
    64. fetch_list=target_var)
    65. names = ["财经", "彩票", "房产", "股票", "家居", "教育", "科技",
    66. "社会", "时尚", "时政", "体育", "星座", "游戏", "娱乐"]
    67. # 输出结果
    68. print('writting')
    69. for i in range(len(data)):
    70. lab = np.argsort(result)[0][i][-1]
    71. #print('预测结果标签为:%d, 名称为:%s, 概率为:%f' % (lab, names[lab], result[0][i][lab]))
    72. with open(self.save_path, 'a', encoding='utf-8') as ans:
    73. #print (names[lab])
    74. ans.write( names[lab]+"\n")
    75. ans.close()
    76. print('loading 1/4 data')
    77. load_tensor(data[:int(len(data)/4)])
    78. print('loading 2/4 data')
    79. load_tensor(data[int(len(data)/4):2*int(len(data)/4)])
    80. print('loading 3/4 data')
    81. load_tensor(data[2*int(len(data)/4):3*int(len(data)/4)])
    82. print('loading 4/4 data')
    83. load_tensor(data[3*int(len(data)/4):])
    84. print('测试输出已生成!')
    85. if __name__ == "__main__":
    86. classify.train(classify)
    87. #classify.test(classify)
    1. !rm -rf submit.sh
    2. !wget -O submit.sh http://ai-studio-static.bj.bcebos.com/script/submit.sh
    3. !sh submit.sh work/result.txt 密码

    请点击此处查看本环境基本用法.

    Please click here for more detailed instructions.