使用GPT3.5模型构建油管频道问答机器人

在 chatgpt api(也就是 GPT-3.5-Turbo)模型出来后,因钱少活好深受大家喜爱,所以 LangChain 也加入了专属的链和模型,我们来跟着这个例子看下如何使用他。

  1. import os
  2. from langchain.document_loaders import YoutubeLoader
  3. from langchain.embeddings.openai import OpenAIEmbeddings
  4. from langchain.vectorstores import Chroma
  5. from langchain.text_splitter import RecursiveCharacterTextSplitter
  6. from langchain.chains import ChatVectorDBChain, ConversationalRetrievalChain
  7. from langchain.chat_models import ChatOpenAI
  8. from langchain.prompts.chat import (
  9. ChatPromptTemplate,
  10. SystemMessagePromptTemplate,
  11. HumanMessagePromptTemplate
  12. )
  13. # 加载 youtube 频道
  14. loader = YoutubeLoader.from_youtube_url('https://www.youtube.com/watch?v=Dj60HHy-Kqk')
  15. # 将数据转成 document
  16. documents = loader.load()
  17. # 初始化文本分割器
  18. text_splitter = RecursiveCharacterTextSplitter(
  19. chunk_size=1000,
  20. chunk_overlap=20
  21. )
  22. # 分割 youtube documents
  23. documents = text_splitter.split_documents(documents)
  24. # 初始化 openai embeddings
  25. embeddings = OpenAIEmbeddings()
  26. # 将数据存入向量存储
  27. vector_store = Chroma.from_documents(documents, embeddings)
  28. # 通过向量存储初始化检索器
  29. retriever = vector_store.as_retriever()
  30. system_template = """
  31. Use the following context to answer the user's question.
  32. If you don't know the answer, say you don't, don't try to make it up. And answer in Chinese.
  33. -----------
  34. {question}
  35. -----------
  36. {chat_history}
  37. """
  38. # 构建初始 messages 列表,这里可以理解为是 openai 传入的 messages 参数
  39. messages = [
  40. SystemMessagePromptTemplate.from_template(system_template),
  41. HumanMessagePromptTemplate.from_template('{question}')
  42. ]
  43. # 初始化 prompt 对象
  44. prompt = ChatPromptTemplate.from_messages(messages)
  45. # 初始化问答链
  46. qa = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.1,max_tokens=2048),retriever,condense_question_prompt=prompt)
  47. chat_history = []
  48. while True:
  49. question = input('问题:')
  50. # 开始发送问题 chat_history 为必须参数,用于存储对话历史
  51. result = qa({'question': question, 'chat_history': chat_history})
  52. chat_history.append((question, result['answer']))
  53. print(result['answer'])

我们可以看到他能很准确的围绕这个油管视频进行问答

image-20230406211923672

使用流式回答也很方便

  1. from langchain.callbacks.base import CallbackManager
  2. from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
  3. chat = ChatOpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)
  4. resp = chat(chat_prompt_with_values.to_messages())