使用 LangChain 集成 InternLM#

LangChain 是一个用于开发由大语言模型驱动的应用程序的框架。

安装#

pip install langchain langchain-community

基本用法#

初始化 InternLM#

from langchain.llms import HuggingFacePipeline
import transformers

# 加载模型和分词器
model_name = "InternLM/internlm3-8b-instruct"
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
model = transformers.AutoModelForCausalLM.from_pretrained(model_name)

# 创建文本生成管道
pipe = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    temperature=0.7
)

# 创建 LangChain LLM
llm = HuggingFacePipeline(pipeline=pipe)

简单的文本生成#

from langchain.prompts import PromptTemplate

template = """问题: {question}

回答: """

prompt = PromptTemplate(template=template, input_variables=["question"])

llm_chain = prompt | llm

response = llm_chain.invoke({"question": "什么是机器学习?"})
print(response)

构建聊天机器人#

from langchain.schema import HumanMessage, AIMessage
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain

# 创建对话内存
memory = ConversationBufferMemory()

# 创建对话链
conversation = ConversationChain(
    llm=llm,
    memory=memory,
    verbose=True
)

# 进行对话
response1 = conversation.predict(input="你好,我是李明")
print("回应1:", response1)

response2 = conversation.predict(input="你还记得我的名字吗?")
print("回应2:", response2)

高级应用#

RAG (检索增强生成)#

from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA

# 准备文档
documents = ["这里是一些示例文档内容..."]

# 文本分割
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.create_documents(documents)

# 创建向量存储
embeddings = HuggingFaceEmbeddings()
docsearch = Chroma.from_documents(texts, embeddings)

# 创建 RAG 链
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever()
)

# 提问
query = "告诉我关于文档的内容"
response = qa.run(query)
print(response)