CHEATSHEET March 19, 2024

RAG CheatSheet

Words count 27k Reading time 25 mins. Read count 0

langchain+ollama

本地文档常用模型:

  • SBert

ollama加载模型

在线

[src] https://ollama.com/library
ollama run gemma:2b

离线

1、创建模型配置文件

创建模型配置文件,比如: Modelfile 这个文件名,文件内容指定需要加载的具体模型文件如下:

1
FROM ./Meta-Llama-3-8B-Instruct-Q6_K.gguf

2、构建对应的Ollama模型

我们使用以下命令构建 Ollama 模型:

1
ollama create lm3 -f ./Modelfile

其中 lm3 是我们准备在Ollama中使用该模型的别名。

这个命令的参数解释如下:

1
ollama create choose-a-model-name -f <location of the file e.g. ./Modelfile>

3、使用这个模型

现在我们就可以使用了,由于是无内容审核的模型,我们可以发挥自己的想象:

1
ollama run lm3 "请写一个幽默笑话"

langchain

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
pip install langchain
pip install langchain_community
pip install langchain-chroma
pip show langchain

#!/usr/bin/env python
# coding=utf-8
from langchain_core.documents import Document

documents = [
Document(
page_content="Dogs are great companions, known for their loyalty and friendliness.",
metadata={"source": "mammal-pets-doc"},
),
Document(
page_content="Cats are independent pets that often enjoy their own space.",
metadata={"source": "mammal-pets-doc"},
),
Document(
page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
metadata={"source": "fish-pets-doc"},
),
Document(
page_content="Parrots are intelligent birds capable of mimicking human speech.",
metadata={"source": "bird-pets-doc"},
),
Document(
page_content="Rabbits are social animals that need plenty of space to hop around.",
metadata={"source": "mammal-pets-doc"},
),
]

# 使用文档创建向量数据库
from langchain_chroma import Chroma
from langchain_community.embeddings import OllamaEmbeddings
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="nomic-embed-text")
vectorstore = Chroma.from_documents(documents, embedding=oembed)
# vectorstore.similarity_search("cat")

# 测试查询文档
# from langchain_core.runnables import RunnableLambda
# docs = vectorstore.similarity_search(question)
# retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1) # select top result
# retriever.batch(["cat", "shark"])

retriever = vectorstore.as_retriever(
search_type="similarity",
search_kwargs={"k": 1},
)

# 检索问答链
# 先基于问题的嵌入找到最相关的文档块,然后将这些文档块作为上下文,结合问题一起提交给ollama模型,以获取更加准确的答案
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_community.llms import Ollama
message = """
Answer this question using the provided context only.

{question}

Context:
{context}
"""
prompt = ChatPromptTemplate.from_messages([("human", message)])
llm = Ollama(model="lm3")
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
response = rag_chain.invoke("tell me about cats")

LangChain中的基本Embedding类公开两种方法:

  • embed_documents:适用于多个文档
  • embed_query:适用于单个文档

load document

csv

1
2
3
4
5
6
7
8
9
10
11
from langchain_community.document_loaders.csv_loader import CSVLoader

file_path = (
"../../../docs/integrations/document_loaders/example_data/mlb_teams_2012.csv"
)

loader = CSVLoader(file_path=file_path)
data = loader.load()

for record in data[:2]:
print(record)

load html

1
2
3
4
5
6
7
8
9
10
11
12
pip install bs4 # 处理Web文档加载

# 使用Loader加载文档《奥德赛》
from langchain_community.document_loaders import WebBaseLoader
odyssey_url = "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
loader = WebBaseLoader(odyssey_url)
data = loader.load()

# 分割文档Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

load pdf - pymupdf

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from langchain.document_loaders import PyMuPDFLoader

# This will load the PDF file
def load_pdf_data(file_path):
# Creating a PyMuPDFLoader object with file_path
loader = PyMuPDFLoader(file_path=file_path)

# loading the PDF file
docs = loader.load()

# returning the loaded document
return docs

# Responsible for splitting the documents into several chunks
def split_docs(documents, chunk_size=1000, chunk_overlap=20):

# Initializing the RecursiveCharacterTextSplitter with
# chunk_size and chunk_overlap
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap
)

# Splitting the documents into chunks
chunks = text_splitter.split_documents(documents=documents)

# returning the document chunks
return chunks

# function for loading the embedding model
def load_embedding_model(model_path, normalize_embedding=True):
return OllamaEmbeddings(base_url="http://localhost:11434", model="nomic-embed-text")


# Function for creating embeddings using FAISS
def create_embeddings(chunks, embedding_model, storing_path="vectorstore"):
# Creating the embeddings using FAISS
vectorstore = Chroma.from_documents(chunks, embedding_model)

# Saving the model in current directory
vectorstore.save_local(storing_path)

# returning the vectorstore
return vectorstore

prompt = """
### System:
You are an AI Assistant that follows instructions extreamly well. \
Help as much as you can.
### User:
{prompt}
### Response:
"""

template = """
### System:
You are an respectful and honest assistant. You have to answer the user's \
questions using only the context provided to you. If you don't know the answer, \
just say you don't know. Don't try to make up an answer.
### Context:
{context}
### User:
{question}
### Response:
"""

# Creating the chain for Question Answering
def load_qa_chain(retriever, llm, prompt):
return RetrievalQA.from_chain_type(
llm=llm,
retriever=retriever, # here we are using the vectorstore as a retriever
chain_type="stuff",
return_source_documents=True, # including source documents in output
chain_type_kwargs={'prompt': prompt} # customizing the prompt
)

# Prettifying the response
def get_response(query, chain):
# Getting response from chain
response = chain({'query': query})

# Wrapping the text for better output in Jupyter Notebook
wrapped_text = textwrap.fill(response['result'], width=100)
print(wrapped_text)

from lang_funcs import *
from langchain.llms import Ollama
from langchain import PromptTemplate

# Loading orca-mini from Ollama
llm = Ollama(model="orca-mini", temperature=0)

# Loading the Embedding Model
embed = load_embedding_model(model_path="all-MiniLM-L6-v2")

# loading and splitting the documents
docs = load_pdf_data(file_path="data/ml_book.pdf")
documents = split_docs(documents=docs)

# creating vectorstore
vectorstore = create_embeddings(documents, embed)

# converting vectorstore to a retriever
retriever = vectorstore.as_retriever()

# Creating the prompt from the template which we created before
prompt = PromptTemplate.from_template(template)

# Creating the chain
chain = load_qa_chain(retriever, llm, prompt)

>>>get_response("What is random forest?", chain)

前端

C/S

AnythingLLM
GPT4All

B/S

open-webui/open-webui: User-friendly WebUI for LLMs (Formerly Ollama WebUI) (github.com)

0%