Contents

1. ollama加载模型
1. 1.1. 在线
2. 1.2. 离线

2. langchain

2.1. load document

3. 前端

3.1. C/S
3.2. B/S

langchain+ollama

本地文档常用模型：

SBert

ollama加载模型

在线

[src] https://ollama.com/library
ollama run gemma:2b

离线

1、创建模型配置文件

创建模型配置文件，比如: Modelfile 这个文件名，文件内容指定需要加载的具体模型文件如下：

1	FROM ./Meta-Llama-3-8B-Instruct-Q6_K.gguf

2、构建对应的Ollama模型

我们使用以下命令构建 Ollama 模型：

1	ollama create lm3 -f ./Modelfile

其中 lm3 是我们准备在Ollama中使用该模型的别名。

这个命令的参数解释如下：

1	ollama create choose-a-model-name -f <location of the file e.g. ./Modelfile>

3、使用这个模型

现在我们就可以使用了，由于是无内容审核的模型，我们可以发挥自己的想象：

1	ollama run lm3 "请写一个幽默笑话"

langchain

pip install langchain
pip install langchain_community
pip install langchain-chroma
pip show langchain

#!/usr/bin/env python  
# coding=utf-8  
from langchain_core.documents import Document  
  
documents = [  
    Document(  
        page_content="Dogs are great companions, known for their loyalty and friendliness.",  
        metadata={"source": "mammal-pets-doc"},  
    ),  
    Document(  
        page_content="Cats are independent pets that often enjoy their own space.",  
        metadata={"source": "mammal-pets-doc"},  
    ),  
    Document(  
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",  
        metadata={"source": "fish-pets-doc"},  
    ),  
    Document(  
        page_content="Parrots are intelligent birds capable of mimicking human speech.",  
        metadata={"source": "bird-pets-doc"},  
    ),  
    Document(  
        page_content="Rabbits are social animals that need plenty of space to hop around.",  
        metadata={"source": "mammal-pets-doc"},  
    ),  
]  
  
# 使用文档创建向量数据库  
from langchain_chroma import Chroma  
from langchain_community.embeddings import OllamaEmbeddings  
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="nomic-embed-text")  
vectorstore = Chroma.from_documents(documents, embedding=oembed)  
# vectorstore.similarity_search("cat")  
  
# 测试查询文档  
# from langchain_core.runnables import RunnableLambda  
# docs = vectorstore.similarity_search(question)  
# retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)  # select top result  
# retriever.batch(["cat", "shark"])  
  
retriever = vectorstore.as_retriever(  
    search_type="similarity",  
    search_kwargs={"k": 1},  
)  
  
# 检索问答链  
# 先基于问题的嵌入找到最相关的文档块，然后将这些文档块作为上下文，结合问题一起提交给ollama模型，以获取更加准确的答案  
from langchain_core.prompts import ChatPromptTemplate  
from langchain_core.runnables import RunnablePassthrough  
from langchain_community.llms import Ollama  
message = """  
Answer this question using the provided context only.  
  
{question}  
  
Context:  
{context}  
"""  
prompt = ChatPromptTemplate.from_messages([("human", message)])  
llm = Ollama(model="lm3")  
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm  
response = rag_chain.invoke("tell me about cats")

LangChain中的基本Embedding类公开两种方法：

embed_documents：适用于多个文档
embed_query：适用于单个文档

load document

csv

from langchain_community.document_loaders.csv_loader import CSVLoader

file_path = (
    "../../../docs/integrations/document_loaders/example_data/mlb_teams_2012.csv"
)

loader = CSVLoader(file_path=file_path)
data = loader.load()

for record in data[:2]:
    print(record)

load html

pip install bs4 # 处理Web文档加载

# 使用Loader加载文档《奥德赛》  
from langchain_community.document_loaders import WebBaseLoader  
odyssey_url = "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"  
loader = WebBaseLoader(odyssey_url)  
data = loader.load()  
  
# 分割文档Document  
from langchain_text_splitters import RecursiveCharacterTextSplitter  
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)  
all_splits = text_splitter.split_documents(data)

load pdf - pymupdf

from langchain.document_loaders import PyMuPDFLoader

# This will load the PDF file
def load_pdf_data(file_path):
    # Creating a PyMuPDFLoader object with file_path
    loader = PyMuPDFLoader(file_path=file_path)
 
    # loading the PDF file
    docs = loader.load()
 
    # returning the loaded document
    return docs

# Responsible for splitting the documents into several chunks
def split_docs(documents, chunk_size=1000, chunk_overlap=20):
 
    # Initializing the RecursiveCharacterTextSplitter with
    # chunk_size and chunk_overlap
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
 
    # Splitting the documents into chunks
    chunks = text_splitter.split_documents(documents=documents)
 
    # returning the document chunks
    return chunks

# function for loading the embedding model
def load_embedding_model(model_path, normalize_embedding=True):
    return OllamaEmbeddings(base_url="http://localhost:11434", model="nomic-embed-text")
 
 
# Function for creating embeddings using FAISS
def create_embeddings(chunks, embedding_model, storing_path="vectorstore"):
    # Creating the embeddings using FAISS
    vectorstore = Chroma.from_documents(chunks, embedding_model)
 
    # Saving the model in current directory
    vectorstore.save_local(storing_path)
 
    # returning the vectorstore
    return vectorstore

prompt = """
### System:
You are an AI Assistant that follows instructions extreamly well. \
Help as much as you can.
### User:
{prompt}
### Response:
"""

template = """
### System:
You are an respectful and honest assistant. You have to answer the user's \
questions using only the context provided to you. If you don't know the answer, \
just say you don't know. Don't try to make up an answer.
### Context:
{context}
### User:
{question}
### Response:
"""

# Creating the chain for Question Answering
def load_qa_chain(retriever, llm, prompt):
    return RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever, # here we are using the vectorstore as a retriever
        chain_type="stuff",
        return_source_documents=True, # including source documents in output
        chain_type_kwargs={'prompt': prompt} # customizing the prompt
    )

# Prettifying the response
def get_response(query, chain):
    # Getting response from chain
    response = chain({'query': query})
 
    # Wrapping the text for better output in Jupyter Notebook
    wrapped_text = textwrap.fill(response['result'], width=100)
    print(wrapped_text)

from lang_funcs import *
from langchain.llms import Ollama
from langchain import PromptTemplate

# Loading orca-mini from Ollama
llm = Ollama(model="orca-mini", temperature=0)
 
# Loading the Embedding Model
embed = load_embedding_model(model_path="all-MiniLM-L6-v2")

# loading and splitting the documents
docs = load_pdf_data(file_path="data/ml_book.pdf")
documents = split_docs(documents=docs)
 
# creating vectorstore
vectorstore = create_embeddings(documents, embed)
 
# converting vectorstore to a retriever
retriever = vectorstore.as_retriever()

# Creating the prompt from the template which we created before
prompt = PromptTemplate.from_template(template)
 
# Creating the chain
chain = load_qa_chain(retriever, llm, prompt)

>>>get_response("What is random forest?", chain)

前端

C/S

AnythingLLM
GPT4All

B/S

open-webui/open-webui: User-friendly WebUI for LLMs (Formerly Ollama WebUI) (github.com)

Manhua

Never Say Die

RAG CheatSheet