使用不同的嵌入模型来使用LlamaIndex。

7

OpenAI的GPT嵌入模型被用于LlamaIndex的所有示例中,尽管与T5和句子转换模型相比,它们似乎是最昂贵且表现最差的嵌入模型(请参见下面的比较)。

我该如何使用all-roberta-large-v1作为嵌入模型,并与OpenAI的GPT3一起作为“响应生成器”?我甚至不确定是否可以使用一个模型来创建/检索嵌入标记,然后使用另一个模型基于检索到的嵌入来生成响应。

示例

以下是我正在寻找的示例:

documents = SimpleDirectoryReader('data').load_data()

# Use Roberta or any other open-source model to generate embeddings
index = ???????.from_documents(documents)

# Use GPT3 here
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")

print(response)

模型比较

Embedding Models

源代码


1
谢谢你提出这个问题!关于这个问题:“我甚至不确定是否可以使用一个模型来创建/检索嵌入标记,然后使用另一个模型根据检索到的嵌入生成响应。”你明白了吗,这是否可能,并且响应的质量是否仍然很好? - Ire00
你找到了吗? - undefined
4个回答

3
你可以在service_context中进行设置,使用本地模型或者HuggingFace提供的模型。
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LangchainEmbedding, ServiceContext

embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)
service_context = ServiceContext.from_defaults(embed_model=embed_model)

您可以选择传递此service_context,或全局设置它。
from llama_index import set_global_service_context

set_global_service_context(service_context)

这并没有回答问题。你能详细解释一下吗? - undefined

1
看看这段代码,它创建了两个不同的llms - 一个用于嵌入,另一个用于“上下文+查询”的评估。
import os
import sys

import llama_index.indices.vector_store.retrievers
import llama_index.query_engine.retriever_query_engine
import llama_index.response_synthesizers
import llama_index.retrievers

if 'OPENAI_API_KEY' not in os.environ:
  sys.stderr.write("""
  You haven't set up your API key yet.
  
  If you don't have an API key yet, visit:
  
  https://platform.openai.com/signup

  1. Make an account or sign in
  2. Click "View API Keys" from the top right menu.
  3. Click "Create new secret key"

  Then, open the Secrets Tool and add OPENAI_API_KEY as a secret.
  """)
  exit(1)

import streamlit as st
from llama_index import (
    ServiceContext,
    SimpleDirectoryReader,
    VectorStoreIndex,
)
from llama_index.llms import OpenAI

st.set_page_config(page_title="LlamaIndex Q&A with Lyft Financials",
                   page_icon="蓮",
                   layout="centered",
                   initial_sidebar_state="auto",
                   menu_items=None)

st.title("LlamaIndex 蓮 Q&A with Lyft Financials")


@st.cache_resource(show_spinner=False)
def load_data():
  """
    Loads and indexes the Lyft 2021 financials using the VectorStoreIndex.
    
    Returns:
    - VectorStoreIndex: Indexed representation of the Lyft 10-K.
  """
  with st.spinner(
      text="Loading and indexing the Lyft 10-K. This may take a while..."):
    reader = SimpleDirectoryReader(input_dir="./data", recursive=True)
    docs = reader.load_data()
    service_context__embedding = ServiceContext.from_defaults(
        llm=OpenAI(
            model="text-ada-001",
            temperature=0.0,
        ),
        system_prompt=
        "You are an AI assistant creating text embedding for financial reports."
    )
    index = VectorStoreIndex.from_documents(
        docs, service_context=service_context__embedding)
    return index


# Create Index
index = load_data()

retriever = llama_index.indices.vector_store.retrievers.VectorIndexRetriever(
    index=index,
    similarity_top_k=3,
)

llm_context_query__service_context = ServiceContext.from_defaults(
    llm=OpenAI(
        model="gpt-3.5-turbo",
        temperature=0.1,
    ),
    system_prompt=
    "You are an AI assistant answering questions related to  financial reports fragments."
)

# configure response synthesizer
# text_qa_template=text_qa_template,
# refine_template=refine_template,

response_synthesizer = llama_index.response_synthesizers.get_response_synthesizer(
    response_mode="refine",
    service_context=llm_context_query__service_context,
    use_async=False,
    streaming=False,
)

query_engine = (
    llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine(
        retriever=retriever,
        response_synthesizer=response_synthesizer,
    ))

# Take input from the user
user_input = st.text_input("Enter Your Query", "")

# Display the input
if st.button("Submit"):
  st.write(f"Your Query: {user_input}")

  with st.spinner("Thinking..."):
    # Query the index
    result = query_engine.query(user_input)
    print(result.source_nodes)
    # Display the results
    st.write(f"Answer: {str(result)}")

1
好的例子,经过测试可行。 - undefined

0
这是一个使用示例:
  • Chroma用于向量存储。
  • sentence-transformers/all-MiniLM-L6-v2用于嵌入。
  • GPT3.5用于最终的llm查询。
它通过使用一个StorageContext进行索引,并使用另一个StorageContext进行llm查询来实现这一点。
import os
import chromadb
import llama_index
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader, LangchainEmbedding
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.llms import OpenAI
from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
from llama_index.indices.vector_store.retrievers import VectorIndexRetriever

docs_to_load = "/path/to/ingester/data-for-ingestion/small-batch"
user_input = "What are these documents about?"

def setup_environment():
    os.environ['OPENAI_API_KEY'] = 'sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
    os.environ['LLAMA_INDEX_CACHE_DIR'] = 'cache'

def load_documents(path):
    return SimpleDirectoryReader(
        path,
        recursive=True,
        required_exts=[".pdf"],
        filename_as_id=True,
    ).load_data()

def setup_index(documents):
    db = chromadb.PersistentClient(path="./chroma_db")
    chroma_collection = db.get_or_create_collection("quickstart")
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'))
    service_context_embedding = ServiceContext.from_defaults(embed_model=embed_model)
    return VectorStoreIndex.from_documents(documents, storage_context=storage_context, service_context=service_context_embedding)

def setup_query_engine(index):
    service_context_llm = ServiceContext.from_defaults(
        llm=OpenAI(
            model="gpt-3.5-turbo",
            temperature=0.1,
        ),
        system_prompt="You are an AI assistant answering questions related to PDF documents."
    )
    retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k=10,
    )
    response_synthesizer = llama_index.response_synthesizers.get_response_synthesizer(
        response_mode="compact",
        service_context=service_context_llm,
        use_async=False,
        streaming=False,
    )
    return RetrieverQueryEngine(
        retriever=retriever,
        response_synthesizer=response_synthesizer,
    )

def main():
    setup_environment()
    documents = load_documents(docs_to_load)
    index = setup_index(documents)
    query_engine = setup_query_engine(index)
    result = query_engine.query(user_input)
    print(f"Answer: {str(result)}")

if __name__ == "__main__":
    main()

0
这是另一个使用开源嵌入和OpenAI进行llm查询的块。

import os
import logging
import sys
import llama_index
from llama_index import SimpleDirectoryReader, VectorStoreIndex, ServiceContext, StorageContext
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.llms import OpenAI
from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
from llama_index.indices.vector_store.retrievers import VectorIndexRetriever
from llama_index import load_index_from_storage
from icecream import ic

# Setup environment and logging
os.environ['OPENAI_API_KEY'] = 'your-openai-api-key'
os.environ['LLAMA_INDEX_CACHE_DIR'] = 'cache'
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# User-defined variables
docs_to_load = "/path/to/your/documents"
persist_dir = "/path/to/your/persistent/directory"
user_input = "Your query here"
gpt_model = "gpt-4"

# Initialize embedding model
embed_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
embed_model = "local"
service_context_embeddings = ServiceContext.from_defaults(embed_model=embed_model)

# Try to load the existing index
try:
    logging.info("Trying to load existing directory.")
    storage_context_persisted = StorageContext.from_defaults(persist_dir=persist_dir)
    index = load_index_from_storage(storage_context=storage_context_persisted, service_context=service_context_embeddings)
    logging.info("Loaded existing directory.")
except FileNotFoundError:
    logging.error("Failed to load existing directory, creating a new one.")
    documents = SimpleDirectoryReader(docs_to_load, recursive=True, required_exts=[".pdf"], filename_as_id=True).load_data()
    storage_context_empty = StorageContext.from_defaults()
    index = VectorStoreIndex.from_documents(documents=documents, storage_context=storage_context_empty, service_context=service_context_embeddings)
    index.storage_context.persist(persist_dir=persist_dir)

# Initialize retriever
retriever = VectorIndexRetriever(index=index, similarity_top_k=10)
nodes = retriever.retrieve(user_input)
ic(nodes)

# Initialize LLM service context and response synthesizer
service_context_llm = ServiceContext.from_defaults(llm=OpenAI(model=gpt_model))
response_synthesizer = llama_index.response_synthesizers.get_response_synthesizer(
    response_mode="compact",
    service_context=service_context_llm,
    use_async=False,
    streaming=False,
)

# Initialize and run query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)
response = query_engine.query(user_input)
logging.info(response)

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接