File name
Commit message
Commit date
import streamlit as st
from llama_index import VectorStoreIndex, ServiceContext, Document, download_loader
from llama_index.llms import OpenAI
from llama_index.readers import PDFReader
import openai
from llama_index import SimpleDirectoryReader
import tempfile
import shutil
import os
# import pdfplumber
def create_vector_embeding_from_pdf(feed):
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
# Write the contents of the uploaded file to the temporary file
shutil.copyfileobj(uploaded_file, temp_file)
temp_file_path = temp_file.name # Get the file path
PyMuPDFReader = download_loader("PyMuPDFReader")
loader = PyMuPDFReader()
documents = loader.load(temp_file_path, metadata=True)
index = VectorStoreIndex.from_documents(documents)
os.remove(temp_file_path)
return documents, index # build more code to return a dataframe
st.set_page_config(page_title="Chat with the Streamlit docs, powered by LlamaIndex", page_icon="🦙", layout="centered",
initial_sidebar_state="auto", menu_items=None)
openai.api_key = st.secrets.openai_key
st.title("Chat with the Streamlit docs, powered by LlamaIndex 💬🦙")
st.info(
"Check out the full tutorial to build this app in our [blog post](https://blog.streamlit.io/build-a-chatbot-with-custom-data-sources-powered-by-llamaindex/)",
icon="📃")
@st.cache_resource(show_spinner=False)
def load_data():
with st.spinner(text="Loading and indexing the Streamlit docs – hang tight! This should take 1-2 minutes."):
reader = SimpleDirectoryReader(input_dir="./data", recursive=True)
docs = reader.load_data()
service_context = ServiceContext.from_defaults(
llm=OpenAI(model="gpt-3.5-turbo", temperature=0.5,
system_prompt="You are an expert on the Streamlit Python library and your job is to answer technical questions."
" Assume that all questions are related to the Streamlit Python library. Keep your answers technical and based on facts – do not hallucinate features.")
)
index = VectorStoreIndex.from_documents(docs, service_context=service_context)
return index
streamlit_docs_index = load_data()
uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
if uploaded_file is not None:
documents, pdf_index = create_vector_embeding_from_pdf(uploaded_file)
combined_index = streamlit_docs_index.insert(documents)
else:
combined_index = streamlit_docs_index
if "messages" not in st.session_state.keys(): # Initialize the chat messages history
st.session_state.messages = [
{"role": "assistant", "content": "Ask me a question about Streamlit's open-source Python library!"}
]
index = load_data()
if "chat_engine" not in st.session_state.keys(): # Initialize the chat engine
st.session_state.chat_engine = combined_index.as_chat_engine(chat_mode="condense_question", verbose=True)
if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
for message in st.session_state.messages: # Display the prior chat messages
with st.chat_message(message["role"]):
st.write(message["content"])
# If last message is not from assistant, generate a new response
if st.session_state.messages[-1]["role"] != "assistant":
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
response = st.session_state.chat_engine.chat(prompt)
st.write(response.response)
message = {"role": "assistant", "content": response.response}
st.session_state.messages.append(message) # Add response to message history