import streamlit as st from llama_index import VectorStoreIndex, ServiceContext, Document, download_loader from llama_index.llms import OpenAI from llama_index.readers import PDFReader import openai from llama_index import SimpleDirectoryReader import tempfile import shutil import os # import pdfplumber def create_vector_embeding_from_pdf(feed): with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file: # Write the contents of the uploaded file to the temporary file shutil.copyfileobj(uploaded_file, temp_file) temp_file_path = temp_file.name # Get the file path PyMuPDFReader = download_loader("PyMuPDFReader") loader = PyMuPDFReader() documents = loader.load(temp_file_path, metadata=True) index = VectorStoreIndex.from_documents(documents) os.remove(temp_file_path) return documents, index # build more code to return a dataframe st.set_page_config(page_title="Chat with the Streamlit docs, powered by LlamaIndex", page_icon="🦙", layout="centered", initial_sidebar_state="auto", menu_items=None) openai.api_key = st.secrets.openai_key st.title("Chat with the Streamlit docs, powered by LlamaIndex 💬🦙") st.info( "Check out the full tutorial to build this app in our [blog post](https://blog.streamlit.io/build-a-chatbot-with-custom-data-sources-powered-by-llamaindex/)", icon="📃") @st.cache_resource(show_spinner=False) def load_data(): with st.spinner(text="Loading and indexing the Streamlit docs – hang tight! This should take 1-2 minutes."): reader = SimpleDirectoryReader(input_dir="./data", recursive=True) docs = reader.load_data() service_context = ServiceContext.from_defaults( llm=OpenAI(model="gpt-3.5-turbo", temperature=0.5, system_prompt="You are an expert on the Streamlit Python library and your job is to answer technical questions." " Assume that all questions are related to the Streamlit Python library. Keep your answers technical and based on facts – do not hallucinate features.") ) index = VectorStoreIndex.from_documents(docs, service_context=service_context) return index streamlit_docs_index = load_data() uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf") if uploaded_file is not None: documents, pdf_index = create_vector_embeding_from_pdf(uploaded_file) combined_index = streamlit_docs_index.insert(documents) else: combined_index = streamlit_docs_index if "messages" not in st.session_state.keys(): # Initialize the chat messages history st.session_state.messages = [ {"role": "assistant", "content": "Ask me a question about Streamlit's open-source Python library!"} ] index = load_data() if "chat_engine" not in st.session_state.keys(): # Initialize the chat engine st.session_state.chat_engine = combined_index.as_chat_engine(chat_mode="condense_question", verbose=True) if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history st.session_state.messages.append({"role": "user", "content": prompt}) for message in st.session_state.messages: # Display the prior chat messages with st.chat_message(message["role"]): st.write(message["content"]) # If last message is not from assistant, generate a new response if st.session_state.messages[-1]["role"] != "assistant": with st.chat_message("assistant"): with st.spinner("Thinking..."): response = st.session_state.chat_engine.chat(prompt) st.write(response.response) message = {"role": "assistant", "content": response.response} st.session_state.messages.append(message) # Add response to message history