From 3cbd3ff8aa6f0213b73b8f0eb8141ab6d3452c58 Mon Sep 17 00:00:00 2001 From: FelixStuyck Date: Fri, 7 Jun 2024 14:15:42 +0200 Subject: [PATCH 1/3] Fill in parameters --- main.py | 152 +++++++++++++++++--------------------------------------- 1 file changed, 46 insertions(+), 106 deletions(-) diff --git a/main.py b/main.py index 97b87d4..7b4095a 100644 --- a/main.py +++ b/main.py @@ -1,107 +1,47 @@ +import getpass import os -import streamlit as st -import pickle -import PyPDF2 -from PyPDF2 import PdfReader -from langchain.document_loaders import PyPDFLoader -from langchain.chains.summarize import load_summarize_chain -import tempfile -import time -import langchain -from langchain import OpenAI -from langchain.chains import RetrievalQAWithSourcesChain -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.document_loaders import UnstructuredURLLoader -from langchain.embeddings import OpenAIEmbeddings -from langchain.vectorstores import FAISS -from langchain.chains.question_answering import load_qa_chain - - -OPENAI_API_KEY ='User Your Open AI API KEY' -url_file_path = "url_faiss_store_openai.pkl" - -# Streamlit setup -st.set_page_config( - page_title="Personal AI Assistant", - layout="wide", - initial_sidebar_state="expanded" -) -st.markdown("

🤖 Personal AI Assistant

", unsafe_allow_html=True) -st.sidebar.markdown("

Assistant Console

", unsafe_allow_html=True) - -# ---- URL Loading & Embedding ---- -num_links = st.sidebar.slider("How many links do you want to input?", min_value=1, max_value=5, value=1) -urls = [st.sidebar.text_input(f"URL {i+1}", key=f"url{i}") for i in range(num_links)] -if urls: - loader = UnstructuredURLLoader(urls=urls) - data = loader.load() - text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", "."], chunk_size=1000) - url_docs = text_splitter.split_documents(data) - if url_docs: - embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) - url_vectorindex_openai = FAISS.from_documents(url_docs, embeddings) - with open(url_file_path, "wb") as f: - pickle.dump(url_vectorindex_openai, f) - -# ---- PDF Loading & Embedding ---- -uploaded_file = st.sidebar.file_uploader("Upload a PDF file", type=['pdf']) -if uploaded_file: - pdf_reader = PdfReader(uploaded_file) - pdf_text = "" - for page in pdf_reader.pages: - pdf_text += page.extract_text() - text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", "."], chunk_size= 500) - pdf_docs = text_splitter.split_text(pdf_text) - if pdf_docs: - embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) - pdf_vectors = FAISS.from_texts(pdf_docs, embeddings) - - -# ---- Query Interface ---- -llm = OpenAI(temperature=0.9, max_tokens=500, openai_api_key=OPENAI_API_KEY) -data_source = st.selectbox("What do you want to inquire about?", ["URL", "PDF"]) - -if data_source == "URL": - query_url = st.text_input('Ask your question about URLs:') - if query_url: - if os.path.exists(url_file_path): # Ensure URL database exists - with open(url_file_path, "rb") as f: - vectorstore = pickle.load(f) - chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever()) - result = chain({"question": query_url}, return_only_outputs=True) - st.header("Answer based on URLs:") - st.subheader(result['answer']) - -elif data_source == "PDF": - query_pdf = st.text_input('Ask your question about PDFs:') - if query_pdf: - docs = pdf_vectors.similarity_search(query_pdf) - - chain = load_qa_chain(llm, chain_type="stuff") - response = chain.run(input_documents=docs, question=query_pdf) - - st.write(response) - - if st.button("Summarize PDF"): - def summarize_pdfs_from_folder(pdfs_folder): - summaries = [] - for pdf_file in pdfs_folder: - with tempfile.NamedTemporaryFile(delete=False) as temp_file: - temp_path = temp_file.name - temp_file.write(pdf_file.getvalue()) - loader = PyPDFLoader(temp_path) - docs = loader.load_and_split() - chain = load_summarize_chain(llm, chain_type="map_reduce") - summary = chain.run(docs) - summaries.append(summary) - os.remove(temp_path) - return summaries - - summaries = summarize_pdfs_from_folder([uploaded_file]) - for summary in summaries: - st.write(summary) - - - - - +import configparser + +# Get open AI key +config_path = r"C:\Users\felixstuyck\OneDrive - Finvision\Documenten\Python\AI assistant config.ini" +config = configparser.ConfigParser() +config.read(config_path) +OPEN_AI_KEY = config['API_KEY_Assistent']['API_KEY'] +os.environ["OPENAI_API_KEY"] = getpass.getpass(OPEN_AI_KEY) + +from langchain_community.document_loaders import TextLoader +from langchain_community.vectorstores import FAISS +from langchain_openai import OpenAIEmbeddings +from langchain_text_splitters import CharacterTextSplitter + +# Load and process the document +loader = TextLoader(R"C:\Users\felixstuyck\OneDrive - Finvision\Documenten\Stow stored procedure.txt") +documents = loader.load() +text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) +docs = text_splitter.split_documents(documents) + +# Generate embeddings and create FAISS index +embeddings = OpenAIEmbeddings() +db = FAISS.from_documents(docs, embeddings) +print(f"Number of documents in the index: {db.index.ntotal}") + +# Query the vectorstore +query = "What did the president say about Ketanji Brown Jackson" +docs = db.similarity_search(query) +print(f"Top result content: {docs[0].page_content}") + +# Use as retriever +retriever = db.as_retriever() +docs = retriever.invoke(query) +print(f"Top result content (retriever): {docs[0].page_content}") + +# Similarity search with score +docs_and_scores = db.similarity_search_with_score(query) +for doc, score in docs_and_scores: + print(f"Content: {doc.page_content}, Score: {score}") + +# Save and load FAISS index +db.save_local("faiss_index") +new_db = FAISS.load_local("faiss_index", embeddings) +docs = new_db.similarity_search(query) +print(f"Top result content (loaded index): {docs[0].page_content}") \ No newline at end of file From a9be21f54d5e9de81db0cd840eebcb1627e2cb8c Mon Sep 17 00:00:00 2001 From: FelixStuyck Date: Fri, 7 Jun 2024 14:17:12 +0200 Subject: [PATCH 2/3] New package --- Test FAISS package.py | 46 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 Test FAISS package.py diff --git a/Test FAISS package.py b/Test FAISS package.py new file mode 100644 index 0000000..5c22075 --- /dev/null +++ b/Test FAISS package.py @@ -0,0 +1,46 @@ +import getpass +import os +import configparser +from langchain_community.document_loaders import TextLoader +from langchain_community.vectorstores import FAISS +from langchain_openai import OpenAIEmbeddings +from langchain_text_splitters import CharacterTextSplitter + +# Get open AI key +config_path = r"C:\Users\felixstuyck\OneDrive - Finvision\Documenten\Python\AI assistant config.ini" +config = configparser.ConfigParser() +config.read(config_path) +OPEN_AI_KEY = config['API_KEY_Assistent']['API_KEY'] +os.environ["OPENAI_API_KEY"] = getpass.getpass(OPEN_AI_KEY) + +# Load and process the document +loader = TextLoader(R"C:\Users\felixstuyck\OneDrive - Finvision\Documenten\Stow stored procedure.txt") +documents = loader.load() +text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) +docs = text_splitter.split_documents(documents) + +# Generate embeddings and create FAISS index +embeddings = OpenAIEmbeddings() +db = FAISS.from_documents(docs, embeddings) +print(f"Number of documents in the index: {db.index.ntotal}") + +# Query the vectorstore +query = "What did the president say about Ketanji Brown Jackson" +docs = db.similarity_search(query) +print(f"Top result content: {docs[0].page_content}") + +# Use as retriever +retriever = db.as_retriever() +docs = retriever.invoke(query) +print(f"Top result content (retriever): {docs[0].page_content}") + +# Similarity search with score +docs_and_scores = db.similarity_search_with_score(query) +for doc, score in docs_and_scores: + print(f"Content: {doc.page_content}, Score: {score}") + +# Save and load FAISS index +db.save_local("faiss_index") +new_db = FAISS.load_local("faiss_index", embeddings) +docs = new_db.similarity_search(query) +print(f"Top result content (loaded index): {docs[0].page_content}") From 231aece9c7d3f9657f7da1b7a11bacacdf272908 Mon Sep 17 00:00:00 2001 From: FelixStuyck <108720292+FelixStuyck@users.noreply.github.com> Date: Fri, 7 Jun 2024 14:40:47 +0200 Subject: [PATCH 3/3] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index f58a8d6..1d75346 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,9 @@ # Personal AI Assistant: Your Web and PDF Guide +## How to run on local desktop +* Install correct packages +* Run in CMD +* Navigate to location of the notebook: ex. cd C:\Users\felixstuyck\Documents +* streamlit run "Ai assistent script.py" ![ezgif com-gif-maker (1)](https://github.com/Abhi0323/Generative-AI-based-Personal-Assistant/assets/112967999/8718ba7f-e075-4a42-bbef-9a6e94ff50a3)