Code - yjyoon/RAG-minimal-example

from llama_index import SimpleDirectoryReader, StorageContext, ServiceContext from llama_index.indices.vector_store import VectorStoreIndex from llama_index.vector_stores import PGVectorStore from llama_index.embeddings import HuggingFaceEmbedding from llama_index.llms.vllm import VllmServer import psycopg2 import json import urllib vllm_instance = VllmServer( api_url="http://takenaiassistent.iptime.org:14220/generate", max_new_tokens=2000, temperature=0.2 ) # print(vllm_instance.complete("hello?")) # max_length must be set to embed weights output dense dimension, and there are problems with executing sparse one, or it will crash for high dimension multilingual models...... WHY DO YOU NOT MENTION THIS ON DOCUMENT!? # https://github.com/run-llama/llama_index/issues/9272 # Why am I setting dimension manually instead of llama_index deciding it? # it's because their default parameters are fit to openai and that causes problems EVERYWHERE DENSE_EMBED_DIMENSION = 384 embed_model = HuggingFaceEmbedding(model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', max_length=DENSE_EMBED_DIMENSION) #, max_length=DENSE_EMBED_DIMENSION) # , device="cuda:1" service_context = ServiceContext.from_defaults(llm=vllm_instance, embed_model=embed_model) documents = SimpleDirectoryReader("./data/paul_graham").load_data() print("Document ID:", documents[0].doc_id) with open('db_config.json', 'r') as file: db_config = json.load(file) table_name = db_config["tablename"] db_config.pop('tablename') db_name = db_config["dbname"] conn = psycopg2.connect(**db_config) # conn.autocommit = True vector_store = PGVectorStore.from_params( database=db_name, host=db_config['host'], password= urllib.parse.quote_plus(db_config['password']), # to handle special char port=db_config['port'], user=db_config['user'], table_name=table_name, embed_dim=DENSE_EMBED_DIMENSION ) index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context) query_engine = index.as_query_engine() response = query_engine.query("what did author do?") print(response) response = query_engine.query("What happened in the mid 1980s?") print(response) response = query_engine.query("저자 그래이엄은 어느 대학교를 갔어? 문서에 있는 내용으로 답해줘") print(response)