from llama_index import SimpleDirectoryReader, StorageContext, ServiceContext
from llama_index.indices.vector_store import VectorStoreIndex
from llama_index.vector_stores import PGVectorStore
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms.vllm import VllmServer
import psycopg2
import json
import urllib

vllm_instance = VllmServer(
    api_url="http://takenaiassistent.iptime.org:14220/generate", max_new_tokens=2000, temperature=0.2
)
# print(vllm_instance.complete("hello?"))

# max_length must be set to 512, or it will crash for high dimension multilingual models...... WHY DO YOU NOT MENTION THIS ON DOCUMENT!?
# https://github.com/run-llama/llama_index/issues/9272
# Why am I setting dimension manually instead of llama_index deciding it?
# it's because their default parameters are fit to openai and that causes problems EVERYWHERE
DENSE_EMBED_DIMENSION = 384
embed_model = HuggingFaceEmbedding(model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', max_length=DENSE_EMBED_DIMENSION) #, max_length=DENSE_EMBED_DIMENSION) # , device="cuda:1"
service_context = ServiceContext.from_defaults(llm=vllm_instance, embed_model=embed_model)

documents = SimpleDirectoryReader("./data/paul_graham").load_data()
print("Document ID:", documents[0].doc_id)

with open('db_config.json', 'r') as file:
    db_config = json.load(file)
table_name = db_config["tablename"]
db_config.pop('tablename')
db_name = db_config["dbname"]

conn = psycopg2.connect(**db_config)
# conn.autocommit = True

vector_store = PGVectorStore.from_params(
    database=db_name,
    host=db_config['host'],
    password= urllib.parse.quote_plus(db_config['password']), # to handle special char
    port=db_config['port'],
    user=db_config['user'],
    table_name=table_name,
    embed_dim=DENSE_EMBED_DIMENSION
)

index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context)
query_engine = index.as_query_engine()

response = query_engine.query("what did author do?")
print(response)
response = query_engine.query("What happened in the mid 1980s?")
print(response)
response = query_engine.query("저자 그래이엄은 어느 대학교를 갔어? 문서에 있는 내용으로 답해줘")
print(response)