from llama_index import SimpleDirectoryReader, StorageContext, ServiceContext
from llama_index.indices.vector_store import VectorStoreIndex
from llama_index.vector_stores import PGVectorStore
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms.vllm import VllmServer
import psycopg2
import json
import urllib

vllm_instance = VllmServer(
    api_url="http://takenaiassistent.iptime.org:14220/generate", max_new_tokens=2000, temperature=0.2
)
# print(vllm_instance.complete("hello?"))

# max_length must be set to embed weights output dense dimension, and there are problems with executing sparse one, or it will crash for high dimension multilingual models...... WHY DO YOU NOT MENTION THIS ON DOCUMENT!?
# https://github.com/run-llama/llama_index/issues/9272
# Why am I setting dimension manually instead of llama_index deciding it?
# it's because their default parameters are fit to openai and that causes problems EVERYWHERE
DENSE_EMBED_DIMENSION = 384
embed_model = HuggingFaceEmbedding(model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', max_length=DENSE_EMBED_DIMENSION) #, max_length=DENSE_EMBED_DIMENSION) # , device="cuda:1"
service_context = ServiceContext.from_defaults(llm=vllm_instance, embed_model=embed_model)

documents = SimpleDirectoryReader("./data/paul_graham").load_data()
print("Document ID:", documents[0].doc_id)

with open('db_config.json', 'r') as file:
    db_config = json.load(file)
table_name = db_config["tablename"]
db_config.pop('tablename')
db_name = db_config["dbname"]

conn = psycopg2.connect(**db_config)
# conn.autocommit = True

vector_store = PGVectorStore.from_params(
    database=db_name,
    host=db_config['host'],
    password= urllib.parse.quote_plus(db_config['password']), # to handle special char
    port=db_config['port'],
    user=db_config['user'],
    table_name=table_name,
    embed_dim=DENSE_EMBED_DIMENSION
)

index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context)
query_engine = index.as_query_engine()

response = query_engine.query("what did author do?")
print(response)
response = query_engine.query("What happened in the mid 1980s?")
print(response)
response = query_engine.query("저자 그래이엄은 어느 대학교를 갔어? 문서에 있는 내용으로 답해줘")
print(response)