Commit @08f80ab8a029b1d5a2630d5fbf81e27eccf87671

08f80ab

README.md (added)

+++ README.md

...	...	@@ -0,0 +1,1 @@
	1	+# LangChain_LLM

08f80ab

app.py (added)

+++ app.py

...	...	@@ -0,0 +1,18 @@
	1	+from fastapi import FastAPI
	2	+from langchain.prompts import ChatPromptTemplate
	3	+from model.weight_load_llamacpp import get_nerualchat7bv3_2, get_mixtral
	4	+# from langchain.chat_models import ChatAnthropic, ChatOpenAI
	5	+from langserve import add_routes
	6	+
	7	+
	8	+app = FastAPI(
	9	+ title="LangChain Server",
	10	+ version="1.0",
	11	+ description="A simple api server using Langchain's Runnable interfaces",
	12	+)
	13	+
	14	+add_routes(
	15	+ app,
	16	+ prompt \| model
	17	+ path = "/sql"
	18	+)

08f80ab

main.py (added)

+++ main.py

...	...	@@ -0,0 +1,20 @@
	1	+import os
	2	+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
	3	+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
	4	+
	5	+from model.weight_load_llamacpp import get_mixtral
	6	+from langchain.chains import LLMChain
	7	+from langchain.prompts import PromptTemplate
	8	+
	9	+
	10	+template = """Question: {question}
	11	+
	12	+Answer: Let's work this out in a step by step way to be sure we have the right answer."""
	13	+
	14	+prompt = PromptTemplate(template=template, input_variables=["question"])
	15	+
	16	+llm_chain = LLMChain(prompt=prompt, llm=get_mixtral())
	17	+question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
	18	+llm_chain.run(question)
	19	+
	20	+

08f80ab

model/weight_load_llamacpp.py (added)

+++ model/weight_load_llamacpp.py

...	...	@@ -0,0 +1,39 @@
	1	+import os
	2	+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
	3	+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
	4	+
	5	+from langchain.callbacks.manager import CallbackManager
	6	+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
	7	+from langchain.llms import LlamaCpp
	8	+
	9	+callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
	10	+
	11	+def get_mixtral():
	12	+ mixtral8x7b = LlamaCpp(
	13	+ model_path="model/weight/mixtral-8x7b-instruct-v0.1.Q6_K.gguf",
	14	+ temperature=0.75,
	15	+ n_gpu_layers=33,
	16	+ n_ctx=20000,
	17	+ n_thread=30,
	18	+ n_batch=32,
	19	+ max_tokens=2024,
	20	+ top_p=3,
	21	+ callback_manager=callback_manager,
	22	+ verbose=True, # Verbose is required to pass to the callback manager
	23	+ )
	24	+ return mixtral8x7b
	25	+
	26	+def get_nerualchat7bv3_2():
	27	+ neuralchat7bv3_2 = LlamaCpp(
	28	+ model_path="model/weight/neural-chat-7b-v3-2.Q5_K_M.gguf",
	29	+ temperature=0.75,
	30	+ n_gpu_layers=33,
	31	+ n_ctx=20000,
	32	+ n_thread=30,
	33	+ n_batch=32,
	34	+ max_tokens=512,
	35	+ top_p=1,
	36	+ callback_manager=callback_manager,
	37	+ verbose=True, # Verbose is required to pass to the callback manager
	38	+ )
	39	+ return neuralchat7bv3_2(No newline at end of file)