윤영준 윤영준 2023-12-28
Hello Yona
@08f80ab8a029b1d5a2630d5fbf81e27eccf87671
 
README.md (added)
+++ README.md
@@ -0,0 +1,1 @@
+# LangChain_LLM
 
app.py (added)
+++ app.py
@@ -0,0 +1,18 @@
+from fastapi import FastAPI
+from langchain.prompts import ChatPromptTemplate
+from model.weight_load_llamacpp import get_nerualchat7bv3_2, get_mixtral
+# from langchain.chat_models import ChatAnthropic, ChatOpenAI
+from langserve import add_routes
+
+
+app = FastAPI(
+  title="LangChain Server",
+  version="1.0",
+  description="A simple api server using Langchain's Runnable interfaces",
+)
+
+add_routes(
+    app,
+    prompt | model
+    path = "/sql"
+)
 
main.py (added)
+++ main.py
@@ -0,0 +1,20 @@
+import os
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+
+from model.weight_load_llamacpp import get_mixtral
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+
+
+template = """Question: {question}
+
+Answer: Let's work this out in a step by step way to be sure we have the right answer."""
+
+prompt = PromptTemplate(template=template, input_variables=["question"])
+
+llm_chain = LLMChain(prompt=prompt, llm=get_mixtral())
+question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
+llm_chain.run(question)
+
+
 
model/weight_load_llamacpp.py (added)
+++ model/weight_load_llamacpp.py
@@ -0,0 +1,39 @@
+import os
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+
+from langchain.callbacks.manager import CallbackManager
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain.llms import LlamaCpp
+
+callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+
+def get_mixtral():
+    mixtral8x7b = LlamaCpp(
+        model_path="model/weight/mixtral-8x7b-instruct-v0.1.Q6_K.gguf",
+        temperature=0.75,
+        n_gpu_layers=33,
+        n_ctx=20000,
+        n_thread=30,
+        n_batch=32,
+        max_tokens=2024,
+        top_p=3,
+        callback_manager=callback_manager,
+        verbose=True,  # Verbose is required to pass to the callback manager
+    )
+    return mixtral8x7b
+
+def get_nerualchat7bv3_2():
+    neuralchat7bv3_2 = LlamaCpp(
+        model_path="model/weight/neural-chat-7b-v3-2.Q5_K_M.gguf",
+        temperature=0.75,
+        n_gpu_layers=33,
+        n_ctx=20000,
+        n_thread=30,
+        n_batch=32,
+        max_tokens=512,
+        top_p=1,
+        callback_manager=callback_manager,
+        verbose=True,  # Verbose is required to pass to the callback manager
+    )
+    return neuralchat7bv3_2(No newline at end of file)
Add a comment
List