Langchain-chatchat

hxy included in 科研

2024-09-20 847 words 4 minutes

Contents

部署

按照read-me.md进行配置

需要git clone huggingface库到本地/自己服务器
安装llama_index注意细节

1
2


pip install llama_index==0.41.0
# 不要错误地写成pip install llama-index!

clone前运行命令

1
2


source /etc/profile.d/clash.sh
proxy_on # 开启clash服务

也可在autodl上开启镜像

源码技术栈

webui streamlit

1
2
3
4
5
6
7


dialogue_mode = st.selectbox(
    "请选择对话模式：",
    dialogue_modes,
    index=index,
    on_change=on_mode_change,
    key="dialogue_mode",
)

asyncio

asyncio是python协程库。所谓「异步 IO」，就是你发起一个 IO 操作，却不用等它结束，你可以继续做其他事情，当它结束时，你会得到通知。

示例

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11


async def chat():
    async def chat_iterator():
        ...
        task = asyncio.create_task(wrap_done(
            chain.acall({"input": query}),
            callback.done),
        )
        async for token in callback.iter():
            yield ...
        await task
    return EventSourceResponse(chat_iterator())

fastapi前后端交互

前端发送请求

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


def chat_chat(
            self,
            query: str,
            conversation_id: str = None,
            history_len: int = -1,
            history: List[Dict] = [],
            stream: bool = True,
            model: str = LLM_MODELS[0],
            temperature: float = TEMPERATURE,
            max_tokens: int = None,
            prompt_name: str = "default",
            embedding_model: str = "None",
            **kwargs,
    ):
        '''
        对应api.py/chat/chat接口
        '''
        data = {
            "query": query,
            "conversation_id": conversation_id,
            "history_len": history_len,
            "history": history,
            "stream": stream,
            "model_name": model,
            "temperature": temperature,
            "max_tokens": max_tokens,
            "prompt_name": prompt_name,
            "embedding_model": embedding_model
        }

        response = self.post("/chat/chat", json=data, stream=True, **kwargs)
        return self._httpx_stream2generator(response, as_json=True)

xxxxxxxxxx python

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14


app.post("/chat/chat",
             tags=["Chat"],
             summary="与llm模型对话(通过LLMChain)",
             )(chat) # 括号内为对应的函数

app.post("/chat/search_engine_chat",
        tags=["Chat"],
        summary="与搜索引擎对话",
    )(search_engine_chat)

app.post("/chat/feedback",
        tags=["Chat"],
        summary="返回llm模型对话评分",
    )(chat_feedback)

封装返回类

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


class BaseResponse(BaseModel):
    code: int = pydantic.Field(200, description="API status code")
    msg: str = pydantic.Field("success", description="API status message")
    data: Any = pydantic.Field(None, description="API data")

    class Config:
        schema_extra = {
            "example": {
                "code": 200,
                "msg": "success",
            }
        }

class ListResponse(BaseResponse):
    data: List[str] = pydantic.Field(..., description="List of names")

    class Config:
        schema_extra = {
            "example": {
                "code": 200,
                "msg": "success",
                "data": ["doc1.docx", "doc2.pdf", "doc3.txt"],
            }
        }

事实上，看懂这些代码后，自己完全可以独立写一个langchain-chatchat。

agent实战

langchain框架提供了一种很好的方法，从而使大模型能够调用agent工具。步骤如下：

编写函数

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19


# search_internet.py
def search_result2docs(search_results):
    docs = []
    for result in search_results:
        doc = Document(page_content=result["snippet"] if "snippet" in result.keys() else "",
                       metadata={"source": result["link"] if "link" in result.keys() else "",
                                 "filename": result["title"] if "title" in result.keys() else ""})
        docs.append(doc)
    return docs

def search_internet(query: str):
    search = DuckDuckGoSearchAPIWrapper()
    results = search.results(query, 10)
    docs = search_result2docs(results)
    context = "\n".join([doc.page_content for doc in docs])
    return context

class SearchInternetInput(BaseModel):
    location: str = Field(description="Query for Internet search")

设置prompt模版

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18


agent_prompt_templates = {
    "default": 'Answer the following questions as best you can. If it is in order, you can use some tools appropriately. '
            'You have access to the following tools:\n\n'
            '{tools}\n\n'
            'Use the following format:\n'
            'Question: the input question you must answer1\n'
            'Thought: you should always think about what to do and what tools to use.\n'
            'Action: the action to take, should be one of [{tool_names}]\n'
            'Action Input: the input to the action\n'
            'Observation: the result of the action\n'
            '... (this Thought/Action/Action Input/Observation can be repeated zero or more times)\n'
            'Thought: I now know the final answer\n'
            'Final Answer: the final answer to the original input question\n'
            'Begin!\n\n'
            'history: {history}\n\n'
            'Question: {input}\n\n'
            'Thought: {agent_scratchpad}\n'
}

编写工具

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17


## 请注意，如果你是为了使用AgentLM，在这里，你应该使用英文版本。
tools = [
    Tool.from_function(
        func=search_internet,
        name="search_internet",
        description="Use this tool to use duckduckgo search engine to search the internet",
        args_schema=SearchInternetInput,
    ),
    Tool.from_function(
        func=fetch_pages,
        name="fetch_pages",
        description="Use this tool to fetch web source code",
        args_schema=FetchPagesInput,
    )
]

tool_names = [tool.name for tool in tools]

初始化模型并调用工具

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


# 初始化ChatOpenAI模型，指定模型名称和API key
llm = ChatOpenAI(
    model="deepseek-coder",
    api_key="your_deepseek_api_key",
    base_url="https://api.deepseek.com/beta"
)

agent_prompt_template = agent_prompt_templates['default']

prompt_template_agent = CustomPromptTemplate(
    template=agent_prompt_template,
    tools=tools,
    input_variables=["input", "intermediate_steps", "history"]
)

output_parser = CustomOutputParser()
llm_chain = LLMChain(llm=llm, prompt=prompt_template_agent)

agent = LLMSingleActionAgent(
                llm_chain=llm_chain,
                output_parser=output_parser,
                stop=["\nObservation:", "Observation"],
                allowed_tools=tool_names,
            )

agent_executor = AgentExecutor.from_agent_and_tools(agent=agent,
                                                    tools=tools,
                                                    verbose=True)

query = "写一段pytorch代码，实现transformer。请结合搜索结果。"
result = agent_executor.invoke({"input": query, "history": []})
print(result['output'])

以上还有另一种编写方式，更为简单：

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38


from langchain.tools import Tool
from langchain.agents import initialize_agent, AgentType
from langchain_openai import ChatOpenAI

# 定义 calculate 函数
def calculate(expression: str) -> float:
    """
    执行数学运算。

    :param expression: 数学表达式，例如 "3 + 5" 或 "10 / 2"
    :return: 计算结果
    """
    try:
        print("调用工具")
        print(expression)
        return eval(expression)  # 使用 eval 执行表达式
    except Exception as e:
        return f"Error: {str(e)}"

# 将函数封装为 Tool
calc_tool = Tool(
    name="calculate",
    func=calculate,
    description="A tool to perform mathematical calculations. Input should be a mathematical expression, e.g., '3 + 5' or '10 / 2'."
)
# 定义工具列表
tools = [calc_tool]
# 创建 Agent
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,  # 使用 Zero-shot ReAct 代理
    verbose=True  # 打印详细日志
)
# 用户输入
user_input = "2的10次方是多少?"
# 调用 Agent
response = agent.run(user_input)