Compare commits

...

4 Commits

Author SHA1 Message Date
yujj128
62fbc0014e Merge branch 'dev' of http://106.13.42.156:33077/lei_y601/sqlbot_agent into dev 2025-10-15 16:52:44 +08:00
yujj128
2b10826568 q_a优化,提示词优化 2025-10-15 16:52:35 +08:00
yujj128
987db89d7d Merge branch 'dev' of http://106.13.42.156:33077/lei_y601/sqlbot_agent into dev
# Conflicts:
#	main_service.py
2025-10-15 10:29:26 +08:00
yujj128
cfc694b821 去掉错误引用 2025-10-15 10:28:34 +08:00
7 changed files with 128 additions and 22 deletions

21
.env
View File

@@ -1,8 +1,17 @@
IS_FIRST_LOAD=True
#CHAT_MODEL_BASE_URL=https://api.siliconflow.cn
#CHAT_MODEL_API_KEY=sk-iyhiltycmrfnhrnbljsgqjrinhbztwdplyvuhfihcdlepole
#CHAT_MODEL_NAME=zai-org/GLM-4.5
#CHAT_MODEL_BASE_URL=http://10.226.128.221:8000/v1
#CHAT_MODEL_API_KEY=sk-iyhiltycmrfnhrnbljsgqjrinhbztwdplyvuhfihcdlepole
#CHAT_MODEL_NAME=deepseek-70b
#CHAT_MODEL_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
#CHAT_MODEL_API_KEY=sk-72575159d3ec43a68c6e222a15719bed
#CHAT_MODEL_NAME=qwen-plus
CHAT_MODEL_BASE_URL=https://api.siliconflow.cn
CHAT_MODEL_API_KEY=sk-iyhiltycmrfnhrnbljsgqjrinhbztwdplyvuhfihcdlepole
CHAT_MODEL_NAME=zai-org/GLM-4.5
CHAT_MODEL_API_KEY=sk-tnmkzvzbipohjfbqxhictewzdgrrxoghbmicrfjgxbgdkjfq
CHAT_MODEL_NAME=Qwen/Qwen3-32B
#使用ai中台的模型
EMBEDDING_MODEL_BASE_URL=http://10.225.128.2:13206/member1/small-model/bge/encode
EMBEDDING_MODEL_API_KEY=sk-iyhiltycmrfnhrnbljsgqjrinhbztwdplyvuhfihcdlepole
@@ -32,5 +41,11 @@ MYSQL_DATABASE_DBNAME=test
#达梦数据库
DAMENG_DATABASE_HOST=10.254.193.63
DAMENG_DATABASE_PORT=5236
DAMENG_DATABASE_PASSWORD=
DAMENG_DATABASE_PASSWORD=Ai@0923!!
DAMENG_DATABASE_USER=ai_view
TTL_CACHE=43200
SESSION_LENGTH=2
ALLOWED_USERS=ea17e939-10f1-492d-adf1-e6ac42f89d81,5fba57a1-0d5f-4988-bcc9-a2b340f8adf1,25bb2ddb-d26d-486f-9aab-5497a9f1e10e,793b49e9-98e6-4f15-8b56-148a691d3022,c76720b5-15f7-4d23-a213-19da38b64d89,103bdc74-a88c-4e1e-a379-794937443c77,c4b2e802-58ba-4927-82fe-f938deb41cf0,1cf59aed-bbf0-4f1c-9246-1d96cc5e2719,6b28c87e-061d-4aca-9a12-71c52e677e73,4c105cfa-ce70-4eca-8d36-c518eda89005,cc93a83b-b145-43f4-80af-fe4ce8a55a04

View File

@@ -7,8 +7,7 @@ from service.cus_vanna_srevice import CustomVanna, QdrantClient, TTLCacheWrapper
from decouple import config
import flask
from util import load_ddl_doc
from flask import Flask, Response, jsonify, request, send_from_directory
from flask import Flask, Response, jsonify, request
logger = logging.getLogger(__name__)

View File

@@ -183,13 +183,24 @@ class OpenAICompatibleLLM(VannaBase):
stop=None,
temperature=self.temperature,
)
# data = {
# "model": self.model,
# "prompt": prompt,
# "max_tokens": self.max_tokens,
# "temperature": self.temperature,
# }
# response = requests.post(
# url=f"{self.api_base}/completions",
# headers=self.headers,
# json=data
# )
else:
if num_tokens > 3500:
model = "kimi"
else:
model = "doubao"
print(f"Using model {model} for {num_tokens} tokens (approx)")
print(f"5.Using model {model} for {num_tokens} tokens (approx)")
response = self.client.chat.completions.create(
model=model,
@@ -231,6 +242,7 @@ class OpenAICompatibleLLM(VannaBase):
logger.info(f"sys_temp:{sys_temp}")
llm_response = self.submit_prompt(
[{'role': 'system', 'content': sys_temp}, {'role': 'user', 'content': user_temp}], **kwargs)
llm_response = str(llm_response.strip())
logger.info(f"llm_response:{llm_response}")
result = {"resp": orjson.loads(extract_nested_json(llm_response))}
logger.info(f"llm_response:{llm_response}")

View File

@@ -52,10 +52,12 @@ template:
<rule-detail>若数据库引擎是 PostgreSQL, Oracle, ClickHouse, 达梦数据库, AWS Redshift, Elasticsearch则schema、表名、字段名、别名使用双引号如 "schema_name"."table_name"。</rule-detail>
<rule-detail>若数据库引擎是 MySQL, Doris则表名、字段名、别名使用反引号如 `table_name`。</rule-detail>
<rule-detail>生成的SQL必须避免与数据库关键字冲突。</rule-detail>
<rule-detail>注意列名定义和使用的先后顺序例如SELECT阶段定义了列名如果GROUP BY阶段先与SELECT阶段执行时是不许在GROUP BY阶段引用列名的。</rule-detail>
</rule>
<rule>
<rule-title>数据查询与排序</rule-title>
<rule-detail>若未明确指定查询字段涉及人员信息时默认返回相关性最强的前10个字段。</rule-detail>
<rule-detail>当涉及部门表org_orgs的查询时注意enable启用状态和dr删除标志且必须限制code字段值包含'CYJ'。</rule-detail>
<rule-detail>若查询字段为 VARCHAR 或 TEXT 类型但需要计算,必须先进行合理的类型转换(如 CAST(... AS NUMERIC))。</rule-detail>
<rule-detail>若查询包含日期/时间字段:
- **默认行为**:若提问未指定排序,**默认按时间字段降序排序**(即最新数据在前)。
@@ -271,22 +273,32 @@ template:
<chat-examples>
<example>
<input>
<sql>SELECT `u`.`email` AS `email`, `u`.`id` AS `id`, `u`.`account` AS `account`, `u`.`enable` AS `enable`, `u`.`create_time` AS `create_time`, `u`.`language` AS `language`, `u`.`default_oid` AS `default_oid`, `u`.`name` AS `name`, `u`.`phone` AS `phone`, FROM `per_user` `u` LIMIT 1000</sql>
<sql>SELECT `u`.`email` AS `邮箱`, `u`.`id` AS `ID`, `u`.`account` AS `账号`, `u`.`enable` AS `启用状态`, `u`.`create_time` AS `创建时间`, `u`.`language` AS `语言`, `u`.`default_oid` AS `所属组织id`, `u`.`name` AS `姓名`, `u`.`phone` AS `电话`, FROM `per_user` `u` LIMIT 1000</sql>
<user-question>查询所有用户信息</user-question>
<chart-type></chart-type>
</input>
<output>
{{"type":"table","title":"所有用户信息","columns":[{{"name":"邮箱","value":"email"}},{{"name":"ID","value":"id"}},{{"name":"账号","value":"account"}},{{"name":"启用状态","value":"enable"}},{{"name":"创建时间","value":"create_time"}},{{"name":"语言","value":"language"}},{{"name":"所属组织ID","value":"default_oid"}},{{"name":"姓名","value":"name"}},{{"name":"Phone","value":"phone"}}]}}
{{"type":"table","title":"所有用户信息","columns":[{{"name":"邮箱","value":"email"}},{{"name":"ID","value":"id"}},{{"name":"账号","value":"account"}},{{"name":"启用状态","value":"enable"}},{{"name":"创建时间","value":"create_time"}},{{"name":"语言","value":"language"}},{{"name":"所属组织id","value":"default_oid"}},{{"name":"姓名","value":"name"}},{{"name":"电话","value":"phone"}}]}}
</output>
</example>
<example>
<input>
<sql>SELECT `o`.`name` AS `org_name`, COUNT(`u`.`id`) AS `user_count` FROM `per_user` `u` JOIN `per_org` `o` ON `u`.`default_oid` = `o`.`id` GROUP BY `o`.`name` ORDER BY `user_count` DESC LIMIT 1000</sql>
<sql>SELECT `o`.`name` AS `org_name`, COUNT(`u`.`id`) AS `人数` FROM `per_user` `u` JOIN `per_org` `o` ON `u`.`default_oid` = `o`.`id` GROUP BY `o`.`name` ORDER BY `user_count` DESC LIMIT 1000</sql>
<user-question>饼图展示各个组织的人员数量</user-question>
<chart-type> pie </chart-type>
</input>
<output>
{{"type":"pie","title":"组织人数统计","axis":{{"y":{{"name":"人数","value":"user_count"}},"series":{{"name":"组织名称","value":"org_name"}}}}}}
{{"type":"pie","title":"组织人数统计","axis":{{"y":{{"name":"人数","value":"人数"}},"series":{{"name":"org_name","value":"org_name"}}}}}}
</output>
</example>
<example>
<input>
<sql>SELECT COUNT(*) AS "总人数", SUM(CASE WHEN p."gender" = '1' THEN 1 ELSE 0 END) AS "男员工数", SUM(CASE WHEN p."gender" = '2' THEN 1 ELSE 0 END) AS "女员工数" FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p WHERE p."dr" = 0 ORDER BY "总人数" DESC LIMIT 1000;</sql>
<user-question>表格展示不同性别人员数量,及人员总数</user-question>
<chart-type> bar </chart-type>
</input>
<output>
{{'type': 'table', 'title': '员工性别统计', 'columns': [{{"name":"总人数","value":"total_persons"}},{{"name":"男员工数","value":"male_count"}},{{"name":"女员工数","value":"female_count"}}]}}
</output>
</example>
</chat-examples>

View File

@@ -8,7 +8,7 @@ question_and_answer = [
FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database p
WHERE p.internal_dept IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name" LIKE '%综合处%'
WITH "name"||"shortname" LIKE '%综合处%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p.dr = 0
@@ -198,7 +198,7 @@ question_and_answer = [
from YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database
where internal_dept in (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name" LIKE '%数信中心%'
WITH "name"||"shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
@@ -224,7 +224,7 @@ question_and_answer = [
JOIN IUAP_APDOC_BASEDOC.org_orgs o ON p.internal_dept = o.id
WHERE p.internal_dept IN (
SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs"
START WITH "name" LIKE '%数信中心%'
START WITH "name"||"shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p.dr = 0
@@ -262,7 +262,7 @@ question_and_answer = [
AND ps."date_value" LIKE '2025-10%'
and p.internal_dept in (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name" LIKE '%数信中心%'
WITH "name" || "shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p."dr" = 0
@@ -279,7 +279,7 @@ question_and_answer = [
'''
},
{
"question": "员工年龄段分布图",
"question": "数信中心员工年龄段分布图",
"answer": '''
SELECT
CASE
@@ -301,7 +301,10 @@ question_and_answer = [
WHERE "parentid" IN (
SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs"
)
WHERE name || shortname LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
) OR id IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs"
WHERE name || shortname LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%')
)
GROUP BY
CASE
@@ -328,14 +331,15 @@ question_and_answer = [
WHERE p."dr" = 0
AND o."id" IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name" LIKE '%综合处%'
WITH "name"||"shortname" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid")
AND p."birthday" IS NOT NULL
AND p."birthday" != ''
ORDER BY o."name" ASC, p."birthday" DESC
LIMIT 1000;
'''
},{
},
{
"question": "查询综合处下面的员工的平均年龄",
"answer": '''
SELECT ROUND(AVG(2025 - CAST(SUBSTR(p."birthday", 1, 4) AS INT)), 2) AS "平均年龄"
@@ -344,12 +348,45 @@ question_and_answer = [
WHERE p."dr" = 0
AND o."id" IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name" LIKE '%综合处%'
WITH "name"||"shortname" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid")
AND p."birthday" IS NOT NULL
AND p."birthday" != ''
LIMIT 1000;
'''
},
{
"question": "综合处有几个人,男员工,女员工分别有多少",
"answer": '''
SELECT COUNT(*) AS "总人数",
SUM(CASE WHEN p."gender" = '1' THEN 1 ELSE 0 END) AS "男员工数",
SUM(CASE WHEN p."gender" = '2' THEN 1 ELSE 0 END) AS "女员工数"
FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p
WHERE p."internal_dept" IN (
SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs"
START WITH "name" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p."dr" = 0
LIMIT 1000;
'''
},
{
"question": "综合处女员工有多少",
"answer": '''
SELECT (SELECT COUNT(*)
FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p
WHERE p."internal_dept" IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs"
START WITH "name"||"shortname" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p."gender" = '2'
AND p."dr" = 0
) AS "女员工数"
'''
}
]

View File

@@ -149,7 +149,7 @@ person_database_ddl = """
"type": "VARCHAR(108)",
"comment": "内部单位ID",
"role": "dimension",
"tags": ["组织信息", "内部架构"]
"tags": ["组织信息", "内部架构ID"]
},
{
"name": "person_type",
@@ -963,6 +963,35 @@ org_orgs_ddl = '''
"role": "dimension",
"tags": ["部门名称","部门简称","部门缩写"]
},
{
"name": "enable",
"type": "INT",
"comment": "启用状态",
"value":{
"1":"启用"
"2":"未启用"
},
"role": "dimension",
"tags": ["部门启用","部门状态","是否启用"]
},
{
"name": "level",
"type": "INT",
"comment": "部门或单位层级",
"role": "dimension",
"tags": ["部门层级","级别"]
},
{
"name": "dr",
"type": "INT",
"comment": "删除标志",
"value": {
"0": "正常",
"1": "删除"
},
"role": "dimension",
"tags": ["状态标识", "软删除", "枚举"]
}
],
"tags": ["部门id","部门信息","部门名称"]

View File

@@ -1,7 +1,8 @@
import logging
from typing import Optional
from orjson import orjson
logger = logging.getLogger(__name__)
keywords = {
# "gender":{"1":"男","2":"女"},
"person_status":{"1":"草稿","2":"审批中","3":"制卡中","4":"已入库","5":"停用"},
@@ -43,7 +44,8 @@ def extract_nested_json(text):
stack = []
start_index = -1
results = []
if not text:
logger.warning("extract_nested_json: text is empty")
for i, char in enumerate(text):
if char in '{[':
if not stack: # 记录起始位置