diff --git a/service/cus_vanna_srevice.py b/service/cus_vanna_srevice.py
index db94799..56a96c0 100644
--- a/service/cus_vanna_srevice.py
+++ b/service/cus_vanna_srevice.py
@@ -182,13 +182,24 @@ class OpenAICompatibleLLM(VannaBase):
stop=None,
temperature=self.temperature,
)
+ # data = {
+ # "model": self.model,
+ # "prompt": prompt,
+ # "max_tokens": self.max_tokens,
+ # "temperature": self.temperature,
+ # }
+ # response = requests.post(
+ # url=f"{self.api_base}/completions",
+ # headers=self.headers,
+ # json=data
+ # )
else:
if num_tokens > 3500:
model = "kimi"
else:
model = "doubao"
- print(f"Using model {model} for {num_tokens} tokens (approx)")
+ print(f"5.Using model {model} for {num_tokens} tokens (approx)")
response = self.client.chat.completions.create(
model=model,
@@ -231,6 +242,7 @@ class OpenAICompatibleLLM(VannaBase):
logger.info(f"sys_temp:{sys_temp}")
llm_response = self.submit_prompt(
[{'role': 'system', 'content': sys_temp}, {'role': 'user', 'content': user_temp}], **kwargs)
+ llm_response = str(llm_response.strip())
logger.info(f"llm_response:{llm_response}")
result = {"resp": orjson.loads(extract_nested_json(llm_response))}
logger.info(f"llm_response:{llm_response}")
diff --git a/template.yaml b/template.yaml
index cf4878f..c12fdcc 100644
--- a/template.yaml
+++ b/template.yaml
@@ -52,10 +52,12 @@ template:
若数据库引擎是 PostgreSQL, Oracle, ClickHouse, 达梦数据库, AWS Redshift, Elasticsearch,则schema、表名、字段名、别名使用双引号,如 "schema_name"."table_name"。若数据库引擎是 MySQL, Doris,则表名、字段名、别名使用反引号,如 `table_name`。生成的SQL必须避免与数据库关键字冲突。
+ 注意列名定义和使用的先后顺序,例如:SELECT阶段定义了列名,如果GROUP BY阶段先与SELECT阶段执行时,是不许在GROUP BY阶段引用列名的。数据查询与排序若未明确指定查询字段,涉及人员信息时,默认返回相关性最强的前10个字段。
+ 当涉及部门表org_orgs的查询时注意enable启用状态和dr删除标志,且必须限制code字段值包含'CYJ'。若查询字段为 VARCHAR 或 TEXT 类型但需要计算,必须先进行合理的类型转换(如 CAST(... AS NUMERIC))。若查询包含日期/时间字段:
- **默认行为**:若提问未指定排序,**默认按时间字段降序排序**(即最新数据在前)。
@@ -271,22 +273,32 @@ template:
- SELECT `u`.`email` AS `email`, `u`.`id` AS `id`, `u`.`account` AS `account`, `u`.`enable` AS `enable`, `u`.`create_time` AS `create_time`, `u`.`language` AS `language`, `u`.`default_oid` AS `default_oid`, `u`.`name` AS `name`, `u`.`phone` AS `phone`, FROM `per_user` `u` LIMIT 1000
+ SELECT `u`.`email` AS `邮箱`, `u`.`id` AS `ID`, `u`.`account` AS `账号`, `u`.`enable` AS `启用状态`, `u`.`create_time` AS `创建时间`, `u`.`language` AS `语言`, `u`.`default_oid` AS `所属组织id`, `u`.`name` AS `姓名`, `u`.`phone` AS `电话`, FROM `per_user` `u` LIMIT 1000查询所有用户信息
- SELECT `o`.`name` AS `org_name`, COUNT(`u`.`id`) AS `user_count` FROM `per_user` `u` JOIN `per_org` `o` ON `u`.`default_oid` = `o`.`id` GROUP BY `o`.`name` ORDER BY `user_count` DESC LIMIT 1000
+ SELECT `o`.`name` AS `org_name`, COUNT(`u`.`id`) AS `人数` FROM `per_user` `u` JOIN `per_org` `o` ON `u`.`default_oid` = `o`.`id` GROUP BY `o`.`name` ORDER BY `user_count` DESC LIMIT 1000饼图展示各个组织的人员数量 pie
+
+
+
+ SELECT COUNT(*) AS "总人数", SUM(CASE WHEN p."gender" = '1' THEN 1 ELSE 0 END) AS "男员工数", SUM(CASE WHEN p."gender" = '2' THEN 1 ELSE 0 END) AS "女员工数" FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p WHERE p."dr" = 0 ORDER BY "总人数" DESC LIMIT 1000;
+ 表格展示不同性别人员数量,及人员总数
+ bar
+
+
diff --git a/util/q_and_a_dict.py b/util/q_and_a_dict.py
index 0fcfe96..9a6ce39 100644
--- a/util/q_and_a_dict.py
+++ b/util/q_and_a_dict.py
@@ -8,7 +8,7 @@ question_and_answer = [
FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database p
WHERE p.internal_dept IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
- WITH "name" LIKE '%综合处%'
+ WITH "name"||"shortname" LIKE '%综合处%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p.dr = 0
@@ -198,7 +198,7 @@ question_and_answer = [
from YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database
where internal_dept in (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
- WITH "name" LIKE '%数信中心%'
+ WITH "name"||"shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
@@ -224,7 +224,7 @@ question_and_answer = [
JOIN IUAP_APDOC_BASEDOC.org_orgs o ON p.internal_dept = o.id
WHERE p.internal_dept IN (
SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs"
- START WITH "name" LIKE '%数信中心%'
+ START WITH "name"||"shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p.dr = 0
@@ -262,7 +262,7 @@ question_and_answer = [
AND ps."date_value" LIKE '2025-10%'
and p.internal_dept in (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
- WITH "name" LIKE '%数信中心%'
+ WITH "name" || "shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p."dr" = 0
@@ -279,7 +279,7 @@ question_and_answer = [
'''
},
{
- "question": "员工年龄段分布图",
+ "question": "数信中心员工年龄段分布图",
"answer": '''
SELECT
CASE
@@ -301,7 +301,10 @@ question_and_answer = [
WHERE "parentid" IN (
SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs"
- )
+ WHERE name || shortname LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
+ ) OR id IN (SELECT "id"
+ FROM "IUAP_APDOC_BASEDOC"."org_orgs"
+ WHERE name || shortname LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%')
)
GROUP BY
CASE
@@ -328,14 +331,15 @@ question_and_answer = [
WHERE p."dr" = 0
AND o."id" IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
- WITH "name" LIKE '%综合处%'
+ WITH "name"||"shortname" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid")
AND p."birthday" IS NOT NULL
AND p."birthday" != ''
ORDER BY o."name" ASC, p."birthday" DESC
LIMIT 1000;
'''
- },{
+ },
+ {
"question": "查询综合处下面的员工的平均年龄",
"answer": '''
SELECT ROUND(AVG(2025 - CAST(SUBSTR(p."birthday", 1, 4) AS INT)), 2) AS "平均年龄"
@@ -344,12 +348,45 @@ question_and_answer = [
WHERE p."dr" = 0
AND o."id" IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
- WITH "name" LIKE '%综合处%'
+ WITH "name"||"shortname" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid")
AND p."birthday" IS NOT NULL
AND p."birthday" != ''
LIMIT 1000;
'''
+ },
+ {
+ "question": "综合处有几个人,男员工,女员工分别有多少",
+ "answer": '''
+ SELECT COUNT(*) AS "总人数",
+ SUM(CASE WHEN p."gender" = '1' THEN 1 ELSE 0 END) AS "男员工数",
+ SUM(CASE WHEN p."gender" = '2' THEN 1 ELSE 0 END) AS "女员工数"
+ FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p
+ WHERE p."internal_dept" IN (
+ SELECT "id"
+ FROM "IUAP_APDOC_BASEDOC"."org_orgs"
+ START WITH "name" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
+ CONNECT BY PRIOR "id" = "parentid"
+ )
+ AND p."dr" = 0
+ LIMIT 1000;
+ '''
+ },
+ {
+ "question": "综合处女员工有多少",
+ "answer": '''
+ SELECT (SELECT COUNT(*)
+ FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p
+ WHERE p."internal_dept" IN (SELECT "id"
+ FROM "IUAP_APDOC_BASEDOC"."org_orgs"
+ START WITH "name"||"shortname" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
+ CONNECT BY PRIOR "id" = "parentid"
+ )
+ AND p."gender" = '2'
+ AND p."dr" = 0
+ ) AS "女员工数"
+ '''
}
+
]
diff --git a/util/train_ddl.py b/util/train_ddl.py
index e8455c3..0ba94f8 100644
--- a/util/train_ddl.py
+++ b/util/train_ddl.py
@@ -963,6 +963,35 @@ org_orgs_ddl = '''
"role": "dimension",
"tags": ["部门名称","部门简称","部门缩写"]
},
+ {
+ "name": "enable",
+ "type": "INT",
+ "comment": "启用状态",
+ "value":{
+ "1":"启用"
+ "2":"未启用"
+ },
+ "role": "dimension",
+ "tags": ["部门启用","部门状态","是否启用"]
+ },
+ {
+ "name": "level",
+ "type": "INT",
+ "comment": "部门或单位层级",
+ "role": "dimension",
+ "tags": ["部门层级","级别"]
+ },
+ {
+ "name": "dr",
+ "type": "INT",
+ "comment": "删除标志",
+ "value": {
+ "0": "正常",
+ "1": "删除"
+ },
+ "role": "dimension",
+ "tags": ["状态标识", "软删除", "枚举"]
+ }
],
"tags": ["部门id","部门信息","部门名称"]
diff --git a/util/utils.py b/util/utils.py
index 93c7429..a41daee 100644
--- a/util/utils.py
+++ b/util/utils.py
@@ -1,7 +1,8 @@
+import logging
from typing import Optional
from orjson import orjson
-
+logger = logging.getLogger(__name__)
keywords = {
# "gender":{"1":"男","2":"女"},
"person_status":{"1":"草稿","2":"审批中","3":"制卡中","4":"已入库","5":"停用"},
@@ -43,7 +44,8 @@ def extract_nested_json(text):
stack = []
start_index = -1
results = []
-
+ if not text:
+ logger.warning("extract_nested_json: text is empty")
for i, char in enumerate(text):
if char in '{[':
if not stack: # 记录起始位置