Compare commits

...

3 Commits

4 changed files with 74 additions and 25 deletions

View File

@@ -243,7 +243,10 @@ class OpenAICompatibleLLM(VannaBase):
[{'role': 'system', 'content': sys_temp}, {'role': 'user', 'content': user_temp}], **kwargs)
llm_response = str(llm_response.strip())
logger.info(f"llm_response:{llm_response}")
result = {"resp": orjson.loads(extract_nested_json(llm_response))}
#优化中.......
result = extract_nested_json(llm_response)
logger.info(f"result:{result}")
result = {"resp": orjson.loads(result)}
logger.info(f"llm_response:{llm_response}")
sql = check_and_get_sql(llm_response)
logger.info(f"sql:{sql}")

View File

@@ -56,7 +56,7 @@ template:
</rule>
<rule>
<rule-title>数据查询与排序</rule-title>
<rule-detail>若未明确指定查询字段,涉及人员信息时,默认返回相关性最强的前10个字段。</rule-detail>
<rule-detail>若未明确指定查询字段,涉及人员信息时,一般返回相关性最强的前10个字段。</rule-detail>
<rule-detail>当涉及部门表org_orgs的查询时注意enable启用状态和dr删除标志且必须限制code字段值包含'CYJ'。</rule-detail>
<rule-detail>若查询字段为 VARCHAR 或 TEXT 类型但需要计算,必须先进行合理的类型转换(如 CAST(... AS NUMERIC))。</rule-detail>
<rule-detail>若查询包含日期/时间字段:
@@ -67,6 +67,7 @@ template:
<rule>
<rule-title>聚合与计算</rule-title>
<rule-detail>使用了聚合函数(如 COUNT(), SUM(), AVG()的SQL必须配置相应的 GROUP BY 子句。</rule-detail>
<rule-detail>禁止使用AGE函数表达式计算年龄</rule-detail>
<rule-detail>使用了函数(如 COUNT(), CAST(), SUM())的字段,必须为其指定一个英文别名。</rule-detail>
<rule-detail>计算占比或百分比时,结果保留两位小数,并以 '%' 符号结尾。示例ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM table), 2) || '%' (PostgreSQL语法)</rule-detail>
<rule-detail>若查询结果包含枚举字段(如 gender=1,2必须使用 CASE WHEN 语句将其转换为可读的标签。示例: SELECT CASE WHEN "gender" = '1' THEN '男' WHEN "gender" = '2' THEN '女' END AS "gender"</rule-detail>

View File

@@ -1,6 +1,6 @@
question_and_answer = [
{"question": "综合处有多少员工,男女员工分别有多少",
{"question": "XX部有多少员工,男女员工分别有多少",
"answer": '''
SELECT COUNT(*) AS "总人数",
SUM(CASE WHEN gender = '1' THEN 1 ELSE 0 END) AS "男员工数",
@@ -8,7 +8,7 @@ question_and_answer = [
FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database p
WHERE p.internal_dept IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name"||"shortname" LIKE '%综合处%'
WITH "name"||"shortname" LIKE '%XX部%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p.dr = 0
@@ -192,13 +192,13 @@ question_and_answer = [
END LIMIT 1000
'''
}, {
"question": "数信中心下有多少员工",
"question": "XX中心下有多少员工",
"answer": '''
select count(*)
from YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database
where internal_dept in (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name"||"shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
WITH "name"||"shortname" LIKE '%XX中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
@@ -217,14 +217,14 @@ question_and_answer = [
'''
},
{
"question": "数信中心下各个处室分别有多少人",
"question": "XX中心下各个处室分别有多少人",
"answer": '''
SELECT o.name AS "处室名称", COUNT(p.id) AS "人数"
FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database p
JOIN IUAP_APDOC_BASEDOC.org_orgs o ON p.internal_dept = o.id
WHERE p.internal_dept IN (
SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs"
START WITH "name"||"shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
START WITH "name"||"shortname" LIKE '%XX中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p.dr = 0
@@ -248,7 +248,7 @@ question_and_answer = [
'''
},
{
"question": "10月数信中心有哪些有员工请假",
"question": "10月XX中心有哪些有员工请假",
"answer": '''
SELECT p."id" AS "id",
p."code" AS "工号",
@@ -262,7 +262,7 @@ question_and_answer = [
AND ps."date_value" LIKE '2025-10%'
and p.internal_dept in (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name" || "shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
WITH "name" || "shortname" LIKE '%XX中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p."dr" = 0
@@ -279,7 +279,7 @@ question_and_answer = [
'''
},
{
"question": "数信中心员工年龄段分布图",
"question": "XX中心员工年龄段分布图",
"answer": '''
SELECT
CASE
@@ -301,10 +301,10 @@ question_and_answer = [
WHERE "parentid" IN (
SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs"
WHERE name || shortname LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
WHERE name || shortname LIKE '%XX中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
) OR id IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs"
WHERE name || shortname LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%')
WHERE name || shortname LIKE '%XX中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%')
)
GROUP BY
CASE
@@ -318,7 +318,7 @@ question_and_answer = [
ORDER BY "age_group" ASC LIMIT 1000
'''
},{
"question": "查询综合处下面的员工以及他们的年龄",
"question": "查询XX部下面的员工以及他们的年龄",
"answer": '''
SELECT p."id" AS "员工ID",
p."name" AS "姓名",
@@ -331,7 +331,7 @@ question_and_answer = [
WHERE p."dr" = 0
AND o."id" IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name"||"shortname" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
WITH "name"||"shortname" LIKE '%XX部%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid")
AND p."birthday" IS NOT NULL
AND p."birthday" != ''
@@ -340,7 +340,7 @@ question_and_answer = [
'''
},
{
"question": "查询综合处下面的员工的平均年龄",
"question": "查询XX部下面的员工的平均年龄",
"answer": '''
SELECT ROUND(AVG(2025 - CAST(SUBSTR(p."birthday", 1, 4) AS INT)), 2) AS "平均年龄"
FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p
@@ -348,7 +348,7 @@ question_and_answer = [
WHERE p."dr" = 0
AND o."id" IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name"||"shortname" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
WITH "name"||"shortname" LIKE '%XX部%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid")
AND p."birthday" IS NOT NULL
AND p."birthday" != ''
@@ -356,7 +356,7 @@ question_and_answer = [
'''
},
{
"question": "综合处有几个人,男员工,女员工分别有多少",
"question": "XX部有几个人,男员工,女员工分别有多少",
"answer": '''
SELECT COUNT(*) AS "总人数",
SUM(CASE WHEN p."gender" = '1' THEN 1 ELSE 0 END) AS "男员工数",
@@ -365,7 +365,7 @@ question_and_answer = [
WHERE p."internal_dept" IN (
SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs"
START WITH "name" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
START WITH "name" LIKE '%XX部%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p."dr" = 0
@@ -373,19 +373,54 @@ question_and_answer = [
'''
},
{
"question": "综合女员工有多少",
"question": "XX综合女员工有多少",
"answer": '''
SELECT (SELECT COUNT(*)
FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p
WHERE p."internal_dept" IN (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs"
START WITH "name"||"shortname" LIKE '%综合%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
START WITH "name"||"shortname" LIKE '%XX综合%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p."gender" = '2'
AND p."dr" = 0
) AS "女员工数"
'''
},
{
"question": "XX中心今天各个处室在岗人员数量",
"answer": '''
SELECT o.name AS "处室名称", COUNT(p.id) AS "人数"
FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database p
JOIN IUAP_APDOC_BASEDOC.org_orgs o ON p.internal_dept = o.id
WHERE p.internal_dept IN (
SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs"
START WITH "name"||"shortname" LIKE '%XX中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p.id IN (SELECT person_id FROM "YJOA_APPSERVICE_DB"."t_yj_person_status" WHERE status = '1001' AND date_value = TO_CHAR(SYSDATE, 'yyyy-MM-dd') AND dr = 0)
AND p.dr = 0
GROUP BY o.name
ORDER BY "人数" DESC
LIMIT 1000
'''
},
{
"question": "XX中心下的XX管理处有多少人",
"answer": '''
SELECT o.name AS "处室名称", COUNT(p.id) AS "人数"
FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database p
JOIN IUAP_APDOC_BASEDOC.org_orgs o ON p.internal_dept = o.id
WHERE p.internal_dept IN (
SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs"
START WITH "name"||"shortname" LIKE '%XX中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p.dr = 0 AND o.name LIKE '%XX管理处%'
GROUP BY o.name
ORDER BY "人数" DESC
LIMIT 1000
'''
}

View File

@@ -3,9 +3,15 @@ train_document='''
查询地址籍贯公司单位时尽量使用like查询;
查询人员信息时,由于数据表字段过多。只需要展示人员关键信息字段id工号姓名单位以及用户问题中需要查询的字段;
表字段信息以及字段枚举信息在values下注意相关字段枚举值的转换;
出生日期字段是varchar类型计算年龄时需转换成合适的格式
查询单位时通过orgs表查询且需要基于parentID查询递归查询单位下的子单位
查询内部单位时则可以直接查询人员信息表通过like模糊查询;
没有明确说明查询外部单位都默认查询通过orgs查询单位;
数信中心和数信部并非同一个部门,注意区分,不要混淆
数信中心下还有多个部门,这些部门里的人员也隶属于数信中心
根据部门名称查询部门时除了全称name简称shortname也重要信息
数信部是简称
部门表org_orgs中的level字段 {1公司2一级部门3二级部门.....}
'''
person_database_ddl = """
@@ -896,8 +902,12 @@ person_ac_area = '''
"name": "area",
"type": "Int",
"comment": "区域位置",
"value": {
"1":"党校",
"2":"凯莱"
}
"role": "dimension",
"tags": ["门禁所属区域"]
"tags": ["门禁所属区域","枚举"]
},
{
"name": "region",
@@ -907,14 +917,14 @@ person_ac_area = '''
"1":"北京",
"2":"成都",
"3":"秭归",
"4":"林芝市区",
"5":"拉萨",
"4":"拉萨",
"5":"林芝",
"6":"米林",
"7":"派镇",
"8":"墨脱",
},
"role": "dimension",
"tags": ["门禁所属地区"]
"tags": ["门禁所属地区","枚举"]
},
],