# Conflicts:
#	util/q_and_a_dict.py
#	util/train_ddl.py
This commit is contained in:
yujj128
2025-10-14 15:54:10 +08:00
4 changed files with 86 additions and 68 deletions

View File

@@ -209,7 +209,7 @@ class OpenAICompatibleLLM(VannaBase):
logger.info("Start to generate_sql_2 in cus_vanna_srevice")
question_sql_list = self.get_similar_question_sql(question, **kwargs)
if question_sql_list and len(question_sql_list)>2:
question_sql_list=question_sql_list[:1]
question_sql_list=question_sql_list[:2]
ddl_list = self.get_related_ddl(question, **kwargs)
#doc_list = self.get_related_documentation(question, **kwargs)

View File

@@ -164,7 +164,7 @@ template:
<description>这些都可能是外部单位的名称</description>
</terminology>
<terminology>
<words><word>数信中心</word><word>建设处</word></words>
<words><word>数信中心</word><word>建设处</word><word>规划发展部</word></words>
<description>这些都可能是单位的名称</description>
</terminology>
</terminologies>

View File

@@ -16,11 +16,10 @@ question_and_answer = [
SELECT "id" AS "id",
"code" AS "工号",
"name" AS "姓名",
"external_unit" AS "外部单位"
FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database"
WHERE
"external_unit" LIKE '%联通%' LIMIT 1000
WHERE "external_unit" LIKE '%联通%' LIMIT 1000
'''
},
{
@@ -37,26 +36,7 @@ question_and_answer = [
AND ps."date_value" LIKE '2025-09%' LIMIT 1000
'''
},
{
"question": "联通的员工8月份有哪些迟到旷工的",
"answer": '''
SELECT DISTINCT p."code" AS "工号",
p."name" AS "姓名",
p."internal_unit" AS "内部单位",
p."external_unit" AS "外部单位",
CASE
WHEN ps."status" = '1006' THEN '迟到,早退'
WHEN ps."status" = '1005' THEN '旷工' END AS "人员状态",
ps."date_value" AS "日期"
FROM "YJOA_APPSERVICE_DB"."t_yj_person_status" ps
JOIN "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p ON ps."person_id" = p."code"
WHERE (p."internal_unit" LIKE '%联通%' OR p."external_unit" LIKE '%联通%')
AND ps."status" IN ('1005', '1006')
AND ps."date_value" LIKE '2025-08%'
AND ps."dr" = 0
ORDER BY ps."date_value" DESC LIMIT 1000
'''
},
{
"question": "博士和硕士分别有哪些员工",
"answer": '''
@@ -75,7 +55,7 @@ question_and_answer = [
ORDER BY "最高学位", "姓名" LIMIT 1000
'''
}, {
"question": "8月份在藏超过10天的有哪些员工",
"question": "8月份在藏,超过10天的有哪些员工",
"answer": '''
SELECT DISTINCT p."code" AS "工号",
p."name" AS "姓名",
@@ -126,16 +106,7 @@ question_and_answer = [
ORDER BY ps."date_value" DESC LIMIT 1000
'''
},
{
"question": "8月份有多人迟到",
"answer": '''
SELECT count(distinct person_id)
FROM "YJOA_APPSERVICE_DB"."t_yj_person_status" ps
WHERE ps."status" = '1006'
AND ps."date_value" LIKE '2025-08%'
AND ps."dr" = 0 LIMIT 1000
'''
},
{
"question": "负责智能体相关工作的是哪些员工",
"answer": '''
@@ -148,18 +119,6 @@ question_and_answer = [
FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database"
WHERE "work_content" LIKE '%智能体%' LIMIT 1000
'''
}, {
"question": "9月旷工迟到分别有多少人",
"answer": '''
SELECT CASE WHEN "status" = '1006' THEN '迟到' WHEN "status" = '1005' THEN '旷工' END AS "status_name",
COUNT(DISTINCT "person_id") AS "person_count"
FROM "YJOA_APPSERVICE_DB"."t_yj_person_status"
WHERE "status" IN ('1006', '1005')
AND "date_value" LIKE '2025-09%'
AND "dr" = 0
GROUP BY "status"
ORDER BY "status" LIMIT 1000
'''
}, {
"question": "在研发基地工作的有哪些员工",
"answer": '''
@@ -177,34 +136,35 @@ question_and_answer = [
{
"question": "查询张三9月在林芝的打卡记录",
"answer": '''
SELECT a."person_name" AS "姓名",
a."person_id" AS "人员ID",
a."attendance_time" AS "考勤时间",
a."attendance_address" AS "考勤地址",
SELECT a."person_name" AS "姓名",
a."person_id" AS "人员ID",
a."attendance_time" AS "考勤时间",
a."attendance_address" AS "考勤地址",
CASE
WHEN a."status" = 0 THEN '在岗'
WHEN a."status" = 1 THEN '出差'
WHEN a."status" = 2 THEN '休假' END AS "状态",
WHEN a."status" = 2 THEN '休假' END AS "状态",
CASE
WHEN a."enter_or_exit" = 0 THEN ''
WHEN a."enter_or_exit" = 1
THEN '' END AS "进出类型",
THEN '' END AS "进出类型",
CASE
WHEN b."region" = 1 THEN '水科院大厦'
WHEN b."region" = 1 THEN '北京'
WHEN b."region" = 2 THEN '成都'
WHEN b."region" = 3 THEN '秭归'
WHEN b."region" = 5 THEN '林芝' END AS "地区",
a."access_control_point" AS "门禁点"
WHEN b."region" = 5 THEN '林芝' END AS "地区", AS "门禁点"
FROM "YJOA_APPSERVICE_DB"."t_yj_person_attendance" a
JOIN "YJOA_APPSERVICE_DB"."t_yj_person_ac_area" b ON a."access_control_point" = b."ac_point"
JOIN "YJOA_APPSERVICE_DB"."t_yj_person_ac_area" b
ON a."access_control_point" = b."ac_point"
WHERE a."person_name" = '张三'
AND b."region" = 5
AND a."attendance_time" >= '2025-09-01'
AND a."attendance_time" < '2025-10-01'
AND a."attendance_time"
< '2025-10-01'
AND a."dr" = 0
ORDER BY a."attendance_time" DESC LIMIT 1000
'''
},{
}, {
"question": "查询张三9月份有多少天在岗",
"answer": '''
SELECT p."code" AS "工号",
@@ -226,15 +186,16 @@ question_and_answer = [
ELSE p."external_unit"
END LIMIT 1000
'''
},{
}, {
"question": "数信中心 部门下有多少员工",
"answer": '''
select count(*)
from YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database
where internal_dept in (select id
from IUAP_APDOC_BASEDOC.org_orgs
where parentid in
(select id from IUAP_APDOC_BASEDOC.org_orgs where name like '数信中心'))
where internal_dept in (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name" LIKE '%数信中心%'
CONNECT BY PRIOR "id" = "parentid"
)
'''
@@ -256,11 +217,58 @@ question_and_answer = [
SELECT o.name AS "处室名称", COUNT(p.id) AS "人数"
FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database p
JOIN IUAP_APDOC_BASEDOC.org_orgs o ON p.internal_dept = o.id
WHERE o.parentid IN (SELECT id FROM IUAP_APDOC_BASEDOC.org_orgs WHERE name LIKE '数信中心')
WHERE o.parentid IN (SELECT id FROM IUAP_APDOC_BASEDOC.org_orgs WHERE name LIKE '%数信中心%')
AND p.dr = 0
GROUP BY o.name
ORDER BY "人数" DESC LIMIT 1000
'''
}, {
"question": "张三9月在林芝工作有多少天",
"answer": '''
SELECT count(distinct (TO_CHAR(a."attendance_time", 'yyyy-MM-dd'))) as count
FROM "YJOA_APPSERVICE_DB"."t_yj_person_attendance" a LEFT JOIN "YJOA_APPSERVICE_DB"."t_yj_person_ac_area" b
ON a."access_control_point" = b."ac_point"
WHERE a."person_name" = '张三'
and b.region=5
AND a."attendance_time" >= '2025-09-01'
AND a."attendance_time"
< '2025-10-01'
AND a."dr" = 0 LIMIT 1000
'''
},
{
"question": "10月数信中心有哪些有员工请假",
"answer": '''
SELECT p."id" AS "id",
p."code" AS "工号",
p."name" AS "姓名",
p."internal_unit" AS "内部单位",
p."external_unit" AS "外部单位",
CASE WHEN ps."status" = '1003' THEN '休假,请假' ELSE ps."status" END AS "状态"
FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p
INNER JOIN "YJOA_APPSERVICE_DB"."t_yj_person_status" ps ON p."code" = ps."person_id"
WHERE ps."status" = '1003'
AND ps."date_value" LIKE '2025-10%'
and p.internal_dept in (SELECT "id"
FROM "IUAP_APDOC_BASEDOC"."org_orgs" START
WITH "name" LIKE '%数信中心%'
CONNECT BY PRIOR "id" = "parentid"
)
AND p."dr" = 0
AND ps."dr" = 0 LIMIT 1000
'''
},
{
"question": "有多少个人迟到9月",
"answer": '''
SELECT COUNT(DISTINCT ps."person_id") AS "迟到人数"
FROM "YJOA_APPSERVICE_DB"."t_yj_person_status" ps
WHERE ps."status" = '1006'
AND ps."date_value" LIKE '2025-09%' LIMIT 1000
'''
}
},
{
"question": "数信中心员工年龄段分布图",

View File

@@ -480,6 +480,16 @@ person_database_ddl = """
"""
train_document='''
语法为达梦数据库语法;
查询地址籍贯公司单位时尽量使用like查询;
查询人员信息时,由于数据表字段过多。只需要展示人员关键信息字段id工号姓名单位以及用户问题中需要查询的字段;
表字段信息以及字段枚举信息在values下注意相关字段枚举值的转换;
查询单位时通过orgs表查询且需要基于parentID查询递归查询单位下的子单位
查询内部单位时则可以直接查询人员信息表通过like模糊查询;
没有明确说明查询外部单位都默认查询通过orgs查询单位;
'''
person_attendance_rule_ddl='''
{
"db_name":"YJOA_APPSERVICE_DB",
@@ -554,7 +564,7 @@ person_status_ddl='''
{
"db_name":"YJOA_APPSERVICE_DB",
"table_name": "t_yj_person_status",
"table_comment": "人员状态记录表,记录人员每日考勤状态信息包括西藏地区标识",
"table_comment": "人员状态记录表,记录人员每日考勤汇总状态信息包括西藏地区标识",
"columns": [
{
"name": "id",
@@ -638,7 +648,7 @@ person_attendance_ddl = '''
{
"db_name": "YJOA_APPSERVICE_DB",
"table_name": "t_yj_person_attendance",
"table_comment": "人员考勤记录表,存储员工的打卡记录、考勤状态和位置信息",
"table_comment": "人员考勤记录打卡表,存储员工的打卡记录、考勤状态和位置信息",
"columns": [
{
"name": "id",