From 6064710f4f14724a6e4eef2a545be6234d4c5f9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9B=B7=E9=9B=A8?= Date: Sun, 28 Sep 2025 16:44:58 +0800 Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9A=E4=BF=AE=E6=94=B9ddql=EF=BC=8C?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0qa=E9=97=AE=E7=AD=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env | 10 +- main_service.py | 5 +- service/cus_vanna_srevice.py | 11 +- template.yaml | 351 ++++++++++----------------- util/load_ddl_doc.py | 7 +- util/q_and_a_dict.py | 206 ++++++++++++++++ util/train_ddl.py | 447 ++++------------------------------- 7 files changed, 396 insertions(+), 641 deletions(-) create mode 100644 util/q_and_a_dict.py diff --git a/.env b/.env index 61466c7..99b4a00 100644 --- a/.env +++ b/.env @@ -2,11 +2,11 @@ IS_FIRST_LOAD=True CHAT_MODEL_BASE_URL=https://api.siliconflow.cn CHAT_MODEL_API_KEY=sk-iyhiltycmrfnhrnbljsgqjrinhbztwdplyvuhfihcdlepole -CHAT_MODEL_NAME=Qwen/Qwen3-Next-80B-A3B-Instruct +CHAT_MODEL_NAME=zai-org/GLM-4.5 EMBEDDING_MODEL_BASE_URL=https://api.siliconflow.cn EMBEDDING_MODEL_API_KEY=sk-iyhiltycmrfnhrnbljsgqjrinhbztwdplyvuhfihcdlepole -EMBEDDING_MODEL_NAME=Qwen/Qwen3-Embedding-8B +EMBEDDING_MODEL_NAME=BAAI/bge-m3 #向量数据库 #type:memory/remote,如果设置为remote,将IS_FIRST_LOAD 设置成false @@ -30,7 +30,7 @@ MYSQL_DATABASE_DBNAME=test #达梦数据库 -DAMENG_DATABASE_HOST=10.254.192.191 +DAMENG_DATABASE_HOST=10.254.193.63 DAMENG_DATABASE_PORT=5236 -DAMENG_DATABASE_PASSWORD=SYSDBA -DAMENG_DATABASE_USER=SYSDBA +DAMENG_DATABASE_PASSWORD= +DAMENG_DATABASE_USER=ai_view diff --git a/main_service.py b/main_service.py index 2989c43..4ee7fbe 100644 --- a/main_service.py +++ b/main_service.py @@ -50,6 +50,7 @@ def create_vana(): "model": config('CHAT_MODEL_NAME', default=''), }, ) + return vn @@ -111,7 +112,7 @@ def generate_sql_2(): data["type"]="success" return jsonify(data) except Exception as e: - logger.error("generate sql failed:{e}") + logger.error(f"generate sql failed:{e}") return jsonify({"type": "error", "error": str(e)}) @@ -170,7 +171,7 @@ def run_sql_2(id: str, sql: str): ) except Exception as e: - logger.error("run sql failed:{e}") + logger.error(f"run sql failed:{e}") return jsonify({"type": "sql_error", "error": str(e)}) if __name__ == '__main__': diff --git a/service/cus_vanna_srevice.py b/service/cus_vanna_srevice.py index 986ede0..f4dcfe0 100644 --- a/service/cus_vanna_srevice.py +++ b/service/cus_vanna_srevice.py @@ -17,7 +17,7 @@ from datetime import datetime import logging from util import train_ddl logger = logging.getLogger(__name__) - +import traceback class OpenAICompatibleLLM(VannaBase): def __init__(self, client=None, config_file=None): VannaBase.__init__(self, config=config_file) @@ -186,6 +186,9 @@ class OpenAICompatibleLLM(VannaBase): try: logger.info("Start to generate_sql_2 in cus_vanna_srevice") question_sql_list = self.get_similar_question_sql(question, **kwargs) + if question_sql_list and len(question_sql_list)>2: + question_sql_list=question_sql_list[:1] + ddl_list = self.get_related_ddl(question, **kwargs) #doc_list = self.get_related_documentation(question, **kwargs) template = get_base_template() @@ -194,7 +197,8 @@ class OpenAICompatibleLLM(VannaBase): # --------基于提示词,生成sql以及图表类型 sys_temp = sql_temp['system'].format(engine=config("DB_ENGINE", default='mysql'), lang='中文', schema=ddl_list, documentation=[train_ddl.train_document], - data_training=question_sql_list) + retrieved_examples_data=question_sql_list, + data_training=question_sql_list,) logger.info(f"sys_temp:{sys_temp}") user_temp = sql_temp['user'].format(question=question, current_time=datetime.now().strftime('%Y-%m-%d %H:%M:%S')) @@ -223,7 +227,8 @@ class OpenAICompatibleLLM(VannaBase): logger.info("Finish to generate_sql_2 in cus_vanna_srevice") return result except Exception as e: - logger.info("cus_vanna_srevice failed-------------------") + logger.info("cus_vanna_srevice failed-------------------: ") + traceback.print_exc() raise e def generate_rewritten_question(self, last_question: str, new_question: str, **kwargs) -> str: diff --git a/template.yaml b/template.yaml index 6e7ffc9..29e193c 100644 --- a/template.yaml +++ b/template.yaml @@ -8,267 +8,169 @@ template: sql: system: | - 你是"SQLBOT",智能问数小助手,可以根据用户提问,专业生成SQL与可视化图表。 - 你当前的任务是根据给定的表结构和用户问题生成SQL语句、可能适合展示的图表类型以及该SQL中所用到的表名。 - 我们会在块内提供给你信息,帮助你生成SQL: - 内有等信息; - 其中,:提供数据库引擎及版本信息; - :以 M-Schema 格式提供数据库表结构信息; - :提供一组术语,块内每一个就是术语,其中同一个内的多个代表术语的多种叫法,也就是术语与它的同义词,即该术语对应的描述,其中也可能是能够用来参考的计算公式,或者是一些其他的查询条件 - :提供一组SQL示例,你可以参考这些示例来生成你的回答,其中内是提问,内是对于该提问的解释或者对应应该回答的SQL示例 - 用户的提问在内,内则会提供上次执行你提供的SQL时会出现的错误信息,内的会告诉你用户当前提问的时间 + 你是"SQLBOT",一个专业的智能问数助手。你的核心能力是根据用户的问题、数据库表结构以及相关背景信息,精准地生成SQL查询语句,并推荐合适的可视化图表类型。 + 为此,你需要仔细分析在 块内提供给你的信息,它们包括: + :数据库引擎及其版本,决定了SQL的语法规范。 + :以M-Schema格式定义的数据库表结构,包括表名、字段名、字段类型、主键和注释以及values下该字段的枚举值。 + :业务术语库。每个包含一组同义词和对应的描述,它们是连接用户问题和数据字段的桥梁,请务必利用。 + :[RAG核心区] 通过检索与当前问题最相关的历史问答对。**这是最高优先级的s参考**,优先从中寻找与用户问题意图或表述最相似的案例来指导你生成SQL。 + :通用SQL示例库。当中没有足够参考时,可在此处寻找相似的用法、函数模板或Join思路作为补充参考。 + :数据库或业务相关的补充文档。 + :[可选] 上一次生成的SQL执行失败时的错误信息,用于修正和优化你的输出。 + :[可选] 背景信息,如当前提问时间等。 + 用户的提问位于 块内。 - - 你必须遵守以下规则: + + 优先级遵循: > > > 。历史成功经验是你的第一指南。 + 理解意图:仔细分析用户问题,结合背景信息,准确识别查询的指标、维度、筛选条件和时间范围。 + 安全第一:严格限制为只读查询(SELECT)。绝不允许生成任何修改、删除或危害数据库数据的SQL(如 INSERT, UPDATE, DELETE, DROP, TRUNCATE 等)。 + 忠于事实:严禁编造 中未提供的表、字段或关系。 + + --- + --- A. 核心格式与结构规则 + --- - 请使用语言:{lang} 回答,若有深度思考过程,则思考过程也需要使用 {lang} 输出 + 返回格式 + 输出必须是严格的JSON格式。 + 若成功生成SQL,格式为:{{"success": true, "sql": "生成的SQL语句", "tables": ["表名1", "表名2", ...], "chart-type": "图表类型"}} + 若因任何原因无法生成SQL,格式为:{{"success": false, "message": "清晰说明无法生成的原因 (例如: 问题与数据库不相关 / 缺少必要的表或字段 / 问题意图不明确)"}} - 你只能生成查询用的SQL语句,不得生成增删改相关或操作数据库以及操作数据库数据的SQL + 语言要求 + 使用 {lang} 语言进行所有输出,包括思考过程(如果有的话)。 + + --- + --- B. SQL生成规范规则 + --- + + 表与字段引用 + 必须为每个表生成一个英文别名(不带 AS 关键字),例如:`FROM user u`。 + 查询字段禁止使用星号(*),必须显式写出所有需要的字段名。 + 字段名和别名不能自动翻译,必须使用英文字符。 + 若数据库引擎是 PostgreSQL, Oracle, ClickHouse, 达梦数据库, AWS Redshift, Elasticsearch,则schema、表名、字段名、别名使用双引号,如 "schema_name"."table_name"。 + 若数据库引擎是 MySQL, Doris,则表名、字段名、别名使用反引号,如 `table_name`。 + 生成的SQL必须避免与数据库关键字冲突。 - 如果因为客观原因无法生成sql,请合理分析无法生成的原因并反馈给用户 + 数据查询与排序 + 若未明确指定查询字段,涉及人员信息时,默认返回相关性最强的前10个字段。 + 若查询字段为 VARCHAR 或 TEXT 类型但需要计算,必须先进行合理的类型转换(如 CAST(... AS NUMERIC))。 + 若查询包含日期/时间字段: + - **默认行为**:若提问未指定排序,**默认按时间字段降序排序**(即最新数据在前)。 + - **格式化**:若提问要求时间/日期/年月/年,且未指定格式,则分别格式化为 'yyyy-MM-dd HH:mm:ss' / 'yyyy-MM-dd' / 'yyyy-MM' / 'yyyy',语法需适配当前数据库引擎。(达梦数据库如果时间字段是varchar类型也可以) + - 涉及查询人员信息时,如果用户没明确指出要查询哪些字段,主要查询相关性较强的10个字段即可,如果指定要查询所有信息,请返回所有字段信息 + 聚合与计算 + 使用了聚合函数(如 COUNT(), SUM(), AVG())的SQL,必须配置相应的 GROUP BY 子句。 + 使用了函数(如 COUNT(), CAST(), SUM())的字段,必须为其指定一个英文别名。 + 计算占比或百分比时,结果保留两位小数,并以 '%' 符号结尾。示例:ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM table), 2) || '%' (PostgreSQL语法) + 若查询结果包含枚举字段(如 gender=1,2),必须使用 CASE WHEN 语句将其转换为可读的标签。示例: SELECT CASE WHEN "gender" = '1' THEN '男' WHEN "gender" = '2' THEN '女' END AS "gender" - 不要编造内没有提供给你的表结构 + 关联与限制 + 多表关联时,优先使用 中标记为 "Primary key"/"ID"/"主键" 的字段作为关联条件。 + 若用户未指定数据条数,**查询SQL必须包含1000条的限制**。若用户指定的限制大于1000,也按1000处理。 + - PostgreSQL: ... LIMIT 1000 + + + --- + --- C. 图表与业务理解规则 + --- + + 图表类型选择 + 若问题与图表展示无关,chart-type 一律使用 "table"。 + 若问题与图表展示相关,根据查询意图推荐最合适的图表类型,参考以下原则: + - **折线图**:展示数据随时间(或其他连续维度)的**趋势**。 + - **柱状图/条形图**:展示不同**分类**之间的**数值对比**。柱状图常用于分类较少,条形图常用于分类较多或分类名较长。 + - **饼图**:展示单一维度各部分占**整体的比例**,且分类不宜过多(建议少于7个)。 + - **表格**:用于展示**详细的原始数据**,或用户明确要求查表的场景。 + + 返回的chart-type值必须是 table, column, bar, line, pie 中的一个。 - 当需要计算的字段类型为varchar或者text时,请根据需求转换为合理的类型格式进行计算 - - - 生成的SQL必须符合内提供数据库引擎的规范 - - - 若用户提问中提供了参考SQL,你需要判断该SQL是否是查询语句 - - - 请注意区分'哪些'和'多少'的区别,哪些是指具体信息,多少是指数量,请注意甄别 - - - 如遇字符串类型的日期要计算时,务必转化为合理的日期格式进行计算 - - - 请使用JSON格式返回你的回答: - 若能生成,则返回格式如:{{"success":true,"sql":"你生成的SQL语句","tables":["该SQL用到的表名1","该SQL用到的表名2",...],"chart-type":"table"}} - 若不能生成,则返回格式如:{{"success":false,"message":"说明无法生成SQL的原因"}} - - - 如果问题是图表展示相关,可参考的图表类型为表格(table)、柱状图(column)、条形图(bar)、折线图(line)或饼图(pie), 返回的JSON内chart-type值则为 table/column/bar/line/pie 中的一个 - 图表类型选择原则推荐:趋势 over time 用 line,分类对比用 column/bar,占比用 pie,原始数据查看用 table - - - 如果问题是图表展示相关且与生成SQL查询无关时,请参考上一次回答的SQL来生成SQL - - - 返回的JSON字段中,tables字段为你回答的SQL中所用到的表名,不要包含schema和database,用数组返回 - - - 提问中如果有涉及数据源名称或数据源描述的内容,则忽略数据源的信息,直接根据剩余内容生成SQL - - - 根据表结构生成SQL语句,需给每个表名生成一个别名(不要加AS) - - - SQL查询中不能使用星号(*),必须明确指定字段名 - - - SQL查询的字段名不要自动翻译,别名必须为英文 - - - 生成sql时,如果返回字段中有枚举字段,请根据枚举字段选项值生成对应的case when语句 - - SELECT - CASE - WHEN gender = 1 THEN '男' - WHEN gender = 2 THEN '女' - ELSE gender - END AS gender, - COUNT(*) AS count - FROM person GROUP BY gender; - - - - SQL查询的字段若是函数字段,如 COUNT(),CAST() 等,必须加上别名 - - - SQL查询的如果用了聚合函数,如SUM(),COUNT()等,必须配合GROUP BY使用 - - - 计算占比,百分比类型字段,保留两位小数,以%结尾 - - - 生成SQL时,必须避免与数据库关键字冲突 - - - 如数据库引擎是 PostgreSQL、Oracle、ClickHouse、达梦(DM)、AWS Redshift、Elasticsearch,则在schema、表名、字段名、别名外层加双引号; - 如数据库引擎是 MySQL、Doris,则在表名、字段名、别名外层加反引号; - 如数据库引擎是 Microsoft SQL Server,则在schema、表名、字段名、别名外层加方括号。 - - 以PostgreSQL为例,查询Schema为TEST表TABLE下前1000条id字段,则生成的SQL为: - SELECT "id" FROM "TEST"."TABLE" LIMIT 1000 - - 注意在表名外双引号的位置,千万不要生成为: - SELECT "id" FROM "TEST.TABLE" LIMIT 1000 - 以Microsoft SQL Server为例,查询Schema为TEST表TABLE下前1000条id字段,则生成的SQL为: - SELECT TOP 1000 [id] FROM [TEST].[TABLE] - - 注意在表名外方括号的位置,千万不要生成为: - SELECT TOP 1000 [id] FROM [TEST.TABLE] - - - - 如果生成SQL的字段内有时间格式的字段: - - 若提问中没有指定查询顺序,则默认按时间升序排序 - - 若提问是时间,且没有指定具体格式,则格式化为yyyy-MM-dd HH:mm:ss的格式 - - 若提问是日期,且没有指定具体格式,则格式化为yyyy-MM-dd的格式 - - 若提问是年月,且没有指定具体格式,则格式化为yyyy-MM的格式 - - 若提问是年,且没有指定具体格式,则格式化为yyyy的格式 - - 生成的格式化语法需要适配对应的数据库引擎。 - - - 生成的SQL查询结果可以用来进行图表展示,需要注意排序字段的排序优先级,例如: - - 柱状图或折线图:适合展示在横轴的字段优先排序,若SQL包含分类字段,则分类字段次一级排序 - - - 如果用户没有指定数据条数的限制,输出的查询SQL必须加上1000条的数据条数限制 - 如果用户指定的限制大于1000,则按1000处理 - - 以PostgreSQL为例,查询Schema为TEST表TABLE下id字段,则生成的SQL为: - SELECT "id" FROM "TEST"."TABLE" LIMIT 1000 - 以Microsoft SQL Server为例,查询Schema为TEST表TABLE下id字段,则生成的SQL为: - SELECT TOP 1000 [id] FROM [TEST].[TABLE] - - - - 若需关联多表,优先使用中标记为"Primary key"/"ID"/"主键"的字段作为关联条件。 - - - 我们目前的情况适用于单指标、多分类的场景(展示table除外) + 术语与问题解析 + 充分利用 中的词是用户可能使用的提问方式, 中可能包含计算公式或精确的查询条件,是理解问题并将其翻译为SQL的关键。 + 注意区分“哪些”(具体信息)和“多少”(数量)的区别。 + 若用户提问中提及参考SQL,需先判断该SQL是否为一个合法的、只读的查询语句。 + 忽略问题中提到的“数据源名称”或“数据源描述”等无关信息,聚焦于核心的业务需求。 - - ### 以下帮助你理解问题及返回格式的例子,不要将内的表结构用来回答用户的问题,内的为后续用户提问传入的内容,为根据模版与输入的输出回答 + ### 以下 块帮助你理解问题及返回格式,**请勿将此块内的任何表结构用于回答用户的问题**。 - PostgreSQL17.6 (Debian 17.6-1.pgdg12+1) - - 【DB_ID】 Sample_Database, 样例数据库 - 【Schema】 - # Table: Sample_Database.sample_country_gdp, 各国GDP数据 - [ - (id: bigint, Primary key, ID), - (country: varchar, 国家), - (continent: varchar, 所在洲, examples:['亚洲','美洲','欧洲','非洲']), - (year: varchar, 年份, examples:['2020','2021','2022']), - (gdp: bigint, GDP(美元)), - ] - - + 达梦数据库 + + 【DB_ID】 Sample_Database, 样例数据库 + 【Schema】 + # Table: Sample_Database.sample_country_gdp, 各国GDP数据 + [ + (id: bigint, Primary key, ID), + (country: varchar, 国家), + (continent: varchar, 所在洲, examples:['亚洲','美洲','欧洲','非洲']), + (year: varchar, 年份, examples:['2020','2021','2022']), + (gdp: bigint, GDP(美元)), + ] + + - - GDP - 国内生产总值 - - 指在一个季度或一年,一个国家或地区的经济中所生产出的全部最终产品和劳务的价值。 + GDP国内生产总值 + 指在一个季度或一年,一个国家或地区的经济中所生产出的全部最终产品和劳务的价值。 - - 中国 - 中国大陆 - - 查询SQL时若作为查询条件,将"中国"作为查询用的值 + 中国中国大陆 + 查询SQL时若作为查询条件,将"中国"作为查询用的值。 - + - - - 今天天气如何? - - - {{"success":false,"message":"我是智能问数小助手,我无法回答您的问题,该问题与当前数据库问数不相关,数据库中无天气等信息。","status":0}} - + 今天天气如何? + {{"success":false,"message":"我是智能问数小助手,我无法回答您的问题。'天气'与当前数据库中的信息(如国家GDP)不相关,数据库中无天气相关数据表或字段。"}} - - 张三的年龄是多大 - - - {{"success":true,"sql":"SELECT name, FLOOR(MONTHS_BETWEEN(SYSDATE, birthday) / 12) AS age FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database","tables":["t_pr3rl2oj_yj_person_database"],"chart-type":"columns"}} - + 请清空数据库 + {{"success":false,"message":"我的职责是进行数据查询。'清空数据库'属于破坏性操作,我无法生成此类SQL语句。"}} - - 请清空数据库 - - - {{"success":false,"message":"我是智能问数小助手,我只能查询数据,不能操作数据库来修改数据或者修改表结构。"}} - + 2025-08-08 11:23:00 + 查询各个国家每年的GDP + {{"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"year\" DESC, \"country\" ASC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"line"}} - - 查询所有用户 - - - {{"success":false,"message":"抱歉,提供的表结构无法生成您需要的SQL"}} - + 2025-08-08 11:23:00 + 使用饼图展示去年各个国家的GDP + {{"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"pie"}} - - - - 2025-08-08 11:23:00 - - - 查询各个国家每年的GDP - - - {{"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\" LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"line"}} - - - - - - - 2025-08-08 11:23:00 - - - 使用饼图展示去年各个国家的GDP - - {{"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"pie"}} - - - - - - - - 2025-08-08 11:24:00 - - - 查询今年中国大陆的GDP - - {{"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国' LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"table"}} - - + 2025-08-08 11:24:00 + 查询今年中国大陆的GDP + {{"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国' LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"table"}} - - ### 下面是提供的信息 + ### --- 真实任务开始 --- + ### 下面是为你提供的完整信息 - {engine} - - {schema} - - - {documentation} - - {data_training} + {engine} + {schema} + {documentation} + + + 电网雅江联通 + 这些都可能是内部或者外部单位的名称 + + + + + + {retrieved_examples_data} + + + + ### 响应, 请根据上述要求直接返回JSON结果: ```json @@ -282,6 +184,7 @@ template: {question} + 注意查询结果枚举值转换 chart: diff --git a/util/load_ddl_doc.py b/util/load_ddl_doc.py index b11ed78..6cd21fd 100644 --- a/util/load_ddl_doc.py +++ b/util/load_ddl_doc.py @@ -1,8 +1,8 @@ from service.cus_vanna_srevice import CustomVanna from util import train_ddl - +from util import q_and_a_dict table_ddls = [ - train_ddl.ddl_sql,train_ddl.attendance_ddl + train_ddl.person_ddl_sql,train_ddl.rule_ddl,train_ddl.user_status_ddl ] list_documentions = [ train_ddl.train_document, @@ -17,3 +17,6 @@ def add_ddl(vn: CustomVanna): def add_documentation(vn: CustomVanna): for doc in list_documentions: vn.add_documentation(doc) + for d in q_and_a_dict.question_and_answer: + vn.add_question_sql(question=d['question'], sql=d['answer']) + diff --git a/util/q_and_a_dict.py b/util/q_and_a_dict.py new file mode 100644 index 0000000..c8eab68 --- /dev/null +++ b/util/q_and_a_dict.py @@ -0,0 +1,206 @@ +question_and_answer = [ + {"question": "考勤地点有哪些", + "answer": ''' + SELECT DISTINCT CASE "region" + WHEN '1' THEN '北京' + WHEN '2' THEN '成都' + WHEN '3' THEN '秭归' + WHEN '4' THEN '拉萨' + WHEN '5' THEN '林芝' END AS "考勤地点" + FROM "YJOA_APPSERVICE_DB"."t_yj_person_attendance_rules" + WHERE "region" IS NOT NULL LIMIT 1000 + ''' + }, + {"question": "成都的考勤规则是什么", + "answer": ''' + SELECT "region" AS "region_code", + CASE + WHEN "region" = '1' THEN '北京' + WHEN "region" = '2' THEN '成都' + WHEN "region" = '3' THEN '秭归' + WHEN "region" = '4' THEN '拉萨' + WHEN "region" = '5' THEN '林芝' END AS "region_name", + "morning_check_time" AS "morning_check_time", + "afternoon_check_time" AS "afternoon_check_time", + "before_lunch_time" AS "before_lunch_time", + "after_lunch_time" AS "after_lunch_time" + FROM "YJOA_APPSERVICE_DB"."t_yj_person_attendance_rules" + WHERE "region" = '2' LIMIT 1000 + ''' + }, + {"question": "所有员工男女各有多少人", + "answer": ''' + SELECT CASE WHEN "gender" = '1' THEN '男' WHEN "gender" = '2' THEN '女' END AS "gender", + COUNT(*) AS "person_count" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" + WHERE "dr" = 0 + GROUP BY "gender" + ORDER BY "gender" ASC LIMIT 1000 + ''' + }, + { + "question": "联通下面有哪些员工", + "answer": ''' + SELECT "id" AS "id", + "code" AS "工号", + "name" AS "姓名", + "internal_unit" AS "内部单位", + "external_unit" AS "外部单位" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" + WHERE "internal_unit" LIKE '%联通%' + OR "external_unit" LIKE '%联通%' LIMIT 1000 + ''' + }, + { + "question": "9月有多少哪些员工在休假", + "answer": ''' + SELECT DISTINCT p."id" AS "id", + p."code" AS "工号", + p."name" AS "姓名", + p."internal_unit" AS "内部单位", + p."external_unit" AS "外部单位" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p + INNER JOIN "YJOA_APPSERVICE_DB"."t_yj_person_status" ps ON p."code" = ps."person_id" + WHERE ps."status" = '1003' + AND ps."date_value" LIKE '2025-09%' LIMIT 1000 + ''' + }, + { + "question": "联通的员工,8月份有哪些迟到,旷工的", + "answer": ''' + SELECT DISTINCT p."code" AS "工号", + p."name" AS "姓名", + p."internal_unit" AS "内部单位", + p."external_unit" AS "外部单位", + CASE + WHEN ps."status" = '1006' THEN '迟到,早退' + WHEN ps."status" = '1005' THEN '旷工' END AS "人员状态", + ps."date_value" AS "日期" + FROM "YJOA_APPSERVICE_DB"."t_yj_person_status" ps + JOIN "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p ON ps."person_id" = p."code" + WHERE (p."internal_unit" LIKE '%联通%' OR p."external_unit" LIKE '%联通%') + AND ps."status" IN ('1005', '1006') + AND ps."date_value" LIKE '2025-08%' + AND ps."dr" = 0 + ORDER BY ps."date_value" DESC LIMIT 1000 + ''' + }, + { + "question": "博士和硕士分别有哪些员工", + "answer": ''' + SELECT DISTINCT "name" AS "姓名", + "code" AS "工号", + "internal_unit" AS "内部单位", + "external_unit" AS "外部单位", + CASE + WHEN "highest_degree" = '1' THEN '学士学位' + WHEN "highest_degree" = '2' THEN '硕士学位' + WHEN "highest_degree" = '3' THEN '博士学位' + WHEN "highest_degree" = '4' THEN '无' END AS "最高学位" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" + WHERE "highest_degree" IN ('2', '3') + AND "dr" = '0' + ORDER BY "最高学位", "姓名" LIMIT 1000 + ''' + }, { + "question": "8月份在藏超过10天的有哪些员工", + "answer": ''' + SELECT DISTINCT p."code" AS "工号", + p."name" AS "姓名", + CASE + WHEN p."internal_unit" IS NOT NULL AND p."internal_unit" != '' THEN p."internal_unit" + ELSE p."external_unit" END AS "单位", + COUNT(ps."id") AS "在藏天数" + FROM YJOA_APPSERVICE_DB."t_yj_person_status" ps + JOIN YJOA_APPSERVICE_DB."t_pr3rl2oj_yj_person_database" p ON ps."person_id" = p."code" + WHERE ps."is_in_tibet" = 1 + AND ps."dr" = 0 + AND p."dr" = 0 + AND ps."date_value" LIKE '2025-08%' + GROUP BY p."code", p."name", + CASE + WHEN p."internal_unit" IS NOT NULL AND p."internal_unit" != '' THEN p."internal_unit" + ELSE p."external_unit" END + HAVING COUNT(ps."id") > 10 + ORDER BY COUNT(ps."id") DESC LIMIT 1000 + ''' + }, + { + "question": "张三 8月的考勤查询", + "answer": ''' + SELECT DISTINCT p."code" AS "工号", + p."name" AS "姓名", + p."internal_unit" AS "内部单位", + p."external_unit" AS "外部单位", + CASE + WHEN ps."status" = '1001' THEN '在岗' + WHEN ps."status" = '1002' THEN '出差' + WHEN ps."status" = '1003' THEN '休假,请假' + WHEN ps."status" = '1005' THEN '旷工' + WHEN ps."status" = '1006' THEN '迟到,早退' + WHEN ps."status" = '1007' THEN '休息日' + WHEN ps."status" = '4001' THEN 'am在岗pm缺勤' + WHEN ps."status" = '4002' THEN 'am缺勤pm在岗' + WHEN ps."status" = '6001' THEN 'am在岗pm早退' + WHEN ps."status" = '6002' THEN 'am迟到pm在岗' + WHEN ps."status" = '6004' THEN 'am迟到pm缺勤' + WHEN ps."status" = '4006' THEN 'am缺勤pm早退' END AS "人员状态", + ps."date_value" AS "日期" + FROM "YJOA_APPSERVICE_DB"."t_yj_person_status" ps + JOIN "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p ON ps."person_id" = p."code" + WHERE p."name" = '张三' + AND ps."date_value" LIKE '2025-08%' + AND ps."dr" = 0 + ORDER BY ps."date_value" DESC LIMIT 1000 + ''' + }, + { + "question": "8月份有多人迟到", + "answer": ''' + SELECT count(distinct person_id) + FROM "YJOA_APPSERVICE_DB"."t_yj_person_status" ps + WHERE ps."status" = '1006' + AND ps."date_value" LIKE '2025-08%' + AND ps."dr" = 0 LIMIT 1000 + ''' + }, + { + "question": "负责智能体相关工作的是哪些员工", + "answer": ''' + SELECT "id" AS "id", + "code" AS "工号", + "name" AS "姓名", + "internal_unit" AS "内部单位", + "external_unit" AS "外部单位", + "work_content" AS "工作内容" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" + WHERE "work_content" LIKE '%智能体%' LIMIT 1000 + ''' + },{ + "question": "9月,旷工,迟到分别有多少人", + "answer": ''' + SELECT CASE WHEN "status" = '1006' THEN '迟到' WHEN "status" = '1005' THEN '旷工' END AS "status_name", + COUNT(DISTINCT "person_id") AS "person_count" + FROM "YJOA_APPSERVICE_DB"."t_yj_person_status" + WHERE "status" IN ('1006', '1005') + AND "date_value" LIKE '2025-09%' + AND "dr" = 0 + GROUP BY "status" + ORDER BY "status" LIMIT 1000 + ''' + },{ + "question": "在研发基地工作的有哪些员工", + "answer": ''' + SELECT "id" AS "id", + "code" AS "工号", + "name" AS "姓名", + "internal_unit" AS "内部单位", + "external_unit" AS "外部单位", + "office_address" AS "办公地点" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" + WHERE ("office_address" LIKE '%研发基地%' OR "office_city" LIKE '%研发基地%') + AND "dr" = '0' LIMIT 1000 + ''' + } + +] diff --git a/util/train_ddl.py b/util/train_ddl.py index d386a79..6dba9ee 100644 --- a/util/train_ddl.py +++ b/util/train_ddl.py @@ -1,5 +1,5 @@ -ddl_sql = """ -[ +person_ddl_sql = """ + { "db_name":"YJOA_APPSERVICE_DB", "table_name": "t_pr3rl2oj_yj_person_database", @@ -17,7 +17,7 @@ ddl_sql = """ "5":"停用" }, "role": "dimension", - "tags": ["状态信息"] + "tags": ["状态信息","枚举"] }, { "name": "gender", @@ -28,7 +28,7 @@ ddl_sql = """ "2": "女" }, "role": "dimension", - "tags": ["基本信息", "人口属性"] + "tags": ["基本信息", "人口属性","枚举"] }, { "name": "id_card", @@ -74,7 +74,7 @@ ddl_sql = """ "0": "否" }, "role": "dimension", - "tags": ["员工类型", "身份标识"] + "tags": ["员工类型", "身份标识","枚举"] }, { "name": "phone_number", @@ -104,7 +104,7 @@ ddl_sql = """ "7":"来访人员", }, "role": "dimension", - "tags": ["证件信息", "通行权限"] + "tags": ["证件信息", "通行权限","枚举"] }, { "name": "expire_time", @@ -154,7 +154,7 @@ ddl_sql = """ "WQT":"其他外部人员" }, "role": "dimension", - "tags": ["分类信息", "人员分类"] + "tags": ["分类信息", "人员分类","枚举"] }, { "name": "id", @@ -163,13 +163,7 @@ ddl_sql = """ "role": "dimension", "tags": ["主键", "ID标识"] }, - { - "name": "pubts", - "type": "DATETIME(39)", - "comment": "发布时间", - "role": "dimension", - "tags": ["时间信息", "系统字段"] - }, + { "name": "dr", "type": "INT", @@ -186,7 +180,7 @@ ddl_sql = """ "type": "VARCHAR(600)", "comment": "编码", "role": "dimension", - "tags": ["编码信息", "业务编码"] + "tags": ["编码信息", "工号"] }, { "name": "ytenant_id", @@ -195,132 +189,7 @@ ddl_sql = """ "role": "dimension", "tags": ["租户信息", "系统隔离"] }, - { - "name": "sourcegrand_id", - "type": "VARCHAR(108)", - "comment": "来源表id", - "role": "dimension", - "tags": ["数据来源", "关联信息"] - }, - { - "name": "first_id", - "type": "VARCHAR(108)", - "comment": "来源单据主表id", - "role": "dimension", - "tags": ["数据来源", "关联信息"] - }, - { - "name": "firstchild_id", - "type": "VARCHAR(108)", - "comment": "来源单据子表id", - "role": "dimension", - "tags": ["数据来源", "关联信息"] - }, - { - "name": "firstbusiobj", - "type": "VARCHAR(108)", - "comment": "来源业务对象", - "role": "dimension", - "tags": ["业务对象", "数据来源"] - }, - { - "name": "firstcode", - "type": "VARCHAR(600)", - "comment": "来源单据号", - "role": "dimension", - "tags": ["单据信息", "业务编码"] - }, - { - "name": "verifystate", - "type": "INT", - "comment": "单据状态", - "role": "dimension", - "tags": ["审批状态", "业务流程"] - }, - { - "name": "auditor", - "type": "VARCHAR(180)", - "comment": "终审审批人", - "role": "dimension", - "tags": ["审批信息", "操作人"] - }, - { - "name": "audit_time", - "type": "DATETIME(39)", - "comment": "审批日期", - "role": "dimension", - "tags": ["时间信息", "审批流程"] - }, - { - "name": "auditnote", - "type": "VARCHAR(600)", - "comment": "当前审批人", - "role": "dimension", - "tags": ["审批信息", "操作人"] - }, - { - "name": "procinst_id", - "type": "VARCHAR(108)", - "comment": "流程实例ID", - "role": "dimension", - "tags": ["流程信息", "实例标识"] - }, - { - "name": "bizflow_id", - "type": "VARCHAR(108)", - "comment": "业务流id", - "role": "dimension", - "tags": ["业务流程", "流标识"] - }, - { - "name": "bizflowname", - "type": "VARCHAR(600)", - "comment": "流程名称", - "role": "dimension", - "tags": ["业务流程", "名称描述"] - }, - { - "name": "source_id", - "type": "VARCHAR(108)", - "comment": "上游单据主表id", - "role": "dimension", - "tags": ["数据关联", "上游信息"] - }, - { - "name": "sourcechild_id", - "type": "VARCHAR(108)", - "comment": "上游单据子表id", - "role": "dimension", - "tags": ["数据关联", "上游信息"] - }, - { - "name": "bizflowinstance_id", - "type": "VARCHAR(108)", - "comment": "业务流实例id", - "role": "dimension", - "tags": ["业务流程", "实例标识"] - }, - { - "name": "sourcebusiobj", - "type": "VARCHAR(108)", - "comment": "上游业务对象", - "role": "dimension", - "tags": ["业务对象", "上游信息"] - }, - { - "name": "sourcecode", - "type": "VARCHAR(600)", - "comment": "上游单据号", - "role": "dimension", - "tags": ["单据信息", "业务编码"] - }, - { - "name": "bizflow_makebillcode", - "type": "VARCHAR(600)", - "comment": "单据转换规则编码", - "role": "dimension", - "tags": ["业务流程", "规则编码"] - }, + { "name": "create_time", "type": "DATETIME(39)", @@ -335,27 +204,8 @@ ddl_sql = """ "role": "dimension", "tags": ["时间信息", "系统记录"] }, - { - "name": "creator", - "type": "VARCHAR(180)", - "comment": "创建人", - "role": "dimension", - "tags": ["操作人信息", "系统记录"] - }, - { - "name": "modifier", - "type": "VARCHAR(180)", - "comment": "修改人", - "role": "dimension", - "tags": ["操作人信息", "系统记录"] - }, - { - "name": "worker_id", - "type": "VARCHAR(200)", - "comment": "工号", - "role": "dimension", - "tags": ["员工信息", "身份标识"] - }, + + { "name": "to_dept", "type": "VARCHAR(600)", @@ -419,7 +269,7 @@ ddl_sql = """ "9":"博士 " }, "role": "dimension", - "tags": ["教育信息", "学历背景"] + "tags": ["教育信息", "学历背景","枚举"] }, { "name": "highest_degree", @@ -432,7 +282,7 @@ ddl_sql = """ "4":"无" }, "role": "dimension", - "tags": ["教育信息", "学位背景"] + "tags": ["教育信息", "学位背景","枚举"] }, { "name": "graduate_school", @@ -600,196 +450,26 @@ ddl_sql = """ "comment": "关联租户信息" } ], - "examples": [ - { - "question": "查询内部员工的数量", - "sql": "SELECT COUNT(*) as person_count FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database WHERE is_internal = '1' AND dr = 0" - }, - { - "question": "按性别统计人员分布情况", - "sql": "SELECT gender, COUNT(*) as person_count FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database WHERE dr = 0 GROUP BY gender" - }, - { - "question": "查询最近一个月新增的人员信息", - "sql": "SELECT * FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database WHERE create_time >= ADD_MONTHS(TRUNC(SYSDATE), -1) AND dr = 0" - } - ], + "tags": ["人员管理", "人力资源", "审批流程", "基本信息", "工作信息"], -}, -{ - "db_name":"YJOA_APPSERVICE_DB", - "table_name": "t_yj_person_attendance", - "table_comment": "人员考勤记录表,记录人员的考勤打卡信息", - "columns": [ - { - "name": "person_name", - "type": "VARCHAR(50)", - "comment": "人员姓名", - "role": "dimension", - "tags": ["基本信息", "身份标识"] - }, - { - "name": "id", - "type": "VARCHAR(200)", - "comment": "主键ID", - "role": "dimension", - "tags": ["主键", "ID标识"] - }, - { - "name": "person_id", - "type": "VARCHAR(200)", - "comment": "人员ID", - "role": "dimension", - "tags": ["人员标识", "关联信息"] - }, - { - "name": "phone_number", - "type": "VARCHAR(50)", - "comment": "手机号码", - "role": "dimension", - "tags": ["联系方式", "通讯信息"] - }, - { - "name": "attendance_time", - "type": "DATETIME(39)", - "comment": "考勤时间", - "role": "dimension", - "tags": ["时间信息", "考勤记录"] - }, - { - "name": "attendance_address", - "type": "VARCHAR(200)", - "comment": "考勤地点", - "role": "dimension", - "tags": ["位置信息", "考勤记录"] - }, - { - "name": "status", - "type": "INT", - "comment": "状态", - "value":{ - "1":"在岗", - "2":"出差", - "3":"休假" - }, - "role": "dimension", - "tags": ["状态信息", "考勤状态"] - }, - { - "name": "original_id", - "type": "VARCHAR(200)", - "comment": "原数据ID", - "role": "dimension", - "tags": ["数据来源", "原始标识"] - }, - { - "name": "source", - "type": "VARCHAR(50)", - "comment": "数据来源", - "role": "dimension", - "tags": ["数据来源", "系统标识"] - }, - { - "name": "dr", - "type": "INT", - "comment": "逻辑删除标志", - "role": "dimension", - "tags": ["系统状态", "数据状态"] - }, - { - "name": "create_time", - "type": "DATETIME(39)", - "comment": "创建时间", - "role": "dimension", - "tags": ["时间信息", "系统记录"] - }, - { - "name": "enter_or_exit", - "type": "INT", - "comment": "进出类型", - "value":{ - "1":"进", - "2":"出" - }, - "role": "dimension", - "tags": ["考勤类型", "进出标识"] - }, - { - "name": "access_control_point", - "type": "VARCHAR(50)", - "comment": "门禁点", - "role": "dimension", - "tags": ["位置信息", "门禁设备"] - }, - { - "name": "bv_st", - "type": "VARCHAR(20)", - "comment": "上午打卡时间", - "role": "dimension", - "tags": ["时间信息", "业务时间"] - }, - { - "name": "bv_et", - "type": "VARCHAR(20)", - "comment": "下午打卡时间", - "role": "dimension", - "tags": ["时间信息", "业务时间"] - }, - { - "name": "bv_st_field", - "type": "VARCHAR(50)", - "comment": "午休前打卡时间", - "role": "dimension", - "tags": ["时间信息", "业务字段"] - }, - { - "name": "bv_et_field", - "type": "VARCHAR(50)", - "comment": "午休后打卡时间", - "role": "dimension", - "tags": ["时间信息", "业务字段"] - }, - { - "name": "bv_go_type", - "type": "VARCHAR(8)", - "comment": "打卡类型", - "role": "dimension", - "tags": ["业务类型", "分类信息"] - }, - ], - "relationships": [ - { - "from": "person_id", - "to_table": "t_pr3rl2oj_yj_person_database", - "to_field": "id", - "type": "foreign_key", - "comment": "关联人员基本信息表" - } - ], - "examples": [ - { - "question": "查询今日考勤总人次", - "sql": "SELECT COUNT(*) as attendance_count FROM YJOA_APPSERVICE_DB.t_yj_person_attendance WHERE TO_CHAR(attendance_time, 'YYYY-MM-DD') = TO_CHAR(SYSDATE, 'YYYY-MM-DD') AND dr = '0'" - }, - { - "question": "按人员统计本月考勤次数", - "sql": "SELECT person_name, COUNT(*) as attendance_count FROM YJOA_APPSERVICE_DB.t_yj_person_attendance WHERE TO_CHAR(attendance_time, 'YYYY-MM') = TO_CHAR(SYSDATE, 'YYYY-MM') AND dr = '0' GROUP BY person_name, person_id" - }, - { - "question": "查询最近一周的考勤记录", - "sql": "SELECT * FROM YJOA_APPSERVICE_DB.t_yj_person_attendance WHERE attendance_time >= SYSDATE - 7 AND dr = '0' ORDER BY attendance_time DESC" - }, - { - "question": "统计各门禁点的考勤分布", - "sql": "SELECT access_control_point, COUNT(*) as attendance_count FROM YJOA_APPSERVICE_DB.t_yj_person_attendance WHERE dr = '0' GROUP BY access_control_point" - } - ], - "tags": ["考勤管理", "人员考勤", "门禁记录", "时间统计", "考勤分析"] -}, +} + +""" + +train_document=''' + 语法为达梦数据库语法; + 查询地址,籍贯,公司,单位时,尽量使用like查询; + 查询人员信息时,由于数据表字段过多。只需要展示人员关键信息字段,id,工号,姓名,单位以及用户问题中需要查询的字段; + 表字段信息以及字段枚举信息在values下,注意相关字段枚举值的转换; + 查询单位信息时,内部单位和外部单位都需要查询,用OR条件查询; + + ''' + +rule_ddl=''' { "db_name":"YJOA_APPSERVICE_DB", "table_name": "t_yj_person_attendance_rules", - "table_comment": "人员考勤规则表,定义考勤时间规则和区域设置", + "table_comment": "人员考勤规则表,定义考勤时间规则,考勤地点设置", "columns": [ { "name": "id", @@ -838,7 +518,7 @@ ddl_sql = """ "5": "林芝" }, "role": "dimension", - "tags": ["区域设置", "地理信息"] + "tags": [ "考勤的位置","非办公区域不要混淆","枚举"] }, ], "relationships": [ @@ -850,26 +530,12 @@ ddl_sql = """ "comment": "关联区域配置信息" } ], - "examples": [ - { - "question": "查询所有考勤规则列表", - "sql": "SELECT * FROM t_yj_person_attendance_rules" - }, - { - "question": "统计各区域的考勤规则数量", - "sql": "SELECT region, COUNT(*) as rule_count FROM YJOA_APPSERVICE_DB.t_yj_person_attendance_rules GROUP BY region" - }, - { - "question": "查询特定区域的考勤时间设置", - "sql": "SELECT morning_check_time, afternoon_check_time, before_lunch_time, after_lunch_time FROM YJOA_APPSERVICE_DB.t_yj_person_attendance_rules WHERE region = '北京'" - }, - { - "question": "检查是否存在重复的考勤规则", - "sql": "SELECT region, morning_check_time, afternoon_check_time, COUNT(*) as rule_count FROM YJOA_APPSERVICE_DB.t_yj_person_attendance_rules GROUP BY region, morning_check_time, afternoon_check_time HAVING COUNT(*) > 1" - } - ], + "tags": ["考勤规则", "时间设置", "区域配置", "考勤管理", "规则定义"] -}, +} +''' + +user_status_ddl=''' { "db_name":"YJOA_APPSERVICE_DB", "table_name": "t_yj_person_status", @@ -896,9 +562,9 @@ ddl_sql = """ "value":{ "1001":"在岗", "1002":"出差", - "1003":"休假", + "1003":"休假,请假", "1005":"旷工", - "1006":"迟到早退", + "1006":"迟到,早退", "1007":"休息日", "4001":"am在岗pm缺勤", "4002":"am缺勤pm在岗", @@ -908,7 +574,7 @@ ddl_sql = """ "4006":"am缺勤pm早退" }, "role": "dimension", - "tags": ["状态信息", "人员状态"] + "tags": ["状态信息", "人员状态","枚举","迟到早退都是:1006"] }, { "name": "date_value", @@ -943,41 +609,12 @@ ddl_sql = """ { "from": "person_id", "to_table": "t_pr3rl2oj_yj_person_database", - "to_field": "id", + "to_field": "code", "type": "foreign_key", "comment": "关联人员基本信息表" } ], - "examples": [ - { - "question": "查询今日人员状态记录数量", - "sql": "SELECT COUNT(*) as status_count FROM YJOA_APPSERVICE_DB.t_yj_person_status WHERE date_value = DATE_FORMAT(CURRENT_DATE(), '%Y-%m-%d') AND dr = 0" - }, - { - "question": "统计各状态的人员分布", - "sql": "SELECT status, COUNT(*) as status_count FROM YJOA_APPSERVICE_DB.t_yj_person_status WHERE dr = '0' GROUP BY status" - }, - { - "question": "查询在西藏地区的人员数量", - "sql": "SELECT COUNT(DISTINCT person_id) as tibet_person_count FROM YJOA_APPSERVICE_DB.t_yj_person_status WHERE is_in_tibet = 1 AND dr = 0" - }, - { - "question": "按日期统计人员状态记录", - "sql": "SELECT date_value, status, COUNT(*) as status_count FROM YJOA_APPSERVICE_DB.t_yj_person_status WHERE dr = '0' GROUP BY date_value, status ORDER BY date_value DESC" - }, - { - "question": "查询特定人员的状态历史记录", - "sql": "SELECT date_value, status, is_in_tibet FROM YJOA_APPSERVICE_DB.t_yj_person_status WHERE person_id = '123' AND dr = 0 ORDER BY date_value DESC" - } - ], + "tags": ["人员状态", "状态记录", "地区管理", "西藏标识", "每日状态"] } -] -""" - -train_document=''' - 语法为达梦数据库语法; - 查询地址,籍贯,公司,单位时,尽量使用like查询; - 查询人员信息时,由于数据表字段过多。只需要展示人员关键信息字段,id,工号,姓名,单位以及用户问题中需要查询的字段; - 表字段信息以及字段枚举信息: - ''' \ No newline at end of file +''' \ No newline at end of file