diff --git a/service/cus_vanna_srevice.py b/service/cus_vanna_srevice.py index db94799..56a96c0 100644 --- a/service/cus_vanna_srevice.py +++ b/service/cus_vanna_srevice.py @@ -182,13 +182,24 @@ class OpenAICompatibleLLM(VannaBase): stop=None, temperature=self.temperature, ) + # data = { + # "model": self.model, + # "prompt": prompt, + # "max_tokens": self.max_tokens, + # "temperature": self.temperature, + # } + # response = requests.post( + # url=f"{self.api_base}/completions", + # headers=self.headers, + # json=data + # ) else: if num_tokens > 3500: model = "kimi" else: model = "doubao" - print(f"Using model {model} for {num_tokens} tokens (approx)") + print(f"5.Using model {model} for {num_tokens} tokens (approx)") response = self.client.chat.completions.create( model=model, @@ -231,6 +242,7 @@ class OpenAICompatibleLLM(VannaBase): logger.info(f"sys_temp:{sys_temp}") llm_response = self.submit_prompt( [{'role': 'system', 'content': sys_temp}, {'role': 'user', 'content': user_temp}], **kwargs) + llm_response = str(llm_response.strip()) logger.info(f"llm_response:{llm_response}") result = {"resp": orjson.loads(extract_nested_json(llm_response))} logger.info(f"llm_response:{llm_response}") diff --git a/template.yaml b/template.yaml index cf4878f..c12fdcc 100644 --- a/template.yaml +++ b/template.yaml @@ -52,10 +52,12 @@ template: 若数据库引擎是 PostgreSQL, Oracle, ClickHouse, 达梦数据库, AWS Redshift, Elasticsearch,则schema、表名、字段名、别名使用双引号,如 "schema_name"."table_name"。 若数据库引擎是 MySQL, Doris,则表名、字段名、别名使用反引号,如 `table_name`。 生成的SQL必须避免与数据库关键字冲突。 + 注意列名定义和使用的先后顺序,例如:SELECT阶段定义了列名,如果GROUP BY阶段先与SELECT阶段执行时,是不许在GROUP BY阶段引用列名的。 数据查询与排序 若未明确指定查询字段,涉及人员信息时,默认返回相关性最强的前10个字段。 + 当涉及部门表org_orgs的查询时注意enable启用状态和dr删除标志,且必须限制code字段值包含'CYJ'。 若查询字段为 VARCHAR 或 TEXT 类型但需要计算,必须先进行合理的类型转换(如 CAST(... AS NUMERIC))。 若查询包含日期/时间字段: - **默认行为**:若提问未指定排序,**默认按时间字段降序排序**(即最新数据在前)。 @@ -271,22 +273,32 @@ template: - SELECT `u`.`email` AS `email`, `u`.`id` AS `id`, `u`.`account` AS `account`, `u`.`enable` AS `enable`, `u`.`create_time` AS `create_time`, `u`.`language` AS `language`, `u`.`default_oid` AS `default_oid`, `u`.`name` AS `name`, `u`.`phone` AS `phone`, FROM `per_user` `u` LIMIT 1000 + SELECT `u`.`email` AS `邮箱`, `u`.`id` AS `ID`, `u`.`account` AS `账号`, `u`.`enable` AS `启用状态`, `u`.`create_time` AS `创建时间`, `u`.`language` AS `语言`, `u`.`default_oid` AS `所属组织id`, `u`.`name` AS `姓名`, `u`.`phone` AS `电话`, FROM `per_user` `u` LIMIT 1000 查询所有用户信息 - {{"type":"table","title":"所有用户信息","columns":[{{"name":"邮箱","value":"email"}},{{"name":"ID","value":"id"}},{{"name":"账号","value":"account"}},{{"name":"启用状态","value":"enable"}},{{"name":"创建时间","value":"create_time"}},{{"name":"语言","value":"language"}},{{"name":"所属组织ID","value":"default_oid"}},{{"name":"姓名","value":"name"}},{{"name":"Phone","value":"phone"}}]}} + {{"type":"table","title":"所有用户信息","columns":[{{"name":"邮箱","value":"email"}},{{"name":"ID","value":"id"}},{{"name":"账号","value":"account"}},{{"name":"启用状态","value":"enable"}},{{"name":"创建时间","value":"create_time"}},{{"name":"语言","value":"language"}},{{"name":"所属组织id","value":"default_oid"}},{{"name":"姓名","value":"name"}},{{"name":"电话","value":"phone"}}]}} - SELECT `o`.`name` AS `org_name`, COUNT(`u`.`id`) AS `user_count` FROM `per_user` `u` JOIN `per_org` `o` ON `u`.`default_oid` = `o`.`id` GROUP BY `o`.`name` ORDER BY `user_count` DESC LIMIT 1000 + SELECT `o`.`name` AS `org_name`, COUNT(`u`.`id`) AS `人数` FROM `per_user` `u` JOIN `per_org` `o` ON `u`.`default_oid` = `o`.`id` GROUP BY `o`.`name` ORDER BY `user_count` DESC LIMIT 1000 饼图展示各个组织的人员数量 pie - {{"type":"pie","title":"组织人数统计","axis":{{"y":{{"name":"人数","value":"user_count"}},"series":{{"name":"组织名称","value":"org_name"}}}}}} + {{"type":"pie","title":"组织人数统计","axis":{{"y":{{"name":"人数","value":"人数"}},"series":{{"name":"org_name","value":"org_name"}}}}}} + + + + + SELECT COUNT(*) AS "总人数", SUM(CASE WHEN p."gender" = '1' THEN 1 ELSE 0 END) AS "男员工数", SUM(CASE WHEN p."gender" = '2' THEN 1 ELSE 0 END) AS "女员工数" FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p WHERE p."dr" = 0 ORDER BY "总人数" DESC LIMIT 1000; + 表格展示不同性别人员数量,及人员总数 + bar + + + {{'type': 'table', 'title': '员工性别统计', 'columns': [{{"name":"总人数","value":"total_persons"}},{{"name":"男员工数","value":"male_count"}},{{"name":"女员工数","value":"female_count"}}]}} diff --git a/util/q_and_a_dict.py b/util/q_and_a_dict.py index 0fcfe96..9a6ce39 100644 --- a/util/q_and_a_dict.py +++ b/util/q_and_a_dict.py @@ -8,7 +8,7 @@ question_and_answer = [ FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database p WHERE p.internal_dept IN (SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs" START - WITH "name" LIKE '%综合处%' + WITH "name"||"shortname" LIKE '%综合处%' CONNECT BY PRIOR "id" = "parentid" ) AND p.dr = 0 @@ -198,7 +198,7 @@ question_and_answer = [ from YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database where internal_dept in (SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs" START - WITH "name" LIKE '%数信中心%' + WITH "name"||"shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%' CONNECT BY PRIOR "id" = "parentid" ) @@ -224,7 +224,7 @@ question_and_answer = [ JOIN IUAP_APDOC_BASEDOC.org_orgs o ON p.internal_dept = o.id WHERE p.internal_dept IN ( SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs" - START WITH "name" LIKE '%数信中心%' + START WITH "name"||"shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%' CONNECT BY PRIOR "id" = "parentid" ) AND p.dr = 0 @@ -262,7 +262,7 @@ question_and_answer = [ AND ps."date_value" LIKE '2025-10%' and p.internal_dept in (SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs" START - WITH "name" LIKE '%数信中心%' + WITH "name" || "shortname" LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%' CONNECT BY PRIOR "id" = "parentid" ) AND p."dr" = 0 @@ -279,7 +279,7 @@ question_and_answer = [ ''' }, { - "question": "员工年龄段分布图", + "question": "数信中心员工年龄段分布图", "answer": ''' SELECT CASE @@ -301,7 +301,10 @@ question_and_answer = [ WHERE "parentid" IN ( SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs" - ) + WHERE name || shortname LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%' + ) OR id IN (SELECT "id" + FROM "IUAP_APDOC_BASEDOC"."org_orgs" + WHERE name || shortname LIKE '%数信中心%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%') ) GROUP BY CASE @@ -328,14 +331,15 @@ question_and_answer = [ WHERE p."dr" = 0 AND o."id" IN (SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs" START - WITH "name" LIKE '%综合处%' + WITH "name"||"shortname" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%' CONNECT BY PRIOR "id" = "parentid") AND p."birthday" IS NOT NULL AND p."birthday" != '' ORDER BY o."name" ASC, p."birthday" DESC LIMIT 1000; ''' - },{ + }, + { "question": "查询综合处下面的员工的平均年龄", "answer": ''' SELECT ROUND(AVG(2025 - CAST(SUBSTR(p."birthday", 1, 4) AS INT)), 2) AS "平均年龄" @@ -344,12 +348,45 @@ question_and_answer = [ WHERE p."dr" = 0 AND o."id" IN (SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs" START - WITH "name" LIKE '%综合处%' + WITH "name"||"shortname" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%' CONNECT BY PRIOR "id" = "parentid") AND p."birthday" IS NOT NULL AND p."birthday" != '' LIMIT 1000; ''' + }, + { + "question": "综合处有几个人,男员工,女员工分别有多少", + "answer": ''' + SELECT COUNT(*) AS "总人数", + SUM(CASE WHEN p."gender" = '1' THEN 1 ELSE 0 END) AS "男员工数", + SUM(CASE WHEN p."gender" = '2' THEN 1 ELSE 0 END) AS "女员工数" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p + WHERE p."internal_dept" IN ( + SELECT "id" + FROM "IUAP_APDOC_BASEDOC"."org_orgs" + START WITH "name" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%' + CONNECT BY PRIOR "id" = "parentid" + ) + AND p."dr" = 0 + LIMIT 1000; + ''' + }, + { + "question": "综合处女员工有多少", + "answer": ''' + SELECT (SELECT COUNT(*) + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p + WHERE p."internal_dept" IN (SELECT "id" + FROM "IUAP_APDOC_BASEDOC"."org_orgs" + START WITH "name"||"shortname" LIKE '%综合处%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%' + CONNECT BY PRIOR "id" = "parentid" + ) + AND p."gender" = '2' + AND p."dr" = 0 + ) AS "女员工数" + ''' } + ] diff --git a/util/train_ddl.py b/util/train_ddl.py index e8455c3..0ba94f8 100644 --- a/util/train_ddl.py +++ b/util/train_ddl.py @@ -963,6 +963,35 @@ org_orgs_ddl = ''' "role": "dimension", "tags": ["部门名称","部门简称","部门缩写"] }, + { + "name": "enable", + "type": "INT", + "comment": "启用状态", + "value":{ + "1":"启用" + "2":"未启用" + }, + "role": "dimension", + "tags": ["部门启用","部门状态","是否启用"] + }, + { + "name": "level", + "type": "INT", + "comment": "部门或单位层级", + "role": "dimension", + "tags": ["部门层级","级别"] + }, + { + "name": "dr", + "type": "INT", + "comment": "删除标志", + "value": { + "0": "正常", + "1": "删除" + }, + "role": "dimension", + "tags": ["状态标识", "软删除", "枚举"] + } ], "tags": ["部门id","部门信息","部门名称"] diff --git a/util/utils.py b/util/utils.py index 93c7429..a41daee 100644 --- a/util/utils.py +++ b/util/utils.py @@ -1,7 +1,8 @@ +import logging from typing import Optional from orjson import orjson - +logger = logging.getLogger(__name__) keywords = { # "gender":{"1":"男","2":"女"}, "person_status":{"1":"草稿","2":"审批中","3":"制卡中","4":"已入库","5":"停用"}, @@ -43,7 +44,8 @@ def extract_nested_json(text): stack = [] start_index = -1 results = [] - + if not text: + logger.warning("extract_nested_json: text is empty") for i, char in enumerate(text): if char in '{[': if not stack: # 记录起始位置