diff --git a/main_service.py b/main_service.py index 4681deb..37854a2 100644 --- a/main_service.py +++ b/main_service.py @@ -57,7 +57,7 @@ def create_vana(): def init_vn(vn): logger.info("--------------init vana-----connect to datasouce db----") - connect_database(vn) + # connect_database(vn) load_ddl_doc.add_ddl(vn) load_ddl_doc.add_documentation(vn) if config('IS_FIRST_LOAD', default=False, cast=bool): @@ -98,7 +98,6 @@ def generate_sql_2(): """ logger.info("Start to generate sql in main") question = flask.request.args.get("question") - if question is None: return jsonify({"type": "error", "error": "No question provided"}) try: @@ -185,6 +184,7 @@ def run_sql_2(id: str, sql: str): """ logger.info("Start to run sql in main") try: + user_id = request.args.get("user_id") if not vn.run_sql_is_set: return jsonify( { @@ -198,7 +198,7 @@ def run_sql_2(id: str, sql: str): # print(df_count,"is type",type(df_count)) # total_count = df_count.to_dict(orient="records")[0]["total_count"] # logger.info("Total count is {0}".format(total_count)) - df = vn.run_sql(sql=sql) + df = vn.run_sql_2(sql=sql) result = df.to_dict(orient='records') logger.info("df ---------------{0} {1}".format(result,type(result))) return jsonify( diff --git a/service/cus_vanna_srevice.py b/service/cus_vanna_srevice.py index f8b7400..3c9ab01 100644 --- a/service/cus_vanna_srevice.py +++ b/service/cus_vanna_srevice.py @@ -26,7 +26,7 @@ class OpenAICompatibleLLM(VannaBase): VannaBase.__init__(self, config=config_file) # default parameters - can be overrided using config self.temperature = 0.6 - self.max_tokens = 5000 + self.max_tokens = 10000 if "temperature" in config_file: self.temperature = config_file["temperature"] @@ -125,17 +125,21 @@ class OpenAICompatibleLLM(VannaBase): def submit_prompt(self, prompt, **kwargs) -> str: if prompt is None: + print("test1") raise Exception("Prompt is None") if len(prompt) == 0: + print("test2") raise Exception("Prompt is empty") print(prompt) num_tokens = 0 for message in prompt: num_tokens += len(message["content"]) / 4 + print("test3 {0}".format(num_tokens)) if kwargs.get("model", None) is not None: + print("test4") model = kwargs.get("model", None) print( f"Using model {model} for {num_tokens} tokens (approx)" @@ -148,6 +152,7 @@ class OpenAICompatibleLLM(VannaBase): temperature=self.temperature, ) elif kwargs.get("engine", None) is not None: + print("test5") engine = kwargs.get("engine", None) print( f"Using model {engine} for {num_tokens} tokens (approx)" @@ -160,6 +165,7 @@ class OpenAICompatibleLLM(VannaBase): temperature=self.temperature, ) elif self.config is not None and "engine" in self.config: + print("test6") print( f"Using engine {self.config['engine']} for {num_tokens} tokens (approx)" ) @@ -171,10 +177,11 @@ class OpenAICompatibleLLM(VannaBase): temperature=self.temperature, ) elif self.config is not None and "model" in self.config: + print("test7") print( f"Using model {self.config['model']} for {num_tokens} tokens (approx)" ) - print(self.config) + print("config is ",self.config) response = self.client.chat.completions.create( model=self.config["model"], messages=prompt, @@ -194,6 +201,7 @@ class OpenAICompatibleLLM(VannaBase): # json=data # ) else: + print("test8") if num_tokens > 3500: model = "kimi" else: @@ -271,15 +279,26 @@ class OpenAICompatibleLLM(VannaBase): traceback.print_exc() raise e + def run_sql_2(self,sql): + try: + return self.run_sql(sql) + except Exception as e: + logger.error("run_sql failed {0}".format(sql)) + raise e + def generate_rewritten_question(self, last_question: str, new_question: str, **kwargs) -> str: logger.info(f"generate_rewritten_question---------------{new_question}") if last_question is None: return new_question - + print("last question {0}".format(last_question)) + print("new question {0}".format(new_question)) prompt = [ self.system_message( - "Your goal is to combine a sequence of questions into a singular question if they are related. If the second question does not relate to the first question and is fully self-contained, return the second question. Return just the new combined question with no additional explanations. The question should theoretically be answerable with a single SQL statement."), - self.user_message(new_question), + "你的目标是将一系列相关问题合并成一个单一的问题。" + "合并准则一、如果第二个问题与第一个问题无关且本身是完整独立的,则直接返回第二个问题。" + "合并准则二、如果第二个问题域第一个问题相关,且要基于第一个问题的前提,请合并两个问题为一个问题,只需返回合并后的新问题,不要添加任何额外解释。" + "合并准则三、理论上,合并后的问题应该能够通过单个SQL语句来回答"), + self.user_message("First question: " + last_question + "\nSecond question: " + new_question), ] return self.submit_prompt(prompt=prompt, **kwargs) @@ -312,6 +331,11 @@ class CustomQdrant_VectorStore(Qdrant_VectorStore): "model": self.embedding_model_name, "sentences": [data], } + # request_body = { + # "model": self.embedding_model_name, + # "encoding_format": "float", + # "input": [data], + # } request_body.update(kwargs) response = requests.post( @@ -325,7 +349,9 @@ class CustomQdrant_VectorStore(Qdrant_VectorStore): ) result = response.json() embeddings = result['embeddings'] + # embeddings = result['data'][0]['embedding'] return embeddings[0] + # return embeddings class CustomVanna(CustomQdrant_VectorStore, OpenAICompatibleLLM): def __init__(self, llm_config=None, vector_store_config=None): @@ -412,6 +438,64 @@ class TTLCacheWrapper: return False return self.get(id=id, field=field) is not None + def items(self): + """遍历所有未过期的缓存键值对""" + current_time = time.time() + items = [] + + for (id, field), expiry in self._expiry_times.items(): + # 检查是否过期 + if current_time <= expiry: + value = self.get(id=id, field=field) + if value is not None: + items.append({ + 'id': id, + 'field': field, + 'value': value, + 'expires_at': expiry, + 'time_left': expiry - current_time + }) + + return items + + def get_latest_by_id(self, id: str, limit: int = 1, field_filter: str = None): + """ + 获取指定ID下时间最近的缓存项 + + Args: + id: 要查询的ID + limit: 返回最近几条记录,默认1条 + field_filter: 可选的字段过滤,如只获取特定字段 + + Returns: + 按时间倒序排列的缓存项列表 + """ + current_time = time.time() + matched_items = [] + + # 找出该ID下所有未过期的缓存项 + for (cache_id, field), expiry in self._expiry_times.items(): + if cache_id == id and current_time <= expiry: + # 字段过滤 + if field_filter and field != field_filter: + continue + + value = self.get(id=id, field=field) + if value is not None: + matched_items.append({ + 'id': id, + 'field': field, + 'value': value, + 'expires_at': expiry, + 'created_time': expiry - self.ttl, # 估算创建时间 + 'time_left': expiry - current_time + }) + + # 按过期时间倒序排列(最近创建的排在前面) + matched_items.sort(key=lambda x: x['expires_at'], reverse=True) + + return matched_items[:limit] + # 代理其他方法到原始cache def __getattr__(self, name): return getattr(self.cache, name) \ No newline at end of file diff --git a/template.yaml b/template.yaml index 418e40a..2c02b5b 100644 --- a/template.yaml +++ b/template.yaml @@ -54,7 +54,7 @@ template: 生成的SQL必须避免与数据库关键字冲突。 注意列名定义和使用的先后顺序,例如:SELECT阶段定义了列名,如果GROUP BY阶段先与SELECT阶段执行时,是不许在GROUP BY阶段引用列名的。 在ORDER BY、GROUP BY、WHERE子句中不要使用SELECT中定义的别名 - 当SELECT中同时包含聚合列和非聚合列时,需要在GROUP BY中指定所有非聚合列 + 注意!当SELECT列中同时包含聚合列(COUNT(), SUM(), AVG())和非聚合列时,必须要在GROUP BY子句中指定所有非聚合列 递归 WITH 子句必须具有列别名列表 CONNECT BY子查询是独立的,无法访问外部查询的表别名,因此涉及CONNECT BY子查询时,里面禁止使用表别名 @@ -66,13 +66,36 @@ template: 若查询包含日期/时间字段: - **默认行为**:若提问未指定排序,**默认按时间字段降序排序**(即最新数据在前)。 - **格式化**:若提问要求时间/日期/年月/年,且未指定格式,则分别格式化为 'yyyy-MM-dd HH:mm:ss' / 'yyyy-MM-dd' / 'yyyy-MM' / 'yyyy',语法需适配当前数据库引擎。(达梦数据库如果时间字段是varchar类型也可以) + - **特殊情况**:时间格式最大为 23:59:59,禁止24:00:00 这不是合法的时间值,查询某一天的数据,结束时间应该是下一天的00:00:00 + + 如果涉及查询男女比例,请参考示例:SELECT CASE WHEN p."gender"='1' THEN '男员工' WHEN gender='2' THEN '女员工' END AS "性别",sum(p."gender") AS "人数"..... + + + 术语标准化规则 + + [重要指令]解析用户问题,识别并替换所有已知的等价短语,将前面的短语换成后面的等价短语,: + ** "数信部" -> "数字信息部" (必须替换!) + ** "安质部" -> "安全质量部" (必须替换!) + 例如:用户:查询数信部 → SQL: LIKE '%数字信息部%' + 用户:查询安质部 → SQL: LIKE '%安全质量部%' + 例如:查询数信部有多少人->查询数字信息部有多少人 - 解析用户问题,识别并替换所有已知的等价短语,将前面的短语换成后面得等价短语,: - ** 数信部 -> 数字信息部 - ** 安质部 -> 安全质量部 - 例如:查询数信部有多少人->查询数字信息部有多少人 + 禁止自动联想或替换: + - "数信部" 不等于 "数信中心" + - "数字信息部" 不等于 "数字信息中心" + + 数信中心和数信部都是部门,而非单位 + + + external_dept和external_unit是外部部门和外部单位,字段值直接是部门名称和单位名称 + + + internal_dept和internal_unit的字段值是内部部门和内部单位编号,而非名称,涉及内部单位或部门时需要在部门表中递归查询 + 例如:"internal_dept" IN (SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs" START WITH ("name" || "shortname") LIKE '%数信中心%' AND "dr" = 0 AND "enable" = 1 AND "code" LIKE '%CYJ%' CONNECT BY PRIOR "id" = "parentid") + + 聚合与计算 @@ -95,7 +118,7 @@ template: 若用户未指定数据条数,**查询SQL必须包含1000条的限制**。若用户指定的限制大于1000,也按1000处理。 - PostgreSQL: ... LIMIT 1000 - 涉及查询最大,最多,最小,最少等查询是,添加,limit 1 + 涉及查询最大,最多,最小,最少等查询是,查询语句尾部添加,limit 1 --- --- C. 图表与业务理解规则 @@ -120,7 +143,12 @@ template: 图表字段注意事项 - 基于参考SQL的查询结果,请确保图表配置中的value字段与SQL结果字段别名完全一致。例如SELECT "external_unit" AS "外部单位" 图表配置的value应为“外部单位”而非“external_unit”。 + + 基于参考SQL的查询结果,图表配置里的所有value字段必须严格匹配SQL查询列的字段别名或字段名(有别名时优先别名),字段数量也得对应上。 + 例如SELECT "external_unit" AS "外部单位名" FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database"。 + 图表配置应为:"chart": {{"columns": [{{"name": "外部单位","value": "外部单位名"}}],"title": "外部单位信息","type": "table"}} + **注意**:一定是别名与value字段对应,如这里的别名是"外部单位名",因此value字段也应该是"外部单位名",value:外部单位名 + ### 以下 块帮助你理解问题及返回格式,**请勿将此块内的任何表结构用于回答用户的问题**。 @@ -192,6 +220,10 @@ template: 数信中心建设处规划发展部综合处 这些都可能是单位的名称,属于内部部门 + + "数信部"务必替换成"数字信息部" + "安质部"务必替换成"安全质量部" + @@ -251,7 +283,10 @@ template: 你需要在JSON内生成一个图表的标题,放在"title"字段内,这个标题需要尽量精简 - 基于参考SQL的查询结果,请确保图表配置中的value字段与SQL结果字段别名完全一致。例如SELECT "external_unit" AS "外部单位" 图表配置的value应为“外部单位”而非“external_unit” + 基于参考SQL的查询结果,图表配置里的所有value字段必须严格匹配SQL查询列的字段别名或字段名(有别名时优先别名),字段数量也得对应上。 + 例如SELECT "外部单位" AS "外部单位名" FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database"。 + 图表配置应为:"chart": {{"columns": [{{"name": "外部单位","value": "外部单位名"}}],"title": "外部单位信息","type": "table"}} + **注意**:一定是别名与value对应,如这里的别名是"外部单位名",因此value字段也应该是"外部单位名",value:外部单位名 涉及查询男女性别比例时建议采用表格或者柱状图展示,禁止采用饼状图 diff --git a/util/load_ddl_doc.py b/util/load_ddl_doc.py index 3ced32e..5003d56 100644 --- a/util/load_ddl_doc.py +++ b/util/load_ddl_doc.py @@ -6,6 +6,7 @@ import random table_ddls = [ train_ddl.person_database_ddl,train_ddl.person_status_ddl, train_ddl.person_attendance_ddl,train_ddl.person_ac_area, + train_ddl.person_ac_position, train_ddl.org_orgs_ddl ] diff --git a/util/q_and_a_dict.py b/util/q_and_a_dict.py index c622909..8bf1da7 100644 --- a/util/q_and_a_dict.py +++ b/util/q_and_a_dict.py @@ -242,7 +242,7 @@ question_and_answer = [ "answer": ''' SELECT COUNT(DISTINCT ps."person_id") AS "迟到人数" FROM "YJOA_APPSERVICE_DB"."t_yj_person_status" ps - WHERE ps."status" = '1006' + WHERE ps."status" IN ('1006','1009','6002','6004') AND ps."date_value" LIKE '2025-09%' LIMIT 1000 ''' }, diff --git a/util/q_and_a_test1.py b/util/q_and_a_test1.py index 629bf2d..ce6503b 100644 --- a/util/q_and_a_test1.py +++ b/util/q_and_a_test1.py @@ -26,17 +26,25 @@ question_and_answer = [ "category": "员工基本信息查询" }, { - "question": "在研发基地工作的有哪些员工", + "question": "在成都研发基地工作的有哪些员工", "answer": ''' - SELECT "id" AS "id", - "code" AS "工号", - "name" AS "姓名", - "internal_unit" AS "内部单位", - "external_unit" AS "外部单位", - "office_address" AS "办公地点" - FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" - WHERE ("office_address" LIKE '%研发基地%' OR "office_city" LIKE '%研发基地%') - AND "dr" = '0' LIMIT 1000 + SELECT DISTINCT + p."code" AS "工号", + p."name" AS "姓名", + p."work_unit" AS "工作单位", + CASE + WHEN ac."region" = 2 THEN '成都' + ELSE '其他' + END AS "工作地区" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p + LEFT JOIN "YJOA_APPSERVICE_DB"."t_yj_person_attendance" a + ON p."code" = a."person_id" + LEFT JOIN "YJOA_APPSERVICE_DB"."t_yj_person_ac_area" ac + ON a."access_control_point" = ac."ac_point" + WHERE p."dr" = 0 + AND a."dr" = 0 + AND (a."attendance_address" LIKE '%成都研发基地%' OR ac."region" = 2) + LIMIT 1000; ''', "tags": ["员工", "工作地", "条件筛选查询"], "category": "员工基本信息查询" @@ -64,6 +72,21 @@ question_and_answer = [ "tags": ["员工", "部门", "人数"], "category": "员工统计与分析" }, + { + "question": "数信部下有多少员工", + "answer": """ + SELECT COUNT(*) + FROM YJOA_APPSERVICE_DB.t_pr3rl2oj_yj_person_database p + WHERE internal_dept in (SELECT "id" + FROM "IUAP_APDOC_BASEDOC"."org_orgs" + START WITH "name"||"shortname" LIKE '%数字信息部%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%' + CONNECT BY PRIOR "id" = "parentid" + ) + AND p.dr=0 + """, + "tags": ["员工", "部门", "人数"], + "category": "员工统计与分析" + }, { "question": "XX中心下各个处室分别有多少人", "answer": """ @@ -483,7 +506,7 @@ question_and_answer = [ "tags": ["员工", "部门", "考勤", "在岗人数"], "category": "考勤管理" }, - { + { "question": "XX部门昨天在岗人员数量", "answer": ''' SELECT COUNT(p.id) AS "人数" @@ -502,18 +525,18 @@ question_and_answer = [ ''', "tags": ["员工", "部门", "考勤", "在岗人数"], "category": "考勤管理" - }, - { + }, + { "question": "XX中心的李四十月份休息了多少天", "answer": """ SELECT COUNT(*) AS rest_days FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p INNER JOIN "YJOA_APPSERVICE_DB"."t_yj_person_status" ps ON p."code" = ps."person_id" - WHERE p."name" = '谭杰明' + WHERE p."name" = '李四' AND p."internal_dept" IN ( SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs" - START WITH ("name" LIKE '%数信中心%' OR "shortname" LIKE '%数信中心%') + START WITH ("name" LIKE '%XX中心%' OR "shortname" LIKE '%XX中心%') AND "dr" = 0 AND "enable" = 1 AND "code" LIKE '%CYJ%' CONNECT BY PRIOR "id" = "parentid" ) @@ -550,42 +573,66 @@ question_and_answer = [ "category": "考勤管理" }, { - "question": "xx部门9月有多少个人迟到", + "question": "xx部门10月有哪些人迟到", "answer": ''' - SELECT p."id" AS "id", - p."code" AS "工号", - p."name" AS "姓名", - p."internal_unit" AS "内部单位", - p."external_unit" AS "外部单位", - CASE WHEN ps."status" = '1003' THEN '休假,请假' ELSE ps."status" END AS "状态" - FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p - INNER JOIN "YJOA_APPSERVICE_DB"."t_yj_person_status" ps ON p."code" = ps."person_id" - WHERE ps."status" = '1003' - AND ps."date_value" LIKE '2025-10%' - and p.internal_dept in (SELECT "id" - FROM "IUAP_APDOC_BASEDOC"."org_orgs" START - WITH "name" || "shortname" LIKE '%xx部门%' AND "dr"=0 AND "enable"=1 AND "code" LIKE '%CYJ%' - CONNECT BY PRIOR "id" = "parentid" - ) - AND p."dr" = 0 - AND ps."dr" = 0 LIMIT 1000 + SELECT p."id" AS "id", + p."code" AS "工号", + p."name" AS "姓名", + p."internal_unit" AS "内部单位", + p."external_unit" AS "外部单位", + CASE + WHEN ps."status" = '1006' THEN '迟到早退' + WHEN ps."status" = '1009' THEN '迟到' + WHEN ps."status" = '6002' THEN 'am迟到pm在岗' + WHEN ps."status" = '6004' THEN 'am迟到pm缺勤' + ELSE ps."status" + END AS "状态" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p + INNER JOIN "YJOA_APPSERVICE_DB"."t_yj_person_status" ps ON p."code" = ps."person_id" + WHERE ps."status" IN ('1006','1009','6002','6004') + AND ps."date_value" LIKE '2025-10%' + AND p."internal_dept" IN ( + SELECT "id" + FROM "IUAP_APDOC_BASEDOC"."org_orgs" + START WITH ("name" || "shortname") LIKE '%xx部门%' + AND "dr" = 0 AND "enable" = 1 AND "code" LIKE '%CYJ%' + CONNECT BY PRIOR "id" = "parentid" + ) + AND p."dr" = 0 + AND ps."dr" = 0 + LIMIT 1000; ''', "tags": ["员工", "部门", "考勤", "迟到"], "category": "考勤管理" }, + { - "question": "9月有哪些员工在休假", + "question": "XX中心9月有哪些员工在加班", "answer": ''' - SELECT DISTINCT p."id" AS "id", - p."code" AS "工号", - p."name" AS "姓名", - p."internal_unit" AS "内部单位", - p."external_unit" AS "外部单位" - FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p - INNER JOIN "YJOA_APPSERVICE_DB"."t_yj_person_status" ps ON p."code" = ps."person_id" - WHERE ps."status" = '1003' - AND ps."date_value" LIKE '2025-09%' LIMIT 1000 - ''', + SELECT DISTINCT p."id" AS "id", + p."code" AS "工号", + p."name" AS "姓名", + p."internal_unit" AS "内部单位", + p."external_unit" AS "外部单位", + CASE WHEN ps."status" = '1008' THEN '加班' + ELSE '其他' + END AS "状态" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p + JOIN "YJOA_APPSERVICE_DB"."t_yj_person_status" ps ON p."code" = ps."person_id" + JOIN "IUAP_APDOC_BASEDOC"."org_orgs" o ON p."internal_dept" = o."id" + WHERE p."internal_dept" IN (SELECT "id" + FROM "IUAP_APDOC_BASEDOC"."org_orgs" + START WITH "name"||"shortname" LIKE '%XX中心%' + AND "dr" = 0 + AND "enable" = 1 + AND "code" LIKE '%CYJ%' + CONNECT BY PRIOR "id" = "parentid" + ) + AND p."dr" = 0 + AND ps."status" = '1008' + AND ps."date_value" LIKE '2025-09%' + LIMIT 1000 + ''', "tags": ["员工", "考勤", "休假"], "category": "考勤管理" }, @@ -600,9 +647,12 @@ question_and_answer = [ WHEN ps."status" = '1001' THEN '在岗' WHEN ps."status" = '1002' THEN '出差' WHEN ps."status" = '1003' THEN '休假,请假' + WHEN ps."status" = '1004' THEN '缺勤' WHEN ps."status" = '1005' THEN '旷工' WHEN ps."status" = '1006' THEN '迟到,早退' WHEN ps."status" = '1007' THEN '休息日' + WHEN ps."status" = '1008' THEN '加班' + WHEN ps."status" = '1009' THEN '迟到' WHEN ps."status" = '4001' THEN 'am在岗pm缺勤' WHEN ps."status" = '4002' THEN 'am缺勤pm在岗' WHEN ps."status" = '6001' THEN 'am在岗pm早退' @@ -655,7 +705,7 @@ question_and_answer = [ INNER JOIN "IUAP_APDOC_BASEDOC"."org_orgs" o ON p."internal_dept" = o."id" WHERE ps."date_value" IN ('2025-10-20', '2025-10-21', '2025-10-22') - AND ps."status" = '1006' + AND ps."status" IN ('1006','1009','6002','6004') AND p."dr" = 0 AND ps."dr" = 0 AND o."dr" = 0 @@ -692,7 +742,7 @@ question_and_answer = [ AND "dr" = 0 AND "enable" = 1 AND "code" LIKE '%CYJ%' CONNECT BY PRIOR "id" = "parentid" ) - AND ps."status" = '1006' + AND ps."status" IN ('1006','1009','6002','6004') AND ps."date_value" BETWEEN '2025-10-16' AND '2025-10-22' @@ -758,6 +808,34 @@ question_and_answer = [ "tags": ["员工", "部门", "考勤", "工作地", "区域", "工作天数统计"], "category": "工作地考勤统计分析" + }, + { + "question": "XX中心员工在成都工作的天数", + "answer": ''' + SELECT p."code" AS "工号", + p."name" AS "姓名", + COUNT(DISTINCT TO_CHAR(a."attendance_time", 'yyyy-MM-dd')) AS "在成都工作天数" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p + LEFT JOIN "YJOA_APPSERVICE_DB"."t_yj_person_attendance" a ON p."code" = a."person_id" + LEFT JOIN "YJOA_APPSERVICE_DB"."t_yj_person_ac_area" ac + ON a."access_control_point" = ac."ac_point" + WHERE p."dr" = 0 + AND a."dr" = 0 + AND ac."region" = 2 OR (a."attendance_address" LIKE '%成都%') + AND a."attendance_time" >= '2025-09-01' + AND a."attendance_time" < '2025-10-01' + AND p."internal_dept" IN (SELECT "id" + FROM "IUAP_APDOC_BASEDOC"."org_orgs" START + WITH ("name" LIKE '%XX中心%' OR "shortname" LIKE '%XX中心%') + AND "dr" = 0 AND "enable" = 1 AND "code" LIKE '%CYJ%' + CONNECT BY PRIOR "id" = "parentid" + ) + GROUP BY p."code", p."name" + ORDER BY "在成都工作天数" DESC LIMIT 1000; + ''', + "tags": ["员工", "部门", "考勤", "工作地", "区域", "工作天数统计"], + "category": "工作地考勤统计分析" + }, { "question": "查询张三9月在林芝的打卡记录", @@ -779,7 +857,11 @@ question_and_answer = [ WHEN b."region" = 2 THEN '成都' WHEN b."region" = 3 THEN '秭归' WHEN b."region" = 4 THEN '拉萨' - WHEN b."region" = 5 THEN '林芝' END AS "地区", + WHEN b."region" = 5 THEN '林芝' + WHEN b."region" = 6 THEN '米林' + WHEN b."region" = 7 THEN '派镇' + WHEN b."region" = 6 THEN '墨脱' + END AS "地区", b."ac_point" AS "门禁点" FROM "YJOA_APPSERVICE_DB"."t_yj_person_attendance" a JOIN "YJOA_APPSERVICE_DB"."t_yj_person_ac_area" b @@ -890,6 +972,34 @@ question_and_answer = [ "tags": ["员工", "个人", "考勤", "工作地", "区域", "天数", "最值"], "category": "工作地考勤统计分析" }, + { + "question": "XX信息部9月谁在林芝工作天数最多", + "answer": ''' + SELECT p."code" AS "工号", + p."name" AS "姓名", + COUNT(DISTINCT TO_CHAR(a."attendance_time", 'yyyy-MM-dd')) AS "在林芝工作天数" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p + LEFT JOIN "YJOA_APPSERVICE_DB"."t_yj_person_attendance" a + ON p."code" = a."person_id" + LEFT JOIN "YJOA_APPSERVICE_DB"."t_yj_person_ac_area" ac + ON a."access_control_point" = ac."ac_point" + WHERE p."dr" = 0 + AND a."dr" = 0 + AND ac."region" = 5 + AND p."internal_dept" IN ( + SELECT "id" + FROM "IUAP_APDOC_BASEDOC"."org_orgs" + START WITH ("name" LIKE '%XX信息部%' OR "shortname" LIKE '%XX信息部%') AND "dr" = 0 AND "enable" = 1 AND "code" LIKE '%CYJ%' + CONNECT BY PRIOR "id" = "parentid" + ) + AND TO_CHAR(a."attendance_time", 'yyyy-MM') = '2025-09' + GROUP BY p."code", p."name" + ORDER BY "在林芝工作天数" DESC + LIMIT 1 + ''', + "tags": ["员工", "个人", "考勤", "工作地", "区域", "天数", "最值"], + "category": "工作地考勤统计分析" + }, { "question": "XX中心的员工九月在林芝工作的天数排名", "answer": ''' @@ -917,19 +1027,99 @@ question_and_answer = [ ''', "tags": ["员工", "个人", "考勤", "工作地", "区域", "天数", "排名"], "category": "工作地考勤统计分析" - }, { + }, + { "question": "张三10月份是否有迟到", "answer": ''' SELECT CASE WHEN COUNT(*) > 0 THEN '是' ELSE '否' END AS "是否迟到" FROM "YJOA_APPSERVICE_DB"."t_yj_person_status" ps JOIN "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p ON ps."person_id" = p."code" WHERE p."name" = '张三' - AND ps."status" = '1006' + AND ps."status" IN ('1006','1009','6002','6004') AND ps."date_value" LIKE '2025-10%' AND ps."dr" = 0 AND p."dr" = 0 ''', - "tags": ["员工", "个人", "考勤"], + "tags": ["员工", "个人", "考勤","迟到"], + "category": "考勤" + }, + { + "question": "李四9月份是否有缺勤", + "answer": ''' + SELECT CASE + WHEN COUNT(*) > 0 THEN '是' ELSE '否' + END AS "是否缺勤", "date_value" AS "缺勤日期" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p + INNER JOIN "YJOA_APPSERVICE_DB"."t_yj_person_status" ps + ON p."code" = ps."person_id" + WHERE p."name" = '李四' + AND ps."status" IN ('1004','4001','4002','4006','6004') + AND ps."date_value" LIKE '2025-09%' + AND p."dr" = 0 AND ps."dr" = 0 + GROUP BY "date_value" + ORDER BY "date_value" DESC + LIMIT 1000 + ''', + "tags": ["员工", "个人", "考勤","缺勤"], + "category": "考勤" + }, + { + "question": "9月份XX中心哪些人存在早退现象", + "answer": ''' + SELECT p."id" AS "id", + p."code" AS "code", + p."name" AS "name", + ps."date_value" AS "早退日期", + CASE + WHEN ps."status" = '1006' THEN '迟到早退' + WHEN ps."status" = '6001' THEN 'am在岗pm早退' + WHEN ps."status" = '4006' THEN 'am缺勤pm早退' + ELSE ps."status" + END AS "status" + FROM "YJOA_APPSERVICE_DB"."t_yj_person_status" ps + INNER JOIN "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p + ON p."code" = ps."person_id" + WHERE ps."status" IN ('1006','6001','4006') + AND ps."date_value" LIKE '2025-09%' + AND ps."dr" = 0 + AND p."dr" = 0 + AND p."internal_dept" IN ( + SELECT "id" FROM "IUAP_APDOC_BASEDOC"."org_orgs" + START WITH "name" || "shortname" LIKE '%XX中心%' + AND "dr"=0 AND "enable"=1 + AND "code" LIKE '%CYJ%' + CONNECT BY PRIOR "id" = "parentid" + ) + ORDER BY ps."date_value" DESC + LIMIT 1000 + ''', + "tags": ["员工", "个人", "考勤","早退"], + "category": "考勤" + }, + { + "question": "XX中心张三今天的工作了多长时间", + "answer": ''' + SELECT p."code" AS "工号", + p."name" AS "姓名", + SUM(ps."work_time") AS "工作时长_分钟" + FROM "YJOA_APPSERVICE_DB"."t_pr3rl2oj_yj_person_database" p + INNER JOIN "YJOA_APPSERVICE_DB"."t_yj_person_status" ps + ON p."code" = ps."person_id" + WHERE p."name" = '张三' + AND p."internal_dept" IN ( + SELECT "id" + FROM "IUAP_APDOC_BASEDOC"."org_orgs" + START WITH ("name" LIKE '%XX中心%' OR "shortname" LIKE '%XX中心%') + AND "dr" = 0 + AND "enable" = 1 + AND "code" LIKE '%CYJ%' + CONNECT BY PRIOR "id" = "parentid") + AND ps."date_value" = '2025-10-25' + AND p."dr" = 0 + AND ps."dr" = 0 + GROUP BY p."code", p."name" LIMIT 1000 + ''', + "tags": ["员工", "个人", "考勤", "工作时长"], "category": "考勤" } diff --git a/util/train_ddl.py b/util/train_ddl.py index cbb1814..a0d6c97 100644 --- a/util/train_ddl.py +++ b/util/train_ddl.py @@ -10,6 +10,7 @@ train_document=''' 根据部门名称查询部门时除了全称name,简称shortname也重要信息 internal_dept和internal_unit是部门编号不是名称,注意区分 查询部门信息时尽量使用internal_dept而非internal_unit + 数信部不是数信中心,两者不能等价 ''' person_database_ddl = """ @@ -102,7 +103,7 @@ person_database_ddl = """ "type": "VARCHAR(600)", "comment": "外部部门", "role": "dimension", - "tags": ["组织信息", "外部部门"] + "tags": ["组织信息", "外部部门","外部部门名称"] }, { "name": "pass_type", @@ -132,14 +133,14 @@ person_database_ddl = """ "type": "VARCHAR(108)", "comment": "内部部门ID", "role": "dimension", - "tags": ["组织信息", "内部架构"] + "tags": ["组织信息", "内部架构","部门ID"] }, { "name": "input_dept", "type": "VARCHAR(108)", "comment": "录入部门ID", "role": "dimension", - "tags": ["操作部门", "组织信息"] + "tags": ["操作部门", "组织信息","录入部门ID"] }, { "name": "name", @@ -153,7 +154,7 @@ person_database_ddl = """ "type": "VARCHAR(108)", "comment": "内部单位ID", "role": "dimension", - "tags": ["组织信息", "内部架构ID"] + "tags": ["组织信息", "内部单位ID"] }, { "name": "person_type", @@ -192,9 +193,9 @@ person_database_ddl = """ { "name": "code", "type": "VARCHAR(600)", - "comment": "编码", + "comment": "用户ID彪马", "role": "dimension", - "tags": ["编码信息", "工号"] + "tags": ["用户ID", "工号"] }, { "name": "ytenant_id", @@ -372,8 +373,12 @@ person_database_ddl = """ "name": "is_subcontractor", "type": "VARCHAR(108)", "comment": "是否分包商", + "value":{ + "1":"是", + "0":"否", + }, "role": "dimension", - "tags": ["供应商类型", "合作模式"] + "tags": ["供应商类型", "合作模式","枚举信息"] }, { "name": "general_contractor_unit", @@ -443,14 +448,14 @@ person_database_ddl = """ "to_table": "IUAP_APDOC_BASEDOC.org_orgs", "to_field": ["id","parentid"], "type": "foreign_key", - "comment": "关联部门表" + "comment": "关联部门表,部门ID或父部门ID" }, { "from": "internal_dept", "to_table": "IUAP_APDOC_BASEDOC.org_orgs", "to_field": ["id","parentid"], "type": "foreign_key", - "comment": "关联部门表" + "comment": "关联部门表,部门ID或父部门ID" }, ], @@ -459,18 +464,16 @@ person_database_ddl = """ """ - - person_attendance_rule_ddl=''' { "db_name":"YJOA_APPSERVICE_DB", "table_name": "t_yj_person_attendance_rules", - "table_comment": "人员考勤规则表,定义考勤时间规则,考勤地点设置", + "table_comment": "考勤规则表,定义不同地点考勤时间规则,考勤地点设置", "columns": [ { "name": "id", "type": "VARCHAR(50)", - "comment": "主键ID", + "comment": "规则ID", "role": "dimension", "tags": ["主键", "ID标识"] }, @@ -491,14 +494,14 @@ person_attendance_rule_ddl=''' { "name": "before_lunch_time", "type": "VARCHAR(50)", - "comment": "午餐前时间", + "comment": "午休前时间", "role": "dimension", "tags": ["时间规则", "考勤分段"] }, { "name": "after_lunch_time", "type": "VARCHAR(50)", - "comment": "午餐后时间", + "comment": "午休后时间", "role": "dimension", "tags": ["时间规则", "考勤分段"] }, @@ -511,10 +514,13 @@ person_attendance_rule_ddl=''' "2":"成都", "3":"秭归", "4":"拉萨", - "5": "林芝" + "5":"林芝", + "6":"米林", + "7":"派镇", + "8":"墨脱" }, "role": "dimension", - "tags": [ "考勤的地区位置","非办公区域,不要混淆","枚举"] + "tags": [ "考勤的地区位置","非办公区域,不要混淆","枚举","考勤规则"] }, ], "relationships": [ @@ -558,10 +564,13 @@ person_status_ddl=''' "value":{ "1001":"在岗", "1002":"出差", - "1003":"休假,请假", + "1003":"休假", + "1004":"缺勤", "1005":"旷工", - "1006":"迟到,早退", + "1006":"迟到早退", "1007":"休息日", + "1008":"加班", + "1009":"迟到", "4001":"am在岗pm缺勤", "4002":"am缺勤pm在岗", "6001":"am在岗pm早退", @@ -570,7 +579,7 @@ person_status_ddl=''' "4006":"am缺勤pm早退" }, "role": "dimension", - "tags": ["状态信息", "人员状态","枚举","迟到早退都是:1006"] + "tags": ["状态信息", "人员状态","枚举","考勤状态","迟到包含(1006,1009,6002,6004)","早退包含(1006,6001,4006)","缺勤包含(1004,4001,4002,4006,6004)"] }, { "name": "date_value", @@ -579,6 +588,42 @@ person_status_ddl=''' "role": "dimension", "tags": ["时间信息", "日期记录"] }, + { + "name": "practical_attendance", + "type": "Int", + "comment": "实际出勤", + "value":{ + "1":"已出勤", + "0":"未出勤", + }, + "role": "dimension", + "tags": ["出勤信息", "状态记录","枚举信息"] + }, + { + "name": "is_ought_attendance", + "type": "Int", + "comment": "是否应出勤", + "value":{ + "1":"是", + "0":"否", + }, + "role": "dimension", + "tags": ["是否应出勤", "枚举信息"] + }, + { + "name": "work_area", + "type": "VARCHAR(50)", + "comment": "工作地区", + "role": "dimension", + "tags": ["工作地区", "地域信息"] + }, + { + "name": "work_time", + "type": "Int", + "comment": "工作时间,单位-分钟", + "role": "dimension", + "tags": ["工作时间", "时长"] + }, { "name": "dr", "type": "INT", @@ -593,12 +638,60 @@ person_status_ddl=''' "role": "dimension", "tags": ["时间信息", "系统记录"] }, + { + "name": "vacation_type", + "type": "VARCHAR(50)", + "comment": "休假类型", + "value": { + "1": "年休假", + "2": "补休假", + "3": "探亲假", + "4": "离岗", + "5": "婚假", + "6": "丧假", + "7": "病假", + "8": "事假", + "9": "产假", + "10": "其他", + "11": "轮休假", + "12": "家属探亲" + }, + "role": "dimension", + "tags": ["休假信息", "请假类型", "枚举", "考勤管理"] + }, { "name": "is_in_tibet", "type": "INT", "comment": "是否在西藏地区", "role": "dimension", "tags": ["地区标识", "地理位置"] + }, + { + "name": "business_trip_destination", + "type": "VARCHAR(50)", + "comment": "出差目的地", + "role": "dimension", + "tags": ["出差信息", "地理位置", "目的地"] + }, + { + "name": "year", + "type": "VARCHAR(5)", + "comment": "年", + "value": { + "example": "2025" + }, + "role": "dimension", + "tags": ["时间信息", "年度", "时间维度"] + }, + { + "name": "month", + "type": "VARCHAR(10)", + "comment": "月", + "value": { + "example": "2025-08" + }, + "role": "dimension", + "tags": ["时间信息", "月度", "时间维度"] } ], "relationships": [ @@ -687,12 +780,10 @@ person_attendance_ddl = ''' "type": "VARCHAR(50)", "comment": "数据来源", "value": { - "APP": "手机应用", - "DEVICE": "考勤设备", - "SYSTEM": "系统导入" + "example":"大华门禁" }, "role": "dimension", - "tags": ["来源系统", "数据渠道"] + "tags": ["来源系统", "数据渠道","门禁系统源"] }, { "name": "dr", @@ -724,13 +815,6 @@ person_attendance_ddl = ''' "role": "dimension", "tags": ["门禁位置", "打卡设备点"] }, - { - "name": "by_go_type", - "type": "VARCHAR(8)", - "comment": "打卡类型", - "role": "dimension", - "tags": ["类型标识", "打卡类型"] - } ], "relationships": [ { @@ -756,7 +840,7 @@ person_rules_ddl = ''' { "db_name": "YJOA_APPSERVICE_DB", "table_name": "t_yj_person_rules", - "table_comment": "人员考勤规则关联表,关联人员与考勤规则", + "table_comment": "人员考勤规则关系表,关联人员与考勤规则", "columns": [ { "name": "id", @@ -802,7 +886,7 @@ person_rules_ddl = ''' { "from": "person_id", "to_table": "t_yj_person_database", - "to_field": "id", + "to_field": "code", "type": "foreign_key", "comment": "关联人员基本信息表" }, @@ -847,13 +931,6 @@ person_ac_position = ''' "type": "foreign_key", "comment": "关联门禁区域关系表" }, - { - "from": "position", - "to_table": "t_yj_person_ac_area", - "to_field": "area", - "type": "foreign_key", - "comment": "关联门禁区域关系表" - }, "tags": ["门禁控制点","门禁位置"] } ''' @@ -862,7 +939,7 @@ person_ac_area = ''' { "db_name":"YJOA_APPSERVICE_DB", "table_name": "t_yj_person_ac_area", - "table_comment": "门禁区域关系表", + "table_comment": "门禁与区域关系表", "columns": [ { "name": "ac_point", @@ -901,7 +978,7 @@ person_ac_area = ''' }, ], - "tags": ["门禁详情","门禁区域位置","门禁地区信息","枚举"] + "tags": ["门禁详情","门禁与区域位置关联信息","门禁地区信息","枚举"] } ''' @@ -914,9 +991,9 @@ org_orgs_ddl = ''' { "name": "id", "type": "VARCHAR(36)", - "comment": "主键ID", + "comment": "部门ID", "role": "dimension", - "tags": ["主键", "id标识"] + "tags": ["主键", "id标识","部门ID"] }, { "name": "parentid",