diff --git a/db/sql_db.py b/db/sql_db.py index 34b6f0c..3516669 100644 --- a/db/sql_db.py +++ b/db/sql_db.py @@ -4,8 +4,8 @@ from sqlalchemy.orm import declarative_base, sessionmaker Base = declarative_base() from decouple import config -DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3') -# DB_PATH = config('DB_PATH', default='D://PycharmProject//yj_resume//main.sqlite3') +# DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3') +DB_PATH = config('DB_PATH', default='D://PycharmProject//yj_resume//main.sqlite3') class DBTASK(Base): __tablename__ = 'db_task' @@ -34,6 +34,19 @@ class DBRESUME(Base): # 错误信息等 message = Column(Text, nullable=True) +class DBEXCEL(Base): + __tablename__ = 'db_excel' + # 每个任务对应一个文件夹ID + id = Column(String(100), primary_key=True) + # 0 代表待执行,1 成功,2 失败 + status = Column(Integer, nullable=False, default=0) + file_name = Column(String(100), nullable=True) + # 可以用json表示提取的数据 + excel_info = Column(Text, nullable=True) + # 错误信息等 + message = Column(Text, nullable=True) + + class SqliteSqlalchemy(object): def __init__(self): diff --git a/requirements.txt b/requirements.txt index 67e8902..7cd72d8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,4 @@ openpyxl python-multipart Pillow>=10.0.0 numpy -pypandoc \ No newline at end of file +openpyxl diff --git a/service/excel_service.py b/service/excel_service.py index 59ca711..ab96057 100644 --- a/service/excel_service.py +++ b/service/excel_service.py @@ -4,8 +4,8 @@ import pandas as pd import pathlib from decouple import config -BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//') -# BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//') +# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//') +BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//') # 导出数据到excel diff --git a/service/file_service.py b/service/file_service.py index 7e16f10..6758de3 100644 --- a/service/file_service.py +++ b/service/file_service.py @@ -1,6 +1,6 @@ import json from sqlalchemy import update -from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy +from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy, DBEXCEL import uuid from datetime import datetime from decouple import config @@ -18,10 +18,11 @@ import pypandoc logger = logging.getLogger(__name__) -BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//') -ZIP_PATH = config('ZIP_PATh', default='E://pyptoject//yj_resume//zip//') -# # BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//') +# ZIP_PATH = config('ZIP_PATh', default='E://pyptoject//yj_resume//zip//') +# +BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//') +ZIP_PATH = config('ZIP_PATh', default='D://PycharmProject//yj_resume//zip//') import pandas as pd import zipfile @@ -189,13 +190,26 @@ async def upload_and_format_file(dir_id, files: List[UploadFile]) -> (bool, str) logger.info(f"upload_and_format_file in service dir_id {dir_id}") pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id) pathxx.mkdir(parents=True, exist_ok=True) + data = [] for file in files: + # id = str(uuid.uuid4()) name, fix = os.path.splitext(file.filename) if fix not in ['.xls', '.xlsx']: continue with open(pathxx.joinpath(dir_id + fix), 'wb') as f: file_content = await file.read() f.write(file_content) + data.append(DBEXCEL(id=dir_id, status=0, file_name=dir_id + '.xlsx')) + session = SqliteSqlalchemy().session + try: + session.bulk_save_objects(data) + session.commit() + except Exception as e: + print(f"Failed to save DBEXCEL error {e}") + session.rollback() + return False, f"Failed to save DBEXCEL error {e}" + finally: + session.close() return True, "success" diff --git a/service/format_template_resume.py b/service/format_template_resume.py index fdee51d..d4fff64 100644 --- a/service/format_template_resume.py +++ b/service/format_template_resume.py @@ -1,10 +1,20 @@ +import json +import re + from docxtpl import DocxTemplate from pathlib import Path from decouple import config import pathlib,logging import uuid +from sqlalchemy import update + +from openpyxl import load_workbook,styles + +from db.sql_db import SqliteSqlalchemy, DBEXCEL + logger = logging.getLogger(__name__) -BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//') +# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//') +BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//') context = { 'name': '张三', 'sex': '男', @@ -34,6 +44,93 @@ context = { } +excel_mapping = { + 'politics': {'群众': '1', '中共党员': '2', '民主党派': '3', '共青团员': '4'}, + 'education': {'小学及以下': '1', '初中': '2', '高中、技校': '3', '中专': '4', + '大专': '5', '大学本科': '6', '硕士研究生': '7', '博士研究生': '8'}, + 'nation': {'汉族': '1', '蒙古族': '2', '回族': '3', '藏族': '4', '维吾尔族': '5', + '苗族': '6', '彝族': '7', '壮族': '8', '布依族': '9', '朝鲜族': '10', + '满族': '11', '侗族': '12', '瑶族': '13', '白族': '14', '土家族': '15', + '哈尼族': '16', '哈萨克族': '17', '傣族': '18', '黎族': '19', '傈僳族': '20', + '佤族': '21', '畲族': '22', '高山族': '23', '拉祜族': '24', '水族': '25', + '东乡族': '26', '纳西族': '27', '景颇族': '28', '柯尔克孜族': '29', '土族': '30', + '达斡尔族': '31', '仫佬族': '32', '羌族': '33', '布朗族': '34', '撒拉族': '35', + '毛南族': '36', '仡佬族': '37', '锡伯族': '38', '阿昌族': '39', '普米族': '40', + '塔吉克族': '41', '怒族': '42', '乌孜别克族': '43', '俄罗斯族': '44', '鄂温克族': '45', + '德昂族': '46', '保安族': '47', '裕固族': '48', '京族': '49', '塔塔尔族': '50', '独龙族': '51', + '鄂伦春族': '52', '赫哲族': '53', '门巴族': '54', '珞巴族': '55', '基诺族': '56', '其他': '57'}, + 'address': {'拉萨市': '540100', '昌都地区': '542100', '山南地区': '542200', '日喀则地区': '542300', + '那曲地区': '542400', '阿里地区': '542500', '林芝地区': '542600', '藏外地区': '549999'}} + + +def convert_excel_data(old_dict: dict) -> dict: + new_dict = {} + pre_edu_mapping = { + '无':'小学及以下', + '小学':'小学及以下', + '初中':'初中', + '高中':'高中、技校', + '高职':'高中、技校', + '中专':'中专', + '大专': '大专', + '本科': '大学本科', + '硕士': '硕士研究生', + '博士': '博士研究生', + } + pre_addr_mapping = { + '拉萨': '拉萨市', + '昌都': '昌都地区', + '山南': '山南地区', + '日喀则': '日喀则地区', + '那曲': '那曲地区', + '阿里': '阿里地区', + '林芝': '林芝地区', + + } + pre_nation_mapping = excel_mapping.get('nation') + new_dict['name'] = dict_get_mul_key(['姓名', '姓 名'], old_dict) + new_dict['sex'] = dict_get_mul_key(['性别', '性 别'], old_dict) + new_dict['origin'] = dict_get_mul_key(['籍贯', '籍 贯'], old_dict) + new_dict['politics'] = '中共党员' if len(dict_get_mul_key(['入党时间', '入 党 时 间'], old_dict)) > 0 and dict_get_mul_key(['入党时间'],old_dict) not in ('/', '\\','None','nan','无') else '群众' + address = dict_get_mul_key(['出 生 地', '出生地'], old_dict) + nation = dict_get_mul_key(['民族', '民 族'], old_dict) + new_dict['nation'] = nation + # 学历标准化 + r1 = old_dict.get("全日制教育") + r2 = old_dict.get("在职教育") + if len(r1) > 0 and r1 not in ('/','\\','None','nan','无','否','null'): + education = r1 + else: + education = r2 + education = education.lower().replace(' ', '') + for k,v in pre_edu_mapping.items(): + if k in education: + new_dict['education'] = v + addr_found = False + # 居住地标准化 + for k,v in pre_addr_mapping.items(): + if k in address: + print("藏内地区") + addr_found = True + new_dict['address'] = v + if not addr_found: + print("藏外地区") + new_dict['address'] = '藏外地区' + # 民族标准化 + for k,v in pre_nation_mapping.items(): + if k in nation or nation in k: + new_dict['nation'] = k + return new_dict + +def map_data(data_list): + #特殊值地区编号 + for idx, row in enumerate(data_list): + for k,v in excel_mapping.items(): + value = row.get(k) + if value: + if v.get(value,''): + data_list[idx][k] = v.get(value) + return data_list def dict_get_mul_key(keys: list, dict_data: dict): for k in keys: @@ -52,38 +149,154 @@ def convert_data(old_dict: dict) -> dict: new_dict['address'] = dict_get_mul_key(['出 生 地', '出生地'], old_dict) new_dict['education'] = dict_get_mul_key(['学历', '学 历'], old_dict) new_dict['degree'] = dict_get_mul_key(['学位', '学 位'], old_dict) - new_dict['politics'] = '党员' if len(dict_get_mul_key(['入党时间', '入 党 时 间'], old_dict)) > 0 else '群众' + new_dict['politics'] = '党员' if len(dict_get_mul_key(['入党时间', '入 党 时 间'], old_dict)) > 0 and dict_get_mul_key(['入党时间'], old_dict) not in ('/','\\','None','nan','无') else '群众' new_dict['department'] = dict_get_mul_key(['部门', '部 门'], old_dict) new_dict['position'] = dict_get_mul_key(['现任职务', '现 任 职 务'], old_dict) new_dict['phone'] = dict_get_mul_key(['手机号', '手 机 号'], old_dict) new_dict['title'] = dict_get_mul_key(['专业技术职务', '职 称'], old_dict) new_dict['start_work_time'] = dict_get_mul_key(['开始工作时间', '开始 工作 时间'], old_dict) - new_dict['id_number'] = dict_get_mul_key(['身份证', '身 份 证'], old_dict) + new_dict['id_number'] = dict_get_mul_key(['身份证', '身 份 证','身份证号码','身份证号'], old_dict) new_dict['honor'] = dict_get_mul_key(['奖惩情况', '奖惩 情况'], old_dict) new_dict['work_text'] = dict_get_mul_key(['简历', '简 历'], old_dict) return new_dict +def format_and_write_excel_file(dir_id, data_list, template_row=5): + logger.info("Start to format and write excel file ") + try: + outpath = pathlib.Path(BASE_PATH).joinpath(dir_id) + output_path = outpath.joinpath((dir_id + '_out.xlsx')) + template_path = Path.cwd().joinpath('template.xlsx') + wb = load_workbook(template_path) + ws = wb.active + + pattern = re.compile(r'\{\{(\w+)\}\}') + + placeholder_columns = {} + for col in range(1, ws.max_column + 1): + cell = ws.cell(row=template_row, column=col) + if cell.value and isinstance(cell.value, str) and '{{' in cell.value: + matches = pattern.findall(cell.value) + if matches: + placeholder_columns[col] = matches[0] + + logger.info(f"找到占位符列: {placeholder_columns}") + + # 处理每条数据 + for index, data in enumerate(data_list): + target_row = template_row + index + + if index > 0: + # 插入新行并复制格式 + ws.insert_rows(target_row) + for col in range(1, ws.max_column + 1): + source_cell = ws.cell(row=target_row - 1, column=col) + target_cell = ws.cell(row=target_row, column=col) + target_cell.style = source_cell.style + if source_cell.alignment: + # 必须明确复制对齐属性 + target_cell.alignment = styles.Alignment( + horizontal=source_cell.alignment.horizontal, # 水平对齐 + vertical=source_cell.alignment.vertical, # 垂直对齐 + text_rotation=source_cell.alignment.text_rotation, + wrap_text=source_cell.alignment.wrap_text, + shrink_to_fit=source_cell.alignment.shrink_to_fit, + indent=source_cell.alignment.indent + ) + # 只复制样式,不复制值 + # if source_cell.has_style: + # target_cell.font = copy(source_cell.font) + # target_cell.border = copy(source_cell.border) + # target_cell.fill = copy(source_cell.fill) + # target_cell.number_format = source_cell.number_format + # target_cell.alignment = copy(source_cell.alignment) + # 复制边框(这是你缺失的关键部分) + if hasattr(source_cell, 'border') and source_cell.border: + # 创建新的边框对象 + from openpyxl.styles import Border, Side + + # 获取源单元格的边框样式 + source_border = source_cell.border + + # 创建新的边框对象 + new_border = Border( + left=Side( + style=source_border.left.style, + color=source_border.left.color + ) if source_border.left else None, + right=Side( + style=source_border.right.style, + color=source_border.right.color + ) if source_border.right else None, + top=Side( + style=source_border.top.style, + color=source_border.top.color + ) if source_border.top else None, + bottom=Side( + style=source_border.bottom.style, + color=source_border.bottom.color + ) if source_border.bottom else None + ) + target_cell.border = new_border + + # 填充数据 + for col, field in placeholder_columns.items(): + print() + cell = ws.cell(row=target_row, column=col) + + if field in data and data[field] is not None: + value = data[field] + cell.value = value + else: + # 数据为空,保持单元格空白 + cell.value = None + + # 保存文件 + wb.save(output_path) + except Exception as e: + logger.error(f"format and write excel file failed {e}") + + def format_and_write_file(dir_id: str, ctx: dict): logger.info(f'format_and_write_file dir id is {dir_id}') user_name = ctx.get('name', str(uuid.uuid4())) file_path = Path.cwd().joinpath('template.docx') print(file_path) template = DocxTemplate(file_path) + print(f"ctx {ctx}") + print("test1") template.render(ctx) + print("test2") pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id) + print("test3") pathxx = pathxx.joinpath((user_name + '.docx')) print(pathxx) template.save(pathxx) print('222222222') + + def format_excel_to_words(dir_id: str, dict_data_list: list[dict]): if not dict_data_list or len(dict_data_list) < 1: return + logger.info("dict_data_list is {0}".format(dict_data_list)) + excel_data_list = [convert_excel_data(data) for data in dict_data_list] + excel_data_list = map_data(excel_data_list) + logger.info(f"excel map data is {excel_data_list}") + format_and_write_excel_file(dir_id, excel_data_list) + session = SqliteSqlalchemy().session + # 同时写出一份到数据库,后期后继汇总excel + try: + save_data = json.dumps(excel_data_list, ensure_ascii=False) + session.execute(update(DBEXCEL).where(DBEXCEL.id == dir_id).values(status=1, file_name=dir_id + '_out.xlsx', excel_info=save_data)) + session.commit() + except Exception as e: + session.rollback() + finally: + session.close() for dict_data in dict_data_list: - #同时写出一份到数据库,后期后继汇总excel - #TODO + new_data = convert_data(dict_data) print(new_data) format_and_write_file(dir_id, new_data) diff --git a/service/template.xlsx b/service/template.xlsx new file mode 100644 index 0000000..9f3052c Binary files /dev/null and b/service/template.xlsx differ diff --git a/template.docx b/template.docx new file mode 100644 index 0000000..77ee22f Binary files /dev/null and b/template.docx differ diff --git a/template.xlsx b/template.xlsx new file mode 100644 index 0000000..ae4e0a6 Binary files /dev/null and b/template.xlsx differ