导出excel

This commit is contained in:
yujj128
2025-12-12 15:31:14 +08:00
parent 5e70e79365
commit b383a52bdc
8 changed files with 254 additions and 14 deletions

View File

@@ -4,8 +4,8 @@ from sqlalchemy.orm import declarative_base, sessionmaker
Base = declarative_base()
from decouple import config
DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3')
# DB_PATH = config('DB_PATH', default='D://PycharmProject//yj_resume//main.sqlite3')
# DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3')
DB_PATH = config('DB_PATH', default='D://PycharmProject//yj_resume//main.sqlite3')
class DBTASK(Base):
__tablename__ = 'db_task'
@@ -34,6 +34,19 @@ class DBRESUME(Base):
# 错误信息等
message = Column(Text, nullable=True)
class DBEXCEL(Base):
__tablename__ = 'db_excel'
# 每个任务对应一个文件夹ID
id = Column(String(100), primary_key=True)
# 0 代表待执行1 成功2 失败
status = Column(Integer, nullable=False, default=0)
file_name = Column(String(100), nullable=True)
# 可以用json表示提取的数据
excel_info = Column(Text, nullable=True)
# 错误信息等
message = Column(Text, nullable=True)
class SqliteSqlalchemy(object):
def __init__(self):

View File

@@ -10,4 +10,4 @@ openpyxl
python-multipart
Pillow>=10.0.0
numpy
pypandoc
openpyxl

View File

@@ -4,8 +4,8 @@ import pandas as pd
import pathlib
from decouple import config
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
# BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
# 导出数据到excel

View File

@@ -1,6 +1,6 @@
import json
from sqlalchemy import update
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy, DBEXCEL
import uuid
from datetime import datetime
from decouple import config
@@ -18,10 +18,11 @@ import pypandoc
logger = logging.getLogger(__name__)
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
ZIP_PATH = config('ZIP_PATh', default='E://pyptoject//yj_resume//zip//')
#
# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
# ZIP_PATH = config('ZIP_PATh', default='E://pyptoject//yj_resume//zip//')
#
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
ZIP_PATH = config('ZIP_PATh', default='D://PycharmProject//yj_resume//zip//')
import pandas as pd
import zipfile
@@ -189,13 +190,26 @@ async def upload_and_format_file(dir_id, files: List[UploadFile]) -> (bool, str)
logger.info(f"upload_and_format_file in service dir_id {dir_id}")
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
pathxx.mkdir(parents=True, exist_ok=True)
data = []
for file in files:
# id = str(uuid.uuid4())
name, fix = os.path.splitext(file.filename)
if fix not in ['.xls', '.xlsx']:
continue
with open(pathxx.joinpath(dir_id + fix), 'wb') as f:
file_content = await file.read()
f.write(file_content)
data.append(DBEXCEL(id=dir_id, status=0, file_name=dir_id + '.xlsx'))
session = SqliteSqlalchemy().session
try:
session.bulk_save_objects(data)
session.commit()
except Exception as e:
print(f"Failed to save DBEXCEL error {e}")
session.rollback()
return False, f"Failed to save DBEXCEL error {e}"
finally:
session.close()
return True, "success"

View File

@@ -1,10 +1,20 @@
import json
import re
from docxtpl import DocxTemplate
from pathlib import Path
from decouple import config
import pathlib,logging
import uuid
from sqlalchemy import update
from openpyxl import load_workbook,styles
from db.sql_db import SqliteSqlalchemy, DBEXCEL
logger = logging.getLogger(__name__)
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
context = {
'name': '张三',
'sex': '',
@@ -34,6 +44,93 @@ context = {
}
excel_mapping = {
'politics': {'群众': '1', '中共党员': '2', '民主党派': '3', '共青团员': '4'},
'education': {'小学及以下': '1', '初中': '2', '高中、技校': '3', '中专': '4',
'大专': '5', '大学本科': '6', '硕士研究生': '7', '博士研究生': '8'},
'nation': {'汉族': '1', '蒙古族': '2', '回族': '3', '藏族': '4', '维吾尔族': '5',
'苗族': '6', '彝族': '7', '壮族': '8', '布依族': '9', '朝鲜族': '10',
'满族': '11', '侗族': '12', '瑶族': '13', '白族': '14', '土家族': '15',
'哈尼族': '16', '哈萨克族': '17', '傣族': '18', '黎族': '19', '傈僳族': '20',
'佤族': '21', '畲族': '22', '高山族': '23', '拉祜族': '24', '水族': '25',
'东乡族': '26', '纳西族': '27', '景颇族': '28', '柯尔克孜族': '29', '土族': '30',
'达斡尔族': '31', '仫佬族': '32', '羌族': '33', '布朗族': '34', '撒拉族': '35',
'毛南族': '36', '仡佬族': '37', '锡伯族': '38', '阿昌族': '39', '普米族': '40',
'塔吉克族': '41', '怒族': '42', '乌孜别克族': '43', '俄罗斯族': '44', '鄂温克族': '45',
'德昂族': '46', '保安族': '47', '裕固族': '48', '京族': '49', '塔塔尔族': '50', '独龙族': '51',
'鄂伦春族': '52', '赫哲族': '53', '门巴族': '54', '珞巴族': '55', '基诺族': '56', '其他': '57'},
'address': {'拉萨市': '540100', '昌都地区': '542100', '山南地区': '542200', '日喀则地区': '542300',
'那曲地区': '542400', '阿里地区': '542500', '林芝地区': '542600', '藏外地区': '549999'}}
def convert_excel_data(old_dict: dict) -> dict:
new_dict = {}
pre_edu_mapping = {
'':'小学及以下',
'小学':'小学及以下',
'初中':'初中',
'高中':'高中、技校',
'高职':'高中、技校',
'中专':'中专',
'大专': '大专',
'本科': '大学本科',
'硕士': '硕士研究生',
'博士': '博士研究生',
}
pre_addr_mapping = {
'拉萨': '拉萨市',
'昌都': '昌都地区',
'山南': '山南地区',
'日喀则': '日喀则地区',
'那曲': '那曲地区',
'阿里': '阿里地区',
'林芝': '林芝地区',
}
pre_nation_mapping = excel_mapping.get('nation')
new_dict['name'] = dict_get_mul_key(['姓名', '姓 名'], old_dict)
new_dict['sex'] = dict_get_mul_key(['性别', '性 别'], old_dict)
new_dict['origin'] = dict_get_mul_key(['籍贯', '籍 贯'], old_dict)
new_dict['politics'] = '中共党员' if len(dict_get_mul_key(['入党时间', '入 党 时 间'], old_dict)) > 0 and dict_get_mul_key(['入党时间'],old_dict) not in ('/', '\\','None','nan','') else '群众'
address = dict_get_mul_key(['出 生 地', '出生地'], old_dict)
nation = dict_get_mul_key(['民族', '民 族'], old_dict)
new_dict['nation'] = nation
# 学历标准化
r1 = old_dict.get("全日制教育")
r2 = old_dict.get("在职教育")
if len(r1) > 0 and r1 not in ('/','\\','None','nan','','','null'):
education = r1
else:
education = r2
education = education.lower().replace(' ', '')
for k,v in pre_edu_mapping.items():
if k in education:
new_dict['education'] = v
addr_found = False
# 居住地标准化
for k,v in pre_addr_mapping.items():
if k in address:
print("藏内地区")
addr_found = True
new_dict['address'] = v
if not addr_found:
print("藏外地区")
new_dict['address'] = '藏外地区'
# 民族标准化
for k,v in pre_nation_mapping.items():
if k in nation or nation in k:
new_dict['nation'] = k
return new_dict
def map_data(data_list):
#特殊值地区编号
for idx, row in enumerate(data_list):
for k,v in excel_mapping.items():
value = row.get(k)
if value:
if v.get(value,''):
data_list[idx][k] = v.get(value)
return data_list
def dict_get_mul_key(keys: list, dict_data: dict):
for k in keys:
@@ -52,38 +149,154 @@ def convert_data(old_dict: dict) -> dict:
new_dict['address'] = dict_get_mul_key(['出 生 地', '出生地'], old_dict)
new_dict['education'] = dict_get_mul_key(['学历', '学 历'], old_dict)
new_dict['degree'] = dict_get_mul_key(['学位', '学 位'], old_dict)
new_dict['politics'] = '党员' if len(dict_get_mul_key(['入党时间', '入 党 时 间'], old_dict)) > 0 else '群众'
new_dict['politics'] = '党员' if len(dict_get_mul_key(['入党时间', '入 党 时 间'], old_dict)) > 0 and dict_get_mul_key(['入党时间'], old_dict) not in ('/','\\','None','nan','') else '群众'
new_dict['department'] = dict_get_mul_key(['部门', '部 门'], old_dict)
new_dict['position'] = dict_get_mul_key(['现任职务', '现 任 职 务'], old_dict)
new_dict['phone'] = dict_get_mul_key(['手机号', '手 机 号'], old_dict)
new_dict['title'] = dict_get_mul_key(['专业技术职务', '职 称'], old_dict)
new_dict['start_work_time'] = dict_get_mul_key(['开始工作时间', '开始 工作 时间'], old_dict)
new_dict['id_number'] = dict_get_mul_key(['身份证', '身 份 证'], old_dict)
new_dict['id_number'] = dict_get_mul_key(['身份证', '身 份 证','身份证号码','身份证号'], old_dict)
new_dict['honor'] = dict_get_mul_key(['奖惩情况', '奖惩 情况'], old_dict)
new_dict['work_text'] = dict_get_mul_key(['简历', '简 历'], old_dict)
return new_dict
def format_and_write_excel_file(dir_id, data_list, template_row=5):
logger.info("Start to format and write excel file ")
try:
outpath = pathlib.Path(BASE_PATH).joinpath(dir_id)
output_path = outpath.joinpath((dir_id + '_out.xlsx'))
template_path = Path.cwd().joinpath('template.xlsx')
wb = load_workbook(template_path)
ws = wb.active
pattern = re.compile(r'\{\{(\w+)\}\}')
placeholder_columns = {}
for col in range(1, ws.max_column + 1):
cell = ws.cell(row=template_row, column=col)
if cell.value and isinstance(cell.value, str) and '{{' in cell.value:
matches = pattern.findall(cell.value)
if matches:
placeholder_columns[col] = matches[0]
logger.info(f"找到占位符列: {placeholder_columns}")
# 处理每条数据
for index, data in enumerate(data_list):
target_row = template_row + index
if index > 0:
# 插入新行并复制格式
ws.insert_rows(target_row)
for col in range(1, ws.max_column + 1):
source_cell = ws.cell(row=target_row - 1, column=col)
target_cell = ws.cell(row=target_row, column=col)
target_cell.style = source_cell.style
if source_cell.alignment:
# 必须明确复制对齐属性
target_cell.alignment = styles.Alignment(
horizontal=source_cell.alignment.horizontal, # 水平对齐
vertical=source_cell.alignment.vertical, # 垂直对齐
text_rotation=source_cell.alignment.text_rotation,
wrap_text=source_cell.alignment.wrap_text,
shrink_to_fit=source_cell.alignment.shrink_to_fit,
indent=source_cell.alignment.indent
)
# 只复制样式,不复制值
# if source_cell.has_style:
# target_cell.font = copy(source_cell.font)
# target_cell.border = copy(source_cell.border)
# target_cell.fill = copy(source_cell.fill)
# target_cell.number_format = source_cell.number_format
# target_cell.alignment = copy(source_cell.alignment)
# 复制边框(这是你缺失的关键部分)
if hasattr(source_cell, 'border') and source_cell.border:
# 创建新的边框对象
from openpyxl.styles import Border, Side
# 获取源单元格的边框样式
source_border = source_cell.border
# 创建新的边框对象
new_border = Border(
left=Side(
style=source_border.left.style,
color=source_border.left.color
) if source_border.left else None,
right=Side(
style=source_border.right.style,
color=source_border.right.color
) if source_border.right else None,
top=Side(
style=source_border.top.style,
color=source_border.top.color
) if source_border.top else None,
bottom=Side(
style=source_border.bottom.style,
color=source_border.bottom.color
) if source_border.bottom else None
)
target_cell.border = new_border
# 填充数据
for col, field in placeholder_columns.items():
print()
cell = ws.cell(row=target_row, column=col)
if field in data and data[field] is not None:
value = data[field]
cell.value = value
else:
# 数据为空,保持单元格空白
cell.value = None
# 保存文件
wb.save(output_path)
except Exception as e:
logger.error(f"format and write excel file failed {e}")
def format_and_write_file(dir_id: str, ctx: dict):
logger.info(f'format_and_write_file dir id is {dir_id}')
user_name = ctx.get('name', str(uuid.uuid4()))
file_path = Path.cwd().joinpath('template.docx')
print(file_path)
template = DocxTemplate(file_path)
print(f"ctx {ctx}")
print("test1")
template.render(ctx)
print("test2")
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
print("test3")
pathxx = pathxx.joinpath((user_name + '.docx'))
print(pathxx)
template.save(pathxx)
print('222222222')
def format_excel_to_words(dir_id: str, dict_data_list: list[dict]):
if not dict_data_list or len(dict_data_list) < 1:
return
logger.info("dict_data_list is {0}".format(dict_data_list))
excel_data_list = [convert_excel_data(data) for data in dict_data_list]
excel_data_list = map_data(excel_data_list)
logger.info(f"excel map data is {excel_data_list}")
format_and_write_excel_file(dir_id, excel_data_list)
session = SqliteSqlalchemy().session
# 同时写出一份到数据库后期后继汇总excel
try:
save_data = json.dumps(excel_data_list, ensure_ascii=False)
session.execute(update(DBEXCEL).where(DBEXCEL.id == dir_id).values(status=1, file_name=dir_id + '_out.xlsx', excel_info=save_data))
session.commit()
except Exception as e:
session.rollback()
finally:
session.close()
for dict_data in dict_data_list:
#同时写出一份到数据库后期后继汇总excel
#TODO
new_data = convert_data(dict_data)
print(new_data)
format_and_write_file(dir_id, new_data)

BIN
service/template.xlsx Normal file

Binary file not shown.

BIN
template.docx Normal file

Binary file not shown.

BIN
template.xlsx Normal file

Binary file not shown.