导出excel
This commit is contained in:
17
db/sql_db.py
17
db/sql_db.py
@@ -4,8 +4,8 @@ from sqlalchemy.orm import declarative_base, sessionmaker
|
||||
Base = declarative_base()
|
||||
from decouple import config
|
||||
|
||||
DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3')
|
||||
# DB_PATH = config('DB_PATH', default='D://PycharmProject//yj_resume//main.sqlite3')
|
||||
# DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3')
|
||||
DB_PATH = config('DB_PATH', default='D://PycharmProject//yj_resume//main.sqlite3')
|
||||
|
||||
class DBTASK(Base):
|
||||
__tablename__ = 'db_task'
|
||||
@@ -34,6 +34,19 @@ class DBRESUME(Base):
|
||||
# 错误信息等
|
||||
message = Column(Text, nullable=True)
|
||||
|
||||
class DBEXCEL(Base):
|
||||
__tablename__ = 'db_excel'
|
||||
# 每个任务对应一个文件夹ID
|
||||
id = Column(String(100), primary_key=True)
|
||||
# 0 代表待执行,1 成功,2 失败
|
||||
status = Column(Integer, nullable=False, default=0)
|
||||
file_name = Column(String(100), nullable=True)
|
||||
# 可以用json表示提取的数据
|
||||
excel_info = Column(Text, nullable=True)
|
||||
# 错误信息等
|
||||
message = Column(Text, nullable=True)
|
||||
|
||||
|
||||
|
||||
class SqliteSqlalchemy(object):
|
||||
def __init__(self):
|
||||
|
||||
@@ -10,4 +10,4 @@ openpyxl
|
||||
python-multipart
|
||||
Pillow>=10.0.0
|
||||
numpy
|
||||
pypandoc
|
||||
openpyxl
|
||||
|
||||
@@ -4,8 +4,8 @@ import pandas as pd
|
||||
import pathlib
|
||||
from decouple import config
|
||||
|
||||
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
# BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
|
||||
# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
|
||||
|
||||
|
||||
# 导出数据到excel
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import json
|
||||
from sqlalchemy import update
|
||||
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
|
||||
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy, DBEXCEL
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from decouple import config
|
||||
@@ -18,10 +18,11 @@ import pypandoc
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
ZIP_PATH = config('ZIP_PATh', default='E://pyptoject//yj_resume//zip//')
|
||||
#
|
||||
# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
# ZIP_PATH = config('ZIP_PATh', default='E://pyptoject//yj_resume//zip//')
|
||||
#
|
||||
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
|
||||
ZIP_PATH = config('ZIP_PATh', default='D://PycharmProject//yj_resume//zip//')
|
||||
import pandas as pd
|
||||
import zipfile
|
||||
|
||||
@@ -189,13 +190,26 @@ async def upload_and_format_file(dir_id, files: List[UploadFile]) -> (bool, str)
|
||||
logger.info(f"upload_and_format_file in service dir_id {dir_id}")
|
||||
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||
pathxx.mkdir(parents=True, exist_ok=True)
|
||||
data = []
|
||||
for file in files:
|
||||
# id = str(uuid.uuid4())
|
||||
name, fix = os.path.splitext(file.filename)
|
||||
if fix not in ['.xls', '.xlsx']:
|
||||
continue
|
||||
with open(pathxx.joinpath(dir_id + fix), 'wb') as f:
|
||||
file_content = await file.read()
|
||||
f.write(file_content)
|
||||
data.append(DBEXCEL(id=dir_id, status=0, file_name=dir_id + '.xlsx'))
|
||||
session = SqliteSqlalchemy().session
|
||||
try:
|
||||
session.bulk_save_objects(data)
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
print(f"Failed to save DBEXCEL error {e}")
|
||||
session.rollback()
|
||||
return False, f"Failed to save DBEXCEL error {e}"
|
||||
finally:
|
||||
session.close()
|
||||
return True, "success"
|
||||
|
||||
|
||||
|
||||
@@ -1,10 +1,20 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from docxtpl import DocxTemplate
|
||||
from pathlib import Path
|
||||
from decouple import config
|
||||
import pathlib,logging
|
||||
import uuid
|
||||
from sqlalchemy import update
|
||||
|
||||
from openpyxl import load_workbook,styles
|
||||
|
||||
from db.sql_db import SqliteSqlalchemy, DBEXCEL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
|
||||
context = {
|
||||
'name': '张三',
|
||||
'sex': '男',
|
||||
@@ -34,6 +44,93 @@ context = {
|
||||
|
||||
}
|
||||
|
||||
excel_mapping = {
|
||||
'politics': {'群众': '1', '中共党员': '2', '民主党派': '3', '共青团员': '4'},
|
||||
'education': {'小学及以下': '1', '初中': '2', '高中、技校': '3', '中专': '4',
|
||||
'大专': '5', '大学本科': '6', '硕士研究生': '7', '博士研究生': '8'},
|
||||
'nation': {'汉族': '1', '蒙古族': '2', '回族': '3', '藏族': '4', '维吾尔族': '5',
|
||||
'苗族': '6', '彝族': '7', '壮族': '8', '布依族': '9', '朝鲜族': '10',
|
||||
'满族': '11', '侗族': '12', '瑶族': '13', '白族': '14', '土家族': '15',
|
||||
'哈尼族': '16', '哈萨克族': '17', '傣族': '18', '黎族': '19', '傈僳族': '20',
|
||||
'佤族': '21', '畲族': '22', '高山族': '23', '拉祜族': '24', '水族': '25',
|
||||
'东乡族': '26', '纳西族': '27', '景颇族': '28', '柯尔克孜族': '29', '土族': '30',
|
||||
'达斡尔族': '31', '仫佬族': '32', '羌族': '33', '布朗族': '34', '撒拉族': '35',
|
||||
'毛南族': '36', '仡佬族': '37', '锡伯族': '38', '阿昌族': '39', '普米族': '40',
|
||||
'塔吉克族': '41', '怒族': '42', '乌孜别克族': '43', '俄罗斯族': '44', '鄂温克族': '45',
|
||||
'德昂族': '46', '保安族': '47', '裕固族': '48', '京族': '49', '塔塔尔族': '50', '独龙族': '51',
|
||||
'鄂伦春族': '52', '赫哲族': '53', '门巴族': '54', '珞巴族': '55', '基诺族': '56', '其他': '57'},
|
||||
'address': {'拉萨市': '540100', '昌都地区': '542100', '山南地区': '542200', '日喀则地区': '542300',
|
||||
'那曲地区': '542400', '阿里地区': '542500', '林芝地区': '542600', '藏外地区': '549999'}}
|
||||
|
||||
|
||||
def convert_excel_data(old_dict: dict) -> dict:
|
||||
new_dict = {}
|
||||
pre_edu_mapping = {
|
||||
'无':'小学及以下',
|
||||
'小学':'小学及以下',
|
||||
'初中':'初中',
|
||||
'高中':'高中、技校',
|
||||
'高职':'高中、技校',
|
||||
'中专':'中专',
|
||||
'大专': '大专',
|
||||
'本科': '大学本科',
|
||||
'硕士': '硕士研究生',
|
||||
'博士': '博士研究生',
|
||||
}
|
||||
pre_addr_mapping = {
|
||||
'拉萨': '拉萨市',
|
||||
'昌都': '昌都地区',
|
||||
'山南': '山南地区',
|
||||
'日喀则': '日喀则地区',
|
||||
'那曲': '那曲地区',
|
||||
'阿里': '阿里地区',
|
||||
'林芝': '林芝地区',
|
||||
|
||||
}
|
||||
pre_nation_mapping = excel_mapping.get('nation')
|
||||
new_dict['name'] = dict_get_mul_key(['姓名', '姓 名'], old_dict)
|
||||
new_dict['sex'] = dict_get_mul_key(['性别', '性 别'], old_dict)
|
||||
new_dict['origin'] = dict_get_mul_key(['籍贯', '籍 贯'], old_dict)
|
||||
new_dict['politics'] = '中共党员' if len(dict_get_mul_key(['入党时间', '入 党 时 间'], old_dict)) > 0 and dict_get_mul_key(['入党时间'],old_dict) not in ('/', '\\','None','nan','无') else '群众'
|
||||
address = dict_get_mul_key(['出 生 地', '出生地'], old_dict)
|
||||
nation = dict_get_mul_key(['民族', '民 族'], old_dict)
|
||||
new_dict['nation'] = nation
|
||||
# 学历标准化
|
||||
r1 = old_dict.get("全日制教育")
|
||||
r2 = old_dict.get("在职教育")
|
||||
if len(r1) > 0 and r1 not in ('/','\\','None','nan','无','否','null'):
|
||||
education = r1
|
||||
else:
|
||||
education = r2
|
||||
education = education.lower().replace(' ', '')
|
||||
for k,v in pre_edu_mapping.items():
|
||||
if k in education:
|
||||
new_dict['education'] = v
|
||||
addr_found = False
|
||||
# 居住地标准化
|
||||
for k,v in pre_addr_mapping.items():
|
||||
if k in address:
|
||||
print("藏内地区")
|
||||
addr_found = True
|
||||
new_dict['address'] = v
|
||||
if not addr_found:
|
||||
print("藏外地区")
|
||||
new_dict['address'] = '藏外地区'
|
||||
# 民族标准化
|
||||
for k,v in pre_nation_mapping.items():
|
||||
if k in nation or nation in k:
|
||||
new_dict['nation'] = k
|
||||
return new_dict
|
||||
|
||||
def map_data(data_list):
|
||||
#特殊值地区编号
|
||||
for idx, row in enumerate(data_list):
|
||||
for k,v in excel_mapping.items():
|
||||
value = row.get(k)
|
||||
if value:
|
||||
if v.get(value,''):
|
||||
data_list[idx][k] = v.get(value)
|
||||
return data_list
|
||||
|
||||
def dict_get_mul_key(keys: list, dict_data: dict):
|
||||
for k in keys:
|
||||
@@ -52,38 +149,154 @@ def convert_data(old_dict: dict) -> dict:
|
||||
new_dict['address'] = dict_get_mul_key(['出 生 地', '出生地'], old_dict)
|
||||
new_dict['education'] = dict_get_mul_key(['学历', '学 历'], old_dict)
|
||||
new_dict['degree'] = dict_get_mul_key(['学位', '学 位'], old_dict)
|
||||
new_dict['politics'] = '党员' if len(dict_get_mul_key(['入党时间', '入 党 时 间'], old_dict)) > 0 else '群众'
|
||||
new_dict['politics'] = '党员' if len(dict_get_mul_key(['入党时间', '入 党 时 间'], old_dict)) > 0 and dict_get_mul_key(['入党时间'], old_dict) not in ('/','\\','None','nan','无') else '群众'
|
||||
new_dict['department'] = dict_get_mul_key(['部门', '部 门'], old_dict)
|
||||
new_dict['position'] = dict_get_mul_key(['现任职务', '现 任 职 务'], old_dict)
|
||||
new_dict['phone'] = dict_get_mul_key(['手机号', '手 机 号'], old_dict)
|
||||
new_dict['title'] = dict_get_mul_key(['专业技术职务', '职 称'], old_dict)
|
||||
new_dict['start_work_time'] = dict_get_mul_key(['开始工作时间', '开始 工作 时间'], old_dict)
|
||||
new_dict['id_number'] = dict_get_mul_key(['身份证', '身 份 证'], old_dict)
|
||||
new_dict['id_number'] = dict_get_mul_key(['身份证', '身 份 证','身份证号码','身份证号'], old_dict)
|
||||
new_dict['honor'] = dict_get_mul_key(['奖惩情况', '奖惩 情况'], old_dict)
|
||||
new_dict['work_text'] = dict_get_mul_key(['简历', '简 历'], old_dict)
|
||||
return new_dict
|
||||
|
||||
|
||||
def format_and_write_excel_file(dir_id, data_list, template_row=5):
|
||||
logger.info("Start to format and write excel file ")
|
||||
try:
|
||||
outpath = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||
output_path = outpath.joinpath((dir_id + '_out.xlsx'))
|
||||
template_path = Path.cwd().joinpath('template.xlsx')
|
||||
wb = load_workbook(template_path)
|
||||
ws = wb.active
|
||||
|
||||
pattern = re.compile(r'\{\{(\w+)\}\}')
|
||||
|
||||
placeholder_columns = {}
|
||||
for col in range(1, ws.max_column + 1):
|
||||
cell = ws.cell(row=template_row, column=col)
|
||||
if cell.value and isinstance(cell.value, str) and '{{' in cell.value:
|
||||
matches = pattern.findall(cell.value)
|
||||
if matches:
|
||||
placeholder_columns[col] = matches[0]
|
||||
|
||||
logger.info(f"找到占位符列: {placeholder_columns}")
|
||||
|
||||
# 处理每条数据
|
||||
for index, data in enumerate(data_list):
|
||||
target_row = template_row + index
|
||||
|
||||
if index > 0:
|
||||
# 插入新行并复制格式
|
||||
ws.insert_rows(target_row)
|
||||
for col in range(1, ws.max_column + 1):
|
||||
source_cell = ws.cell(row=target_row - 1, column=col)
|
||||
target_cell = ws.cell(row=target_row, column=col)
|
||||
target_cell.style = source_cell.style
|
||||
if source_cell.alignment:
|
||||
# 必须明确复制对齐属性
|
||||
target_cell.alignment = styles.Alignment(
|
||||
horizontal=source_cell.alignment.horizontal, # 水平对齐
|
||||
vertical=source_cell.alignment.vertical, # 垂直对齐
|
||||
text_rotation=source_cell.alignment.text_rotation,
|
||||
wrap_text=source_cell.alignment.wrap_text,
|
||||
shrink_to_fit=source_cell.alignment.shrink_to_fit,
|
||||
indent=source_cell.alignment.indent
|
||||
)
|
||||
# 只复制样式,不复制值
|
||||
# if source_cell.has_style:
|
||||
# target_cell.font = copy(source_cell.font)
|
||||
# target_cell.border = copy(source_cell.border)
|
||||
# target_cell.fill = copy(source_cell.fill)
|
||||
# target_cell.number_format = source_cell.number_format
|
||||
# target_cell.alignment = copy(source_cell.alignment)
|
||||
# 复制边框(这是你缺失的关键部分)
|
||||
if hasattr(source_cell, 'border') and source_cell.border:
|
||||
# 创建新的边框对象
|
||||
from openpyxl.styles import Border, Side
|
||||
|
||||
# 获取源单元格的边框样式
|
||||
source_border = source_cell.border
|
||||
|
||||
# 创建新的边框对象
|
||||
new_border = Border(
|
||||
left=Side(
|
||||
style=source_border.left.style,
|
||||
color=source_border.left.color
|
||||
) if source_border.left else None,
|
||||
right=Side(
|
||||
style=source_border.right.style,
|
||||
color=source_border.right.color
|
||||
) if source_border.right else None,
|
||||
top=Side(
|
||||
style=source_border.top.style,
|
||||
color=source_border.top.color
|
||||
) if source_border.top else None,
|
||||
bottom=Side(
|
||||
style=source_border.bottom.style,
|
||||
color=source_border.bottom.color
|
||||
) if source_border.bottom else None
|
||||
)
|
||||
target_cell.border = new_border
|
||||
|
||||
# 填充数据
|
||||
for col, field in placeholder_columns.items():
|
||||
print()
|
||||
cell = ws.cell(row=target_row, column=col)
|
||||
|
||||
if field in data and data[field] is not None:
|
||||
value = data[field]
|
||||
cell.value = value
|
||||
else:
|
||||
# 数据为空,保持单元格空白
|
||||
cell.value = None
|
||||
|
||||
# 保存文件
|
||||
wb.save(output_path)
|
||||
except Exception as e:
|
||||
logger.error(f"format and write excel file failed {e}")
|
||||
|
||||
|
||||
def format_and_write_file(dir_id: str, ctx: dict):
|
||||
logger.info(f'format_and_write_file dir id is {dir_id}')
|
||||
user_name = ctx.get('name', str(uuid.uuid4()))
|
||||
file_path = Path.cwd().joinpath('template.docx')
|
||||
print(file_path)
|
||||
template = DocxTemplate(file_path)
|
||||
print(f"ctx {ctx}")
|
||||
print("test1")
|
||||
template.render(ctx)
|
||||
print("test2")
|
||||
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||
print("test3")
|
||||
pathxx = pathxx.joinpath((user_name + '.docx'))
|
||||
print(pathxx)
|
||||
template.save(pathxx)
|
||||
print('222222222')
|
||||
|
||||
|
||||
|
||||
|
||||
def format_excel_to_words(dir_id: str, dict_data_list: list[dict]):
|
||||
if not dict_data_list or len(dict_data_list) < 1:
|
||||
return
|
||||
logger.info("dict_data_list is {0}".format(dict_data_list))
|
||||
excel_data_list = [convert_excel_data(data) for data in dict_data_list]
|
||||
excel_data_list = map_data(excel_data_list)
|
||||
logger.info(f"excel map data is {excel_data_list}")
|
||||
format_and_write_excel_file(dir_id, excel_data_list)
|
||||
session = SqliteSqlalchemy().session
|
||||
# 同时写出一份到数据库,后期后继汇总excel
|
||||
try:
|
||||
save_data = json.dumps(excel_data_list, ensure_ascii=False)
|
||||
session.execute(update(DBEXCEL).where(DBEXCEL.id == dir_id).values(status=1, file_name=dir_id + '_out.xlsx', excel_info=save_data))
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
finally:
|
||||
session.close()
|
||||
for dict_data in dict_data_list:
|
||||
#同时写出一份到数据库,后期后继汇总excel
|
||||
#TODO
|
||||
|
||||
new_data = convert_data(dict_data)
|
||||
print(new_data)
|
||||
format_and_write_file(dir_id, new_data)
|
||||
|
||||
BIN
service/template.xlsx
Normal file
BIN
service/template.xlsx
Normal file
Binary file not shown.
BIN
template.docx
Normal file
BIN
template.docx
Normal file
Binary file not shown.
BIN
template.xlsx
Normal file
BIN
template.xlsx
Normal file
Binary file not shown.
Reference in New Issue
Block a user