feat:增加文件格式化,下载,压缩
This commit is contained in:
@@ -12,6 +12,7 @@ class DBTASK(Base):
|
||||
id = Column(String(100), primary_key=True)
|
||||
name = Column(String(100), nullable=False)
|
||||
create_time = Column(DateTime, nullable=False, )
|
||||
task_type = Column(String(20), nullable=False, )
|
||||
# 0 代表待执行,1 成功,2 失败
|
||||
status = Column(Integer, nullable=False, default=0)
|
||||
success_num = Column(Integer, nullable=False, default=0)
|
||||
|
||||
52
main.py
52
main.py
@@ -2,40 +2,45 @@ from fastapi import FastAPI
|
||||
import uvicorn
|
||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||
from typing import List
|
||||
from service.file_service import check_and_create_directory, upload_and_save_file, fetch_files
|
||||
from service.file_service import download_format_words, check_and_create_directory, upload_and_format_file, \
|
||||
upload_and_save_file, fetch_files, fetch_and_format_file
|
||||
from service import excel_service
|
||||
from service.db_service import get_task_list
|
||||
from fastapi.responses import FileResponse
|
||||
import threading
|
||||
from logging_config import LOGGING_CONFIG
|
||||
import logging
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
import concurrent.futures
|
||||
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
|
||||
|
||||
executor = concurrent.futures.ThreadPoolExecutor(max_workers=20)
|
||||
|
||||
|
||||
@app.get("/api/v1/hw")
|
||||
def read_root():
|
||||
return {"Hello": "World"}
|
||||
|
||||
|
||||
# 上传文件并解析,解析是异步错误
|
||||
@app.post("/upload_files_and_parse")
|
||||
@app.post("/yj_resume/upload_files_and_parse")
|
||||
async def create_upload_files(files: List[UploadFile] = File(...)):
|
||||
dir_id = check_and_create_directory(files)
|
||||
dir_id = check_and_create_directory(files, 'parse')
|
||||
if not dir_id:
|
||||
return {"result": False, "code": 500, "message": "create directory failed"}
|
||||
flag, message = await upload_and_save_file(dir_id, files)
|
||||
logger.info(f"flag is {flag}")
|
||||
if flag:
|
||||
#flag, message = await fetch_files(dir_id)
|
||||
# flag, message = await fetch_files(dir_id)
|
||||
executor.submit(fetch_files, dir_id)
|
||||
return {"result": flag, "message": message, "task_id": dir_id}
|
||||
|
||||
|
||||
@app.get("/export_task_data_to_excel")
|
||||
@app.get("/yj_resume/export_task_data_to_excel")
|
||||
def export_task_data_to_excel(task_id: str):
|
||||
path_xx = excel_service.export_task_data_to_excel(task_id)
|
||||
if not path_xx:
|
||||
@@ -47,12 +52,43 @@ def export_task_data_to_excel(task_id: str):
|
||||
)
|
||||
|
||||
|
||||
@app.get("/parse_task_list")
|
||||
@app.get("/yj_resume/parse_task_list")
|
||||
def parse_task_list():
|
||||
data = get_task_list()
|
||||
data = get_task_list('parse')
|
||||
return {"data": data, "code": 200, }
|
||||
|
||||
|
||||
@app.get("/yj_resume/format_task_list")
|
||||
def format_task_list():
|
||||
data = get_task_list('format')
|
||||
return {"data": data, "code": 200, }
|
||||
|
||||
|
||||
@app.post("/yj_resume/upload_files_and_format")
|
||||
async def create_upload_files(files: List[UploadFile] = File(...)):
|
||||
dir_id = check_and_create_directory(files, 'format')
|
||||
if not dir_id:
|
||||
return {"result": False, "code": 500, "message": "create directory failed"}
|
||||
flag, message = await upload_and_format_file(dir_id, files)
|
||||
logger.info(f"flag is {flag}")
|
||||
if flag:
|
||||
# flag, message = await fetch_files(dir_id)
|
||||
executor.submit(fetch_and_format_file, dir_id)
|
||||
return {"result": flag, "message": message, "task_id": dir_id}
|
||||
|
||||
|
||||
@app.get("/yj_resume/download_format_words")
|
||||
def export_task_data_to_excel(task_id: str):
|
||||
path_xx = download_format_words(task_id)
|
||||
if not path_xx:
|
||||
raise HTTPException(status_code=404, detail="file not found")
|
||||
return FileResponse(
|
||||
path=path_xx,
|
||||
media_type="application/octet-stream", # 通用二进制流
|
||||
filename=f"{task_id}.zip" # 浏览器下载时使用的文件名
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger.info("start server")
|
||||
uvicorn.run(app, host="0.0.0.0", port=3006)
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
|
||||
|
||||
|
||||
def get_task_list():
|
||||
task_list = SqliteSqlalchemy().session.query(DBTASK).order_by(DBTASK.create_time.desc()).all()
|
||||
result=[]
|
||||
def get_task_list(task_type):
|
||||
task_list = SqliteSqlalchemy().session.query(DBTASK).filter_by(DBTASK.task_type == task_type).order_by(
|
||||
DBTASK.create_time.desc()).all()
|
||||
result = []
|
||||
for task in task_list:
|
||||
result.append({
|
||||
"id": task.id,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import json
|
||||
|
||||
|
||||
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
@@ -12,16 +11,19 @@ import os
|
||||
import asyncio
|
||||
import logging
|
||||
from logging_config import LOGGING_CONFIG
|
||||
|
||||
from service.format_template_resume import format_excel_to_words
|
||||
from service.parse_resume2_doc import extra_resume
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
|
||||
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
ZIP_PATH = config('ZIP_PATh', default='E://pyptoject//yj_resume//zip//')
|
||||
#
|
||||
# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
import pandas as pd
|
||||
import zipfile
|
||||
|
||||
|
||||
def check_and_create_directory(files):
|
||||
def check_and_create_directory(files, task_type):
|
||||
logger.info("check_and_create_directory in service")
|
||||
# 先创建一个task
|
||||
if not files or len(files) == 0:
|
||||
@@ -31,8 +33,8 @@ def check_and_create_directory(files):
|
||||
current_time = datetime.now()
|
||||
# 格式化时间为字符串
|
||||
formatted_time = current_time.strftime("%Y-%m-%d-%H-%M-%S")
|
||||
task = DBTASK(id=id, create_time=datetime.now(), status=0, success_num=0, total_num=len(files),
|
||||
fail_num=0,name=f"解析任务({formatted_time})")
|
||||
task = DBTASK(id=id, task_type=task_type, create_time=datetime.now(), status=0, success_num=0, total_num=len(files),
|
||||
fail_num=0, name=f"解析任务({formatted_time})")
|
||||
|
||||
session = SqliteSqlalchemy().session
|
||||
try:
|
||||
@@ -74,8 +76,8 @@ async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str):
|
||||
session.close()
|
||||
return True, "success"
|
||||
|
||||
def fetch_files(dir_id) -> (bool, str):
|
||||
|
||||
def fetch_files(dir_id) -> (bool, str):
|
||||
logger.info(f"start fetching files task {dir_id} in service")
|
||||
if not os.path.exists(BASE_PATH):
|
||||
logger.info(f"目录{BASE_PATH}不存在")
|
||||
@@ -83,13 +85,13 @@ def fetch_files(dir_id) -> (bool, str):
|
||||
file_extensions = ['.docx', '.doc']
|
||||
files_list = []
|
||||
dir_path = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||
for root,dirs,files in os.walk(dir_path):
|
||||
for root, dirs, files in os.walk(dir_path):
|
||||
for file in files:
|
||||
_,ext = os.path.splitext(file)
|
||||
_, ext = os.path.splitext(file)
|
||||
if file_extensions and ext not in file_extensions:
|
||||
logger.error(f"文件{file}格式不符合预期")
|
||||
continue
|
||||
file_path = os.path.join(root,file)
|
||||
file_path = os.path.join(root, file)
|
||||
if os.path.isfile(file_path):
|
||||
files_list.append(file_path)
|
||||
else:
|
||||
@@ -106,10 +108,10 @@ def fetch_files(dir_id) -> (bool, str):
|
||||
logger.info(f"file content is {result}")
|
||||
if not result:
|
||||
logger.warning(f"file {file_name} 提取为空")
|
||||
update_fail_mapping.append({'id':id, 'status':0,
|
||||
'message': f"task {dir_id} => file {file_name} 提取为空"})
|
||||
update_fail_mapping.append({'id': id, 'status': 0,
|
||||
'message': f"task {dir_id} => file {file_name} 提取为空"})
|
||||
continue
|
||||
update_success_mapping.append({'id':id, 'status':1,'data_info': result})
|
||||
update_success_mapping.append({'id': id, 'status': 1, 'data_info': result})
|
||||
session = SqliteSqlalchemy().session
|
||||
logger.info(f"update success mapping => {update_success_mapping}")
|
||||
logger.info(f"update fail mapping => {update_fail_mapping}")
|
||||
@@ -120,8 +122,8 @@ def fetch_files(dir_id) -> (bool, str):
|
||||
session.bulk_update_mappings(DBRESUME, update_data)
|
||||
|
||||
if update_fail_mapping:
|
||||
session.bulk_update_mappings(DBTASK, [{'id':dir_id, 'status':2, 'success_num':success_num,
|
||||
'fail_num':fail_num,'message':f'fail => {update_fail_mapping}'}])
|
||||
session.bulk_update_mappings(DBTASK, [{'id': dir_id, 'status': 2, 'success_num': success_num,
|
||||
'fail_num': fail_num, 'message': f'fail => {update_fail_mapping}'}])
|
||||
else:
|
||||
session.bulk_update_mappings(DBTASK, [{'id': dir_id, 'status': 1,
|
||||
'success_num': success_num, 'fail_num': fail_num}])
|
||||
@@ -136,5 +138,48 @@ def fetch_files(dir_id) -> (bool, str):
|
||||
return True, 'success'
|
||||
|
||||
|
||||
async def upload_and_format_file(dir_id, files: List[UploadFile]) -> (bool, str):
|
||||
logger.info(f"upload_and_format_file in service dir_id {dir_id}")
|
||||
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||
pathxx.mkdir(parents=True, exist_ok=True)
|
||||
for file in files:
|
||||
name, fix = os.path.splitext(file.filename)
|
||||
if fix not in ['.xls', '.xlsx']:
|
||||
continue
|
||||
with open(pathxx.joinpath(dir_id + fix), 'wb') as f:
|
||||
file_content = await file.read()
|
||||
f.write(file_content)
|
||||
return True, "success"
|
||||
|
||||
|
||||
def zip_file_folder(dir_id):
|
||||
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||
output_filename = pathlib.Path(ZIP_PATH).joinpath((dir_id + ".zip"))
|
||||
with zipfile.ZipFile(output_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
||||
# 遍历文件夹中的所有文件和子文件夹
|
||||
for root, dirs, files in os.walk(pathxx):
|
||||
for file in files:
|
||||
# 构建完整的文件路径
|
||||
file_path = os.path.join(root, file)
|
||||
# 将文件添加到ZIP文件中,使用相对于原始文件夹的路径
|
||||
zipf.write(file_path, arcname=os.path.relpath(file_path, pathxx))
|
||||
|
||||
|
||||
def fetch_and_format_file(dir_id) -> (bool, str):
|
||||
logger.info(f"fetch_and_format_file in service dir_id {dir_id}")
|
||||
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||
pathx_1 = pathxx.joinpath(dir_id + ".xlsx")
|
||||
if not pathx_1.exists():
|
||||
pathx_1 = pathxx.joinpath(dir_id + ".xls")
|
||||
data = pd.read_excel(pathx_1)
|
||||
data_dict = data.to_dict('records')
|
||||
print(data_dict)
|
||||
format_excel_to_words(dir_id, data_dict)
|
||||
zip_file_folder(dir_id)
|
||||
|
||||
|
||||
def download_format_words(task_id):
|
||||
pathxx = pathlib.Path(ZIP_PATH).joinpath((task_id + ".zip"))
|
||||
if not pathxx.exists():
|
||||
return None
|
||||
return pathxx
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
from docxtpl import DocxTemplate
|
||||
from pathlib import Path
|
||||
|
||||
from decouple import config
|
||||
import pathlib,logging
|
||||
import uuid
|
||||
logger = logging.getLogger(__name__)
|
||||
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
context = {
|
||||
'name': '张三',
|
||||
'sex': '男',
|
||||
@@ -29,7 +33,58 @@ context = {
|
||||
'''
|
||||
|
||||
}
|
||||
file_path = Path.cwd().joinpath('template.docx')
|
||||
template = DocxTemplate(file_path)
|
||||
template.render(context)
|
||||
template.save('E://resu//output.docx')
|
||||
|
||||
|
||||
def dict_get_mul_key(keys: list, dict_data: dict):
|
||||
for k in keys:
|
||||
if k in dict_data.keys():
|
||||
return dict_data[k]
|
||||
return ''
|
||||
|
||||
|
||||
def convert_data(old_dict: dict) -> dict:
|
||||
new_dict = {}
|
||||
new_dict['name'] = dict_get_mul_key(['姓名', '姓 名'], old_dict)
|
||||
new_dict['sex'] = dict_get_mul_key(['性别', '性 别'], old_dict)
|
||||
new_dict['nation'] = dict_get_mul_key(['民族', '民 族'], old_dict)
|
||||
new_dict['brith'] = dict_get_mul_key(['出生年月', '出生年月(岁)'], old_dict)
|
||||
new_dict['origin'] = dict_get_mul_key(['籍贯', '籍 贯'], old_dict)
|
||||
new_dict['address'] = dict_get_mul_key(['出 生 地', '出生地'], old_dict)
|
||||
new_dict['education'] = dict_get_mul_key(['学历', '学 历'], old_dict)
|
||||
new_dict['degree'] = dict_get_mul_key(['学位', '学 位'], old_dict)
|
||||
new_dict['politics'] = '党员' if len(dict_get_mul_key(['入党时间', '入 党 时 间'], old_dict)) > 0 else '群众'
|
||||
new_dict['department'] = dict_get_mul_key(['部门', '部 门'], old_dict)
|
||||
new_dict['position'] = dict_get_mul_key(['现任职务', '现 任 职 务'], old_dict)
|
||||
new_dict['phone'] = dict_get_mul_key(['手机号', '手 机 号'], old_dict)
|
||||
new_dict['title'] = dict_get_mul_key(['专业技术职务', '职 称'], old_dict)
|
||||
new_dict['start_work_time'] = dict_get_mul_key(['开始工作时间', '开始 工作 时间'], old_dict)
|
||||
new_dict['id_number'] = dict_get_mul_key(['身份证', '身 份 证'], old_dict)
|
||||
new_dict['honor'] = dict_get_mul_key(['奖惩情况', '奖惩 情况'], old_dict)
|
||||
new_dict['work_text'] = dict_get_mul_key(['简历', '简 历'], old_dict)
|
||||
return new_dict
|
||||
|
||||
|
||||
def format_and_write_file(dir_id: str, ctx: dict):
|
||||
logger.info(f'format_and_write_file dir id is {dir_id}')
|
||||
user_name = ctx.get('name', str(uuid.uuid4()))
|
||||
file_path = Path.cwd().joinpath('template.docx')
|
||||
print(file_path)
|
||||
template = DocxTemplate(file_path)
|
||||
template.render(ctx)
|
||||
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||
pathxx = pathxx.joinpath((user_name + '.docx'))
|
||||
print(pathxx)
|
||||
template.save(pathxx)
|
||||
print('222222222')
|
||||
|
||||
|
||||
def format_excel_to_words(dir_id: str, dict_data_list: list[dict]):
|
||||
if not dict_data_list or len(dict_data_list) < 1:
|
||||
return
|
||||
for dict_data in dict_data_list:
|
||||
#同时写出一份到数据库,后期后继汇总excel
|
||||
#TODO
|
||||
new_data = convert_data(dict_data)
|
||||
print(new_data)
|
||||
format_and_write_file(dir_id, new_data)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user