简历提取,写入
This commit is contained in:
@@ -3,7 +3,8 @@ from sqlalchemy.orm import declarative_base, sessionmaker
|
|||||||
# 申明基类对象
|
# 申明基类对象
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
from decouple import config
|
from decouple import config
|
||||||
DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3')
|
|
||||||
|
DB_PATH = config('DB_PATH', default='D://PycharmProject//yj_resume//main.sqlite3')
|
||||||
|
|
||||||
|
|
||||||
class DBTASK(Base):
|
class DBTASK(Base):
|
||||||
|
|||||||
62
logging_config.py
Normal file
62
logging_config.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
# logging_config.py
|
||||||
|
import logging
|
||||||
|
import logging.config
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# 确保 logs 目录存在
|
||||||
|
log_dir = Path("logs")
|
||||||
|
log_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
LOGGING_CONFIG = {
|
||||||
|
"version": 1,
|
||||||
|
"disable_existing_loggers": False,
|
||||||
|
"formatters": {
|
||||||
|
"default": {
|
||||||
|
"format": "%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s",
|
||||||
|
},
|
||||||
|
"detailed": {
|
||||||
|
"format": "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"handlers": {
|
||||||
|
"console": {
|
||||||
|
"class": "logging.StreamHandler",
|
||||||
|
"level": "INFO",
|
||||||
|
"formatter": "default",
|
||||||
|
"stream": "ext://sys.stdout"
|
||||||
|
},
|
||||||
|
"file": {
|
||||||
|
"class": "logging.handlers.RotatingFileHandler", # 自动轮转
|
||||||
|
"level": "INFO",
|
||||||
|
"formatter": "detailed",
|
||||||
|
"filename": "logs/resume.log",
|
||||||
|
"maxBytes": 10485760, # 10MB
|
||||||
|
"backupCount": 5, # 保留5个备份
|
||||||
|
"encoding": "utf8"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"level": "INFO",
|
||||||
|
"handlers": ["console", "file"]
|
||||||
|
},
|
||||||
|
"loggers": {
|
||||||
|
"uvicorn": {
|
||||||
|
"level": "INFO",
|
||||||
|
"handlers": ["console", "file"],
|
||||||
|
"propagate": False
|
||||||
|
},
|
||||||
|
"uvicorn.error": {
|
||||||
|
"level": "INFO",
|
||||||
|
"handlers": ["console", "file"],
|
||||||
|
"propagate": False
|
||||||
|
},
|
||||||
|
"uvicorn.access": {
|
||||||
|
"level": "WARNING", # 只记录警告以上,避免刷屏
|
||||||
|
"handlers": ["file"], # 只写入文件
|
||||||
|
"propagate": False
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# 应用配置
|
||||||
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
15
main.py
15
main.py
@@ -2,8 +2,13 @@ from fastapi import FastAPI
|
|||||||
import uvicorn
|
import uvicorn
|
||||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||||
from typing import List
|
from typing import List
|
||||||
from service.file_service import check_and_create_directory, upload_and_save_file
|
from service.file_service import check_and_create_directory, upload_and_save_file,fetch_files
|
||||||
from service import excel_service
|
from service import excel_service
|
||||||
|
import threading
|
||||||
|
from logging_config import LOGGING_CONFIG
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
@@ -19,11 +24,11 @@ async def create_upload_files(files: List[UploadFile] = File(...)):
|
|||||||
dir_id = check_and_create_directory(files)
|
dir_id = check_and_create_directory(files)
|
||||||
if not dir_id:
|
if not dir_id:
|
||||||
return {"result": False, "code": 500, "message": "create directory failed"}
|
return {"result": False, "code": 500, "message": "create directory failed"}
|
||||||
flag, message = await upload_and_save_file(dir_id, files)
|
flag, message= await upload_and_save_file(dir_id, files)
|
||||||
|
logger.info(f"flag is {flag}")
|
||||||
if flag:
|
if flag:
|
||||||
# 触发异步任务,解析文件 TODO
|
flag,message = await fetch_files(dir_id)
|
||||||
pass
|
return {"result": flag, "message": message,"task_id": dir_id}
|
||||||
return {"result": flag, "message": message}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/export_task_data_to_excel")
|
@app.get("/export_task_data_to_excel")
|
||||||
|
|||||||
@@ -4,7 +4,8 @@ import pandas as pd
|
|||||||
import pathlib
|
import pathlib
|
||||||
from decouple import config
|
from decouple import config
|
||||||
|
|
||||||
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||||
|
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//')
|
||||||
|
|
||||||
|
|
||||||
# 导出数据到excel
|
# 导出数据到excel
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from pymupdf import message
|
||||||
|
|
||||||
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
|
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -6,11 +10,19 @@ import pathlib
|
|||||||
from fastapi import File, UploadFile
|
from fastapi import File, UploadFile
|
||||||
from typing import List
|
from typing import List
|
||||||
import os
|
import os
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from service.parse_resume2_doc import extra_resume
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//')
|
||||||
|
|
||||||
|
|
||||||
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
|
||||||
|
|
||||||
|
|
||||||
def check_and_create_directory(files):
|
def check_and_create_directory(files):
|
||||||
|
logger.info("check_and_create_directory in service")
|
||||||
# 先创建一个task
|
# 先创建一个task
|
||||||
if not files or len(files) == 0:
|
if not files or len(files) == 0:
|
||||||
return None
|
return None
|
||||||
@@ -32,19 +44,20 @@ def check_and_create_directory(files):
|
|||||||
|
|
||||||
|
|
||||||
async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str):
|
async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str):
|
||||||
|
logger.info(f"upload_and_save_file in service dir_id {dir_id}")
|
||||||
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||||
pathxx.mkdir(parents=True, exist_ok=True)
|
pathxx.mkdir(parents=True, exist_ok=True)
|
||||||
data = []
|
data = []
|
||||||
i = 0
|
|
||||||
for file in files:
|
for file in files:
|
||||||
name, fix = os.path.splitext(file.filename)
|
name, fix = os.path.splitext(file.filename)
|
||||||
|
id = str(uuid.uuid4())
|
||||||
if fix not in ['.doc', '.docx']:
|
if fix not in ['.doc', '.docx']:
|
||||||
continue
|
continue
|
||||||
i = i + 1
|
with open(pathxx.joinpath(id + fix), 'wb') as f:
|
||||||
with open(pathxx.joinpath(str(i) + fix), 'wb') as f:
|
|
||||||
file_content = await file.read()
|
file_content = await file.read()
|
||||||
f.write(file_content)
|
f.write(file_content)
|
||||||
data.append(DBRESUME(id=str(uuid.uuid4()), task_id=dir_id, status=0, file_name=str(i) + fix))
|
|
||||||
|
data.append(DBRESUME(id=id, task_id=dir_id, status=0, file_name=id + fix))
|
||||||
session = SqliteSqlalchemy().session
|
session = SqliteSqlalchemy().session
|
||||||
try:
|
try:
|
||||||
session.bulk_save_objects(data)
|
session.bulk_save_objects(data)
|
||||||
@@ -52,8 +65,72 @@ async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Failed to save DBRESUME error {e}")
|
print(f"Failed to save DBRESUME error {e}")
|
||||||
session.rollback()
|
session.rollback()
|
||||||
return False, f"Failed to save DBRESUME error {e}"
|
return False, f"Failed to save DBRESUME error {e}",[]
|
||||||
finally:
|
finally:
|
||||||
session.close()
|
session.close()
|
||||||
return True, "success"
|
return True, "success"
|
||||||
|
|
||||||
|
async def fetch_files(dir_id) -> (bool, str):
|
||||||
|
|
||||||
|
logger.info(f"start fetching files task {dir_id} in service")
|
||||||
|
if not os.path.exists(BASE_PATH):
|
||||||
|
logger.info(f"目录{BASE_PATH}不存在")
|
||||||
|
return None
|
||||||
|
file_extensions = ['.docx', '.doc']
|
||||||
|
files_list = []
|
||||||
|
dir_path = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||||
|
for root,dirs,files in os.walk(dir_path):
|
||||||
|
for file in files:
|
||||||
|
_,ext = os.path.splitext(file)
|
||||||
|
if file_extensions and ext not in file_extensions:
|
||||||
|
logger.error(f"文件{file}格式不符合预期")
|
||||||
|
continue
|
||||||
|
file_path = os.path.join(root,file)
|
||||||
|
if os.path.isfile(file_path):
|
||||||
|
files_list.append(file_path)
|
||||||
|
else:
|
||||||
|
logger.error(f"路径下{file_path}不是文件")
|
||||||
|
update_success_mapping = []
|
||||||
|
update_fail_mapping = []
|
||||||
|
for file in files_list:
|
||||||
|
logger.info(f"file is {file} {os.path.basename(file)}")
|
||||||
|
file_name = os.path.basename(file)
|
||||||
|
id = os.path.splitext(file_name)[0]
|
||||||
|
result = extra_resume(file)
|
||||||
|
result = json.dumps(result, ensure_ascii=False)
|
||||||
|
logger.info(f"result type is {type(result)}")
|
||||||
|
logger.info(f"file content is {result}")
|
||||||
|
if not result:
|
||||||
|
logger.warning(f"file {file_name} 提取为空")
|
||||||
|
update_fail_mapping.append({'id':id, 'status':0,
|
||||||
|
'message': f"task {dir_id} => file {file_name} 提取为空"})
|
||||||
|
continue
|
||||||
|
update_success_mapping.append({'id':id, 'status':1,'data_info': result})
|
||||||
|
session = SqliteSqlalchemy().session
|
||||||
|
logger.info(f"update success mapping => {update_success_mapping}")
|
||||||
|
logger.info(f"update fail mapping => {update_fail_mapping}")
|
||||||
|
success_num = len(update_success_mapping)
|
||||||
|
fail_num = len(update_fail_mapping)
|
||||||
|
try:
|
||||||
|
update_data = update_success_mapping + update_fail_mapping
|
||||||
|
session.bulk_update_mappings(DBRESUME, update_data)
|
||||||
|
|
||||||
|
if update_fail_mapping:
|
||||||
|
session.bulk_update_mappings(DBTASK, [{'id':dir_id, 'status':2, 'success_num':success_num,
|
||||||
|
'fail_num':fail_num,'message':f'fail => {update_fail_mapping}'}])
|
||||||
|
else:
|
||||||
|
session.bulk_update_mappings(DBTASK, [{'id': dir_id, 'status': 1,
|
||||||
|
'success_num': success_num, 'fail_num': fail_num}])
|
||||||
|
session.commit()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"update failed => task {dir_id} error {e}")
|
||||||
|
session.rollback()
|
||||||
|
return False, f"Failed to update DBRESUME error {e}"
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
return True, 'success'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -376,9 +376,9 @@ def extra_resume(file_path):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
# 使用方法
|
# # 使用方法
|
||||||
docx_file = "../1.报名登记表.docx" # 替换为你的文件
|
# docx_file = "../1.报名登记表.docx" # 替换为你的文件
|
||||||
print(extra_resume(docx_file))
|
# print(extra_resume(docx_file))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user