From a124651a7efdccdb959b098e2a13383a367866e1 Mon Sep 17 00:00:00 2001 From: yujj128 Date: Sat, 6 Dec 2025 16:44:53 +0800 Subject: [PATCH] =?UTF-8?q?=E7=AE=80=E5=8E=86=E6=8F=90=E5=8F=96,=E5=86=99?= =?UTF-8?q?=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db/sql_db.py | 3 +- logging_config.py | 62 +++++++++++++++++++++++++ main.py | 15 ++++-- service/excel_service.py | 3 +- service/file_service.py | 89 +++++++++++++++++++++++++++++++++--- service/parse_resume2_doc.py | 8 ++-- 6 files changed, 163 insertions(+), 17 deletions(-) create mode 100644 logging_config.py diff --git a/db/sql_db.py b/db/sql_db.py index c95daf9..c3aff6d 100644 --- a/db/sql_db.py +++ b/db/sql_db.py @@ -3,7 +3,8 @@ from sqlalchemy.orm import declarative_base, sessionmaker # 申明基类对象 Base = declarative_base() from decouple import config -DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3') + +DB_PATH = config('DB_PATH', default='D://PycharmProject//yj_resume//main.sqlite3') class DBTASK(Base): diff --git a/logging_config.py b/logging_config.py new file mode 100644 index 0000000..bdab2b1 --- /dev/null +++ b/logging_config.py @@ -0,0 +1,62 @@ +# logging_config.py +import logging +import logging.config +from pathlib import Path + +# 确保 logs 目录存在 +log_dir = Path("logs") +log_dir.mkdir(exist_ok=True) + +LOGGING_CONFIG = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "default": { + "format": "%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s", + }, + "detailed": { + "format": "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s", + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "level": "INFO", + "formatter": "default", + "stream": "ext://sys.stdout" + }, + "file": { + "class": "logging.handlers.RotatingFileHandler", # 自动轮转 + "level": "INFO", + "formatter": "detailed", + "filename": "logs/resume.log", + "maxBytes": 10485760, # 10MB + "backupCount": 5, # 保留5个备份 + "encoding": "utf8" + }, + }, + "root": { + "level": "INFO", + "handlers": ["console", "file"] + }, + "loggers": { + "uvicorn": { + "level": "INFO", + "handlers": ["console", "file"], + "propagate": False + }, + "uvicorn.error": { + "level": "INFO", + "handlers": ["console", "file"], + "propagate": False + }, + "uvicorn.access": { + "level": "WARNING", # 只记录警告以上,避免刷屏 + "handlers": ["file"], # 只写入文件 + "propagate": False + } + } +} + +# 应用配置 +logging.config.dictConfig(LOGGING_CONFIG) \ No newline at end of file diff --git a/main.py b/main.py index 365622a..792c6a5 100644 --- a/main.py +++ b/main.py @@ -2,8 +2,13 @@ from fastapi import FastAPI import uvicorn from fastapi import FastAPI, File, UploadFile, HTTPException from typing import List -from service.file_service import check_and_create_directory, upload_and_save_file +from service.file_service import check_and_create_directory, upload_and_save_file,fetch_files from service import excel_service +import threading +from logging_config import LOGGING_CONFIG +import logging + +logger = logging.getLogger(__name__) app = FastAPI() @@ -19,11 +24,11 @@ async def create_upload_files(files: List[UploadFile] = File(...)): dir_id = check_and_create_directory(files) if not dir_id: return {"result": False, "code": 500, "message": "create directory failed"} - flag, message = await upload_and_save_file(dir_id, files) + flag, message= await upload_and_save_file(dir_id, files) + logger.info(f"flag is {flag}") if flag: - # 触发异步任务,解析文件 TODO - pass - return {"result": flag, "message": message} + flag,message = await fetch_files(dir_id) + return {"result": flag, "message": message,"task_id": dir_id} @app.get("/export_task_data_to_excel") diff --git a/service/excel_service.py b/service/excel_service.py index 3920057..da0adec 100644 --- a/service/excel_service.py +++ b/service/excel_service.py @@ -4,7 +4,8 @@ import pandas as pd import pathlib from decouple import config -BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//') +# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//') +BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//') # 导出数据到excel diff --git a/service/file_service.py b/service/file_service.py index 38bdc94..22955d1 100644 --- a/service/file_service.py +++ b/service/file_service.py @@ -1,3 +1,7 @@ +import json + +from pymupdf import message + from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy import uuid from datetime import datetime @@ -6,11 +10,19 @@ import pathlib from fastapi import File, UploadFile from typing import List import os +import asyncio +import logging + +from service.parse_resume2_doc import extra_resume + +logger = logging.getLogger(__name__) +BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//') + -BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//') def check_and_create_directory(files): + logger.info("check_and_create_directory in service") # 先创建一个task if not files or len(files) == 0: return None @@ -32,19 +44,20 @@ def check_and_create_directory(files): async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str): + logger.info(f"upload_and_save_file in service dir_id {dir_id}") pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id) pathxx.mkdir(parents=True, exist_ok=True) data = [] - i = 0 for file in files: name, fix = os.path.splitext(file.filename) + id = str(uuid.uuid4()) if fix not in ['.doc', '.docx']: continue - i = i + 1 - with open(pathxx.joinpath(str(i) + fix), 'wb') as f: + with open(pathxx.joinpath(id + fix), 'wb') as f: file_content = await file.read() f.write(file_content) - data.append(DBRESUME(id=str(uuid.uuid4()), task_id=dir_id, status=0, file_name=str(i) + fix)) + + data.append(DBRESUME(id=id, task_id=dir_id, status=0, file_name=id + fix)) session = SqliteSqlalchemy().session try: session.bulk_save_objects(data) @@ -52,8 +65,72 @@ async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str): except Exception as e: print(f"Failed to save DBRESUME error {e}") session.rollback() - return False, f"Failed to save DBRESUME error {e}" + return False, f"Failed to save DBRESUME error {e}",[] finally: session.close() return True, "success" +async def fetch_files(dir_id) -> (bool, str): + + logger.info(f"start fetching files task {dir_id} in service") + if not os.path.exists(BASE_PATH): + logger.info(f"目录{BASE_PATH}不存在") + return None + file_extensions = ['.docx', '.doc'] + files_list = [] + dir_path = pathlib.Path(BASE_PATH).joinpath(dir_id) + for root,dirs,files in os.walk(dir_path): + for file in files: + _,ext = os.path.splitext(file) + if file_extensions and ext not in file_extensions: + logger.error(f"文件{file}格式不符合预期") + continue + file_path = os.path.join(root,file) + if os.path.isfile(file_path): + files_list.append(file_path) + else: + logger.error(f"路径下{file_path}不是文件") + update_success_mapping = [] + update_fail_mapping = [] + for file in files_list: + logger.info(f"file is {file} {os.path.basename(file)}") + file_name = os.path.basename(file) + id = os.path.splitext(file_name)[0] + result = extra_resume(file) + result = json.dumps(result, ensure_ascii=False) + logger.info(f"result type is {type(result)}") + logger.info(f"file content is {result}") + if not result: + logger.warning(f"file {file_name} 提取为空") + update_fail_mapping.append({'id':id, 'status':0, + 'message': f"task {dir_id} => file {file_name} 提取为空"}) + continue + update_success_mapping.append({'id':id, 'status':1,'data_info': result}) + session = SqliteSqlalchemy().session + logger.info(f"update success mapping => {update_success_mapping}") + logger.info(f"update fail mapping => {update_fail_mapping}") + success_num = len(update_success_mapping) + fail_num = len(update_fail_mapping) + try: + update_data = update_success_mapping + update_fail_mapping + session.bulk_update_mappings(DBRESUME, update_data) + + if update_fail_mapping: + session.bulk_update_mappings(DBTASK, [{'id':dir_id, 'status':2, 'success_num':success_num, + 'fail_num':fail_num,'message':f'fail => {update_fail_mapping}'}]) + else: + session.bulk_update_mappings(DBTASK, [{'id': dir_id, 'status': 1, + 'success_num': success_num, 'fail_num': fail_num}]) + session.commit() + except Exception as e: + logger.error(f"update failed => task {dir_id} error {e}") + session.rollback() + return False, f"Failed to update DBRESUME error {e}" + finally: + session.close() + + return True, 'success' + + + + diff --git a/service/parse_resume2_doc.py b/service/parse_resume2_doc.py index f6aa705..d4664ef 100644 --- a/service/parse_resume2_doc.py +++ b/service/parse_resume2_doc.py @@ -376,9 +376,9 @@ def extra_resume(file_path): return res -if __name__ == "__main__": - # 使用方法 - docx_file = "../1.报名登记表.docx" # 替换为你的文件 - print(extra_resume(docx_file)) +# if __name__ == "__main__": +# # 使用方法 +# docx_file = "../1.报名登记表.docx" # 替换为你的文件 +# print(extra_resume(docx_file))