Compare commits

...

4 Commits

Author SHA1 Message Date
yujj128
5f3c61c18c Merge branch 'dev' of http://106.13.42.156:33077/lei_y601/yj_resume 2025-12-06 18:00:31 +08:00
yujj128
0f666f18c1 空值特殊处理 2025-12-06 18:00:04 +08:00
雷雨
c00328ed8d feat:处理文件修改为异步 2025-12-06 17:39:21 +08:00
yujj128
16583dbb06 简历提取,写入 2025-12-06 17:04:05 +08:00
5 changed files with 22 additions and 16 deletions

View File

@@ -4,7 +4,7 @@ from sqlalchemy.orm import declarative_base, sessionmaker
Base = declarative_base()
from decouple import config
DB_PATH = config('DB_PATH', default='D://PycharmProject//yj_resume//main.sqlite3')
DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3')
class DBTASK(Base):

15
main.py
View File

@@ -2,7 +2,7 @@ from fastapi import FastAPI
import uvicorn
from fastapi import FastAPI, File, UploadFile, HTTPException
from typing import List
from service.file_service import check_and_create_directory, upload_and_save_file,fetch_files
from service.file_service import check_and_create_directory, upload_and_save_file, fetch_files
from service import excel_service
from service.db_service import get_task_list
from fastapi.responses import FileResponse
@@ -14,7 +14,8 @@ logger = logging.getLogger(__name__)
app = FastAPI()
import concurrent.futures
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
@app.get("/api/v1/hw")
def read_root():
return {"Hello": "World"}
@@ -26,11 +27,12 @@ async def create_upload_files(files: List[UploadFile] = File(...)):
dir_id = check_and_create_directory(files)
if not dir_id:
return {"result": False, "code": 500, "message": "create directory failed"}
flag, message= await upload_and_save_file(dir_id, files)
flag, message = await upload_and_save_file(dir_id, files)
logger.info(f"flag is {flag}")
if flag:
flag,message = await fetch_files(dir_id)
return {"result": flag, "message": message,"task_id": dir_id}
#flag, message = await fetch_files(dir_id)
executor.submit(fetch_files, dir_id)
return {"result": flag, "message": message, "task_id": dir_id}
@app.get("/export_task_data_to_excel")
@@ -52,4 +54,5 @@ def parse_task_list():
if __name__ == '__main__':
uvicorn.run(app, host="127.0.0.1", port=3006)
logger.info("start server")
uvicorn.run(app, host="0.0.0.0", port=3006)

View File

@@ -4,8 +4,8 @@ import pandas as pd
import pathlib
from decouple import config
# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//')
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
#BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
# 导出数据到excel

View File

@@ -16,15 +16,16 @@ import logging
from service.parse_resume2_doc import extra_resume
logger = logging.getLogger(__name__)
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//')
#BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
def check_and_create_directory(files):
logger.info("check_and_create_directory in service")
# 先创建一个task
if not files or len(files) == 0:
logger.warning("check_and_create_directory is empty")
return None
id = str(uuid.uuid4())
current_time = datetime.now()
@@ -68,17 +69,17 @@ async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str):
except Exception as e:
print(f"Failed to save DBRESUME error {e}")
session.rollback()
return False, f"Failed to save DBRESUME error {e}",[]
return False, f"Failed to save DBRESUME error {e}"
finally:
session.close()
return True, "success"
async def fetch_files(dir_id) -> (bool, str):
def fetch_files(dir_id) -> (bool, str):
logger.info(f"start fetching files task {dir_id} in service")
if not os.path.exists(BASE_PATH):
logger.info(f"目录{BASE_PATH}不存在")
return None
return False, f"Failed to fetch file 目录{BASE_PATH}不存在"
file_extensions = ['.docx', '.doc']
files_list = []
dir_path = pathlib.Path(BASE_PATH).joinpath(dir_id)

View File

@@ -124,10 +124,12 @@ class EnhancedDocxExtractor:
spec_coll = ['全日制教育','在职教育']
if current_key_cell['text'].replace('\n','') in spec_coll :
if not value_cell['text']:
value_cell['text'] = 'False'
value_cell['text'] = ""
else:
value_cell['text'] = 'True'
value_cell['text'] = ''
if not value_cell['text']:
value_cell['text'] = "None"
if value_cell['text'] and (key_row, key_col + 1) not in visited:
# 检查这个值是否与前一个键提取的值相同(可能是合并单元格)
if not self._is_key_duplicate_merged_cell(structure[key_row][key_col]['text'], kv_pairs):