Compare commits
4 Commits
ec0995d08a
...
5f3c61c18c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5f3c61c18c | ||
|
|
0f666f18c1 | ||
|
|
c00328ed8d | ||
|
|
16583dbb06 |
@@ -4,7 +4,7 @@ from sqlalchemy.orm import declarative_base, sessionmaker
|
||||
Base = declarative_base()
|
||||
from decouple import config
|
||||
|
||||
DB_PATH = config('DB_PATH', default='D://PycharmProject//yj_resume//main.sqlite3')
|
||||
DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3')
|
||||
|
||||
|
||||
class DBTASK(Base):
|
||||
|
||||
15
main.py
15
main.py
@@ -2,7 +2,7 @@ from fastapi import FastAPI
|
||||
import uvicorn
|
||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||
from typing import List
|
||||
from service.file_service import check_and_create_directory, upload_and_save_file,fetch_files
|
||||
from service.file_service import check_and_create_directory, upload_and_save_file, fetch_files
|
||||
from service import excel_service
|
||||
from service.db_service import get_task_list
|
||||
from fastapi.responses import FileResponse
|
||||
@@ -14,7 +14,8 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
import concurrent.futures
|
||||
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
|
||||
@app.get("/api/v1/hw")
|
||||
def read_root():
|
||||
return {"Hello": "World"}
|
||||
@@ -26,11 +27,12 @@ async def create_upload_files(files: List[UploadFile] = File(...)):
|
||||
dir_id = check_and_create_directory(files)
|
||||
if not dir_id:
|
||||
return {"result": False, "code": 500, "message": "create directory failed"}
|
||||
flag, message= await upload_and_save_file(dir_id, files)
|
||||
flag, message = await upload_and_save_file(dir_id, files)
|
||||
logger.info(f"flag is {flag}")
|
||||
if flag:
|
||||
flag,message = await fetch_files(dir_id)
|
||||
return {"result": flag, "message": message,"task_id": dir_id}
|
||||
#flag, message = await fetch_files(dir_id)
|
||||
executor.submit(fetch_files, dir_id)
|
||||
return {"result": flag, "message": message, "task_id": dir_id}
|
||||
|
||||
|
||||
@app.get("/export_task_data_to_excel")
|
||||
@@ -52,4 +54,5 @@ def parse_task_list():
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
uvicorn.run(app, host="127.0.0.1", port=3006)
|
||||
logger.info("start server")
|
||||
uvicorn.run(app, host="0.0.0.0", port=3006)
|
||||
|
||||
@@ -4,8 +4,8 @@ import pandas as pd
|
||||
import pathlib
|
||||
from decouple import config
|
||||
|
||||
# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//')
|
||||
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
#BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
|
||||
|
||||
|
||||
# 导出数据到excel
|
||||
|
||||
@@ -16,15 +16,16 @@ import logging
|
||||
from service.parse_resume2_doc import extra_resume
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//')
|
||||
|
||||
#BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
|
||||
|
||||
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
|
||||
|
||||
def check_and_create_directory(files):
|
||||
logger.info("check_and_create_directory in service")
|
||||
# 先创建一个task
|
||||
if not files or len(files) == 0:
|
||||
logger.warning("check_and_create_directory is empty")
|
||||
return None
|
||||
id = str(uuid.uuid4())
|
||||
current_time = datetime.now()
|
||||
@@ -68,17 +69,17 @@ async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str):
|
||||
except Exception as e:
|
||||
print(f"Failed to save DBRESUME error {e}")
|
||||
session.rollback()
|
||||
return False, f"Failed to save DBRESUME error {e}",[]
|
||||
return False, f"Failed to save DBRESUME error {e}"
|
||||
finally:
|
||||
session.close()
|
||||
return True, "success"
|
||||
|
||||
async def fetch_files(dir_id) -> (bool, str):
|
||||
def fetch_files(dir_id) -> (bool, str):
|
||||
|
||||
logger.info(f"start fetching files task {dir_id} in service")
|
||||
if not os.path.exists(BASE_PATH):
|
||||
logger.info(f"目录{BASE_PATH}不存在")
|
||||
return None
|
||||
return False, f"Failed to fetch file 目录{BASE_PATH}不存在"
|
||||
file_extensions = ['.docx', '.doc']
|
||||
files_list = []
|
||||
dir_path = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||
|
||||
@@ -124,10 +124,12 @@ class EnhancedDocxExtractor:
|
||||
spec_coll = ['全日制教育','在职教育']
|
||||
if current_key_cell['text'].replace('\n','') in spec_coll :
|
||||
if not value_cell['text']:
|
||||
value_cell['text'] = 'False'
|
||||
value_cell['text'] = "否"
|
||||
else:
|
||||
value_cell['text'] = 'True'
|
||||
value_cell['text'] = '是'
|
||||
|
||||
if not value_cell['text']:
|
||||
value_cell['text'] = "None"
|
||||
if value_cell['text'] and (key_row, key_col + 1) not in visited:
|
||||
# 检查这个值是否与前一个键提取的值相同(可能是合并单元格)
|
||||
if not self._is_key_duplicate_merged_cell(structure[key_row][key_col]['text'], kv_pairs):
|
||||
|
||||
Reference in New Issue
Block a user