Compare commits
4 Commits
ec0995d08a
...
5f3c61c18c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5f3c61c18c | ||
|
|
0f666f18c1 | ||
|
|
c00328ed8d | ||
|
|
16583dbb06 |
@@ -4,7 +4,7 @@ from sqlalchemy.orm import declarative_base, sessionmaker
|
|||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
from decouple import config
|
from decouple import config
|
||||||
|
|
||||||
DB_PATH = config('DB_PATH', default='D://PycharmProject//yj_resume//main.sqlite3')
|
DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3')
|
||||||
|
|
||||||
|
|
||||||
class DBTASK(Base):
|
class DBTASK(Base):
|
||||||
|
|||||||
15
main.py
15
main.py
@@ -2,7 +2,7 @@ from fastapi import FastAPI
|
|||||||
import uvicorn
|
import uvicorn
|
||||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||||
from typing import List
|
from typing import List
|
||||||
from service.file_service import check_and_create_directory, upload_and_save_file,fetch_files
|
from service.file_service import check_and_create_directory, upload_and_save_file, fetch_files
|
||||||
from service import excel_service
|
from service import excel_service
|
||||||
from service.db_service import get_task_list
|
from service.db_service import get_task_list
|
||||||
from fastapi.responses import FileResponse
|
from fastapi.responses import FileResponse
|
||||||
@@ -14,7 +14,8 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
|
import concurrent.futures
|
||||||
|
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
|
||||||
@app.get("/api/v1/hw")
|
@app.get("/api/v1/hw")
|
||||||
def read_root():
|
def read_root():
|
||||||
return {"Hello": "World"}
|
return {"Hello": "World"}
|
||||||
@@ -26,11 +27,12 @@ async def create_upload_files(files: List[UploadFile] = File(...)):
|
|||||||
dir_id = check_and_create_directory(files)
|
dir_id = check_and_create_directory(files)
|
||||||
if not dir_id:
|
if not dir_id:
|
||||||
return {"result": False, "code": 500, "message": "create directory failed"}
|
return {"result": False, "code": 500, "message": "create directory failed"}
|
||||||
flag, message= await upload_and_save_file(dir_id, files)
|
flag, message = await upload_and_save_file(dir_id, files)
|
||||||
logger.info(f"flag is {flag}")
|
logger.info(f"flag is {flag}")
|
||||||
if flag:
|
if flag:
|
||||||
flag,message = await fetch_files(dir_id)
|
#flag, message = await fetch_files(dir_id)
|
||||||
return {"result": flag, "message": message,"task_id": dir_id}
|
executor.submit(fetch_files, dir_id)
|
||||||
|
return {"result": flag, "message": message, "task_id": dir_id}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/export_task_data_to_excel")
|
@app.get("/export_task_data_to_excel")
|
||||||
@@ -52,4 +54,5 @@ def parse_task_list():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
uvicorn.run(app, host="127.0.0.1", port=3006)
|
logger.info("start server")
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=3006)
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ import pandas as pd
|
|||||||
import pathlib
|
import pathlib
|
||||||
from decouple import config
|
from decouple import config
|
||||||
|
|
||||||
# BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||||
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//')
|
#BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
|
||||||
|
|
||||||
|
|
||||||
# 导出数据到excel
|
# 导出数据到excel
|
||||||
|
|||||||
@@ -16,15 +16,16 @@ import logging
|
|||||||
from service.parse_resume2_doc import extra_resume
|
from service.parse_resume2_doc import extra_resume
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//')
|
#BASE_PATH = config('BASE_PATH', default='D://PycharmProject//yj_resume//uploads//')
|
||||||
|
|
||||||
|
|
||||||
|
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||||
|
|
||||||
|
|
||||||
def check_and_create_directory(files):
|
def check_and_create_directory(files):
|
||||||
logger.info("check_and_create_directory in service")
|
logger.info("check_and_create_directory in service")
|
||||||
# 先创建一个task
|
# 先创建一个task
|
||||||
if not files or len(files) == 0:
|
if not files or len(files) == 0:
|
||||||
|
logger.warning("check_and_create_directory is empty")
|
||||||
return None
|
return None
|
||||||
id = str(uuid.uuid4())
|
id = str(uuid.uuid4())
|
||||||
current_time = datetime.now()
|
current_time = datetime.now()
|
||||||
@@ -68,17 +69,17 @@ async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Failed to save DBRESUME error {e}")
|
print(f"Failed to save DBRESUME error {e}")
|
||||||
session.rollback()
|
session.rollback()
|
||||||
return False, f"Failed to save DBRESUME error {e}",[]
|
return False, f"Failed to save DBRESUME error {e}"
|
||||||
finally:
|
finally:
|
||||||
session.close()
|
session.close()
|
||||||
return True, "success"
|
return True, "success"
|
||||||
|
|
||||||
async def fetch_files(dir_id) -> (bool, str):
|
def fetch_files(dir_id) -> (bool, str):
|
||||||
|
|
||||||
logger.info(f"start fetching files task {dir_id} in service")
|
logger.info(f"start fetching files task {dir_id} in service")
|
||||||
if not os.path.exists(BASE_PATH):
|
if not os.path.exists(BASE_PATH):
|
||||||
logger.info(f"目录{BASE_PATH}不存在")
|
logger.info(f"目录{BASE_PATH}不存在")
|
||||||
return None
|
return False, f"Failed to fetch file 目录{BASE_PATH}不存在"
|
||||||
file_extensions = ['.docx', '.doc']
|
file_extensions = ['.docx', '.doc']
|
||||||
files_list = []
|
files_list = []
|
||||||
dir_path = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
dir_path = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||||
|
|||||||
@@ -124,10 +124,12 @@ class EnhancedDocxExtractor:
|
|||||||
spec_coll = ['全日制教育','在职教育']
|
spec_coll = ['全日制教育','在职教育']
|
||||||
if current_key_cell['text'].replace('\n','') in spec_coll :
|
if current_key_cell['text'].replace('\n','') in spec_coll :
|
||||||
if not value_cell['text']:
|
if not value_cell['text']:
|
||||||
value_cell['text'] = 'False'
|
value_cell['text'] = "否"
|
||||||
else:
|
else:
|
||||||
value_cell['text'] = 'True'
|
value_cell['text'] = '是'
|
||||||
|
|
||||||
|
if not value_cell['text']:
|
||||||
|
value_cell['text'] = "None"
|
||||||
if value_cell['text'] and (key_row, key_col + 1) not in visited:
|
if value_cell['text'] and (key_row, key_col + 1) not in visited:
|
||||||
# 检查这个值是否与前一个键提取的值相同(可能是合并单元格)
|
# 检查这个值是否与前一个键提取的值相同(可能是合并单元格)
|
||||||
if not self._is_key_duplicate_merged_cell(structure[key_row][key_col]['text'], kv_pairs):
|
if not self._is_key_duplicate_merged_cell(structure[key_row][key_col]['text'], kv_pairs):
|
||||||
|
|||||||
Reference in New Issue
Block a user