Merge branch 'dev' of http://106.13.42.156:33077/lei_y601/yj_resume
# Conflicts: # requirements.txt
This commit is contained in:
0
db/__init__.py
Normal file
0
db/__init__.py
Normal file
46
db/sql_db.py
Normal file
46
db/sql_db.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from sqlalchemy import Column, DateTime, Integer, BigInteger, String, create_engine, Boolean, Text
|
||||
from sqlalchemy.orm import declarative_base, sessionmaker
|
||||
# 申明基类对象
|
||||
Base = declarative_base()
|
||||
from decouple import config
|
||||
DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3')
|
||||
|
||||
|
||||
class DBTASK(Base):
|
||||
__tablename__ = 'db_task'
|
||||
id = Column(String(100), primary_key=True)
|
||||
create_time = Column(DateTime, nullable=False, )
|
||||
# 0 代表待执行,1 成功,2 失败
|
||||
status = Column(Integer, nullable=False, default=0)
|
||||
success_num = Column(Integer, nullable=False, default=0)
|
||||
total_num = Column(Integer, nullable=False, default=0)
|
||||
fail_num = Column(Integer, nullable=False, default=0)
|
||||
message = Column(Text, nullable=True)
|
||||
|
||||
|
||||
class DBRESUME(Base):
|
||||
__tablename__ = 'db_resume'
|
||||
id = Column(String(100), primary_key=True)
|
||||
# 每个任务对应一个文件夹ID
|
||||
task_id = Column(String(100), nullable=False)
|
||||
# 0 代表待执行,1 成功,2 失败
|
||||
status = Column(Integer, nullable=False, default=0)
|
||||
file_name = Column(String(100), nullable=True)
|
||||
# 可以用json表示提取的数据
|
||||
data_info = Column(Text, nullable=True)
|
||||
# 错误信息等
|
||||
message = Column(Text, nullable=True)
|
||||
|
||||
|
||||
class SqliteSqlalchemy(object):
|
||||
def __init__(self):
|
||||
# 创建sqlite连接引擎
|
||||
engine = create_engine(f'sqlite:///{DB_PATH}', echo=True)
|
||||
# 创建表
|
||||
Base.metadata.create_all(engine, checkfirst=True)
|
||||
# 创建sqlite的session连接对象
|
||||
self.session = sessionmaker(bind=engine)()
|
||||
|
||||
|
||||
|
||||
|
||||
26
main.py
26
main.py
@@ -1,5 +1,10 @@
|
||||
from fastapi import FastAPI
|
||||
import uvicorn
|
||||
import uvicorn
|
||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||
from typing import List
|
||||
from service.file_service import check_and_create_directory, upload_and_save_file
|
||||
from service import excel_service
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@@ -8,7 +13,24 @@ def read_root():
|
||||
return {"Hello": "World"}
|
||||
|
||||
|
||||
# 上传文件并解析,解析是异步错误
|
||||
@app.post("/upload_files_and_parse")
|
||||
async def create_upload_files(files: List[UploadFile] = File(...)):
|
||||
dir_id = check_and_create_directory(files)
|
||||
if not dir_id:
|
||||
return {"result": False, "code": 500, "message": "create directory failed"}
|
||||
flag, message = await upload_and_save_file(dir_id, files)
|
||||
if flag:
|
||||
# 触发异步任务,解析文件 TODO
|
||||
pass
|
||||
return {"result": flag, "message": message}
|
||||
|
||||
|
||||
@app.get("/export_task_data_to_excel")
|
||||
def export_task_data_to_excel(task_id: str):
|
||||
message = excel_service.export_task_data_to_excel(task_id)
|
||||
return {"message": message}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
uvicorn.run(app, host="127.0.0.1", port=3006)
|
||||
uvicorn.run(app, host="127.0.0.1", port=3006)
|
||||
|
||||
@@ -2,6 +2,12 @@ python-docx
|
||||
fastapi
|
||||
uvicorn
|
||||
docxtpl
|
||||
SQLAlchemy
|
||||
python-decouple
|
||||
python-multipart
|
||||
pandas
|
||||
openpyxl
|
||||
|
||||
python-multipart
|
||||
PyMuPDF>=1.23.0
|
||||
paddlepaddle>=2.5.0
|
||||
|
||||
30
service/excel_service.py
Normal file
30
service/excel_service.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
|
||||
import json
|
||||
import pandas as pd
|
||||
import pathlib
|
||||
from decouple import config
|
||||
|
||||
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
|
||||
|
||||
# 导出数据到excel
|
||||
def export_to_excel(task_id):
|
||||
# 获取所有成功的信息
|
||||
list_data = SqliteSqlalchemy().session.query(DBRESUME).filter_by(task_id=task_id, status=1).all()
|
||||
pd_data = []
|
||||
for data in list_data:
|
||||
pd_data.append(json.loads(data.data_info))
|
||||
data_frame = pd.DataFrame(pd_data)
|
||||
# 导出到excel
|
||||
pathxx = pathlib.Path(BASE_PATH).joinpath(task_id)
|
||||
pathxx = pathxx.joinpath(f"{task_id}.xlsx")
|
||||
data_frame.to_excel(pathxx, index=False)
|
||||
|
||||
|
||||
def export_task_data_to_excel(task_id):
|
||||
session = SqliteSqlalchemy().session
|
||||
task = session.query(DBTASK).filter_by(id=task_id).first()
|
||||
if not task or task.status == 0 or task.status == 2:
|
||||
return "任务未完成或者失败"
|
||||
export_to_excel(task_id)
|
||||
return "导出成功"
|
||||
59
service/file_service.py
Normal file
59
service/file_service.py
Normal file
@@ -0,0 +1,59 @@
|
||||
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from decouple import config
|
||||
import pathlib
|
||||
from fastapi import File, UploadFile
|
||||
from typing import List
|
||||
import os
|
||||
|
||||
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||
|
||||
|
||||
def check_and_create_directory(files):
|
||||
# 先创建一个task
|
||||
if not files or len(files) == 0:
|
||||
return None
|
||||
id = str(uuid.uuid4())
|
||||
task = DBTASK(id=id, create_time=datetime.now(), status=0, success_num=0, total_num=len(files),
|
||||
fail_num=0)
|
||||
|
||||
session = SqliteSqlalchemy().session
|
||||
try:
|
||||
session.add(task)
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
print(f"Failed to save DBTASK info error {e}")
|
||||
session.rollback()
|
||||
return None
|
||||
finally:
|
||||
session.close()
|
||||
return id
|
||||
|
||||
|
||||
async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str):
|
||||
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||
pathxx.mkdir(parents=True, exist_ok=True)
|
||||
data = []
|
||||
i = 0
|
||||
for file in files:
|
||||
name, fix = os.path.splitext(file.filename)
|
||||
if fix not in ['.doc', '.docx']:
|
||||
continue
|
||||
i = i + 1
|
||||
with open(pathxx.joinpath(str(i) + fix), 'wb') as f:
|
||||
file_content = await file.read()
|
||||
f.write(file_content)
|
||||
data.append(DBRESUME(id=str(uuid.uuid4()), task_id=dir_id, status=0, file_name=str(i) + fix))
|
||||
session = SqliteSqlalchemy().session
|
||||
try:
|
||||
session.bulk_save_objects(data)
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
print(f"Failed to save DBRESUME error {e}")
|
||||
session.rollback()
|
||||
return False, f"Failed to save DBRESUME error {e}"
|
||||
finally:
|
||||
session.close()
|
||||
return True, "success"
|
||||
|
||||
Reference in New Issue
Block a user