Merge branch 'dev' of http://106.13.42.156:33077/lei_y601/yj_resume
# Conflicts: # requirements.txt
This commit is contained in:
0
db/__init__.py
Normal file
0
db/__init__.py
Normal file
46
db/sql_db.py
Normal file
46
db/sql_db.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
from sqlalchemy import Column, DateTime, Integer, BigInteger, String, create_engine, Boolean, Text
|
||||||
|
from sqlalchemy.orm import declarative_base, sessionmaker
|
||||||
|
# 申明基类对象
|
||||||
|
Base = declarative_base()
|
||||||
|
from decouple import config
|
||||||
|
DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3')
|
||||||
|
|
||||||
|
|
||||||
|
class DBTASK(Base):
|
||||||
|
__tablename__ = 'db_task'
|
||||||
|
id = Column(String(100), primary_key=True)
|
||||||
|
create_time = Column(DateTime, nullable=False, )
|
||||||
|
# 0 代表待执行,1 成功,2 失败
|
||||||
|
status = Column(Integer, nullable=False, default=0)
|
||||||
|
success_num = Column(Integer, nullable=False, default=0)
|
||||||
|
total_num = Column(Integer, nullable=False, default=0)
|
||||||
|
fail_num = Column(Integer, nullable=False, default=0)
|
||||||
|
message = Column(Text, nullable=True)
|
||||||
|
|
||||||
|
|
||||||
|
class DBRESUME(Base):
|
||||||
|
__tablename__ = 'db_resume'
|
||||||
|
id = Column(String(100), primary_key=True)
|
||||||
|
# 每个任务对应一个文件夹ID
|
||||||
|
task_id = Column(String(100), nullable=False)
|
||||||
|
# 0 代表待执行,1 成功,2 失败
|
||||||
|
status = Column(Integer, nullable=False, default=0)
|
||||||
|
file_name = Column(String(100), nullable=True)
|
||||||
|
# 可以用json表示提取的数据
|
||||||
|
data_info = Column(Text, nullable=True)
|
||||||
|
# 错误信息等
|
||||||
|
message = Column(Text, nullable=True)
|
||||||
|
|
||||||
|
|
||||||
|
class SqliteSqlalchemy(object):
|
||||||
|
def __init__(self):
|
||||||
|
# 创建sqlite连接引擎
|
||||||
|
engine = create_engine(f'sqlite:///{DB_PATH}', echo=True)
|
||||||
|
# 创建表
|
||||||
|
Base.metadata.create_all(engine, checkfirst=True)
|
||||||
|
# 创建sqlite的session连接对象
|
||||||
|
self.session = sessionmaker(bind=engine)()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
26
main.py
26
main.py
@@ -1,5 +1,10 @@
|
|||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||||
|
from typing import List
|
||||||
|
from service.file_service import check_and_create_directory, upload_and_save_file
|
||||||
|
from service import excel_service
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
@@ -8,7 +13,24 @@ def read_root():
|
|||||||
return {"Hello": "World"}
|
return {"Hello": "World"}
|
||||||
|
|
||||||
|
|
||||||
|
# 上传文件并解析,解析是异步错误
|
||||||
|
@app.post("/upload_files_and_parse")
|
||||||
|
async def create_upload_files(files: List[UploadFile] = File(...)):
|
||||||
|
dir_id = check_and_create_directory(files)
|
||||||
|
if not dir_id:
|
||||||
|
return {"result": False, "code": 500, "message": "create directory failed"}
|
||||||
|
flag, message = await upload_and_save_file(dir_id, files)
|
||||||
|
if flag:
|
||||||
|
# 触发异步任务,解析文件 TODO
|
||||||
|
pass
|
||||||
|
return {"result": flag, "message": message}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/export_task_data_to_excel")
|
||||||
|
def export_task_data_to_excel(task_id: str):
|
||||||
|
message = excel_service.export_task_data_to_excel(task_id)
|
||||||
|
return {"message": message}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
uvicorn.run(app, host="127.0.0.1", port=3006)
|
uvicorn.run(app, host="127.0.0.1", port=3006)
|
||||||
|
|||||||
@@ -2,6 +2,12 @@ python-docx
|
|||||||
fastapi
|
fastapi
|
||||||
uvicorn
|
uvicorn
|
||||||
docxtpl
|
docxtpl
|
||||||
|
SQLAlchemy
|
||||||
|
python-decouple
|
||||||
|
python-multipart
|
||||||
|
pandas
|
||||||
|
openpyxl
|
||||||
|
|
||||||
python-multipart
|
python-multipart
|
||||||
PyMuPDF>=1.23.0
|
PyMuPDF>=1.23.0
|
||||||
paddlepaddle>=2.5.0
|
paddlepaddle>=2.5.0
|
||||||
|
|||||||
30
service/excel_service.py
Normal file
30
service/excel_service.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
|
||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
import pathlib
|
||||||
|
from decouple import config
|
||||||
|
|
||||||
|
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||||
|
|
||||||
|
|
||||||
|
# 导出数据到excel
|
||||||
|
def export_to_excel(task_id):
|
||||||
|
# 获取所有成功的信息
|
||||||
|
list_data = SqliteSqlalchemy().session.query(DBRESUME).filter_by(task_id=task_id, status=1).all()
|
||||||
|
pd_data = []
|
||||||
|
for data in list_data:
|
||||||
|
pd_data.append(json.loads(data.data_info))
|
||||||
|
data_frame = pd.DataFrame(pd_data)
|
||||||
|
# 导出到excel
|
||||||
|
pathxx = pathlib.Path(BASE_PATH).joinpath(task_id)
|
||||||
|
pathxx = pathxx.joinpath(f"{task_id}.xlsx")
|
||||||
|
data_frame.to_excel(pathxx, index=False)
|
||||||
|
|
||||||
|
|
||||||
|
def export_task_data_to_excel(task_id):
|
||||||
|
session = SqliteSqlalchemy().session
|
||||||
|
task = session.query(DBTASK).filter_by(id=task_id).first()
|
||||||
|
if not task or task.status == 0 or task.status == 2:
|
||||||
|
return "任务未完成或者失败"
|
||||||
|
export_to_excel(task_id)
|
||||||
|
return "导出成功"
|
||||||
59
service/file_service.py
Normal file
59
service/file_service.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime
|
||||||
|
from decouple import config
|
||||||
|
import pathlib
|
||||||
|
from fastapi import File, UploadFile
|
||||||
|
from typing import List
|
||||||
|
import os
|
||||||
|
|
||||||
|
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
|
||||||
|
|
||||||
|
|
||||||
|
def check_and_create_directory(files):
|
||||||
|
# 先创建一个task
|
||||||
|
if not files or len(files) == 0:
|
||||||
|
return None
|
||||||
|
id = str(uuid.uuid4())
|
||||||
|
task = DBTASK(id=id, create_time=datetime.now(), status=0, success_num=0, total_num=len(files),
|
||||||
|
fail_num=0)
|
||||||
|
|
||||||
|
session = SqliteSqlalchemy().session
|
||||||
|
try:
|
||||||
|
session.add(task)
|
||||||
|
session.commit()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to save DBTASK info error {e}")
|
||||||
|
session.rollback()
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
return id
|
||||||
|
|
||||||
|
|
||||||
|
async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str):
|
||||||
|
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
|
||||||
|
pathxx.mkdir(parents=True, exist_ok=True)
|
||||||
|
data = []
|
||||||
|
i = 0
|
||||||
|
for file in files:
|
||||||
|
name, fix = os.path.splitext(file.filename)
|
||||||
|
if fix not in ['.doc', '.docx']:
|
||||||
|
continue
|
||||||
|
i = i + 1
|
||||||
|
with open(pathxx.joinpath(str(i) + fix), 'wb') as f:
|
||||||
|
file_content = await file.read()
|
||||||
|
f.write(file_content)
|
||||||
|
data.append(DBRESUME(id=str(uuid.uuid4()), task_id=dir_id, status=0, file_name=str(i) + fix))
|
||||||
|
session = SqliteSqlalchemy().session
|
||||||
|
try:
|
||||||
|
session.bulk_save_objects(data)
|
||||||
|
session.commit()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to save DBRESUME error {e}")
|
||||||
|
session.rollback()
|
||||||
|
return False, f"Failed to save DBRESUME error {e}"
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
return True, "success"
|
||||||
|
|
||||||
Reference in New Issue
Block a user