# Conflicts:
#	requirements.txt
This commit is contained in:
yujj128
2025-12-06 11:19:38 +08:00
6 changed files with 165 additions and 2 deletions

0
db/__init__.py Normal file
View File

46
db/sql_db.py Normal file
View File

@@ -0,0 +1,46 @@
from sqlalchemy import Column, DateTime, Integer, BigInteger, String, create_engine, Boolean, Text
from sqlalchemy.orm import declarative_base, sessionmaker
# 申明基类对象
Base = declarative_base()
from decouple import config
DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3')
class DBTASK(Base):
__tablename__ = 'db_task'
id = Column(String(100), primary_key=True)
create_time = Column(DateTime, nullable=False, )
# 0 代表待执行1 成功2 失败
status = Column(Integer, nullable=False, default=0)
success_num = Column(Integer, nullable=False, default=0)
total_num = Column(Integer, nullable=False, default=0)
fail_num = Column(Integer, nullable=False, default=0)
message = Column(Text, nullable=True)
class DBRESUME(Base):
__tablename__ = 'db_resume'
id = Column(String(100), primary_key=True)
# 每个任务对应一个文件夹ID
task_id = Column(String(100), nullable=False)
# 0 代表待执行1 成功2 失败
status = Column(Integer, nullable=False, default=0)
file_name = Column(String(100), nullable=True)
# 可以用json表示提取的数据
data_info = Column(Text, nullable=True)
# 错误信息等
message = Column(Text, nullable=True)
class SqliteSqlalchemy(object):
def __init__(self):
# 创建sqlite连接引擎
engine = create_engine(f'sqlite:///{DB_PATH}', echo=True)
# 创建表
Base.metadata.create_all(engine, checkfirst=True)
# 创建sqlite的session连接对象
self.session = sessionmaker(bind=engine)()

26
main.py
View File

@@ -1,5 +1,10 @@
from fastapi import FastAPI
import uvicorn
import uvicorn
from fastapi import FastAPI, File, UploadFile, HTTPException
from typing import List
from service.file_service import check_and_create_directory, upload_and_save_file
from service import excel_service
app = FastAPI()
@@ -8,7 +13,24 @@ def read_root():
return {"Hello": "World"}
# 上传文件并解析,解析是异步错误
@app.post("/upload_files_and_parse")
async def create_upload_files(files: List[UploadFile] = File(...)):
dir_id = check_and_create_directory(files)
if not dir_id:
return {"result": False, "code": 500, "message": "create directory failed"}
flag, message = await upload_and_save_file(dir_id, files)
if flag:
# 触发异步任务,解析文件 TODO
pass
return {"result": flag, "message": message}
@app.get("/export_task_data_to_excel")
def export_task_data_to_excel(task_id: str):
message = excel_service.export_task_data_to_excel(task_id)
return {"message": message}
if __name__ == '__main__':
uvicorn.run(app, host="127.0.0.1", port=3006)
uvicorn.run(app, host="127.0.0.1", port=3006)

View File

@@ -2,6 +2,12 @@ python-docx
fastapi
uvicorn
docxtpl
SQLAlchemy
python-decouple
python-multipart
pandas
openpyxl
python-multipart
PyMuPDF>=1.23.0
paddlepaddle>=2.5.0

30
service/excel_service.py Normal file
View File

@@ -0,0 +1,30 @@
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
import json
import pandas as pd
import pathlib
from decouple import config
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
# 导出数据到excel
def export_to_excel(task_id):
# 获取所有成功的信息
list_data = SqliteSqlalchemy().session.query(DBRESUME).filter_by(task_id=task_id, status=1).all()
pd_data = []
for data in list_data:
pd_data.append(json.loads(data.data_info))
data_frame = pd.DataFrame(pd_data)
# 导出到excel
pathxx = pathlib.Path(BASE_PATH).joinpath(task_id)
pathxx = pathxx.joinpath(f"{task_id}.xlsx")
data_frame.to_excel(pathxx, index=False)
def export_task_data_to_excel(task_id):
session = SqliteSqlalchemy().session
task = session.query(DBTASK).filter_by(id=task_id).first()
if not task or task.status == 0 or task.status == 2:
return "任务未完成或者失败"
export_to_excel(task_id)
return "导出成功"

59
service/file_service.py Normal file
View File

@@ -0,0 +1,59 @@
from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy
import uuid
from datetime import datetime
from decouple import config
import pathlib
from fastapi import File, UploadFile
from typing import List
import os
BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//')
def check_and_create_directory(files):
# 先创建一个task
if not files or len(files) == 0:
return None
id = str(uuid.uuid4())
task = DBTASK(id=id, create_time=datetime.now(), status=0, success_num=0, total_num=len(files),
fail_num=0)
session = SqliteSqlalchemy().session
try:
session.add(task)
session.commit()
except Exception as e:
print(f"Failed to save DBTASK info error {e}")
session.rollback()
return None
finally:
session.close()
return id
async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str):
pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id)
pathxx.mkdir(parents=True, exist_ok=True)
data = []
i = 0
for file in files:
name, fix = os.path.splitext(file.filename)
if fix not in ['.doc', '.docx']:
continue
i = i + 1
with open(pathxx.joinpath(str(i) + fix), 'wb') as f:
file_content = await file.read()
f.write(file_content)
data.append(DBRESUME(id=str(uuid.uuid4()), task_id=dir_id, status=0, file_name=str(i) + fix))
session = SqliteSqlalchemy().session
try:
session.bulk_save_objects(data)
session.commit()
except Exception as e:
print(f"Failed to save DBRESUME error {e}")
session.rollback()
return False, f"Failed to save DBRESUME error {e}"
finally:
session.close()
return True, "success"