diff --git a/db/__init__.py b/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/db/sql_db.py b/db/sql_db.py new file mode 100644 index 0000000..c95daf9 --- /dev/null +++ b/db/sql_db.py @@ -0,0 +1,46 @@ +from sqlalchemy import Column, DateTime, Integer, BigInteger, String, create_engine, Boolean, Text +from sqlalchemy.orm import declarative_base, sessionmaker +# 申明基类对象 +Base = declarative_base() +from decouple import config +DB_PATH = config('DB_PATH', default='E://pyptoject//yj_resume//main.sqlite3') + + +class DBTASK(Base): + __tablename__ = 'db_task' + id = Column(String(100), primary_key=True) + create_time = Column(DateTime, nullable=False, ) + # 0 代表待执行,1 成功,2 失败 + status = Column(Integer, nullable=False, default=0) + success_num = Column(Integer, nullable=False, default=0) + total_num = Column(Integer, nullable=False, default=0) + fail_num = Column(Integer, nullable=False, default=0) + message = Column(Text, nullable=True) + + +class DBRESUME(Base): + __tablename__ = 'db_resume' + id = Column(String(100), primary_key=True) + # 每个任务对应一个文件夹ID + task_id = Column(String(100), nullable=False) + # 0 代表待执行,1 成功,2 失败 + status = Column(Integer, nullable=False, default=0) + file_name = Column(String(100), nullable=True) + # 可以用json表示提取的数据 + data_info = Column(Text, nullable=True) + # 错误信息等 + message = Column(Text, nullable=True) + + +class SqliteSqlalchemy(object): + def __init__(self): + # 创建sqlite连接引擎 + engine = create_engine(f'sqlite:///{DB_PATH}', echo=True) + # 创建表 + Base.metadata.create_all(engine, checkfirst=True) + # 创建sqlite的session连接对象 + self.session = sessionmaker(bind=engine)() + + + + diff --git a/main.py b/main.py index 6457a44..365622a 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,10 @@ from fastapi import FastAPI -import uvicorn +import uvicorn +from fastapi import FastAPI, File, UploadFile, HTTPException +from typing import List +from service.file_service import check_and_create_directory, upload_and_save_file +from service import excel_service + app = FastAPI() @@ -8,7 +13,24 @@ def read_root(): return {"Hello": "World"} +# 上传文件并解析,解析是异步错误 +@app.post("/upload_files_and_parse") +async def create_upload_files(files: List[UploadFile] = File(...)): + dir_id = check_and_create_directory(files) + if not dir_id: + return {"result": False, "code": 500, "message": "create directory failed"} + flag, message = await upload_and_save_file(dir_id, files) + if flag: + # 触发异步任务,解析文件 TODO + pass + return {"result": flag, "message": message} + + +@app.get("/export_task_data_to_excel") +def export_task_data_to_excel(task_id: str): + message = excel_service.export_task_data_to_excel(task_id) + return {"message": message} if __name__ == '__main__': - uvicorn.run(app, host="127.0.0.1", port=3006) \ No newline at end of file + uvicorn.run(app, host="127.0.0.1", port=3006) diff --git a/requirements.txt b/requirements.txt index a9fa245..8abca01 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,12 @@ python-docx fastapi uvicorn docxtpl +SQLAlchemy +python-decouple +python-multipart +pandas +openpyxl + python-multipart PyMuPDF>=1.23.0 paddlepaddle>=2.5.0 diff --git a/service/excel_service.py b/service/excel_service.py new file mode 100644 index 0000000..3920057 --- /dev/null +++ b/service/excel_service.py @@ -0,0 +1,30 @@ +from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy +import json +import pandas as pd +import pathlib +from decouple import config + +BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//') + + +# 导出数据到excel +def export_to_excel(task_id): + # 获取所有成功的信息 + list_data = SqliteSqlalchemy().session.query(DBRESUME).filter_by(task_id=task_id, status=1).all() + pd_data = [] + for data in list_data: + pd_data.append(json.loads(data.data_info)) + data_frame = pd.DataFrame(pd_data) + # 导出到excel + pathxx = pathlib.Path(BASE_PATH).joinpath(task_id) + pathxx = pathxx.joinpath(f"{task_id}.xlsx") + data_frame.to_excel(pathxx, index=False) + + +def export_task_data_to_excel(task_id): + session = SqliteSqlalchemy().session + task = session.query(DBTASK).filter_by(id=task_id).first() + if not task or task.status == 0 or task.status == 2: + return "任务未完成或者失败" + export_to_excel(task_id) + return "导出成功" diff --git a/service/file_service.py b/service/file_service.py new file mode 100644 index 0000000..38bdc94 --- /dev/null +++ b/service/file_service.py @@ -0,0 +1,59 @@ +from db.sql_db import DBTASK, DBRESUME, SqliteSqlalchemy +import uuid +from datetime import datetime +from decouple import config +import pathlib +from fastapi import File, UploadFile +from typing import List +import os + +BASE_PATH = config('BASE_PATH', default='E://pyptoject//yj_resume//') + + +def check_and_create_directory(files): + # 先创建一个task + if not files or len(files) == 0: + return None + id = str(uuid.uuid4()) + task = DBTASK(id=id, create_time=datetime.now(), status=0, success_num=0, total_num=len(files), + fail_num=0) + + session = SqliteSqlalchemy().session + try: + session.add(task) + session.commit() + except Exception as e: + print(f"Failed to save DBTASK info error {e}") + session.rollback() + return None + finally: + session.close() + return id + + +async def upload_and_save_file(dir_id, files: List[UploadFile]) -> (bool, str): + pathxx = pathlib.Path(BASE_PATH).joinpath(dir_id) + pathxx.mkdir(parents=True, exist_ok=True) + data = [] + i = 0 + for file in files: + name, fix = os.path.splitext(file.filename) + if fix not in ['.doc', '.docx']: + continue + i = i + 1 + with open(pathxx.joinpath(str(i) + fix), 'wb') as f: + file_content = await file.read() + f.write(file_content) + data.append(DBRESUME(id=str(uuid.uuid4()), task_id=dir_id, status=0, file_name=str(i) + fix)) + session = SqliteSqlalchemy().session + try: + session.bulk_save_objects(data) + session.commit() + except Exception as e: + print(f"Failed to save DBRESUME error {e}") + session.rollback() + return False, f"Failed to save DBRESUME error {e}" + finally: + session.close() + return True, "success" +