Files
sqlbot_agent/main_service.py

508 lines
19 KiB
Python
Raw Normal View History

2025-10-13 18:18:58 +08:00
import copy
2025-11-28 15:26:51 +08:00
import json
2025-09-24 14:39:42 +08:00
import logging
import uuid
2025-11-06 16:23:51 +08:00
import time
2025-10-13 18:18:58 +08:00
from functools import wraps
2025-09-25 16:49:25 +08:00
import util.utils
2025-09-24 14:39:42 +08:00
from logging_config import LOGGING_CONFIG
from service.cus_vanna_srevice import CustomVanna, QdrantClient, TTLCacheWrapper
2025-11-07 18:03:59 +08:00
from service.question_feedback_service import save_save_question_async, query_predefined_question_list, \
update_user_feedBack, query_feedBack_question_list
2025-11-28 15:26:51 +08:00
from service.conversation_service import (save_conversation, update_conversation,
get_qa_by_id, get_latest_question,get_all_conversations_by_user)
2025-09-23 14:49:00 +08:00
from decouple import config
import flask
from util import load_ddl_doc
2025-10-15 10:28:34 +08:00
from flask import Flask, Response, jsonify, request
2025-11-06 12:23:07 +08:00
from graph_chat.gen_sql_chart_agent import SqlAgentState, sql_chart_agent
from graph_chat.gen_data_report_agent import result_report_agent, DateReportAgentState
2025-11-06 12:23:07 +08:00
import traceback
2025-09-24 14:39:42 +08:00
logger = logging.getLogger(__name__)
2025-11-28 15:26:51 +08:00
logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG)
2025-11-06 16:23:51 +08:00
def generate_timestamp_id():
"""生成基于时间戳的ID"""
# 获取当前时间戳(秒级)
timestamp = int(time.time() * 1000)
return f"Q{timestamp}"
2025-09-23 14:49:00 +08:00
def connect_database(vn):
db_type = config('DATA_SOURCE_TYPE', default='sqlite')
if db_type == 'sqlite':
vn.connect_to_sqlite(config('SQLITE_DATABASE_URL', default=''))
elif db_type == 'mysql':
vn.connect_to_mysql(host=config('MYSQL_DATABASE_HOST', default=''),
port=int(config('MYSQL_DATABASE_PORT', default=3306)),
2025-09-23 14:49:00 +08:00
user=config('MYSQL_DATABASE_USER', default=''),
password=config('MYSQL_DATABASE_PASSWORD', default=''),
database=config('MYSQL_DATABASE_DBNAME', default=''))
2025-09-24 14:39:42 +08:00
elif db_type == 'dameng':
2025-09-23 14:49:00 +08:00
# 待补充
vn.connect_to_dameng(
host=config('DAMENG_DATABASE_HOST', default=''),
port=config('DAMENG_DATABASE_PORT', default=3306),
user=config('DAMENG_DATABASE_USER', default=''),
password=config('DAMENG_DATABASE_PASSWORD', default=''),
)
2025-09-23 14:49:00 +08:00
else:
pass
def load_train_data_ddl(vn: CustomVanna):
vn.train()
2025-09-23 14:49:00 +08:00
def create_vana():
logger.info("----------------create vana ---------")
q_client = QdrantClient(":memory:") if config('QDRANT_TYPE', default='memory') == 'memory' else QdrantClient(
url=config('QDRANT_DB_HOST', default=''), port=config('QDRANT_DB_PORT', default=6333))
2025-09-23 14:49:00 +08:00
vn = CustomVanna(
vector_store_config={"client": q_client},
2025-09-23 14:49:00 +08:00
llm_config={
"api_key": config('CHAT_MODEL_API_KEY', default=''),
"api_base": config('CHAT_MODEL_BASE_URL', default=''),
"model": config('CHAT_MODEL_NAME', default=''),
'temperature': config('CHAT_MODEL_TEMPERATURE', default=0.7, cast=float),
'max_tokens': config('CHAT_MODEL_MAX_TOKEN', default=5000),
2025-09-23 14:49:00 +08:00
},
)
2025-09-28 16:44:58 +08:00
2025-09-23 14:49:00 +08:00
return vn
def init_vn(vn):
logger.info("--------------init vana-----connect to datasouce db----")
connect_database(vn)
2025-09-23 14:49:00 +08:00
if config('IS_FIRST_LOAD', default=False, cast=bool):
load_ddl_doc.add_ddl(vn)
load_ddl_doc.add_documentation(vn)
2025-09-23 14:49:00 +08:00
load_train_data_ddl(vn)
return vn
from vanna.flask import VannaFlaskApp
2025-09-23 14:49:00 +08:00
vn = create_vana()
app = VannaFlaskApp(vn, chart=False)
app.cache = TTLCacheWrapper(app.cache, ttl=config('TTL_CACHE', cast=int, default=60 * 60))
2025-09-23 14:49:00 +08:00
init_vn(vn)
cache = app.cache
2025-09-29 11:22:56 +08:00
@app.flask_app.route("/yj_sqlbot/api/v0/generate_sql_2", methods=["GET"])
2025-09-23 14:49:00 +08:00
def generate_sql_2():
"""
Generate SQL from a question
---
parameters:
2025-11-06 16:23:51 +08:00
- name: user_id
2025-09-23 14:49:00 +08:00
in: query
- name: question
in: query
2025-11-06 16:23:51 +08:00
- name: question_id
2025-09-23 14:49:00 +08:00
responses:
200:
schema:
type: object
properties:
type:
type: string
default: sql
id:
type: string
text:
type: string
"""
2025-09-24 14:39:42 +08:00
logger.info("Start to generate sql in main")
2025-09-23 14:49:00 +08:00
question = flask.request.args.get("question")
if question is None:
return jsonify({"type": "error", "error": "No question provided"})
2025-11-06 16:23:51 +08:00
user_id = request.args.get("user_id")
cvs_id = request.args.get("cvs_id")
need_context = bool(request.args.get("need_context"))
if user_id is None or cvs_id is None:
return jsonify({"type": "error", "error": "No user_id or cvs_id provided"})
id = generate_timestamp_id()
logger.info(f"question_id: {id} user_id: {user_id} cvs_id: {cvs_id} question: {question}")
save_conversation(id, user_id, cvs_id, question)
2025-09-24 14:39:42 +08:00
try:
2025-09-25 16:49:25 +08:00
logger.info(f"Generate sql for {question}")
data = vn.generate_sql_2(user_id, cvs_id, question, id, need_context)
2025-09-25 16:49:25 +08:00
logger.info("Generate sql result is {0}".format(data))
2025-09-24 14:39:42 +08:00
data['id'] = id
sql = data["resp"]["sql"]
logger.info("generate sql is : " + sql)
2025-11-07 15:30:27 +08:00
update_conversation(id, sql)
save_save_question_async(id, user_id, question, sql)
data["type"] = "success"
2025-09-24 14:39:42 +08:00
return jsonify(data)
except Exception as e:
2025-09-28 16:44:58 +08:00
logger.error(f"generate sql failed:{e}")
2025-09-24 14:39:42 +08:00
return jsonify({"type": "error", "error": str(e)})
2025-11-06 16:23:51 +08:00
# def requires_cache_2(required_keys):
# def decorator(f):
# @wraps(f)
# def decorated(*args, **kwargs):
# id = request.args.get("id")
# user_id = request.args.get("user_id")
# if user_id is None:
# user_id = request.json.get("user_id")
# if user_id is None:
# return jsonify({"type": "error", "error": "No user_id provided"})
# if id is None:
# id = request.json.get("id")
# if id is None:
# return jsonify({"type": "error", "error": "No id provided"})
# all_v = cache.items()
# logger.info(f"all values {all_v}")
# logger.info(f"user {user_id} id {id}")
# qa_list = cache.get(id=user_id, field="qa_list")
# if qa_list is None:
# return jsonify({"type": "error", "error": f"No qa_list found"})
# logger.info(f"qa_list {qa_list}")
# q_a = list(filter(lambda x: x["id"] == id, qa_list))
# logger.info(f"q_a {q_a}")
# for key in required_keys:
# if q_a[0][key] is None:
# return jsonify({"type": "error", "error": f"No {key} found for id:{id}"})
# values = {key:q_a[0][key] for key in required_keys}
# values["id"] = id
# logger.info("cache values {0}".format(values))
#
# return f(*args, **values, **kwargs)
#
# return decorated
#
# return decorator
# def session_save(func):
# @wraps(func)
# def wrapper(*args, **kwargs):
# id = request.args.get("id")
# user_id = request.args.get("user_id")
# logger.info(f" id: {id},user_id: {user_id}")
# result = func(*args, **kwargs)
#
# datas = []
# session_len = int(config("SESSION_LENGTH", default=2))
# if cache.exists(id=user_id, field="qa_list"):
# datas = copy.deepcopy(cache.get(id=user_id, field="qa_list"))
# logger.info("datas is {0}".format(datas))
# if len(datas) > session_len and session_len > 0:
# logger.info(f"开始裁剪-------------------------------------")
# datas=datas[-session_len:]
# # 删除id对应的所有缓存值,因为已经run_sql完毕改用user_id保存为上下文
# # cache.delete(id=id, field="question")
# print("datas---------------------{0}".format(datas))
# cache.set(id=user_id, field="qa_list", value=copy.deepcopy(datas))
# logger.info(f" user data {cache.get(user_id, field='qa_list')}")
# return result
#
# return wrapper
2025-10-13 18:18:58 +08:00
2025-09-24 14:39:42 +08:00
2025-09-29 11:22:56 +08:00
@app.flask_app.route("/yj_sqlbot/api/v0/run_sql_2", methods=["GET"])
2025-11-06 16:23:51 +08:00
# @session_save
# @requires_cache_2(required_keys=["sql"])
def run_sql_2():
2025-09-24 14:39:42 +08:00
"""
Run SQL
---
parameters:
2025-10-13 18:18:58 +08:00
- name: user_id
2025-09-24 14:39:42 +08:00
in: query
2025-10-13 18:18:58 +08:00
required: true
2025-09-24 14:39:42 +08:00
- name: id
in: query|body
type: string
required: true
responses:
200:
schema:
type: object
properties:
type:
type: string
default: df
id:
type: string
df:
type: object
should_generate_chart:
type: boolean
"""
logger.info("Start to run sql in main")
try:
2025-11-06 16:23:51 +08:00
id = request.args.get("id")
2025-11-07 15:30:27 +08:00
qa = get_qa_by_id(id)
sql = qa["sql"]
2025-11-06 16:23:51 +08:00
logger.info(f"sql is {sql}")
2025-09-24 14:39:42 +08:00
if not vn.run_sql_is_set:
return jsonify(
{
"type": "error",
"error": "Please connect to a database using vn.connect_to_... in order to run SQL queries.",
}
)
2025-11-01 10:16:06 +08:00
df = vn.run_sql_2(sql=sql)
result = df.to_dict(orient='records')
logger.info("df ---------------{0} {1}".format(result, type(result)))
2025-09-24 14:39:42 +08:00
return jsonify(
{
2025-09-25 16:49:25 +08:00
"type": "success",
2025-09-24 14:39:42 +08:00
"id": id,
2025-09-25 16:49:25 +08:00
"df": result,
2025-09-24 14:39:42 +08:00
}
)
except Exception as e:
2025-09-28 16:44:58 +08:00
logger.error(f"run sql failed:{e}")
2025-09-24 14:39:42 +08:00
return jsonify({"type": "sql_error", "error": str(e)})
2025-09-23 14:49:00 +08:00
2025-10-14 10:30:17 +08:00
@app.flask_app.route("/yj_sqlbot/api/v0/verify", methods=["GET"])
def verify_user():
try:
id = request.args.get("user_id")
2025-10-14 15:47:11 +08:00
users = config('ALLOWED_USERS', default='')
users = users.split(',')
logger.info(f"allowed users {users}")
2025-10-14 10:30:17 +08:00
for user in users:
if user == id:
return jsonify({"type": "success", "verify": True})
else:
return jsonify({"type": "success", "verify": False})
except Exception as e:
logger.error(f"verify user failed:{e}")
return jsonify({"type": "error", "error": str(e)})
@app.flask_app.route("/yj_sqlbot/api/v0/query_present_question", methods=["GET"])
def query_present_question():
try:
2025-11-06 12:23:07 +08:00
data = query_predefined_question_list()
return jsonify({"type": "success", "data": data})
except Exception as e:
logger.error(f"查询预制问题失败 failed:{e}")
return jsonify({"type": "error", "error": f'查询预制问题失败:{str(e)}'})
@app.flask_app.route("/yj_sqlbot/api/v0/query_feedback_question", methods=["POST"])
def query_feedback_question():
2025-11-07 18:03:59 +08:00
id_list = request.json.get("id_list", [])
try:
data = query_feedBack_question_list(id_list)
return jsonify({"type": "success", "data": data})
except Exception as e:
logger.error(f"查询用户反馈问题失败 failed:{e}")
return jsonify({"type": "error", "error": f'查询用户反馈问题失败:{str(e)}'})
2025-11-07 18:03:59 +08:00
@app.flask_app.route("/yj_sqlbot/api/v0/question_feed_back", methods=["PUT"])
def update_question_feed_back():
id = request.json.get("id")
user_feedback = request.json.get("user_feedback")
if not id or not user_feedback:
return jsonify({"type": "error", "error": "id 或者用户反馈为空"})
try:
2025-11-07 18:03:59 +08:00
update_user_feedBack(id, '', user_feedback)
return jsonify({"type": "success"})
except Exception as e:
logger.error(f"查询预制问题失败 failed:{e}")
return jsonify({"type": "error", "error": f'查询预制问题失败:{str(e)}'})
2025-11-06 12:23:07 +08:00
@app.flask_app.route("/yj_sqlbot/api/v0/gen_graph_question", methods=["GET"])
def gen_graph_question():
try:
2025-11-07 15:30:27 +08:00
user_id = request.args.get("user_id")
2025-11-28 15:26:51 +08:00
logger.info(f"Into gen_graph_question => user {user_id}")
2025-11-07 15:30:27 +08:00
cvs_id = request.args.get("cvs_id")
2025-11-07 18:03:59 +08:00
config = {"configurable": {"thread_id": str(uuid.uuid4())}}
2025-11-06 12:23:07 +08:00
question = flask.request.args.get("question")
2025-11-28 15:26:51 +08:00
logger.info(f"Start to get question context")
question_context = get_latest_question(cvs_id, user_id, limit_count=2)
2025-11-07 15:30:27 +08:00
history = []
2025-11-07 18:03:59 +08:00
i = 0
2025-11-28 15:26:51 +08:00
if not question_context:
question_context = []
2025-11-07 15:30:27 +08:00
for q in question_context:
2025-11-07 18:03:59 +08:00
is_latest=False
if i==0:
is_latest=True
history.append({"role": "user", 'content': q, 'order': i,'is_latest':is_latest})
i +=1
2025-11-28 15:26:51 +08:00
logger.info(f"question context is {history}")
2025-11-06 12:23:07 +08:00
initial_state: SqlAgentState = {
2025-11-07 15:30:27 +08:00
"user_id": user_id,
2025-11-06 12:23:07 +08:00
"user_question": question,
2025-11-07 15:30:27 +08:00
"history": history,
2025-11-06 12:23:07 +08:00
"sql_retry_count": 0,
"chart_retry_count": 0
}
2025-11-28 15:26:51 +08:00
id = str(uuid.uuid4())
logger.info(f"Start to save conversation info (id,user_id,cvs_id,question)")
save_conversation(id, user_id, cvs_id, question)
logger.info(f"Enter the graph node=>gen_sql, gen_chart")
result = sql_chart_agent.invoke(initial_state, config=config)
2025-11-28 15:26:51 +08:00
logger.info(f"End====>gen_sql, gen_chart")
new_question = result.get('rewritten_user_question', question)
2025-11-28 15:26:51 +08:00
# save_conversation(id, user_id, cvs_id, new_question)
if new_question:
logger.info(f"new_question is {new_question}")
update_conversation(id=id, meta={'new_question': new_question})
logger.info("gen_sql_result => {0}".format(result.get("gen_sql_result", {})))
data = {
'id': id,
'sql': result.get("gen_sql_result", {}),
'chart': result.get("gen_chart_result", {}),
'gen_sql_error': result.get("gen_sql_error", None),
'gen_chart_error': result.get("gen_chart_error", None),
2025-11-06 12:23:07 +08:00
}
2025-11-07 15:30:27 +08:00
sql = data.get('sql', {}).get('sql', '')
2025-11-28 15:26:51 +08:00
state = data.get('sql', {}).get('success', False)
if not state:
logger.info("SQL generation failed.save error info to table")
error_msg = data.get('sql', {}).get('message', '')
update_conversation(id=id, answer={'type_error':'error','error':error_msg})
chart_cfg = data.get('chart', {})
update_conversation(id=id, sql=sql, chart_cfg=chart_cfg)
save_save_question_async(id, user_id, new_question, sql)
2025-11-06 12:23:07 +08:00
return jsonify(data)
except Exception as e:
2025-11-06 12:23:07 +08:00
traceback.print_exc()
logger.error(f"查询预制问题失败 failed:{e}")
2025-11-06 12:23:07 +08:00
return jsonify({"type": "error", "error": f'查询预制问题失败:{str(e)}'})
2025-10-14 10:30:17 +08:00
@app.flask_app.route("/yj_sqlbot/api/v0/run_sql_3", methods=["GET"])
2025-11-07 15:30:27 +08:00
def run_sql_3():
id = request.args.get("id")
qa = get_qa_by_id(id)
2025-11-28 15:26:51 +08:00
if not qa:
return jsonify({"type": "error", "error": f'获取对话失败:{id}'})
logger.info(f"Start to run_sql_3 => {qa}")
2025-11-07 15:30:27 +08:00
sql = qa["sql"]
2025-11-28 15:26:51 +08:00
if not sql:
error_info = qa["answer"]
if error_info:
return jsonify({"type": "error", "error": json.loads(error_info).get("error","")})
return jsonify({"type": "error", "error": f'sql 生成失败,请联系管理员'})
2025-11-07 15:30:27 +08:00
question = qa["question"]
2025-11-07 18:06:37 +08:00
logger.info(f"in main sql {sql} question {question}")
logger.info("Start to run sql in main")
try:
user_id = request.args.get("user_id")
cvs_id = request.args.get("cvs_id")
if not vn.run_sql_is_set:
return jsonify(
{
"type": "error",
2025-11-28 15:26:51 +08:00
"error": "Please connect to a database using "
"vn.connect_to_... in order to run SQL queries.",
}
)
question_context = get_latest_question(cvs_id, user_id, limit_count=2)
history = []
i = 0
for q in question_context:
is_latest=False
if i==0:
is_latest=True
2025-11-28 15:26:51 +08:00
history.append({"role": "user", 'content': q,
'order': i,'is_latest':is_latest})
i +=1
initial_state: DateReportAgentState = {
"id": id,
"user_id": user_id,
"sql": sql,
"question": question,
"retry_count": 0,
"history": history,
}
2025-11-07 18:03:59 +08:00
config = {"configurable": {"thread_id": str(uuid.uuid4())}}
rr = result_report_agent.invoke(initial_state, config)
2025-11-28 15:26:51 +08:00
data = rr.get('data', {})
summary = rr.get('summary', '')
run_sql_error = rr.get('run_sql_error', '')
logger.debug(f"data type is {type(data)} data is {data} summary is {summary}")
if data and not run_sql_error:
update_conversation(id=id, answer={'data': data,'summary':summary})
elif run_sql_error:
update_conversation(id=id, answer={'type_error':'error','error':"sql执行失败"})
logger.info(f"run_Sql finish run_sql_error => {run_sql_error}")
return jsonify(
{
2025-11-28 15:26:51 +08:00
'data': data,
'summary': summary,
'run_sql_error': "sql执行失败" if run_sql_error else None
}
)
except Exception as e:
logger.error(f"run sql failed:{e}")
return jsonify({"type": "sql_error", "error": str(e)})
2025-11-28 15:26:51 +08:00
@app.flask_app.route("/yj_sqlbot/api/v0/get_history", methods=["GET"])
def get_history():
logger.info(f"in main get history {request.args}")
try:
user_id = request.args.get("user_id")
page_num = int(request.args.get("page_num"))
page_size = int(request.args.get("page_size"))
if page_num < 1:
page_num = 1
if page_size < 1 or page_size > 100:
page_size = 10
offset = (page_num-1)*page_size
result = get_all_conversations_by_user(user_id, offset, page_size)
data = result.get('data', {})
total_count = result.get('total_count', 0)
total_pages = (total_count + page_size - 1) // page_size
logger.info(f"offset {offset} pagesize {page_size} total_count {total_count} total_pages {total_pages}")
info = {
"total_pages": total_pages,
"total_count": total_count,
}
history = []
ids = [d.id for d in data]
feedback_questions = query_feedBack_question_list(ids)
map_list = {q["id"]:q["user_praise"] for q in feedback_questions}
logger.info(f"map_list {map_list}")
for item in data:
cvs = {}
cvs['id'] = item.id
cvs['question'] = item.question
# answer_info = item.answer
# info_type = answer_info.get('type','')
# if info_type and info_type=="error":
# cvs[info_type] = answer_info.get("error","信息获取失败")
cvs['answer'] = json.loads(item.answer) if item.answer else None
cvs['chart_cfg'] = json.loads(item.chart_cfg) if item.chart_cfg else None
cvs['user_praise'] = map_list[item.id]
history.append(cvs)
info["history"] = history
return jsonify(info)
except Exception as e:
logger.error(f"get history failed:{e}")
return jsonify({"type": "get history error", "error": str(e)})
2025-09-23 14:49:00 +08:00
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8084, debug=False)