diff --git a/.env b/.env
new file mode 100644
index 0000000..a17a301
--- /dev/null
+++ b/.env
@@ -0,0 +1,22 @@
+IS_FIRST_LOAD=True
+
+CHAT_MODEL_BASE_URL=https://api.siliconflow.cn
+CHAT_MODEL_API_KEY=sk-cjiakyfpzamtxgxitcbxvwyvaulnygmyxqpykkgngsvfqhuy
+CHAT_MODEL_NAME=Qwen/Qwen3-Next-80B-A3B-Instruct
+
+EMBEDDING_MODEL_BASE_URL=https://api.siliconflow.cn
+EMBEDDING_MODEL_API_KEY=sk-cjiakyfpzamtxgxitcbxvwyvaulnygmyxqpykkgngsvfqhuy
+EMBEDDING_MODEL_NAME=Qwen/Qwen3-Embedding-8B
+
+#mysql ,sqlite,pg等
+DATA_SOURCE_TYPE=sqlite
+
+#sqlite 连接信息
+SQLITE_DATABASE_URL=E://db/db_flights.sqlite
+
+#mysql 连接信息
+MYSQL_DATABASE_HOST=
+MYSQL_DATABASE_PORT=
+MYSQL_DATABASE_PASSWORD=
+MYSQL_DATABASE_USER=
+MYSQL_DATABASE_DBNAME=
diff --git a/main_service.py b/main_service.py
new file mode 100644
index 0000000..42eb408
--- /dev/null
+++ b/main_service.py
@@ -0,0 +1,115 @@
+from email.policy import default
+
+from service.cus_vanna_srevice import CustomVanna, QdrantClient
+from decouple import config
+import flask
+
+from flask import Flask, Response, jsonify, request, send_from_directory
+
+def connect_database(vn):
+    db_type = config('DATA_SOURCE_TYPE', default='sqlite')
+    if db_type == 'sqlite':
+        vn.connect_to_sqlite(config('SQLITE_DATABASE_URL', default=''))
+    elif db_type == 'mysql':
+        vn.connect_to_mysql(host=config('MYSQL_DATABASE_HOST', default=''),
+                            port=config('MYSQL_DATABASE_PORT', default=3306),
+                            user=config('MYSQL_DATABASE_USER', default=''),
+                            password=config('MYSQL_DATABASE_PASSWORD', default=''),
+                            database=config('MYSQL_DATABASE_DBNAME', default=''))
+    elif db_type == 'postgresql':
+        # 待补充
+        pass
+    else:
+        pass
+
+
+def load_train_data_ddl(vn: CustomVanna):
+    vn.train(ddl="""
+                 create table db_user
+                 (
+                     id        integer not null
+                         constraint db_user_pk
+                             primary key autoincrement,
+                     user_name TEXT    not null,
+                     age       integer not null,
+                     address   TEXT,
+                     gender    integer not null,
+                     email     TEXT
+                 )
+
+
+                 """)
+    vn.train(documentation='''
+                gender 字段 0代表女性,1代表男性;
+                查询address时,尽量使用like查询,如:select * from db_user where address like '%北京%';
+                语法为sqlite语法;
+        ''')
+
+
+def create_vana():
+    print("----------------create---------")
+    vn = CustomVanna(
+        vector_store_config={"client": QdrantClient(":memory:")},
+        llm_config={
+            "api_key": config('CHAT_MODEL_API_KEY', default=''),
+            "api_base": config('CHAT_MODEL_BASE_URL', default=''),
+            "model": config('CHAT_MODEL_NAME', default=''),
+        },
+    )
+    return vn
+
+
+def init_vn(vn):
+    print("--------------init vn-----connect----")
+    connect_database(vn)
+    if config('IS_FIRST_LOAD', default=False, cast=bool):
+        load_train_data_ddl(vn)
+    return vn
+
+
+from vanna.flask import VannaFlaskApp
+vn = create_vana()
+app = VannaFlaskApp(vn,chart=False)
+init_vn(vn)
+
+
+@app.flask_app.route("/api/v0/generate_sql_2", methods=["GET"])
+def generate_sql_2():
+    """
+    Generate SQL from a question
+    ---
+    parameters:
+      - name: user
+        in: query
+      - name: question
+        in: query
+        type: string
+        required: true
+    responses:
+      200:
+        schema:
+          type: object
+          properties:
+            type:
+              type: string
+              default: sql
+            id:
+              type: string
+            text:
+              type: string
+    """
+    question = flask.request.args.get("question")
+
+    if question is None:
+        return jsonify({"type": "error", "error": "No question provided"})
+
+    #id = self.cache.generate_id(question=question)
+    data = vn.generate_sql_2(question=question)
+
+
+    return jsonify(data)
+
+
+if __name__ == '__main__':
+
+    app.run(host='0.0.0.0', port=8084, debug=False)
diff --git a/requirement.txt b/requirement.txt
new file mode 100644
index 0000000..d6b6282
--- /dev/null
+++ b/requirement.txt
@@ -0,0 +1,4 @@
+vanna ==0.7.9
+vanna[openai]
+vanna[qdrant]
+python-decouple==3.8
\ No newline at end of file
diff --git a/service/__init__.py b/service/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/service/cus_vanna_srevice.py b/service/cus_vanna_srevice.py
new file mode 100644
index 0000000..e104585
--- /dev/null
+++ b/service/cus_vanna_srevice.py
@@ -0,0 +1,217 @@
+from email.policy import default
+from typing import List
+
+import orjson
+from vanna.base import VannaBase
+from vanna.qdrant import Qdrant_VectorStore
+from qdrant_client import QdrantClient
+from openai import OpenAI
+import requests
+from decouple import config
+from util.utils import extract_nested_json, check_and_get_sql, get_chart_type_from_sql_answer
+import json
+from template.template import get_base_template
+from datetime import datetime
+
+
+class OpenAICompatibleLLM(VannaBase):
+    def __init__(self, client=None, config_file=None):
+        VannaBase.__init__(self, config=config_file)
+
+        # default parameters - can be overrided using config
+        self.temperature = 0.5
+        self.max_tokens = 5000
+
+        if "temperature" in config_file:
+            self.temperature = config_file["temperature"]
+
+        if "max_tokens" in config_file:
+            self.max_tokens = config_file["max_tokens"]
+
+        if "api_type" in config_file:
+            raise Exception(
+                "Passing api_type is now deprecated. Please pass an OpenAI client instead."
+            )
+
+        if "api_version" in config_file:
+            raise Exception(
+                "Passing api_version is now deprecated. Please pass an OpenAI client instead."
+            )
+
+        if client is not None:
+            self.client = client
+            return
+
+        if "api_base" not in config_file:
+            raise Exception("Please passing api_base")
+
+        if "api_key" not in config_file:
+            raise Exception("Please passing api_key")
+
+        self.client = OpenAI(api_key=config_file["api_key"], base_url=config_file["api_base"])
+
+    def system_message(self, message: str) -> any:
+        return {"role": "system", "content": message}
+
+    def user_message(self, message: str) -> any:
+        return {"role": "user", "content": message}
+
+    def assistant_message(self, message: str) -> any:
+        return {"role": "assistant", "content": message}
+
+    def submit_prompt(self, prompt, **kwargs) -> str:
+        if prompt is None:
+            raise Exception("Prompt is None")
+
+        if len(prompt) == 0:
+            raise Exception("Prompt is empty")
+        print(prompt)
+
+        num_tokens = 0
+        for message in prompt:
+            num_tokens += len(message["content"]) / 4
+
+        if kwargs.get("model", None) is not None:
+            model = kwargs.get("model", None)
+            print(
+                f"Using model {model} for {num_tokens} tokens (approx)"
+            )
+            response = self.client.chat.completions.create(
+                model=model,
+                messages=prompt,
+                max_tokens=self.max_tokens,
+                stop=None,
+                temperature=self.temperature,
+            )
+        elif kwargs.get("engine", None) is not None:
+            engine = kwargs.get("engine", None)
+            print(
+                f"Using model {engine} for {num_tokens} tokens (approx)"
+            )
+            response = self.client.chat.completions.create(
+                engine=engine,
+                messages=prompt,
+                max_tokens=self.max_tokens,
+                stop=None,
+                temperature=self.temperature,
+            )
+        elif self.config is not None and "engine" in self.config:
+            print(
+                f"Using engine {self.config['engine']} for {num_tokens} tokens (approx)"
+            )
+            response = self.client.chat.completions.create(
+                engine=self.config["engine"],
+                messages=prompt,
+                max_tokens=self.max_tokens,
+                stop=None,
+                temperature=self.temperature,
+            )
+        elif self.config is not None and "model" in self.config:
+            print(
+                f"Using model {self.config['model']} for {num_tokens} tokens (approx)"
+            )
+            response = self.client.chat.completions.create(
+                model=self.config["model"],
+                messages=prompt,
+                max_tokens=self.max_tokens,
+                stop=None,
+                temperature=self.temperature,
+            )
+        else:
+            if num_tokens > 3500:
+                model = "kimi"
+            else:
+                model = "doubao"
+
+            print(f"Using model {model} for {num_tokens} tokens (approx)")
+
+            response = self.client.chat.completions.create(
+                model=model,
+                messages=prompt,
+                max_tokens=self.max_tokens,
+                stop=None,
+                temperature=self.temperature,
+            )
+
+        for choice in response.choices:
+            if "text" in choice:
+                return choice.text
+
+        return response.choices[0].message.content
+
+    def generate_sql_2(self, question: str, allow_llm_to_see_data=False, **kwargs) -> dict:
+        question_sql_list = self.get_similar_question_sql(question, **kwargs)
+        ddl_list = self.get_related_ddl(question, **kwargs)
+        doc_list = self.get_related_documentation(question, **kwargs)
+        template = get_base_template()
+        sql_temp = template['template']['sql']
+        char_temp = template['template']['chart']
+        # --------基于提示词,生成sql以及图标类型
+        sys_temp = sql_temp['system'].format(engine='sqlite', lang='中文', schema=ddl_list, documentation=doc_list,
+                                             data_training=question_sql_list)
+        user_temp = sql_temp['user'].format(question=question,
+                                            current_time=datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
+        llm_response = self.submit_prompt(
+            [{'role': 'system', 'content': sys_temp}, {'role': 'user', 'content': user_temp}], **kwargs)
+        print(llm_response)
+        result = {"resp": orjson.loads(extract_nested_json(llm_response))}
+
+        sql = check_and_get_sql(llm_response)
+        # ---------------生成图表
+        char_type = get_chart_type_from_sql_answer(llm_response)
+        if char_type:
+            sys_char_temp = char_temp['system'].format(engine='sqlite', lang='中文', sql=sql, chart_type=char_type)
+            user_char_temp = char_temp['user'].format(sql=sql, chart_type=char_type, question=question)
+            llm_response2 = self.submit_prompt(
+                [{'role': 'system', 'content': sys_char_temp}, {'role': 'user', 'content': user_char_temp}], **kwargs)
+            print(llm_response2)
+            result['chart'] = orjson.loads(extract_nested_json(llm_response2))
+        return result
+
+
+class CustomQdrant_VectorStore(Qdrant_VectorStore):
+    def __init__(
+            self,
+            config_file={}
+    ):
+        self.embedding_model_name = config('EMBEDDING_MODEL_NAME', default='')
+        self.embedding_api_base = config('EMBEDDING_MODEL_BASE_URL', default='')
+        self.embedding_api_key = config('EMBEDDING_MODEL_API_KEY', default='')
+        super().__init__(config_file)
+
+    def generate_embedding(self, data: str, **kwargs) -> List[float]:
+        def _get_error_string(response: requests.Response) -> str:
+            try:
+                if response.content:
+                    return response.json()["detail"]
+            except Exception:
+                pass
+            try:
+                response.raise_for_status()
+            except requests.HTTPError as e:
+                return str(e)
+            return "Unknown error"
+
+        request_body = {
+            "model": self.embedding_model_name,
+            "input": data,
+        }
+        request_body.update(kwargs)
+
+        response = requests.post(
+            url=f"{self.embedding_api_base}/v1/embeddings",
+            json=request_body,
+            headers={"Authorization": f"Bearer {self.embedding_api_key}"},
+        )
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Failed to create the embeddings, detail: {_get_error_string(response)}"
+            )
+        result = response.json()
+        embeddings = [d["embedding"] for d in result["data"]]
+        return embeddings[0]
+
+class CustomVanna(CustomQdrant_VectorStore, OpenAICompatibleLLM):
+    def __init__(self, llm_config=None, vector_store_config=None):
+        CustomQdrant_VectorStore.__init__(self, config_file=vector_store_config)
+        OpenAICompatibleLLM.__init__(self, config_file=llm_config)
\ No newline at end of file
diff --git a/template.yaml b/template.yaml
new file mode 100644
index 0000000..0e00b95
--- /dev/null
+++ b/template.yaml
@@ -0,0 +1,500 @@
+template:
+  terminology: |
+    
+    {terminologies}
+  data_training: |
+    
+    {data_training}
+  sql:
+    system: |
+      
+        你是"SQLBOT",智能问数小助手,可以根据用户提问,专业生成SQL与可视化图表。
+        你当前的任务是根据给定的表结构和用户问题生成SQL语句、可能适合展示的图表类型以及该SQL中所用到的表名。
+        我们会在块内提供给你信息,帮助你生成SQL:
+          内有等信息;
+          其中,:提供数据库引擎及版本信息;
+          :以 M-Schema 格式提供数据库表结构信息;
+          :提供一组术语,块内每一个就是术语,其中同一个内的多个代表术语的多种叫法,也就是术语与它的同义词,即该术语对应的描述,其中也可能是能够用来参考的计算公式,或者是一些其他的查询条件
+          :提供一组SQL示例,你可以参考这些示例来生成你的回答,其中内是提问,内是对于该提问的解释或者对应应该回答的SQL示例
+        用户的提问在内,内则会提供上次执行你提供的SQL时会出现的错误信息,内的会告诉你用户当前提问的时间
+      
+      
+      你必须遵守以下规则:
+      
+        
+          请使用语言:{lang} 回答,若有深度思考过程,则思考过程也需要使用 {lang} 输出
+        
+        
+          你只能生成查询用的SQL语句,不得生成增删改相关或操作数据库以及操作数据库数据的SQL
+        
+        
+          不要编造内没有提供给你的表结构
+        
+        
+          生成的SQL必须符合内提供数据库引擎的规范
+        
+        
+          若用户提问中提供了参考SQL,你需要判断该SQL是否是查询语句
+        
+        
+          请使用JSON格式返回你的回答:
+          若能生成,则返回格式如:{{"success":true,"sql":"你生成的SQL语句","tables":["该SQL用到的表名1","该SQL用到的表名2",...],"chart-type":"table"}}
+          若不能生成,则返回格式如:{{"success":false,"message":"说明无法生成SQL的原因"}}
+        
+        
+          如果问题是图表展示相关,可参考的图表类型为表格(table)、柱状图(column)、条形图(bar)、折线图(line)或饼图(pie), 返回的JSON内chart-type值则为 table/column/bar/line/pie 中的一个
+          图表类型选择原则推荐:趋势 over time 用 line,分类对比用 column/bar,占比用 pie,原始数据查看用 table
+        
+        
+          如果问题是图表展示相关且与生成SQL查询无关时,请参考上一次回答的SQL来生成SQL
+        
+        
+          返回的JSON字段中,tables字段为你回答的SQL中所用到的表名,不要包含schema和database,用数组返回
+        
+        
+          提问中如果有涉及数据源名称或数据源描述的内容,则忽略数据源的信息,直接根据剩余内容生成SQL
+        
+        
+          根据表结构生成SQL语句,需给每个表名生成一个别名(不要加AS)
+        
+        
+          SQL查询中不能使用星号(*),必须明确指定字段名
+        
+        
+          SQL查询的字段名不要自动翻译,别名必须为英文
+        
+        
+          SQL查询的字段若是函数字段,如 COUNT(),CAST() 等,必须加上别名
+        
+        
+          计算占比,百分比类型字段,保留两位小数,以%结尾
+        
+        
+          生成SQL时,必须避免与数据库关键字冲突
+        
+        
+          如数据库引擎是 PostgreSQL、Oracle、ClickHouse、达梦(DM)、AWS Redshift、Elasticsearch,则在schema、表名、字段名、别名外层加双引号;
+          如数据库引擎是 MySQL、Doris,则在表名、字段名、别名外层加反引号;
+          如数据库引擎是 Microsoft SQL Server,则在schema、表名、字段名、别名外层加方括号。
+          
+          以PostgreSQL为例,查询Schema为TEST表TABLE下前1000条id字段,则生成的SQL为:
+            SELECT "id" FROM "TEST"."TABLE" LIMIT 1000
+            - 注意在表名外双引号的位置,千万不要生成为:
+              SELECT "id" FROM "TEST.TABLE" LIMIT 1000
+          以Microsoft SQL Server为例,查询Schema为TEST表TABLE下前1000条id字段,则生成的SQL为:
+            SELECT TOP 1000 [id] FROM [TEST].[TABLE]
+            - 注意在表名外方括号的位置,千万不要生成为:
+              SELECT TOP 1000 [id] FROM [TEST.TABLE]
+          
+        
+        
+          如果生成SQL的字段内有时间格式的字段:
+          - 若提问中没有指定查询顺序,则默认按时间升序排序
+          - 若提问是时间,且没有指定具体格式,则格式化为yyyy-MM-dd HH:mm:ss的格式
+          - 若提问是日期,且没有指定具体格式,则格式化为yyyy-MM-dd的格式
+          - 若提问是年月,且没有指定具体格式,则格式化为yyyy-MM的格式
+          - 若提问是年,且没有指定具体格式,则格式化为yyyy的格式
+          - 生成的格式化语法需要适配对应的数据库引擎。
+        
+        
+          生成的SQL查询结果可以用来进行图表展示,需要注意排序字段的排序优先级,例如:
+            - 柱状图或折线图:适合展示在横轴的字段优先排序,若SQL包含分类字段,则分类字段次一级排序
+        
+        
+          如果用户没有指定数据条数的限制,输出的查询SQL必须加上1000条的数据条数限制
+          如果用户指定的限制大于1000,则按1000处理
+          
+          以PostgreSQL为例,查询Schema为TEST表TABLE下id字段,则生成的SQL为:
+            SELECT "id" FROM "TEST"."TABLE" LIMIT 1000
+          以Microsoft SQL Server为例,查询Schema为TEST表TABLE下id字段,则生成的SQL为:
+            SELECT TOP 1000 [id] FROM [TEST].[TABLE]
+          
+        
+        
+          若需关联多表,优先使用中标记为"Primary key"/"ID"/"主键"的字段作为关联条件。
+        
+        
+          我们目前的情况适用于单指标、多分类的场景(展示table除外)
+        
+      
+      
+      ### 以下帮助你理解问题及返回格式的例子,不要将内的表结构用来回答用户的问题,内的为后续用户提问传入的内容,