# -*- coding : utf-8 -*-


import re
import io
import os
import tqdm
import json
import copy
import random
import openai
import pymysql
import requests
import threadpool
import uuid


# from .util_tool import utils
from treelib import Tree
from openpyxl import load_workbook, Workbook
from concurrent.futures import ThreadPoolExecutor


dialogue_prompt = '''任务的要求为：
{}
任务的运转逻辑是：
{}
任务的可选择话术为：
{}
你需要记住以下几点：
1.当任务的可选择话术为空时，判断任务已完成。
2.当任务的可选择话术不为空，根据对话记录，任务的要求判断任务是否完成。
3.当判断任务已完成，输出"任务完成。"。
4.当判断任务未完成，必须只能在可选择话术中选择一句话，选择的逻辑参考"任务的运转逻辑"和"任务的要求"。输出可选择话术中选择的那句话。
输入：'''

nlu_prompt = '''任务的要求为：
{}
任务的运转逻辑是：
{}
你需要记住以下几点：
1.根据输入的对话记录，分析最后一次用户的表述的含义，给用户表述的含义打一个标签并给出理由。
输入：'''

def generate_uuid(): return str(uuid.uuid4())

def _make_w_io_base(f, mode: str):
    if not isinstance(f, io.IOBase):
        f_dirname = os.path.dirname(f)
        if f_dirname != "":
            os.makedirs(f_dirname, exist_ok=True)
        f = open(f, mode=mode)
    return f

def _make_r_io_base(f, mode: str):
    if not isinstance(f, io.IOBase):
        f = open(f, mode=mode)
    return f

def jdump(obj, f, mode="w", indent=4, default=str):
    """Dump a str or dictionary to a file in json format.

    Args:
        obj: An object to be written.
        f: A string path to the location on disk.
        mode: Mode for opening the file.
        indent: Indent for storing json dictionaries.
        default: A function to handle non-serializable entries; defaults to `str`.
    """
    f = _make_w_io_base(f, mode)
    if isinstance(obj, (dict, list)):
        json.dump(obj, f, indent=indent, default=default, ensure_ascii=False)
    elif isinstance(obj, str):
        f.write(obj)
    else:
        raise ValueError(f"Unexpected type: {type(obj)}")
    f.close()

def jload(f, mode="r"):
    """Load a .json file into a dictionary."""
    f = _make_r_io_base(f, mode)
    jdict = json.load(f)
    f.close()
    return jdict

# 话术选择训练，测试数据生成
def bot_document_to_dialogue(file_path):

    wb = load_workbook(file_path)
    ws = wb[wb.sheetnames[0]]
    FAQ_off_list, FAQ_name_list, FAQ_answer_dict = bot_document_to_faq(file_path)

    task_transfer_pattern = re.compile(r'【.*】')

    task_transfer = dict()
    task_dict = dict()
    task_name = ''
    slot_id = 1
    for i, row in enumerate(ws.values):
        task_name = row[0] if row[0] else task_name
        if task_name == '流程文档结束':
            break

        task_finish_logic = row[1]
        task_run_logic = row[2]

        task_stage = str(row[3])
        task_condition = row[4]
        task_wav_no = row[5]
        task_seat_response = row[6]
        task_simple_seat_response = row[7].replace('【faq答案】+', 'faq')
        task_label = '['+row[9]+']' if row[9] else ''
        task_response_label = '[' + row[8] + ']' if row[8] else ''
        task_break_label = row[13].replace('开场支持打断的标签：\n', '') if row[13] else ''
        # print(task_name, task_stage, task_condition)
        if task_stage == '整体兜底':
            continue


        # 初始化任务
        if task_name not in task_dict:
            task_dict[task_name] = [
                # 进入条件
                dict(),
                # 可选择的简版回复话术
                list(),
                # 真实完整的回复话术
                list(),
                # 任务是否完成的判断逻辑
                task_finish_logic,
                # 任务运转逻辑
                task_run_logic,
                # 以树结构保存的流程路线
                list(),
                # 语句对应的标签可为空值
                list()
            ]

        # 构建任务字典，任务转换字典
        if '.' not in task_stage:
            task_conditions = task_condition.split('\n') if '\n' in task_condition else [task_condition, ]
            for condition in task_conditions:

                task_dict[task_name][0][condition] = [task_simple_seat_response,
                                                      '《槽位id：{}》'.format(slot_id)+'@#'+task_wav_no+'||'+task_seat_response+'#@']
                task_dict[task_name][1].append(task_simple_seat_response)
                task_dict[task_name][2].append(
                    '《槽位id:{}》'.format(slot_id) + '@#' + task_wav_no + '||' + task_seat_response + '#@')
                task_dict[task_name][6].append(task_response_label)
                slot_id += 1

        elif '【' in task_simple_seat_response and '】' in task_simple_seat_response:
            result = task_transfer_pattern.search(task_simple_seat_response)
            if result.group() == '【faq答案】' and \
                    task_simple_seat_response.replace('【faq答案】+', 'faq') not in task_dict[task_name][1]:
                task_dict[task_name][1].append(task_simple_seat_response.replace('【faq答案】+', 'faq'))
                task_dict[task_name][2].append('《槽位id：{}》'.format(slot_id)+task_label+'@#'+task_wav_no+'||'+task_seat_response+'#@')
                task_dict[task_name][6].append(task_response_label)
                slot_id += 1
            else:
                result = result.group().replace('【', '').replace('】', '')
                if '\n' in task_condition:
                    for condition in task_condition.split('\n'):
                        task_transfer[task_name+'+'+condition] = result
                elif '|' in task_condition:
                    task_transfer[task_name+'+'+task_condition.split('|')[0]] = result
                else:
                    task_transfer[task_name+'+'+task_condition] = result
        elif task_simple_seat_response not in task_dict[task_name][1]:
            task_dict[task_name][1].append(task_simple_seat_response)
            task_dict[task_name][2].append('《槽位id：{}》'.format(slot_id)+task_label+'@#'+task_wav_no+'||'+task_seat_response+'#@')
            task_dict[task_name][6].append(task_response_label)
            slot_id += 1

        # 构建bot树
        first_stage = list()
        # 当节点中出现省略时，进行补充溯源
        if 'x' in task_stage and 'y' in task_stage and 'z' in task_stage:
            for x_num in task_stage.split('\n')[1].split('x=')[-1].split(','):
                for y_num in task_stage.split('\n')[2].split('y=')[-1].split(','):
                    for z_num in task_stage.split('\n')[3].split('z=')[-1].split(','):
                        for tree in task_dict[task_name][5]:
                            tree.create_node(
                                task_condition,
                                task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).replace('z', z_num),
                                '.'.join(task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).replace('z', z_num).split('.')[:-1]),
                                task_simple_seat_response.replace('【faq答案】+', 'faq'),
                            )
        elif 'x' in task_stage and 'y' in task_stage:
            for x_num in task_stage.split('\n')[1].split('x=')[-1].split(','):
                for y_num in task_stage.split('\n')[2].split('y=')[-1].split(','):
                    for tree in task_dict[task_name][5]:
                        tree.create_node(
                            task_condition, task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num),
                            '.'.join(task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).split('.')[:-1]),
                            task_simple_seat_response.replace('【faq答案】+', 'faq'),
                        )
        elif 'x' in task_stage:
            for num in task_stage.split('\n')[1].split('x=')[-1].split(','):
            # for num in task_stage.replace(')', '').split('x=')[-1].split(','):
                for tree in task_dict[task_name][5]:
                    # print(task_stage.split('(')[0].replace('x', num))
                    tree.create_node(
                        task_condition, task_stage.split('\n')[0].replace('x', num),
                        '.'.join(task_stage.split('\n')[0].replace('x', num).split('.')[:-1]),
                        task_simple_seat_response.replace('【faq答案】+', 'faq'))
        else:
            stage_level = task_stage.count('.')
            if stage_level == 0:
                tree = Tree()
                first_stage.append(task_simple_seat_response.replace('【faq答案】+', 'faq'))
                # tag=None（进入条件）, identifier=None（节点编号）, parent=None（上个节点编号）, data=None（动作，回复话术或者已完成任务）
                tree.create_node(task_condition, task_stage, None, task_simple_seat_response.replace('【faq答案】+', 'faq'))
                # task_dict[task_name][5].append(tree)
                task_dict[task_name][5] = [tree, ]

            else:
                for tree in task_dict[task_name][5]:
                    tree.create_node(task_condition, task_stage, '.'.join(task_stage.split('.')[:-1]),
                                     task_simple_seat_response.replace('【faq答案】+', 'faq'))

    # 尝试输出树结构
    for tree in task_dict['收集资金用途'][5]:
        # print(len(tree.leaves()))
        # print(tree.leaves())
        for leaf in tree.leaves():

            for i, condition in enumerate(['金额', '肯定', '否定', '其他', 'FAQ']):
                tree.create_node(
                    condition, leaf.identifier+'.'+str(i+1), leaf.identifier,
                    '【促成】'
                )
        # print(len(tree.leaves()))
        # print(tree.leaves())


    # 尝试输出路径
    '''tree = task_dict['核身'][5][0]
    tree = Tree()
    node_id_list = ['1', ]
    print(tree.get_node('1'))
    print(tree.children('1')[0] if tree.children('1') else '')
    print(tree.children(tree.children('1')[0].identifier))
    # 获取所有叶子节点的路径
    print(len(tree.paths_to_leaves()))
    for paths in tree.paths_to_leaves():
        print(paths)
        for path_id in paths:
            print( '用户：{}\n销售员：{}'.format(tree.get_node(path_id).tag, tree.get_node(path_id).data) )
    
    print(task_dict)
    print(task_transfer)'''

    # 构建话术选择训练数据
    input_list = list()
    insert_data_list = list()
    nlu_data_list = list()
    task_paths = dict()
    for task_name in tqdm.tqdm(task_dict):
        for tree in task_dict[task_name][5]:
            task_paths[task_name] = list()
            for paths in tree.paths_to_leaves():
                # if task_name == '收集资金用途' or task_name == '收集用户当前用款场景':
                #     print(tree.get_node(paths[-1]).data)

                # 话术选择训练数据
                if len(paths) >= 3:
                    paths_list = [paths[:2], ]
                    # paths_list = list()
                    for i in range(len(paths) - 2):
                        sublist = paths[i:i + 3]
                        paths_list.append(sublist)

                    for paths in paths_list:
                        if paths not in task_paths[task_name]:
                            task_paths[task_name].append(paths)
                            # if paths == ['1', '1.5', '1.5.4']:
                            #     print(paths)
                            data_list, input_list = path_to_dialogue_data(
                                paths, task_dict, task_name, tree, input_list, FAQ_name_list, dialogue_prompt)
                            insert_data_list.extend(data_list)

                else:
                    if paths not in task_paths[task_name]:
                        task_paths[task_name].append(paths)
                        # if paths == ['1', '1.5', '1.5.4']:
                        #     print(paths)
                        data_list, input_list = path_to_dialogue_data(
                            paths, task_dict, task_name, tree, input_list, FAQ_name_list, dialogue_prompt)
                        insert_data_list.extend(data_list)

                # 打标训练数据
                '''paths_list = list()
                for i in range(len(paths) - 1):
                    sublist = paths[i:i + 2]
                    paths_list.append(sublist)

                for paths in paths_list:
                    data_list, input_list = path_to_dialogue_data(
                        paths, task_dict, task_name, tree, input_list, FAQ_name_list, nlu_prompt)
                    nlu_data_list.extend(data_list)'''


    random.shuffle(insert_data_list)

    print(len(insert_data_list))
    print(len(nlu_data_list))
    jdump(insert_data_list, './v2_train_data_360UJD首贷.json')
    jdump(nlu_data_list, './v2_nlu_train_data_360UJD首贷.json')


def path_to_dialogue_data(paths, task_dict, task_name, tree, input_list, FAQ_name_list, prompt):


    data_list = list()
    dialogues_list = ['', ]

    task_transfer_pattern = re.compile(r'【.*】')

    for i, path_id in enumerate(paths):
        dialogues_mid_list = list()
        for dialogues in dialogues_list:
            # dialogues_mid_list = list()
            # print(tree.get_node(path_id).tag)
            if 'FAQ' in tree.get_node(path_id).tag and i != 0:
                # print(tree.get_node(path_id).tag)
                unpick_faq_name_list = tree.get_node(path_id).tag.replace('FAQ', '').split('|')
                for faq_name in FAQ_name_list:
                    if faq_name not in unpick_faq_name_list:
                        dialogues_mid_list.append(
                            dialogues + '用户：{}\n销售员：{}\n'.format(faq_name,
                                                                 tree.get_node(path_id).data)
                        )

            elif '\n' in tree.get_node(path_id).tag and i != 0:
                faq_name_list = tree.get_node(path_id).tag.split('\n')
                for faq_name in faq_name_list:
                    dialogues_mid_list.append(
                        dialogues + '用户：{}\n销售员：{}\n'.format(faq_name,
                                                             tree.get_node(path_id).data)
                    )
            else:
                dialogues_mid_list.append(
                    dialogues + '用户：{}\n销售员：{}\n'.format(tree.get_node(path_id).tag, tree.get_node(path_id).data)
                )

        dialogues_list = dialogues_mid_list
        # if paths == ['1', '1.5', '1.5.4']:
        # if task_name == '收集用户当前的用款方案':
        #     print(dialogues_list)

    # if task_name == '收集用户当前的用款方案':
    #     print(paths)
    #     print('最终对话列表:', dialogues_list)

    dialogues_mid_list = list()
    if paths[0] == '1':
        for dialogues in dialogues_list:
            for enter_condition in task_dict[task_name][0]:
                if enter_condition not in dialogues:
                    dialogues_mid_list.append('用户：卡卡卡\n销售员：'+enter_condition+'用户：'.join(dialogues.split('用户：')[1:]))
                elif enter_condition in dialogues:
                    dialogues_mid_list.append(dialogues)
        dialogues_list = dialogues_mid_list


    # 过滤已有话术
    for dialogues in dialogues_list:
        input = '销售员：' + '销售员：'.join(dialogues.split('销售员：')[1:-1])
        dialogue_list = list()
        for dialogue in task_dict[task_name][1]:
            # if dialogue not in input:
            dialogue_list.append(dialogue)

        instruction = prompt.format(task_dict[task_name][3], task_dict[task_name][4],
                                    '|'.join(dialogue_list) if dialogue_list else '', )
        output = dialogues.split('销售员：')[-1].replace('\n', '')

        result = task_transfer_pattern.search(output)
        if result and result.group() != '【faq答案】':
        # if '【' in output and '】' in output and '【】':
            output = '任务完成'
        else:
            output = output

        if input not in input_list:
            input_list.append(input)
            data_list.append({
                'instruction': instruction,
                'input': input,
                'output': output,
            })

    return data_list, input_list


def bot_document_to_faq(file_path):
    wb = load_workbook(file_path)
    ws = wb[wb.sheetnames[1]]

    workspace_all_intent_list = ['不需要-无原因', '不需要-会考虑', '不需要-不缺钱']

    FAQ_answer_dict = dict()
    FAQ_name_list = list()
    FAQ_off_list = list()
    for i, row in tqdm.tqdm(enumerate(ws.values)):
        if i != 0 and row[0]:
            faq_name = row[0]
            faq_wav_no = row[1]
            faq_label = row[2]
            faq_seat_response = row[3]
            faq_sign = row[5]

            FAQ_answer_dict[faq_name] = [faq_seat_response, faq_wav_no, faq_label]

            if '轮询' not in faq_name and \
                    faq_name not in ['投诉', '别给我打电话了', '你怎么有我号码', '强烈拒绝', '语音信箱',
                                     '第一次静音', '第二次静音', '第三次静音', '多次不需要', '多次在忙']:
                FAQ_name_list.append(faq_name)
            if faq_sign == '挂机':
                FAQ_off_list.append(faq_name)

    for other_intent in workspace_all_intent_list:
        if other_intent not in FAQ_name_list:
            FAQ_name_list.append(other_intent)

    return FAQ_off_list, FAQ_name_list, FAQ_answer_dict


def bot_document_to_tree(file_path):

    wb = load_workbook(file_path)
    ws = wb[wb.sheetnames[0]]
    FAQ_off_list, FAQ_name_list, FAQ_answer_dict = bot_document_to_faq(file_path)
    print(FAQ_name_list)
    task_transfer_pattern = re.compile(r'【.*】')

    task_name = ''
    task_dict = dict()
    for i, row in enumerate(ws.values):

        task_name = row[0] if row[0] else task_name
        if task_name == '流程文档结束':
            break
        task_finish_logic = row[1]
        task_run_logic = row[2]

        task_stage = str(row[3])
        task_condition = row[4]
        task_wav_no = row[5]
        task_seat_response = row[6]
        task_simple_seat_response = row[7].replace('【faq答案】+', 'faq')
        task_label = '[' + row[9] + ']' if row[9] else ''
        task_response_label = '[' + row[8] + ']' if row[8] else ''
        task_break_label = row[13].replace('开场支持打断的标签：\n', '') if row[13] else ''
        if task_stage == '整体兜底':
            continue

        if task_name not in task_dict:
            task_dict[task_name] = {
                'tree': dict(), # 以树结构保存的流程路线
                'tree_path': list(),
                'enter_simple_response': dict(),
                'simple_response': list(),
                'simple_response_to_response': dict(),
                'next_task': dict()
            }

        # 添加选择的简版话术
        if task_simple_seat_response not in task_dict[task_name]['simple_response']:
            task_dict[task_name]['simple_response'].append(task_simple_seat_response)
            task_dict[task_name]['simple_response_to_response'][task_simple_seat_response] = {
                'wav_no': task_wav_no,
                'response': task_seat_response,
                'label': task_label,
                'response_label': task_response_label,
            }


        # 构建bot树
        # 当节点中出现省略时，进行补充溯源
        if 'x' in task_stage and 'y' in task_stage and 'z' in task_stage:
            for x_num in task_stage.split('\n')[1].split('x=')[-1].split(','):
                for y_num in task_stage.split('\n')[2].split('y=')[-1].split(','):
                    for z_num in task_stage.split('\n')[3].split('z=')[-1].split(','):
                        for condition in task_dict[task_name]['tree']:
                            tree = task_dict[task_name]['tree'][condition]
                            tree.create_node(
                                task_condition,
                                task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).replace('z', z_num),
                                '.'.join(task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).replace('z', z_num).split('.')[:-1]),
                                # task_simple_seat_response,
                                task_wav_no if task_wav_no else task_simple_seat_response,
                            )
        elif 'x' in task_stage and 'y' in task_stage:
            for x_num in task_stage.split('\n')[1].split('x=')[-1].split(','):
                for y_num in task_stage.split('\n')[2].split('y=')[-1].split(','):
                    for condition in task_dict[task_name]['tree']:
                        tree = task_dict[task_name]['tree'][condition]
                        tree.create_node(
                            task_condition, task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num),
                            '.'.join(task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).split('.')[:-1]),
                            # task_simple_seat_response,
                            task_wav_no if task_wav_no else task_simple_seat_response,
                        )
        elif 'x' in task_stage:
            for num in task_stage.split('\n')[1].split('x=')[-1].split(','):
                # for num in task_stage.replace(')', '').split('x=')[-1].split(','):
                for condition in task_dict[task_name]['tree']:
                    tree = task_dict[task_name]['tree'][condition]
                    tree.create_node(
                        task_condition, task_stage.split('\n')[0].replace('x', num),
                        '.'.join(task_stage.split('\n')[0].replace('x', num).split('.')[:-1]),
                        # task_simple_seat_response,
                        task_wav_no if task_wav_no else task_simple_seat_response,
                    )
        else:
            stage_level = task_stage.count('.')
            if stage_level == 0:
                task_condition = task_condition.split('\n') if '\n' in task_condition else [task_condition, ]
                for condition in task_condition:

                    tree = Tree()
                    # tag=None（进入条件）, identifier=None（节点编号）, parent=None（上个节点编号）, data=None（动作，回复话术或者已完成任务）
                    tree.create_node(condition, task_stage, None,
                                     # task_simple_seat_response,
                                     task_wav_no if task_wav_no else task_simple_seat_response,)
                    task_dict[task_name]['tree'][condition] = tree

                    task_dict[task_name]['enter_simple_response'][condition] = task_simple_seat_response

            else:
                for condition in task_dict[task_name]['tree']:
                    tree = task_dict[task_name]['tree'][condition]
                    tree.create_node(task_condition, task_stage, '.'.join(task_stage.split('.')[:-1]),
                                     # task_simple_seat_response,
                                     task_wav_no if task_wav_no else task_simple_seat_response,)

    # 尝试输出树结构
    for condition in task_dict['收集资金用途']['tree']:
        tree = task_dict['收集资金用途']['tree'][condition]

        for leaf in tree.leaves():

            for i, condition in enumerate(['金额', '肯定', '否定', '其他', 'FAQ']):
                tree.create_node(
                    condition, leaf.identifier + '.' + str(i + 1), leaf.identifier,
                    '【促成】'
                )

    # 尝试输出树结构
    '''for condition in task_dict['收集资金用途']['tree']:
        tree = task_dict['收集资金用途']['tree'][condition]
        tree.show(idhidden=False)'''

    all_num = 0
    # 每个子树获取所有到叶子节点的路径
    for task_name in task_dict:
        if task_name == '收集用户当前用款场景':
            break

        for condition in task_dict[task_name]['tree']:
            for paths in task_dict[task_name]['tree'][condition].paths_to_leaves():
                path_list = list()
                for path_id in paths:
                    path_list.append(task_dict[task_name]['tree'][condition].get_node(path_id).tag)
                    path_list.append(task_dict[task_name]['tree'][condition].get_node(path_id).data)

                # 添加路径
                task_dict[task_name]['tree_path'].append(path_list)
                # if path_list[-2] == '建材':
                #     print(paths)

                # 添加到达其他任务的路径
                leave_data = task_transfer_pattern.search(task_dict[task_name]['tree'][condition].get_node(paths[-1]).data)
                if leave_data:
                    leave_data = leave_data.group().replace('【', '').replace('】', '')
                    if leave_data not in task_dict[task_name]['next_task'] or \
                        (leave_data in task_dict[task_name]['next_task'] and len(task_dict[task_name]['next_task'][leave_data]) > len(path_list)):

                        task_dict[task_name]['next_task'][leave_data] = path_list


        all_num += len(task_dict[task_name]['tree_path'])
        print(task_name)
        print(task_dict[task_name]['tree_path'])
        print(task_dict[task_name]['next_task'])

    print(all_num)

    # 从核身开场开始进行数据构建
    finish_paths_list = list()
    unfinish_paths_list = list()

    finish_paths_list.extend(task_dict['核身']['tree_path'])
    # print(task_dict['核身']['tree_path'])
    for next_task in task_dict['核身']['next_task']:
        unfinish_paths_list.append(
            task_dict['核身']['next_task'][next_task]
        )
    # print(finish_paths_list)
    # print(unfinish_paths_list)
    while unfinish_paths_list:
        paths_list = list()
        for path in tqdm.tqdm(unfinish_paths_list):
            task_name = task_transfer_pattern.search(path[-1]).group().replace('【', '').replace('】', '')
            for next_task_path in task_dict[task_name]['tree_path']:
                finish_path = copy.deepcopy(path[:-1])
                finish_path.extend(next_task_path[1:])
                if finish_path not in finish_paths_list:
                    finish_paths_list.append(finish_path)
                    # print(finish_path)

            for next_task in task_dict[task_name]['next_task']:
                finish_path = copy.deepcopy(path[:-1])
                finish_path.extend(task_dict[task_name]['next_task'][next_task][1:])
                paths_list.append(finish_path)

        unfinish_paths_list = paths_list

    # print(finish_paths_list)
    # print(unfinish_paths_list)
    # print(len(finish_paths_list))


    path_list = list()
    test_path_list= list()
    for path in finish_paths_list:
        # 替换路径找到对应的录音编号
        if '【' in path[-1] and '】' in path[-1]:

            enter_condition = path[-1].split('】')[-1]
            # enter_condition = path[-2]
            print(path)
            task_name = task_transfer_pattern.search(path[-1]).group().replace('【', '').replace('】', '')
            print(enter_condition, task_name)
            print(task_dict[task_name]['enter_simple_response'])
            if enter_condition and enter_condition in task_dict[task_name]['enter_simple_response']:
                simple_response = task_dict[task_name]['enter_simple_response'][enter_condition]
            else:
                simple_response = task_dict[task_name]['enter_simple_response']['首句']

            wav_no = task_dict[task_name]['simple_response_to_response'][simple_response]['wav_no']
            print(wav_no)
            path[-1] = wav_no

        # 生成测试数据
        user_response = list()
        for i, row in enumerate(path[2:]):
            # 优化row
            if 'FAQ' in row:
                row = 'FAQ'
            if '不需要-无原因' in row:
                row = '不需要'
            if '在忙-无原因' in row:
                row = '在忙'
            if row == '静音':
                row = '@@quiet@@'
            if '\n' in row:
                row = row.split('\n')[0]
            if row in ['平台银行用款方案', '平台产品']:
                row = '花呗'
            if row == '金额':
                row = '三万'
            if row == '其他':
                row = '不明'
            if row in ['资金周转', '生活消费']:
                row = '肯定'

            if i % 2 != 0:
                # if ','.join(user_response) == '什么平台,不需要-使用其他平台,肯定,建材':
                #     print(','.join(user_response)+'+'+row)

                if ','.join(user_response) not in path_list:
                    test_path_list.append(','.join(user_response)+'+'+row)
                    path_list.append(','.join(user_response))
                    # print(path)
                    # print(','.join(user_response), row)
            else:
                user_response.append(row)


    wb = Workbook()
    ws = wb.active

    for paths in tqdm.tqdm(test_path_list):
        ws.append(['', '', paths.split('+')[0], paths.split('+')[1]])

    wb.save('./test_dialogue_data.xlsx')


def get_wav_online_content(company_id, tts_model):
    url = 'http://work.xi-ai.com/admin/soundRecording/getRecordManageList?companyId={}&ttsModel={}'.format(company_id,
                                                                                                           tts_model)
    wav_key = '{}_{}'.format(company_id, tts_model)
    wav_dict = {
        wav_key: dict()
    }
    response = json.loads(requests.get(url).text)

    for wav_data in response['data'][0]['list']:
        file_name = wav_data['fileName']
        content = wav_data['content']
        print(file_name, content)
        wav_dict[wav_key][file_name] = content

    return wav_dict


# 打标训练，测试数据生成
def bot_document_to_nlu(file_path):

    intent_dict = FAQ_file()
    wb = load_workbook('./[360-UJD-首贷].xlsx')
    ws = wb[wb.sheetnames[0]]

    simple_list =list()
    task_transfer_pattern = re.compile(r'【.*】')
    for i, row in enumerate(ws.values):
        if str(row[3]) == '整体兜底':
            break

        task_simple_seat_response = row[7].replace('【faq答案】+', 'faq')

        if '【' not in task_simple_seat_response and '】' not in task_simple_seat_response and \
            '再见' not in task_simple_seat_response and task_simple_seat_response not in simple_list:

            simple_list.append(task_simple_seat_response)

    print(len(simple_list))


    instruction = '''接下来会有一个用户的表述，你需要做的是用一个词代表用户的表述含义。'''
    data_list = list()
    '''for simple_response in simple_list[:-1]:
        print(simple_response)
        for intent_name in intent_dict:
            for query in intent_dict[intent_name][0]:
                data_list.append({
                    'instruction': instruction,
                    'input': '销售员：'+simple_response+'\n用户：'+query+'\n',
                    'output': intent_name
                })'''
    for intent_name in intent_dict:
        for query in intent_dict[intent_name][0]:
            data_list.append({
                'instruction': instruction,
                'input': '用户：'+query+'\n',
                'output': intent_name
            })

    print(len(data_list))
    random.shuffle(data_list)
    jdump(data_list, './nlu_train_data.json')


def bot_document_seat_response(file_path):
    wb = load_workbook(file_path)
    ws = wb[wb.sheetnames[0]]

    seat_response = set()
    task_transfer_pattern = re.compile(r'【.*】')
    for i, row in enumerate(ws.values):
        if row[7]:
            task_simple_seat_response = row[7].replace('【faq答案】+', 'faq')
            if '再见' in task_simple_seat_response:
                continue

            result = task_transfer_pattern.search(task_simple_seat_response)

            if result and '+' in result.group():
                task_simple_seat_response = result.group().split('+')[0].replace('【', '').replace('】', '')
            elif result:
                continue

            seat_response.add(task_simple_seat_response)

    print(seat_response)
    return list(seat_response)


def request_chatgpt(content):
    # print(content)

    prompt = [{'role': 'system', 'content': content}, ]
    response = openai.ChatCompletion.create(
        api_type="azure",
        api_version="2023-03-15-preview",
        api_base="https://lingxi-openai.openai.azure.com",
        api_key="45a5ee249f364e208dd950f87ab5aba7",
        engine="gpt-35",
        messages=prompt,
        temperature=0.8,
        max_tokens=2048,
        request_timeout=10,
    )
    result = response["choices"][0]['message']['content']
    # print(result)

    return result


def FAQ_file():

    wb = load_workbook('./FAQ.xlsx')
    ws = wb[wb.sheetnames[0]]

    intent_dict = dict()
    for i, row in enumerate(ws.values):
        if i != 0 and row[5] == 1:
            intent_name = row[1]
            query = row[2]
            # use_sign = 1 if row[5] == 1 else 0
            if intent_name == 'NOINTENT':
                intent_name = row[6]

            if intent_name in ['在操作', ]:
                # continue
                intent_name = '正在操作'

            if intent_name in ['在忙-无原因', '在忙-有原因', ]:
                intent_name = '没时间'
            if intent_name == '在忙-快点说':
                intent_name = '快点说'
            if intent_name == '在忙-主动邀约':
                intent_name = '主动邀约'

            if intent_name not in intent_dict:
                intent_dict[intent_name] = [list(), '']
            intent_dict[intent_name][0].append(query)

    ws = wb[wb.sheetnames[1]]
    for i, row in enumerate(ws.values):
        intent_name = row[0]
        intent_meaning = row[1]
        # use_sign = 1 if row[5] == 1 else 0

        if intent_name in intent_dict:
            intent_dict[intent_name][1] = intent_meaning

    return intent_dict


def check_360_nlu():

    url = 'http://8.142.8.47:8681/nlu?session_id=-1&workspace=222&current_query={}'

    dm_url = 'https://work.xi-ai.com/dataCenter/dm/detail?sessionId={}'

    all_case = list()
    for i in ['11', '14', '16', '18']:
        connection = pymysql.connect(
            host="39.103.215.119",
            # host="am-8vbwn20384jdq3vq185480.zhangbei.ads.aliyuncs.com",
            port=3308,
            # port=3306,
            user="ds_user",
            passwd="Moxi123#",
            # db="data_center_temp",
            charset='utf8mb4',
            cursorclass=pymysql.cursors.DictCursor
        )
        cursor = connection.cursor()
        sql = '''
        select dm_session_id, customer_id 
        from ods_outbound_data_platform.outbound_call_result 
        where call_start_time >= "2023-08-24 {}:00:00" 
        and call_start_time <= "2023-08-24 {}:10:00" 
        and company_id = 2186 
        and call_status in ("normalConnection", "transferFail") 
        '''.format(i, i)

        cursor.execute(sql)
        results = cursor.fetchall()

        all_case.extend(results)

    wb = Workbook()
    ws = wb.active
    ws.append([
        '上文', '语句', '线上标签', '对比标签'
    ])

    for result in tqdm.tqdm(all_case):

        session_id = result['dm_session_id']
        response = json.loads(requests.get(dm_url.format(session_id)).text)['data']
        last_seat_query = ''
        for res in response:
            if res['speakerType'] == 'USER' and res['idlResultJson']:

                result_json = json.loads(res['idlResultJson'])
                intent = result_json['standardQuery']
                attitude = result_json['originalAttitude']
                query = result_json['query']

                if intent and intent not in ['NOINTENT', '肯定态度', '否定态度', '无态度']:
                    online_label = intent
                else:
                    if attitude == 1:
                        online_label = '肯定态度'
                    elif attitude == -1:
                        online_label = '否定态度'
                    else:
                        online_label = '无态度'

                response = json.loads(requests.get(url.format(query)).text)
                intent = response['standard_query']
                attitude = response['original_attitude']
                if intent and intent not in ['NOINTENT', '肯定态度', '否定态度', '无态度']:
                    label = intent
                else:
                    if attitude == 1:
                        label = '肯定态度'
                    elif attitude == -1:
                        label = '否定态度'
                    else:
                        label = '无态度'

                if label != online_label:
                    print(last_seat_query, )
                    print(query, online_label, label)
                    ws.append([
                        last_seat_query, query, online_label, label
                    ])
            elif res['speakerType'] == 'IVR':
                last_seat_query = res['msgContent']


    wb.save('./360_1.0对比结果.xlsx')


def dialogue_data():
    connection = pymysql.connect(
        host="39.103.215.119",
        # host="am-8vbwn20384jdq3vq185480.zhangbei.ads.aliyuncs.com",
        port=3308,
        # port=3306,
        user="ds_user",
        passwd="Moxi123#",
        # db="data_center_temp",
        charset='utf8mb4',
        cursorclass=pymysql.cursors.DictCursor
    )
    cursor = connection.cursor()
    sql = '''
select dm_session_id, customer_id 
from ods_outbound_data_platform.outbound_call_result 
where call_start_time >= "2023-09-07 00:00:00" 
and call_start_time <= "2023-09-07 12:00:00" 
and robot_answer_duration > 30 
and dm_version in ("JT-实时-实分转-淑婷-20230221新版A")
and call_status in ("normalConnection", "transferFail") 
'''

    cursor.execute(sql)
    results = cursor.fetchall()

    # dm_url = 'https://work.xi-ai.com/dataCenter/dm/detail?sessionId={}'
    dm_url = 'http://172.26.2.56:8630/report/getDetailedRecord/?sessionId={}'
    print(len(results))

    wb = Workbook()
    ws = wb.active
    ws.append([
        '客户ID', 'session_id', '角色', '语句', '意图', '态度'
    ])
    query_dict = dict()
    query_list = list()
    for result in tqdm.tqdm(results):
        session_id = result['dm_session_id']
        customer_id = result['customer_id']

        response = json.loads(requests.get(dm_url.format(session_id)).text)['result']

        # if response[-1]['dialogueRound'] > 2:
        for res in response:
            if res['speakerType'] == 'USER' and res['idlResultJson']:
                result_json = json.loads(res['idlResultJson'])
                intent = result_json['standardQuery'] if result_json['standardQuery'] else "NOINTENT"
                attitude = result_json['originalAttitude']
                query = result_json['query']
                # query = process_queries(query)

                if query not in query_list:
                    query_list.append(query)

                    ws.append([
                        customer_id, session_id, query, intent, attitude
                    ])

                if query not in query_dict:
                    query_dict[query] = 1
                else:
                    query_dict[query] += 1
            '''
            msg_content = res['msgContent']

            intent = ''
            attitude = ''
            if res['idlResultJson']:
                result_json = json.loads(res['idlResultJson'])
                intent = result_json['standardQuery'] if result_json['standardQuery'] else "NOINTENT"
                attitude = result_json['originalAttitude']


            ws.append([
                customer_id, session_id, res['speakerType'], msg_content, intent, attitude
            ])
            '''


    ws_2 = wb.create_sheet('语料出现次数')
    ws_2.append([
        '语句', '次数'
    ])

    for query in tqdm.tqdm(query_dict):
        if query_dict[query] > 1:
            ws_2.append([
                query, query_dict[query]
            ])

    wb.save('./标注测试.xlsx')


def process_queries(current_query, ):

    # 删除相近相同词
    sign = True
    while sign:
        i = 0
        sign = False
        record_list = list()
        final_str = list()

        while i < len(current_query):
            repeat_num = 0
            for j in range(1, 6):
                target = current_query[i:i + j]
                while True:
                    if target == current_query[i + j * (repeat_num + 1):i + j * (repeat_num + 2)]:
                        repeat_num += 1
                    else:
                        break
                if repeat_num:
                    record_append = (i, j, repeat_num)
                    break
            if repeat_num:
                if target in ['', ] or not (u'\u4e00' <= target <= u'\u9fff'):
                    final_str.append(target * 2)
                else:
                    final_str.append(target)
                    sign = True
                record_list.append(target)
                i = i + j * (repeat_num + 1)
                continue
            final_str.append(current_query[i])
            i += 1
        current_query = ''.join(final_str)

    # 将被标点符号隔开的相同字合并在一块
    current_query_result = ''
    for i in range(len(current_query)):
        if current_query[i] in ['，', '。', '？', '！', '、']:
            for j in range(1, 5):
                if current_query[i - j if i - j >= 0 else 0:i] == current_query[i + 1:i + j + 1]:
                    current_query_result = current_query_result[:-1 * j]
                    break
        current_query_result += current_query[i]
    current_query = current_query_result

    for point_sign in ['？。', '？，', '，。', '，，', '。，', '。。']:
        current_query = current_query.replace(point_sign, '，')

    if current_query[0] in ['，', '。']:
        current_query = current_query[1:]

    return current_query


def process_data_to_llama2():

    wb = load_workbook('./标注测试.xlsx')
    ws = wb[wb.sheetnames[0]]


    session_id_dict = dict()
    for i, row in tqdm.tqdm(enumerate(ws.values)):
        if i != 0 and row[2]:
            content = row[2]
            session_id = row[1]
            if session_id not in session_id_dict:
                session_id_dict[session_id] = list()

            if content and content.startswith('《'):
                speaker_type = 'gpt'
                content_result = ''
                for con in content.split('@#')[1:]:
                    # print(con.split('#@')[0].split('||')[-1])
                    content_result += con.split('#@')[0].split('||')[-1]
                content = content_result

            else:
                speaker_type = 'human'
                content = content.split(']')[-1]

            # print(speaker_type, content)

            session_id_dict[session_id].append([speaker_type, content])

    for session_id in session_id_dict:
        last_speaker_type = ''
        dialogue_list = list()
        for msg in session_id_dict[session_id]:
            speaker_type = msg[0]
            content = msg[1]

            if last_speaker_type != speaker_type:
                dialogue_list.append([speaker_type, content])
                last_speaker_type = speaker_type
            else:
                dialogue_list[-1][-1] += content

        session_id_dict[session_id] = dialogue_list


    data_list = list()
    for session_id in session_id_dict:
        # print(session_id, len(session_id_dict[session_id]))
        data = {
            'id': generate_uuid(),
            'model': '',
            'conversations': [{
                'from': 'human',
                'value': '你好。'
            }],
        }
        for msg in session_id_dict[session_id]:
            speaker_type = msg[0]
            content = msg[1]

            data['conversations'].append({
                'from': speaker_type,
                'value': process_queries(content) if speaker_type == 'human' else content
            })

        data_list.append(data)


    # random.shuffle(data_list)
    # jdump(data_list, './2244_dialogue.json')

    return data_list


if __name__ == '__main__':
    # 产生职业与归类
    content = '''
    你扮演一个用户，有人询问“您的职业是什么”，给出50个职业名称，可以是自己做生意的，也可以是上班的，并将每个职业名称在“建材生意，工程生意，开店做生意，养殖生意，其他生意，上班”中选择一个进行归类，给出的生意不用受归类的影响。
    输出格式以json格式输出，key为归类名称，value为同归类的列表集合。
    '''

    # bot_document_to_dialogue('./[360-UJD-首贷].xlsx')
    # bot_document_to_tree('./[360-UJD-首贷].xlsx')
    # bot_document_to_nlu('./credit_intent.xlsx')

    # FAQ_file()
    # check_360_nlu()

    dialogue_data()

    '''data_list = process_data_to_llama2()

    out_data_list = list()
    cot_data = jload('./cot_2023-08-25.json')
    for data in cot_data:
        data['model'] = ''
        out_data_list.append(data)


    cot_data = jload('./counterfactural_correction_multi_round_chat.json')
    for data in cot_data:
        out_data_list.append({
            'id': generate_uuid(),
            'model': '',
            'conversations': [{
                'from': 'human',
                'value': data['instruction'],
            },
            {
                'from': 'gpt',
                'value': data['output'],
            }]
        })

    random.shuffle(out_data_list)
    out_data_list = out_data_list[:len(data_list)//2]
    data_list.extend(out_data_list)
    jdump(data_list, './2244_dialogue.json')'''