# -*- coding : utf-8 -*-


import time
import tqdm
import json
import pymysql
import requests


from openpyxl import load_workbook, Workbook


def nlu_result(workspace, query):
    update_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(round(time.time() * 1000)) / 1000))
    response = requests.get(
        'http://8.142.85.77:8680/nlu?session_id=-1&workspace={}&current_query={}'.format(workspace, query))
        # 'http://8.142.85.77:8670/nlu?session_id=-1&workspace={}&current_query={}'.format(workspace, query))
    response = json.loads(response.text)

    if workspace == '210':
        slot = list()
        if response.get('intention', ''):
            if response.get('intention', '')[0].get('value', ''):
                intent = response.get('intention', '')[0].get('value', '')
                # print(query, intent, type(intent))
                q_type = intent['qtype']
                answer = intent['a']
                query = intent['original_query']
                intent = intent['standard_query']

                return [q_type, intent, query, answer, update_time]
            else:
                q, i, s = faq_result(workspace, query)
                return ['默认分类', '{}_{}'.format(i, q), query, 'NOINTENT', update_time]
        elif response.get('slot', ''):
            for slots in response['slot']:
                slot.append(slots['slot_value'][0])
            return ['默认类型', ' '.join(slot), query, '', update_time]
    else:
        if response.get('intention', '') and response['intention'][0].get('value', ''):
            intent = response.get('intention', '')[0].get('value', '')
            # print( query, intent, type(intent) )
            q_type = intent['qtype']
            answer = intent['a']
            query = intent['original_query']
            intent = intent['standard_query']

            return [q_type, intent, query, answer, update_time]
        else:
            q, i, s = faq_result(workspace, query)
            return ['默认分类', '{}_{}'.format(i, q), query, 'NOINTENT', update_time]


def faq_result(workspace, query):
    response = requests.get(
        # 'http://8.142.85.77:8454/level_search?systemId={}&query={}'.format(workspace, query))
        'http://8.142.85.77:8456/level_search?systemId={}&query={}'.format(workspace, query))
    response = json.loads(response.text)

    if response['ch']:
        query = response['ch'][0]['original_query']
        intent = response['ch'][0]['standard_query']
        semantic = response['ch'][0]['semantic']
        return query, intent, semantic
    elif response['h']:
        query = response['h'][0]['original_query']
        intent = response['h'][0]['standard_query']
        semantic = response['h'][0]['semantic']
        return query, intent, semantic
    return '', '', ''


def process_queries(current_query):
    pass_word = [
        # '不需要了', '不需要', '不用了', '不用', '不要', '不要了',
        '一点', '考虑', '谢',
        '在', '看', '问', '想', '天', '刚', '试', '拜']

    '''
    remove_word = ['嗯，', '嗯。', '嗯',
                   '，好，', '。好，', '。好。',
                   '，行，', '，行。', '。行。',
                   '，对，', '，对。', '。对。']
    for remove in remove_word:
        if len( current_query.replace(remove, '') ) > 2:
            current_query = current_query.replace(remove, '')
    '''

    # 删除相近相同词
    sign = True
    while sign:
        i = 0
        sign = False
        record_list = list()
        final_str = list()

        while i < len(current_query):
            repeat_num = 0
            for j in range(1, 7):
                target = current_query[i:i + j]
                while True:
                    if target == current_query[i + j * (repeat_num + 1):i + j * (repeat_num + 2)]:
                        repeat_num += 1
                    else:
                        break
                if repeat_num:
                    record_append = (i, j, repeat_num)
                    break
            if repeat_num:
                if target in pass_word or not (u'\u4e00' <= target <= u'\u9fff'):
                    final_str.append(target * 2)
                else:
                    final_str.append(target)
                    sign = True
                record_list.append(target)
                i = i + j * (repeat_num + 1)
                continue
            final_str.append(current_query[i])
            i += 1
        current_query = ''.join(final_str)

    try:
        # 去除"好"，"行"
        current_query_list = list()
        for i in range(len(current_query)):
            if current_query[i] in ['好', '行', '对']:
                if i == 0:
                    if current_query[i+1] == '，':
                        continue
                elif i == len(current_query) - 1:
                    if current_query[i-1] == '，':
                        continue

            current_query_list.append(current_query[i])
        if len(current_query_list) > 2:
            current_query = ''.join(current_query_list)
    except Exception:
        pass

    # 将被标点符号隔开的相同字合并在一块
    current_query_result = ''
    for i in range(len(current_query)):
        if current_query[i] in ['，', '。', '？', '！']:
            for j in range(1, 5):
                if current_query[i - j if i - j >= 0 else 0:i] == current_query[i + 1:i + j + 1]:
                    current_query_result = current_query_result[:-1 * j]
                    break
        current_query_result += current_query[i]
    current_query = current_query_result

    for point_sign in ['？。', '？，', '，。', '，，', '。，', '。。', ',，']:
        current_query = current_query.replace(point_sign, '，')

    if current_query[0] in ['，', '。']:
        current_query = current_query[1:]

    return current_query


def rm_stop_word(query):

    stop_word = ['quiet', '@',
                 '不好意思', '对不起', '好谢谢', '谢谢您', '谢谢你',
                 '拜拜', '谢谢', '好吧', '好嘞', '你好', '您好', '然后', '抱歉', '再见',
                 '再见', '受累', '姐夫', '嫂子', '大哥', '老妹', '兄弟', '美女',
                 '谢', '哦', '啊', '嘞', '喂', '啦', '唉', '哎', '哥', '姐',
                 '哈', '呐', '呃', '噢', '诶', '噢', '唔', '呢', '呀',
                 '嗯，', '嗯。', '嗯',
                 '，好，', '。好，', '。好。',
                 '，行，', '，行。', '。行。',
                 '，对，', '，对。', '。对。',
                 ]
    if query == "@@quiet@@":
        return query

    for stop in stop_word:
        if len(query.replace(stop, '').replace('，', '').replace(',', '').replace('。', '').replace('？', '')) > 0:
            query = query.replace(stop, '')
        # query = query.replace(stop, '')

    for start_word in ['好，', '好。', '行，', '行。', '对，', '对。']:
        if query.startswith(start_word) and len(query[2:].replace('，', '').replace('。', '').replace('？', '')) >= 2:
            query = query[2:]

    return query


def nlu_task(content, num, ws_w):

    try:
        content = rm_stop_word(content)
        content = process_queries(content)
        content = rm_stop_word(content)
    except Exception:
        pass

    content_list = content.replace('。', '，').split('，')

    # print(content_list)
    final_list = list()
    for i, content in enumerate(content_list):
        if i != 0:
            if len(final_list[-1]) < 4:
                final_list[-1] = final_list[-1] + content
            else:
                final_list.append(content)
        else:
            final_list.append(content)


    intent_set = set()
    if len(final_list) > num:
        for i in range(len(final_list)-num+1):
            content = '，'.join(final_list[i:i+num])

            print(content)
            result = nlu_result('246', content)
            # result = nlu_result('250', content)
            print( result )
            if '_' not in result[1]:
                print( result[1] )
                ws_w.append([result[0], result[1], content, result[2], result[3]])
                intent_set.add(result[1])
            else:
                ws_w.append([result[0], 'NOINTENT', content, result[2], result[3]])
    else:
        content = '，'.join(final_list)

        print( content )
        result = nlu_result('246', content)
        # result = nlu_result('250', content)
        print( result )
        if '_' not in result[1]:
            print(result[1])
            ws_w.append([result[0], result[1], content, result[2], result[3]])
            intent_set.add(result[1])
        else:
            ws_w.append([result[0], 'NOINTENT', content, result[2], result[3]])

    print( intent_set )
    return intent_set


def task():
    wb = load_workbook('./还呗人人对话3.24.xlsx')

    wb_w = Workbook()
    ws_w = wb_w.active

    intent_dict = dict()
    for i in [2, 3]:
        ws = wb[wb.sheetnames[i]]
        for i, row in tqdm.tqdm(enumerate(ws.values)):

            if i != 0 and row[1] == '坐席':
                query = row[2]
                intent = row[3]
                if row[4]:
                    intent = row[4]
                # print( row[3], row[4], intent )

                if intent in ['操作流程:银行卡信息', ]:
                # if intent and intent != 'NOINTENT':
                    print(query)
                    if intent not in intent_dict:
                        intent_dict[intent] = [0, 0]

                    intent_list = nlu_task(query, 5)

                    if intent in ['是否打开APP', '是否同意在线协助操作']:
                        if '是否打开APP' in intent_list or '是否同意在线协助操作' in intent_list:
                            intent_dict[intent][1] += 1
                    elif intent in ['产品介绍', '是否同意注册查看额度']:
                        if '产品介绍' in intent_list or '是否同意注册查看额度' in intent_list:
                            intent_dict[intent][1] += 1
                    elif intent in intent_list:
                        # print(1)
                        intent_dict[intent][1] += 1

                    intent_dict[intent][0] += 1

    all_num = 0
    all_true = 0
    all_false = 0
    ws_w = wb.create_sheet('出现标签准确率')
    ws_w.append(['意图名称', '是否出现/正确率', '出现次数', '正确数'])
    for keys in intent_dict:
        try:
            all_num += intent_dict[keys][0]
            all_true += intent_dict[keys][1]
            ws_w.append(
                [keys, (intent_dict[keys][1] / intent_dict[keys][0]) * 100, intent_dict[keys][0], intent_dict[keys][1]])
            print('{}，{}%，出现总数:{}，正确数:{}'.format(keys, (intent_dict[keys][1] / intent_dict[keys][0]) * 100,
                                                 intent_dict[keys][0], intent_dict[keys][1]))
        except Exception:
            # ws.append( [keys, '未出现', out] )
            # print( keys, '未出现' )
            pass
    print('整体准确率：{}'.format(all_true / all_num * 100))
    ws_w.append(['整体准确率', all_true / all_num * 100, all_num, all_true])

    wb_w.save('./金融准确率.xlsx')


def get_data_from_datebase():
    connection = pymysql.connect(host="47.92.193.147",
                                  port=3306,
                                  user="root",
                                  passwd="Moxi123#",
                                  db="task_dialogue_config",
                                  charset='utf8mb4',
                                  cursorclass=pymysql.cursors.DictCursor)


    cursor = connection.cursor()
    sql = 'select distinct call_id ' \
          'from human_dialogue ' \
          'where company_id = {} ' \
          'and call_start_time > "{} 09:00:00" ' \
          'and call_start_time < "{} 23:59:59"; '

    sql_id = 'select call_id, content_text, identity, workspace_id, company_id, call_start_time ' \
             'from human_dialogue ' \
             'where company_id = {} ' \
             'and call_start_time > "{} 09:00:00" ' \
             'and call_start_time < "{} 23:59:59"; '


    wb = Workbook()
    ws = wb.active
    ws.append(['session_id', '角色', '内容'])

    cursor.execute(sql_id.format('2141', '2022-03-15', '2022-03-18'))
    results = cursor.fetchall()
    print( len(results) )
    call_id_dict = dict()
    num = 0
    for result in tqdm.tqdm(results):
        num += 1
        call_id = result['call_id']
        content = result['content_text']
        identity = result['identity']
        # print( type(identity) )

        intent = ''
        intent_query = ''
        if identity == 1:
            res = nlu_result('304', content)
            if '_' not in res[1]:
                intent = res[1]
                intent_query = res[2]
        '''if call_id not in call_id_dict:
            call_id_dict[call_id] = list()

        call_id_dict[call_id].append([content, identity])'''
        ws.append([call_id, identity, content, intent, intent_query])
        if num % 2000 == 0:
            wb.save('./轻舟纯人数据.xlsx')

    wb.save('./轻舟纯人数据.xlsx')


def total_dialogue_test():


    wb_w = Workbook()
    ws_w = wb_w.active


    wb = load_workbook('./2022-5-6_金条质检纯人违规.xlsx')
    # wb = load_workbook('./金条纯人.xlsx')
    ws = wb[wb.sheetnames[0]]

    customer_dict = dict()
    for i, row in tqdm.tqdm(enumerate(ws.values)):
        # print( row ) in ['K605690650414041922010'] \
        if i != 0 and row[1] and row[2] == 1:
        # if i != 0 and row[0] in ['K622801142150074225144'] \
        #         and row[1].startswith('1'):

            print(row)
            content = row[1]
            customer_id = row[0]
            intent = row[2]
            if customer_id not in customer_dict:
                customer_dict[customer_id] = 0

            intent_list = nlu_task(content, 3, ws_w)
            # print( '识别语句：{}'.format(row[1]) )
            # print( intent_list )
            '''if '免息券使用举例:不合理' in intent_list:
                print( '免息券使用举例:不合理', customer_id, )
                print( row[1] )
                print( intent_list )
                customer_dict[customer_id] = 1

            if '免息券使用举例:合理' in intent_list:
                print( '免息券使用举例:合理', row[1] )
                print( intent_list )
                customer_dict[customer_id] = 1'''
            print( customer_id, intent )


    wb_w.save('./质检标注.xlsx')


if __name__ == '__main__':

    wb = Workbook()
    ws = wb.active

    content = '嗯，先生咱们这个首页激活呢，是只需要。嗯几块钱的线生也是非常划算的，然后六百万的保障先上，然后先打开网址看一下你这个保单好吧先生。'


    intent_list = nlu_task(content, 3, ws)


    # total_dialogue_test()

    # task()
    # get_data_from_datebase()


    '''
    print('########################################################')
    ws_2 = wb.create_sheet('未识别标签准确率')
    ws_2.append(['意图名称', '未识别次数'])
    for keys in intent_false_dict:
        try:
            all_false += intent_false_dict[keys][0]
            ws_2.append([keys, intent_false_dict[keys][0]])
            print('{}，未识别次数:{}'.format(keys, intent_false_dict[keys][0]))
        except Exception:
            # ws.append( [keys, '未出现', out] )
            # print( keys, '未出现' )
            pass
    print('召回率：{}'.format(all_num / (all_num + all_false) * 100))
    ws_2.append(['召回率', all_num / (all_num + all_false) * 100])
    '''