# -*- coding : utf-8 -*-


import os
import re
import glob
import json
import logging
import datetime
import requests

from openpyxl import Workbook

project_list = [re.compile(r'.*(少儿英语|开心鼠|(猿|圆|缘|元)(编程|变成|边城|边上)|英语.{0,10}(体验|启蒙|课程|课)|(网易|有道)|河小象|写字课|叫叫阅读).*'),
                re.compile(r'.*(阅读.{0,10}(兴趣|课程|启蒙|课)).*')
                ]

re_dict1 = {
    '销售禁行': [r'^((?!app|下载).)*(高途|高图|高徒).*(工作人员|客服)((?!app|下载).)*$'],
    '服务专业': [r'(企微|企业微信|管方微信|服务通知|找不到|加不到|隐藏功能|另.{0,10}(手机号|微信号))']
}

re_dict2 = {
    '客户冲突': [
        r'^((?!动).)*滚((?!动).)*$', # 微信名不涉及 | 所有种类
        r'^((?!昵称|微信名).)*(垃圾|我操|傻逼|叫声|神经病|有病|卧槽|他妈的|贱人|智商|卑鄙无耻|衣冠禽兽|我丢你老母|屎|装疯卖傻|装傻充愣|疯子|王八|疯狗|乱咬人|听不懂人话|不说人话|猪狗不如|混蛋|没脑子|傻屌|去死吧|TM|人渣|怂|妈逼|操你妈|妈逼|傻子|SB|sb)((?!昵称|微信名).)*$',
        r'^((?!昵称|微信名).)*(有人生没人养|不要脸|叫声妈|叫声爹|装纯洁|在东莞挣钱|有脸活|枉为人|噎着|臭|臭嘴|坑蒙拐骗|生下你|镜子|照照|三观不正|吃软饭|脑残|丧样|人话|个泡|装逼|牛逼|婊子|不是人[^工]|贱|废话|丧失|嘴硬|三观不正|出息|没教养|上门女婿|装什么装|没素质|找小三|吃软饭|拿镜子照照|撒泡尿照照|几斤几两|蠢|不着调|屎|脑残|人渣|是不是人|吸毒|溜冰|贩毒|瘾君子|赢钱|赌场|毒品|大波|犯贱|痴呆|败类|丢脸|脑残|人渣|天堂|脸皮厚)((?!昵称|微信名).)*$',
        r'(我是你.{1,5}(爸|妈|爷|奶))', r'^((?!.).)*妈的((?!.).)*$',
        r'(东莞.*母亲|东莞.*女儿|母亲.*东莞|女儿.*东莞|你母亲)',
        r'(穷|就缺.{0,5}(钱|报名费)|都不舍得|报名.{0,5}怎么了)'],
    '弄虚作假': [r'^((?!不支持|不能|特价课).)*退费((?!不支持|不能|特价课).)*$',
             r'^((?!不合适).)买.{0,5}(((高|低)一年级)|下学期)((?!不合适).)*$',
             r'帮.{0,5}(亲戚|朋友|同事).{0,5}买|多买点|(亲戚|朋友|同事).{0,5}有需要吗|帮完成个任务'],
    '不当获利': [r'(红包|现金)？.*返利|(发|给).{0,5}(红包|现金)', ],
    '虚假承诺': [
        r'非凡.{0,10}(均|也).{0,10}(能|可)使用', r'不限.{0,10}樊登.{0,10}使用',
        r'^((?!(2年|两年|赠送一年|买一年送一年)).)*年卡.{0,50}(有效期|能使用|可用)((?!(2年|两年|赠送一年|买一年送一年)).)*$', ],
        #r'(多|2|两|几)台.{0,10}同时.{0,10}(登陆|登录)|同时.{0,10}(多|2|两|几)台.{0,20}(登陆|登录)'], # 2021年10月25号
    '销售禁行': [r'(工资|你的.{0,5}待遇|几口人|身体好吗|吃饭了吗)', r'(您|你|我).{0,5}(微信|号码|地址|住址|住的地方).{0,5}(是多少|在哪)', ], # 坐席说搜不到和用户说不是这个手机号，另一个手机号
    '服务态度': [],
    '服务专业': [r'(加.{0,5}我.{0,5}[^企业]微信)', ] # 之后用户拒绝和挂机 | 加不到|隐藏功能|另一个手机号
}

re_dict3 = {
    '询问平台': [r'(这个是|哪里|哪个|哪的|怎么办|什么(平台|东西|玩意))', ]
}


def recognition(conversation, repeat_time, call_id, customer_id, who, call_date, wav_path, ip, time, channel, phone, stop_sign):
    wb = Workbook()
    ws = wb.active

    ws.append([call_id, customer_id, who, call_date, wav_path, phone])

    sign_num = 0
    sign_false = 0
    sign_guanfangweixin = 0
    sign_jiaweixin = 0
    sign_ask = False
    sign_tousu = False
    num_1, num_2 = 0, 0
    dialogue_list = list()
    type_dict = {'客户冲突': 0, '弄虚作假': 0, '不当获利': 0, '虚假承诺': 0,
                 '销售禁行': 0, '服务态度': 0, '服务专业': 0, '询问平台': 0}
    if channel == 1:
        for i, c in enumerate(conversation):
            content, type = c
            try:
                content = process_queries(content)
            except Exception:
                pass

            # 客户话语 主动询问
            pattern = re.compile(re_dict3['询问平台'][0])
            result = pattern.search(content)
            if result:
                sign_ask = True
                dialogue_list.append([call_id, customer_id, '', '', '', '', '1_' + content])
                continue

            # 客户话语 投诉意图
            pattern = re.compile(r'(投诉|领导在哪)')
            result = pattern.search(content)
            if result:
                sign_tousu = True
                dialogue_list.append([call_id, customer_id, '', '', '', '', '1_' + content])
                continue

            # 坐席话语
            pattern = re.compile(re_dict1['销售禁行'][0])
            result = pattern.search(content)
            if result and not sign_ask:
                dialogue_list.append([call_id, customer_id, '', '{}错误'.format('销售禁行'), re_dict1['销售禁行'][0], result.group(),
                           '1_' + content])
                sign_num = 1
                sign_false = 1

            for false_type in re_dict2:
                for re_rule in re_dict2[false_type]:
                    pattern = re.compile(re_rule)
                    result = pattern.search(content)
                    if result:
                        if false_type == '服务专业':
                            type_dict[false_type] = 0
                        else:
                            dialogue_list.append([call_id, customer_id, '', '{}错误'.format(false_type), re_rule, result.group(),
                                       '1_' + content])
                            sign_num = 1
                            sign_false = 1
            if sign_false:
                sign_false = 0
                continue

            dialogue_list.append([call_id, customer_id, '', '', '', '', '1_' + content])

    elif channel == 2:
        conversation_1 = list()
        conversation_2 = list()

        for c in conversation:
            if c[1] == 1:
                conversation_1.append(c)
            else:
                for word in ['留言', '正在通话中']:
                    if word in c[0]:
                        conver_list = [call_id, customer_id, who, call_date, '0', '']

                        for c in conversation:
                            conver_list.append(str(c[1]) + '_' + c[0])
                        dialogue_list.append(conver_list)

                        if not os.path.exists('./{}_data_dir/'.format(ip)):
                            os.mkdir('./{}_data_dir/'.format(ip))
                        wb.save('./{}_data_dir/{}.xlsx'.format(ip, call_id))
                        return 0
                conversation_2.append(c)

        sign_num = all_recognition('开场', call_id, customer_id, conversation_1, conversation_2, repeat_time, dialogue_list, type_dict, sign_num, time)

        for i, c in enumerate(conversation):
            content, type, start_time, end_time = c
            content = process_queries(content)

            if type == 1:
                '''num_1 += 1
                for project_name in project_list:
                    result = project_name.search(content)
                    if result:
                        sign_other_project = 1'''

                # 常规规则识别
                for false_type in re_dict2:
                    for re_rule in re_dict2[false_type]:
                        pattern = re.compile(re_rule)
                        result = pattern.search(content)
                        if result:
                            dialogue_list.append([call_id, customer_id, '', '{}错误'.format(false_type), re_rule, result.group(),
                                           '1_' + content])
                            type_dict[false_type] = 1
                            sign_num = 1
                            sign_false = 1

                # 判断用户询问所属机构时，是否及时回复
                pattern = re.compile(re_dict1['销售禁行'][0])
                result = pattern.search(content)
                if result and not sign_ask:
                    dialogue_list.append([call_id, customer_id, '', '{}错误'.format('销售禁行'), re_dict1['销售禁行'][0], result.group(),
                               '1_' + content])
                    sign_num = 1
                    sign_false = 1

                # 判断用户加微信是否是使用的官方微信号
                pattern = re.compile(re_dict1['服务专业'][0])
                result = pattern.search(content)
                if result:
                    sign_guanfangweixin = 1


                if sign_false:
                    sign_false = 0
                    continue
                dialogue_list.append([call_id, customer_id, '', '', '', '', '1_' + content])

            else:
                num_2 += 1

                # 使用线上意图模型识别意图
                intent = intent_judge(content, customer_id, ip, 'nlu获取意图错误')
                if intent and intent == '什么平台':
                    sign_ask = True
                elif intent and intent == '投诉':
                    sign_tousu = True

                if sign_jiaweixin:
                    if intent and intent.startswith('不需要'):
                        sign_jiaweixin = 0
                        sign_guanfangweixin = 1

                # 客户主动询问
                pattern = re.compile(r'(哪的|什么(平台|东西|玩意))')
                result = pattern.search(content)
                if result:
                    sign_ask = True


                dialogue_list.append([call_id, customer_id, '', '', '', '', '2_' + content])

        sign_num = all_recognition('结束', call_id, customer_id, conversation_1, conversation_2, repeat_time, dialogue_list, type_dict, sign_num, time)

        # 最终
        for key in type_dict:
            if type_dict[key]:
                dialogue_list.append([call_id, customer_id, '', '{}错误'.format(key), '', '', ''])

        # 判断坐席说加微的时候到底是企业微信还是自己的微信
        sign_num = 0
        for i in range(len(dialogue_list)):
            if dialogue_list[i][3] == '服务专业错误' and \
                    (sign_guanfangweixin or (len(dialogue_list) - i) < 5):
                dialogue_list[i][3] = ''
                dialogue_list[i][4] = ''
                dialogue_list[i][5] = ''
                dialogue_list[i][6] = ''
            elif dialogue_list[i][3]:
                sign_num = 1

            ws.append(dialogue_list[i])


    if not os.path.exists('./{}_data_dir/'.format(ip)):
        os.mkdir('./{}_data_dir/'.format(ip))

    date = '{}-{}-{}'.format(datetime.datetime.now().year, datetime.datetime.now().month,
                             datetime.datetime.now().day)
    if glob.glob('./{}_data_dir/*.xlsx'.format(ip)) and \
            not glob.glob('./{}_data_dir/*.xlsx'.format(ip))[-1].split('/')[-1].startswith(date):
        for file_path in glob.glob('./{}_data_dir/*.xlsx'.format(ip)):
            os.remove(file_path)
    wb.save('./{}_data_dir/'.format(ip) + '{}_{}_{}_{}.xlsx'.format(date, customer_id, sign_num, time))

    return sign_num


def all_recognition(type, call_id, customer_id, conversation_1, conversation_2, repeat_time, finnal_list, type_dict, sign_num, time):
    # 对区域性话语进行判断，开场段，产介段，结束段。
    if type == '开场':

        # 开场空挂判断
        if conversation_1 and conversation_1[0][2] > 5:
            # sign_num = 1
            finnal_list.append([call_id, customer_id, '', '销售禁行', '开场空挂时长超过5秒', conversation_1[0][2], conversation_1[0][3]])


    elif type == '结束':

        # 结束空挂判断
        if conversation_1 and (time - conversation_1[-1][3]) > 5:
            # sign_num = 1
            finnal_list.append([call_id, customer_id, '', '销售禁行', '结束空挂时长超过5秒', '', ''])

        for i, re_time in enumerate(repeat_time):
            if re_time > 5:
                # sign_num = 1
                finnal_list.append([call_id, customer_id, '', '第{}段交互重复时长超过5秒'.format(i), '', ''])


    return sign_num


def intent_judge(query, customer_id, ip, text):
    url = 'http://8.142.85.77:8679/nlu?session_id=-1&workspace=222&current_query={}'.format(str(query))
    try:
        response = requests.get(url)
        result = json.loads(response.text)

        intention = result['intention']
        if intention and intention[0].get('value', ''):
            return intention[0]['value']['standard_query']
        else:
            return ''

    except Exception:
        pass
        # send_wechat_warning(customer_id, ip, text)
    return ''


def process_queries(current_query):
    pass_word = ['不需要', '不用了', '不用', '一点', '考虑',
                 '谢', '在', '看', '问', '想', '天', '刚', '试']
    remove_word = [
        '不好意思', '对不起', '谢谢你', '哎呀', '你好', '您好', '再见', '谢谢', '好吧', '感谢',
        '呃', '啊', '哦', '嘞', '喂', '哎', '哈', '哟', '哇', '呦', '拜'
                                                          '嗯，', '嗯。', '嗯', '，好，', '。好，', '，行，', '，行。', '，对，', '，对。']

    for remove in remove_word:
        if len(current_query.replace(remove, '')) > 2:
            current_query = current_query.replace(remove, '')

    # 删除相近相同词
    sign = True
    while sign:
        i = 0
        sign = False
        record_list = list()
        final_str = list()

        while i < len(current_query):
            repeat_num = 0
            for j in range(1, 20):
                target = current_query[i:i + j]
                while True:
                    if target == current_query[i + j * (repeat_num + 1):i + j * (repeat_num + 2)]:
                        repeat_num += 1
                    else:
                        break
                if repeat_num:
                    record_append = (i, j, repeat_num)
                    break
            if repeat_num:
                if target in pass_word or not (u'\u4e00' <= target <= u'\u9fff'):
                    final_str.append(target * 2)
                else:
                    final_str.append(target)
                    sign = True
                record_list.append(target)
                i = i + j * (repeat_num + 1)
                continue
            final_str.append(current_query[i])
            i += 1
        current_query = ''.join(final_str)

    # 将被标点符号隔开的相同字合并在一块
    current_query_result = ''
    for i in range(len(current_query)):
        if current_query[i] in ['，', '。', '？', '！']:
            for j in range(1, 5):
                if current_query[i - j if i - j >= 0 else 0:i] == current_query[i + 1:i + j + 1]:
                    current_query_result = current_query_result[:-1 * j]
                    break
        current_query_result += current_query[i]
    current_query = current_query_result

    for point_sign in ['？。', '？，', '，。', '，，', '。，', '。。']:
        current_query = current_query.replace(point_sign, '，')

    if current_query[0] in ['，', '。']:
        current_query = current_query[1:]

    return current_query


if __name__ == '__main__':
    pass