# -*- coding : utf-8 -*-


import os
import re
import glob
import datetime

from openpyxl import Workbook


project_list = [re.compile(r'.*(少儿英语|开心鼠|(猿|圆|缘|元)(编程|变成|边城|边上)|英语.{0,10}(体验|启蒙|课程|课)|(网易|有道)|河小象|写字课|叫叫阅读).*'),
                re.compile(r'.*(阅读.{0,10}(兴趣|课程|启蒙|课)).*')
                ]


re_dict1 = {
    '销售禁行': ['^((?!app|下载).)*(高途|高图|高徒).*(工作人员|客服)((?!app|下载).)*$'],
}


re_dict2 = {
    '客户冲突': [r'(垃圾|我操|傻逼|叫声|神经病|有病|卧槽|他妈的|贱人|智商|卑鄙无耻|衣冠禽兽|我丢你老母|屎|装疯卖傻|装傻充愣|疯子|王八|疯狗|乱咬人|听不懂人话|不说人话|猪狗不如|混蛋|没脑子|傻屌|去死吧|滚|TM|人渣|怂|妈逼|操你妈|妈逼|傻子|SB|sb)',
                r'(有人生没人养|不要脸|叫声妈|叫声爹|装纯洁|在东莞挣钱|有脸活|枉为人|噎着|臭|臭嘴|坑蒙拐骗|生下你|镜子|照照|三观不正|吃软饭|脑残|丧样|人话|个泡|装逼|牛逼|婊子|滚|不是人|贱|废话|丧失|嘴硬|三观不正|出息|没教养|上门女婿|装什么装|没素质|找小三|吃软饭|拿镜子照照|撒泡尿照照|几斤几两|蠢|不着调|屎|脑残|人渣|是不是人|吸毒|溜冰|贩毒|瘾君子|赢钱|赌场|毒品|大波|有种|犯贱|痴呆|败类|丢脸|脑残|人渣|天堂|脸皮厚)',
                r'(我是你.{1,5}(爸|妈|爷|奶))', r'^((?!.).)*妈的((?!.).)*$',
                r'(东莞.*母亲|东莞.*女儿|母亲.*东莞|女儿.*东莞|你母亲)',
                r'(穷|就缺.{0,5}(钱|报名费)|都不舍得)'],
    '弄虚作假': [r'^((?!不支持|不能|特价课).)*退费((?!不支持|不能|特价课).)*$',
                r'^((?!不合适).)买.{0,5}(((高|低)一年级)|下学期)((?!不合适).)*$',
                r'帮.{0,5}(亲戚|朋友|同事).{0,5}买|多买点|(亲戚|朋友|同事).{0,5}有需要吗|帮完成个任务'],
    '不当获利': [r'(红包|现金)？.*返利|(发|给).{0,5}(红包|现金)', ],
    '虚假承诺': [# r'^((?!不).)*(初三|高一|高二|高三|高中)?.*(再|可以).{0,5}(修改|更改|更换|调整|调课)((?!不).)*$',
                r'^((?!课后|辅导|检查).)*一对一.{0,5}(上课|教学|学习)((?!课后|辅导|检查).)*$',
                r'(增加).{0,5}课时', r'手机[^号].{0,5}[^不](可以|支持).{0,5}(上课|观看|回放|操作)'],
    '销售禁行': [r'(工资|待遇|几口人|身体好吗|吃饭了吗)', ],
    '服务态度': [],
    '服务专业': [r'[^不](需要|用).{0,10}(电脑|平板)', r'(电脑|平板).*(可以上课)']
}


re_dict3 = {
    '询问平台': [r'(这个是|哪里|哪个|哪的|怎么办|什么(平台|东西|玩意))', ]
}


def recognition( conversation, repeat_time, call_id, customer_id, who, call_date, wav_path, ip, time, channel, phone, stop_sign ):
    wb = Workbook()
    ws = wb.active
  
    ws.append([call_id, customer_id, who, call_date, wav_path, phone])

    sign_num = 0
    sign_false = 0
    sign_other_project = 0
    sign_ask = False
    sign_tousu = False
    num_1, num_2 = 0, 0
    type_dict = {'客户冲突': 0, '弄虚作假': 0, '不当获利': 0, '虚假承诺': 0,
                 '销售禁行': 0, '服务态度': 0, '服务专业': 1, '询问平台': 0}
    if channel == 1:
        for i, c in enumerate(conversation):
            content, type = c
            try:
                content = process_queries(content)
            except Exception:
                pass

            # 客户话语 主动询问
            pattern = re.compile(re_dict3['询问平台'][0])
            result = pattern.search(content)
            if result:
                sign_ask = True
                ws.append([call_id, customer_id, '', '', '', '', '1_'+content])
                continue

            # 客户话语 投诉意图
            pattern = re.compile(r'(投诉|领导在哪)')
            result = pattern.search(content)
            if result:
                sign_tousu = True
                ws.append([call_id, customer_id, '', '', '', '', '1_'+content])
                continue

            # 坐席话语
            pattern = re.compile(re_dict1['销售禁行'][0])
            result = pattern.search(content)
            if result and not sign_ask:
                ws.append( [call_id, customer_id, '', '{}错误'.format('销售禁行'), re_dict1['销售禁行'][0], result.group(), '1_'+content] )
                sign_num = 1
                sign_false = 1

            for false_type in re_dict2:
                for re_rule in re_dict2[false_type]:
                    pattern = re.compile(re_rule)
                    result = pattern.search(content)
                    if result:
                        if false_type == '服务专业':
                            type_dict[false_type] = 0
                        else:
                            ws.append( [call_id, customer_id, '', '{}错误'.format(false_type), re_rule, result.group(), '1_'+content] )
                            sign_num = 1
                            sign_false = 1
            if sign_false:
                sign_false = 0
                continue

            ws.append([call_id, customer_id, '', '', '', '', '1_'+content])    

    elif channel == 2:
        conversation_1 = list()
        conversation_2 = list()

        for c in conversation:
            if c[-1] == 1:
                conversation_1.append( c[0] )
            else:
                for word in ['留言', '正在通话中']:
                    if word in c[0]:
                        conver_list = [call_id, customer_id, who, call_date, '0', '']

                        for c in conversation:
                            conver_list.append(str(c[-1]) + '_' + c[0])
                        ws.append(conver_list)

                        wb.save('./data_dir/{}.xlsx'.format())
                        return 0
                conversation_2.append( c )

        for i, c in enumerate(conversation):
            content, type, start_time, end_time = c
            content = process_queries(content)

            if type == 1:
                num_1 += 1
                for project_name in project_list:
                    result = project_name.search(content)
                    if result:
                        sign_other_project = 1

                pattern = re.compile(re_dict1['销售禁行'][0])
                result = pattern.search(content)
                if result and not sign_ask:
                    ws.append( [call_id, customer_id, '', '{}错误'.format('销售禁行'), re_dict1['销售禁行'][0], result.group(), '1_'+content] )
                    sign_num = 1
                    sign_false = 1

                for false_type in re_dict2:
                    for re_rule in re_dict2[false_type]:
                        pattern = re.compile(re_rule)
                        result = pattern.search(content)
                        if result:
                            if false_type == '服务专业':
                                type_dict[false_type] = 0
                            elif re_rule == r'手机[^号].{0,5}[^不](可以|支持).{0,5}(上课|观看|回放|操作)' \
                                    and sign_other_project:
                                continue
                            else:
                                ws.append( [call_id, customer_id, '', '{}错误'.format(false_type), re_rule, result.group(), '1_'+content] )
                                sign_num = 1
                                sign_false = 1
                if sign_false:
                    sign_false = 0
                    continue
                ws.append([call_id, customer_id, '', '', '', '', '1_'+content])

            else:
                num_2 += 1

                # 客户主动询问
                pattern = re.compile(r'(哪的|什么(平台|东西|玩意))')
                result = pattern.search(content)
                if result:
                    sign_ask = True

                # 投诉意图
                pattern = re.compile(r'(投诉|领导在哪)')
                result = pattern.search(content)
                if result:
                    sign_tousu = True

                ws.append([call_id, customer_id, '', '', '', '', '2_'+content])

        for key in type_dict:
            if type_dict[key]:
                ws.append([call_id, customer_id, '', '{}错误'.format(key), '', '', ''])

    if not os.path.exists('./{}_data_dir/'.format(ip)):
        os.mkdir('./{}_data_dir/'.format(ip))

    date = '{}-{}-{}'.format(datetime.datetime.now().year, datetime.datetime.now().month,
                             datetime.datetime.now().day)
    if glob.glob('./{}_data_dir/*.xlsx'.format(ip)) and\
            not glob.glob('./{}_data_dir/*.xlsx'.format(ip))[-1].split('/')[-1].startswith(date):
        for file_path in glob.glob('./{}_data_dir/*.xlsx'.format(ip)):
            os.remove(file_path)
    wb.save('./{}_data_dir/'.format(ip) + '{}_{}_{}_{}.xlsx'.format(date, customer_id, sign_num, time))

    return sign_num


def process_queries(current_query):
    pass_word = ['不需要', '不用了', '不用', '一点', '考虑',
                 '谢', '在', '看', '问', '想', '天', '刚', '试']
    remove_word = [
        '不好意思', '对不起', '谢谢你', '哎呀', '你好', '您好', '再见', '谢谢', '好吧', '感谢',
        '呃', '啊', '哦', '嘞', '喂', '哎', '哈', '哟', '哇', '呦', '拜'
        '嗯，', '嗯。', '嗯', '，好，', '。好，', '，行，', '，行。', '，对，', '，对。']

    for remove in remove_word:
        if len(current_query.replace(remove, '')) > 2:
            current_query = current_query.replace(remove, '')


    # 删除相近相同词
    sign = True
    while sign:
        i = 0
        sign = False
        record_list = list()
        final_str = list()

        while i < len(current_query):
            repeat_num = 0
            for j in range(1, 20):
                target = current_query[i:i + j]
                while True:
                    if target == current_query[i + j * (repeat_num + 1):i + j * (repeat_num + 2)]:
                        repeat_num += 1
                    else:
                        break
                if repeat_num:
                    record_append = (i, j, repeat_num)
                    break
            if repeat_num:
                if target in pass_word or not (u'\u4e00' <= target <= u'\u9fff'):
                    final_str.append(target * 2)
                else:
                    final_str.append(target)
                    sign = True
                record_list.append(target)
                i = i + j * (repeat_num + 1)
                continue
            final_str.append(current_query[i])
            i += 1
        current_query = ''.join(final_str)


    # 将被标点符号隔开的相同字合并在一块
    current_query_result = ''
    for i in range(len(current_query)):
        if current_query[i] in ['，', '。', '？', '！']:
            for j in range(1, 5):
                if current_query[i - j if i - j >= 0 else 0:i] == current_query[i + 1:i + j + 1]:
                    current_query_result = current_query_result[:-1 * j]
                    break
        current_query_result += current_query[i]
    current_query = current_query_result

    for point_sign in ['？。', '？，', '，。', '，，', '。，', '。。']:
        current_query = current_query.replace(point_sign, '，')

    if current_query[0] in ['，', '。']:
        current_query = current_query[1:]

    return current_query


if __name__ == '__main__':

    pass