# -*- coding : utf-8 -*- import re import os import json import glob import pymysql import datetime import requests import traceback from openpyxl import Workbook, load_workbook # from server import send_wechat_warning re_dict1 = {'开场类': [r'^((?!先生|女士|本人).)*(京东金融|京东|金融).{0,10}工号((?!先生|女士|本人).)*$', r'^((?!先生|女士|本人).)*(京东金融|京东|金融).{0,10}(一|二|三|四|五|六|七|八|九|零)((?!先生|女士|本人).)*$'], '敏感话术': [r'^((?!((抱歉|不好意思|对不起|打扰)|回访|工号|95118|九五.{0,5}八)).)*$', ], '结束类': [r'^((?!感谢.*祝您).)*再见((?!感谢.*祝您).)*$', ], '防诈骗话术': [r'^((?!(防范.*诈骗|诈骗.*防范)).)*$', ], '业务类': [r'^((?!以.{0,4}(页面|app|ap|显示|系统).{0,4}为准|您说|您是说).)*((您的|你的).{0,4}额度.{0,4}(万|千|元|块)|利息.*[0-9])((?!以.{0,4}(页面|app|ap|显示|系统).{0,4}为准|您说|您是说).)*$', ] } re_dict2 = {'开场类': [], # r'.*我是.{0,5}京东.{0,5}客服.*'], '业务类': [r'^((?!已经|可以|吗|吧|好|通过|能不能|要不要|方便|需不需要|有.{0,5}需求|能否|没.{0,5}(收到|接受到)|没看到|没同意|?|之前).)*(发.{0,5}短信|发送.{0,20}查收|短信.{0,10}的(形|方)式|加.{0,5}微信)((?!已经|可以|吗|吧|好|通过|能不能|要不要|方便|需不需要|有.{0,5}需求|能否|没.{0,5}(收到|接受到)|没看到|没同意|?|之前).)*$', # r'随便.{0,5}(写|填)', r'^((?=不是).)*给(你|您).*红包((?=不是).)*$', r'^((?!(页面|app|ap|显示|系统).{0,5}(主|准|标准)|(是不是|看到|页面|并不是|(帮|给|帮到|给到)(您|你))).)*(额度.{0,5}最高|利息.{0,5}最低)((?!(页面|app|ap|显示|系统).{0,5}(主|准|标准)|(是不是|看到|页面|并不是|(帮|给|帮到|给到)(您|你))).)*$'], '职业操守': [ r'(您|你|我).{0,5}((微信|电话|号码).{0,5}是多少|(地址|住址|住的地方|身份证).{0,5}在哪)', r'待遇|几口人|身体好吗|吃饭了吗', r'看到.{0,10}系统', r'免费的', r'我这.{0,5}显示.{0,10}(已通过|已激活)|(看到|看见|显示|您的)?.{0,10}(额度)(还行|是.{0,5}[0-9](万|千|元|块))|(看到|看见|显示|您的)?.{0,10}(利息.{0,5}(分之))', r'^((?!以.{0,4}(页面|app|ap|显示|系统).{0,4}为准|您说|您是说).)*', r'^((?!到账|花).)*(是|在).{0,5}([0-9]|(一|两|三|四|五|六|七|八|九|十))(点|分|秒).{0,5}激活.{0,5}额度((?!到账|花).)*$', # 比较低 r'(几|一|二|三|四|五|六|七|八|九|十|1|2|3|4|5|6|7|8|9|10).{0,5}(点|分|秒).{0,10}注册', # 用户表示之后可以重述 # 比较低 r'^((?!最高|优惠券|免息券|圈|(举|打).{0,5}(例子|比方)|比如|比方|假设|假如|的话|有的客户).)*(激活.{0,5}(万|千|百).{0,5}额度|激活.{0,5}额度.{0,5}(万|千|百))((?!最高|优惠券|免息券|券|圈|(举|打).{0,5}(例子|比方)|比如|比方|假设|假如|的话|有的客户).)*$', ], '风险类': [#r'^((?!有助于|有帮助|的机会|一定的|输入).)*((额度|利息)?(肯定|一定).{0,10}(升满|降低|降息|降到|提升|提额))((?!有助于|有帮助|的机会|一定的|输入).)*$', r'帮你.{1,4}查.{1,4}额度', r'(您的|你的).{0,5}额度.{0,5}(最高|最少|大概|大约|一般).{0,5}(万|千|元|块)', r'(您的|你的).{0,5}利息.*[0-9]', r'把.{0,4}(万|千|元|块).{0,4}(提|取|拿)出来', r'^((?!不要|步推荐|不推荐|不建议|不太好|免息|优惠券|想用|比如|比方|假设|假如|的话|打.{0,5}比方).)*(只用|就用).{0,5}(几|一|二|三|四|五|六|七|八|九|十|1|2|3|4|5|6|7|8|9|10).{0,2}天((?!不要|步推荐|不推荐|不建议|不太好|免息|优惠券|想用|比如说|比方说|假设|假如|的话|打.{0,5}比方).)*$', r'(用.{0,5}天.{0,5}算.{0,5}天利息)', r'审核.{0,4}(都能|全部).{0,4}通过.*(100%|百分之百)?|(100%|百分之百)?.{0,4}审核.{0,4}(都能|全部).{0,4}通过', # r'^((?!(良好.{0,3}(记录|习惯)|(有助于|有可能|有机会|不定期)|具体.{0,3}为准)).)*(提额|降息)((?!(良好.{0,3}(记录|习惯)|(有助于|有可能|有机会|不定期)|具体.{0,3}为准)).)*$' ], '过渡营销': [], '服务类': [r'^((?!(不可能|不允许)).)*((不用|不办).{0,4}就.{0,4}一直.{0,4}打)|(天天.{0,4}打)((?!(不可能|不允许)).)*$', '^((?=拦截).)*骚扰((?=拦截).)*$', r'(不要插嘴|不要打断我|你看着办|听不懂我说话)', r'^((?!动).)*滚((?!动).)*$',#再听我说话吗,您开车听我说就行 r'笨|妈的|傻逼|神经病|^((?!没).)*有病((?!没).)*$|卧槽|他妈的|贱人|智商|卑鄙无耻|衣冠禽兽|我丢你老母|装疯卖傻|装傻充愣|疯子|王八|疯狗|乱咬人|听不懂人话|猪狗不如|混蛋|没脑子|傻屌|去死吧|滚|TM|人渣|怂|装逼|操你妈|妈逼|[^不是]傻子|SB|sb', r'.*(有人生没人养|不要脸|叫声妈|叫声爹|装纯洁|在东莞挣钱|有脸活|枉为人|噎着|臭嘴|坑蒙拐骗|生下你|镜子|照照|三观不正|吃软饭|是不是人[^工]|脑残|丧样|人话|个泡|装逼|牛逼|婊子|滚|贱|废话|丧失|嘴硬|三观不正|出息|没教养|上门女婿|装什么装|没素质|找小三|吃软饭|拿镜子照照|撒泡尿照照|几斤几两|蠢|不着调|脑残|人渣|吸毒|溜冰|贩毒|瘾君子|赢钱|赌场|毒品|大波|有种|犯贱|痴呆|败类|丢脸|脑残|人渣|天堂|脸皮厚|羞|脸红).*', ], '甲方禁词': [], } '''r'^((?!降|较).)*低利息((?!降|较).)*$', r'(高额无息|无手续费|没有任何费用|100%审核通过|征信没有影响|套现|包审核通过|^((?!点).)*立即开通((?!点).)*$|保过|实时到账|肯定可以降息|肯定可以提额|最低利息非常安全|绝对安全|非常方便|非常可靠)', r'(借呗|花呗|微粒贷|360借条|有钱花|美团).{0,10}(不好|不行|差劲|垃圾|缺点)', r'不.{0,3}(收取|产生).{0,3}任何.{0,3}(费用|利息)', r'(没费用|没有管理费|不收任何费用|不需要任何费用|不会扣任何费用|不收您任何费用|没有任何的费|没有额外的费用|不需要您承担任何费用|不需要任何的一个费用|不需要任何的费用|没有其他的费用|不会亏本)', r'(向您承诺|为您承诺|承诺您|不会亏损|没有任何风险|无风险|没有风险|风险为零|没风险|无息贷|无息|贷款没利息|没有初始费|无费用|没有费用|无初始费)', r'(先听我说|听我说|不要打断我|你看着办|听不懂我说话|又不是傻子)', r'(提额降息的绿色通道)' ],''' def recognition_new( conversation, repeat_time, call_id, customer_id, who, call_date, wav_path, ip, time, channel, stop_sign ): wb = Workbook() ws = wb.active sign_num = 0 sign_can = 0 finnal_list = list() conversation_1 = list() conversation_2 = list() pattern = re.compile(r'(免息|(不收|不计|不要).{0,5}任何(利息|费用))') pattern_user = re.compile(r'(免息|没有.{0,5}利息|免除.{0,5}利息)') # 判断是否为语音信箱 for c in conversation: if c[1] == 1: conversation_1.append(c) else: for word in ['留言', '正在通话中']: if word in c[0]: conver_list = [call_id, customer_id, who, call_date, '0', ''] for c in conversation: conver_list.append(str(c[-1]) + '_' + c[0]) ws.append(conver_list) wb.save('./data_dir/{}.xlsx'.format(call_id)) return 0 conversation_2.append(c) ws.append([call_id, customer_id, who, call_date, wav_path]) for i, c in enumerate(conversation): content, type, start_time, end_time = c try: content = process_queries(content) except Exception: pass if type == 1: result = pattern.search(content) if result and not sign_can: sign_num = 1 finnal_list.append([call_id, customer_id, '', '', '质检命中', result.group(), '1_' + content]) else: finnal_list.append([call_id, customer_id, '', '', '', '', '1_' + content]) else: finnal_list.append([call_id, customer_id, '', '', '', '', '2_' + content]) result = pattern_user.search(content) if result: sign_can = 1 # 最终对话内容导入 for fina_content in finnal_list: try: ws.append(fina_content) except Exception: pass if not os.path.exists('./{}_data_dir/'.format(ip)): os.mkdir('./{}_data_dir/'.format(ip)) date = '{}-{}-{}'.format(datetime.datetime.now().year, datetime.datetime.now().month, datetime.datetime.now().day) wb.save( './{}_data_dir/'.format(ip) + '{}_{}_{}_{}.xlsx'.format(date, call_id, sign_num, time) ) def recognition( conversation, repeat_time, call_id, customer_id, who, call_date, wav_path, ip, time, channel, stop_sign ): wb = Workbook() ws = wb.active sign_no = False sign_yes = False sign_tousu = False sign_tousu_fix = False sign_erduduoshao = False sign_chanjie = 0 sign_jieriyingxiao = False sign_huashuzhixing = False sign_false = 0 sign_num = 0 false_num = 0 num_1, num_2 = 0, 0 conversation_1 = list() conversation_2 = list() finnal_list = list() type_dict = {'开场类': 0, '结束类': 0, '业务类': 0, '职业操守': 0, '风险类': 0, '过渡营销': 0, '服务类': 0, '防诈骗话术': 0} intent_dict = dict() ws.append([call_id, customer_id, who, call_date, wav_path]) # 判断是否为语音信箱 for c in conversation: if c[1] == 1: conversation_1.append( c ) else: for word in ['留言', '正在通话中']: if word in c[0]: conver_list = [call_id, customer_id, who, call_date, '0', ''] for c in conversation: conver_list.append(str(c[-1]) + '_' + c[0]) ws.append(conver_list) wb.save('./data_dir/{}.xlsx'.format(call_id)) return 0 conversation_2.append( c ) # 金条专属,将同一人物的两句连续话术合并 conversation_toget = list() for i, c in enumerate(conversation): if i == 0: conversation_toget.append(c) else: if c[1] == conversation_toget[-1][1]: conversation_toget[-1][0] = conversation_toget[-1][0] + c[0][2:] else: conversation_toget.append(c) conversation = conversation_toget try: finnal_list, sign_num = all_recognition('开场', call_id, customer_id, conversation_1, conversation_2, finnal_list, type_dict, stop_sign, sign_tousu, false_num, sign_num) except Exception: traceback.print_exc() for i, c in enumerate(conversation): content, type, start_time, end_time = c try: content = process_queries(content) except Exception: pass if type == 1: num_1 += 1 # 额度清零话术 if search_intent_dict(['不需要', '额度多少', '额度问题'], intent_dict): pattern = re.compile(r'(双(十一|11|十二|12)|元旦|年底|过年|开学|孩子上学|618|五一|十一|寒假|暑假).*(购物|满减|囤货|打折|吃饭|旅游|教育)') result = pattern.search(content) if result: sign_jieriyingxiao = True if sign_no: pattern = re.compile(re_dict1['业务类'][0]) result = pattern.search(content) if result: finnal_list.append([call_id, customer_id, '', '业务类错误', re_dict1['业务类'][0], result.group(), '1_'+content]) sign_num = 1 sign_no = False continue sign_no = False if sign_tousu and not sign_tousu_fix: pattern = re.compile(re_dict1['敏感话术'][0]) result = pattern.search(content) if result: sign_num = 1 sign_tousu_fix = True finnal_list.append([call_id, customer_id, '', '投诉意向未按规定回复', re_rule, result.group(), '1_'+content]) # 三次明确表示不要 if false_num > 2: for re_rule in re_dict1['结束类']: pattern = re.compile(re_rule) result = pattern.search(content) if result: if not type_dict['过渡营销']: finnal_list.append([call_id, customer_id, '', '过渡营销错误', re_rule, result.group(), '1_'+content]) sign_num = 1 sign_false = 1 type_dict['过渡营销'] = 1 continue for false_type in re_dict2: if sign_yes and false_type == '业务类': re_rule = re_dict2[false_type][-1] pattern = re.compile(re_rule) result = pattern.search(content) if result: finnal_list.append([call_id, customer_id, '', '{}错误'.format(false_type), re_rule, result.group(), '1_'+content]) sign_num = 1 sign_false = 1 elif false_type == '开场类' and type_dict['开场类'] and i < 4: pattern = re.compile(r'(京东|金融)') result = pattern.search(content) if result: finnal_list.append([call_id, customer_id, '', '开场类错误', r'(京东|金融)', result.group(), '1_'+content]) sign_num = 1 sign_false = 1 else: for re_rule in re_dict2[false_type]: pattern = re.compile(re_rule) result = pattern.search(content) if result: if re_rule == re_dict2['职业操守'][-1] and sign_erduduoshao: continue finnal_list.append([call_id, customer_id, '', '{}错误'.format(false_type), re_rule, result.group(), '1_'+content]) sign_num = 1 sign_false = 1 pattern = re.compile(r'15天免息券|免费用15天|提额|降息|更高额度|更低利率') result = pattern.search(content) if result: sign_chanjie = 1 if sign_chanjie == 1: pattern = re.compile(r'(全额|全部).{0,5}提|提.{0,5}(全额|全部)|清空.{0,5}(账户|余额|额度)') result = pattern.search(content) if not sign_tousu and result: sign_huashuzhixing = True finnal_list.append([call_id, customer_id, '', '额度清零话术提及', '', '', '1_' + content]) sign_false = 1 if sign_false: sign_false = 0 continue finnal_list.append([call_id, customer_id, '', '', '', '', '1_'+content]) else: num_2 += 1 intent = intent_judge(content) if intent: if '-' in intent: intent = intent.split('-')[0] if intent not in intent_dict: intent_dict[intent] = 1 else: intent_dict[intent] += 1 # 获取态度,是否增加同意次数 pattern = re.compile(r'可以|好吧|好的|手机号是|^((?!.).)*(,|。)?(好|行|嗯)+(,|。)?((?!.).)*$') result = pattern.search(content) if result: sign_yes = True pattern = re.compile(r'多少') result = pattern.search(content) if result or intent == '额度多少': sign_no = True # 数字 pattern = re.compile(r'[0-9]|(一|二|三|四|五|六|七|八|九|零|十).{0,5}万') result = pattern.search(content) if result: sign_erduduoshao = True # 使用线上意图模型识别意图 if intent and intent.startswith('不需要'): false_num += 1 sign_chanjie += 1 elif intent in ['投诉', '别给我打电话了', '怀疑平台']: sign_tousu = True # finnal_list.append( [call_id, customer_id, '', '投诉意图', '', '', '2_'+content] ) # continue finnal_list.append( [call_id, customer_id, '', '', '', '', '2_'+content] ) try: finnal_list, sign_num = all_recognition('结束', call_id, customer_id, conversation_1, conversation_2, finnal_list, type_dict, stop_sign, sign_tousu, false_num, sign_num) except Exception: traceback.print_exc() # 最终对话内容导入 for fina_content in finnal_list: try: ws.append(fina_content) except Exception: pass # 话术执行逻辑总结 if sign_tousu: ws.append([call_id, customer_id, '', '敏感客户不进行话术执行质检', '']) else: if not sign_huashuzhixing: ws.append([call_id, customer_id, '', '未进行额度清零话术', '']) if search_intent_dict(['不需要', '额度多少', '额度问题'], intent_dict) and sign_jieriyingxiao: ws.append([call_id, customer_id, '', '未进行节日营销话术', '']) if not os.path.exists('./{}_data_dir/'.format(ip)): os.mkdir('./{}_data_dir/'.format(ip)) date = '{}-{}-{}'.format(datetime.datetime.now().year, datetime.datetime.now().month, datetime.datetime.now().day) if glob.glob('./{}_data_dir/*.xlsx'.format(ip)) and not glob.glob('./{}_data_dir/*.xlsx'.format(ip))[-1].split('/')[-1].startswith(date): for file_path in glob.glob('./{}_data_dir/*.xlsx'.format(ip)): os.remove(file_path) wb.save( './{}_data_dir/'.format(ip) + '{}_{}_{}_{}.xlsx'.format(date, call_id, sign_num, time) ) return sign_num def all_recognition(type, call_id, customer_id, conversation_1, conversation_2, finnal_list, type_dict, stop_sign, sign_tousu, false_num, sign_num): # 对区域性话语进行判断,开场段,产介段,结束段。 if type == '开场': if len( conversation_1 ) > 3 and len( conversation_2 ) > 3: conversation_1 = '{},{},{}'.format(conversation_1[0][0], conversation_1[1][0], conversation_1[2][0],) for r in re_dict1['开场类']: pattern = re.compile(r) result = pattern.search(conversation_1) if result: type_dict['开场类'] = 0 break else: type_dict['开场类'] = 0 if type_dict['开场类']: finnal_list.append([call_id, customer_id, '', '开场类错误', '', '', conversation_1]) sign_num = 1 elif type == '结束': if len( conversation_1 ) > 3 and len( conversation_2 ) > 3 and \ ( not stop_sign or not sign_tousu or false_num<3 ): conversation_2 = '{},{},{}'.format(conversation_1[-3][0], conversation_1[-2][0], conversation_1[-1][0],) for r in re_dict1['结束类']: pattern = re.compile(r) result = pattern.search(conversation_2) if not result: type_dict['结束类'] = 0 break conversation_2 = '{},{},{}'.format(conversation_1[-3][0], conversation_1[-2][0], conversation_1[-1][0],) for r in re_dict1['防诈骗话术']: pattern = re.compile(r) result = pattern.search(conversation_2) if not result: type_dict['防诈骗话术'] = 0 else: type_dict['结束类'] = 0 if type_dict['结束类']: finnal_list.append([call_id, customer_id, '', '结束类错误', '', '', conversation_2]) sign_num = 1 if type_dict['防诈骗话术']: finnal_list.append([call_id, customer_id, '', '防诈骗话术错误', '', '', conversation_2]) sign_num = 1 return finnal_list, sign_num def slipt_recognition(): ''' 整个对话的转换,包括分析用户和坐席表达的意图,所影响的范围 :return: ''' pass def intent_judge(query): url = 'http://8.142.85.77:8683/predict?type=222&sessionId=-1&query={}'.format(str(query)) try: response = requests.get(url) result = json.loads(response.text) intent = result['label'] return intent except Exception: pass return '' def search_intent_dict(intent_list, intent_dict): for intent in intent_list: if intent in intent_dict: return 1 return 0 def process_queries(current_query): pass_word = ['不需要', '不用了', '不用', '一点', '考虑', '谢', '在', '看', '问', '想', '天', '刚', '试'] remove_word = [ '不好意思', '对不起', '谢谢你', '哎呀', '你好', '您好', '再见', '谢谢', '好吧', '感谢', '呃', '啊', '哦', '嘞', '喂', '哎', '哈', '哟', '哇', '呦', '拜' '嗯,', '嗯。', '嗯', ',好,', '。好,', ',行,', ',行。', ',对,', ',对。'] for remove in remove_word: if len(current_query.replace(remove, '')) > 2: current_query = current_query.replace(remove, '') # 删除相近相同词 sign = True while sign: i = 0 sign = False record_list = list() final_str = list() while i < len(current_query): repeat_num = 0 for j in range(1, 20): target = current_query[i:i + j] while True: if target == current_query[i + j * (repeat_num + 1):i + j * (repeat_num + 2)]: repeat_num += 1 else: break if repeat_num: record_append = (i, j, repeat_num) break if repeat_num: if target in pass_word or not (u'\u4e00' <= target <= u'\u9fff'): final_str.append(target * 2) else: final_str.append(target) sign = True record_list.append(target) i = i + j * (repeat_num + 1) continue final_str.append(current_query[i]) i += 1 current_query = ''.join(final_str) # 将被标点符号隔开的相同字合并在一块 current_query_result = '' for i in range(len(current_query)): if current_query[i] in [',', '。', '?', '!']: for j in range(1, 5): if current_query[i - j if i - j >= 0 else 0:i] == current_query[i + 1:i + j + 1]: current_query_result = current_query_result[:-1 * j] break current_query_result += current_query[i] current_query = current_query_result for point_sign in ['?。', '?,', ',。', ',,', '。,', '。。']: current_query = current_query.replace(point_sign, ',') if current_query[0] in [',', '。']: current_query = current_query[1:] return current_query if __name__ == '__main__': pattern = re.compile(r'.*([^用]不用|不需要|没兴趣).*') text = '我用不用' result = pattern.search(text) print( result )