# -*- coding : utf-8 -*- import re import os import json import glob import pymysql import datetime import requests import traceback from openpyxl import Workbook, load_workbook from server import send_wechat_warning from re_dict_setting import re_dict, re_1_dict, re_2_dict sign_daduan = 0 # 客户语句命中打断意图 work_word = ['备用金', '提现', '额度', '活跃账户', '权益', '信用卡', '银行', '利息', '信用卡', '西安'] def recognition(conversation, repeat_time, call_id, customer_id, who, call_date, wav_path, ip, time, channel, stop_sign, call_answer, project, ): wb = Workbook() ws = wb.active sign_num = 0 sign_false = 0 finnal_list = list() conversation_1 = list() conversation_2 = list() last_1_content = '' last_2_content = '' type_1_dict = {'开场类': 1, '结束类': 1, '业务类': 0, '职业操守': 0, '风险类': 0, '过渡营销': 0, '服务态度': 0, '防诈骗话术': 0} type_2_dict = {'不需要': 0, '投诉': 0, '小助手': ''} # 判断是否为语音信箱 for c in conversation: if c[1] == 1: conversation_1.append(c) else: for word in ['留言', '正在通话中']: if word in c[0]: conver_list = [call_id, customer_id, who, call_date, '0', ''] for c in conversation: conver_list.append(str(c[-1]) + '_' + c[0]) ws.append(conver_list) wb.save('./data_dir/{}.xlsx'.format(call_id)) return 0 conversation_2.append(c) try: finnal_list, sign_num = all_recognition('开场', call_id, customer_id, conversation_1, conversation_2, repeat_time, finnal_list, type_1_dict, stop_sign, sign_tousu, sign_xiaozuhshou, false_num, sign_num, time) except Exception: traceback.print_exc() for i, c in enumerate(conversation): content, type, start_time, end_time = c try: content = process_queries(content) except Exception: traceback.print_exc() if type == 1: # 三次明确表示不要 if type_2_dict['过渡营销'] == 3: for re_rule in re_1_dict['固定流程_结束语']: pattern = re.compile(re_rule) result = pattern.search(content) if result: if not type_1_dict['过渡营销']: finnal_list.append( [call_id, customer_id, '', '业务类_违背客户意愿错误', re_rule, result.group(), '1_' + content] ) sign_num = 1 sign_false = 1 type_1_dict['过渡营销'] = 1 continue # 通用型质检规则 for false_type in re_1_dict: for re_rule in re_1_dict[false_type]: pattern = re.compile(re_rule) result = pattern.search(content) if result: finnal_list.append( [call_id, customer_id, '', '{}错误'.format(false_type), re_rule, result.group(), '1_' + content]) sign_num = 1 sign_false = 1 # 项目独自的质检规则 for false_type in re_dict[project]: for re_rule in re_dict[project][false_type]: pattern = re.compile(re_rule) result = pattern.search(content) if result: finnal_list.append( [call_id, customer_id, '', '{}错误'.format(false_type), re_rule, result.group(), '1_' + content]) sign_num = 1 sign_false = 1 if sign_false: sign_false = 0 continue finnal_list.append([call_id, customer_id, '', '', '', '', '1_' + content]) last_1_content = content else: # 通用型质检规则 for false_type in re_2_dict: for re_rule in re_2_dict[false_type]: pattern = re.compile(re_rule) result = pattern.search(content) if result: finnal_list.append( [call_id, customer_id, '', '{}错误'.format(false_type), re_rule, result.group(), '2_' + content]) sign_num = 1 sign_false = 1 # 使用NLU获取用户意图 intent = intent_judge(content, customer_id, ip, 'nlu获取意图错误') if intent and intent.startswith('不需要'): type_2_dict['过渡营销'] += 1 # 此句的回复是否为营销 for word in work_word: if word not in last_1_content: type_2_dict['过渡营销'] -= 1 break finnal_list.append( [call_id, customer_id, '', '', '', '', '2_'+content] ) last_2_content = content try: finnal_list, sign_num = all_recognition('结束', call_id, customer_id, conversation_1, conversation_2, repeat_time, finnal_list, type_dict, stop_sign, sign_tousu, sign_xiaozuhshou, false_num, sign_num, time) except Exception: traceback.print_exc() # 最终对话内容导入 for fina_content in finnal_list: try: ws.append(fina_content) except Exception: traceback.print_exc() if not os.path.exists('./{}_data_dir/'.format(ip)): os.mkdir('./{}_data_dir/'.format(ip)) date = '{}-{}-{}'.format(datetime.datetime.now().year, datetime.datetime.now().month, datetime.datetime.now().day) if glob.glob('./{}_data_dir/*.xlsx'.format(ip)) and not glob.glob('./{}_data_dir/*.xlsx'.format(ip))[-1].split('/')[-1].startswith(date): for file_path in glob.glob('./{}_data_dir/*.xlsx'.format(ip)): os.remove(file_path) wb.save( './{}_data_dir/'.format(ip) + '{}_{}_{}_{}.xlsx'.format(date, call_id, sign_num, time) ) return sign_num def all_recognition(type, call_id, customer_id, conversation_1, conversation_2, repeat_time, finnal_list, type_dict, stop_sign, sign_tousu, sign_xiaozuhshou, false_num, sign_num, time): # 对区域性话语进行判断,开场段,产介段,结束段。 if type == '开场': # 客户声道是否完全无声 if len(conversation_2) == 0 and time > 30: finnal_list.append([call_id, customer_id, '', '客户声道全部无声', '', '', '']) # 开场空挂判断 first_sentence = conversation_1[0] if first_sentence[2] > 10: finnal_list.append([call_id, customer_id, '', '职业操守', '开场空挂时长超过5秒', '', '']) if len(conversation_1) > 3 and len(conversation_2) > 3: conversation_1 = '{},{},{}'.format(conversation_1[0][0], conversation_1[1][0], conversation_1[2][0], ) for r in re_1_dict['开场类']: pattern = re.compile(r) result = pattern.search(conversation_1) if result: type_dict['开场类'] = 0 break else: type_dict['开场类'] = 0 if type_dict['开场类']: finnal_list.append([call_id, customer_id, '', '开场类错误', '', '', conversation_1]) sign_num = 1 elif type == '结束': # 与智能语音助手互动1分钟以上 if sign_xiaozuhshou and time > 60: finnal_list.append([call_id, customer_id, '', '职业操守', '与智能语音助手互动1分钟以上', '', '']) for i, re_time in enumerate(repeat_time): if re_time > 5: finnal_list.append([call_id, customer_id, '', '第{}段交互重复时长超过5秒'.format(i), '', '']) # 结束空挂判断 last_sentence = conversation_1[-1] if (time - last_sentence[3]) > 30: finnal_list.append([call_id, customer_id, '', '职业操守', '结束空挂时长超过5秒', '', '']) if len(conversation_1) > 3 and len(conversation_2) > 3 and \ (not stop_sign or not sign_tousu or false_num > 2): conversation_2 = '{},{},{}'.format(conversation_1[-3][0], conversation_1[-2][0], conversation_1[-1][0], ) for r in re_1_dict['结束类']: pattern = re.compile(r) result = pattern.search(conversation_2) if not result: type_dict['结束类'] = 0 break conversation_2 = '{},{},{}'.format(conversation_1[-3][0], conversation_1[-2][0], conversation_1[-1][0], ) for r in re_1_dict['防诈骗话术']: pattern = re.compile(r) result = pattern.search(conversation_2) if not result: type_dict['防诈骗话术'] = 0 else: type_dict['结束类'] = 0 if type_dict['结束类']: finnal_list.append([call_id, customer_id, '', '结束类错误', '', '', conversation_2]) sign_num = 1 if type_dict['防诈骗话术']: finnal_list.append([call_id, customer_id, '', '防诈骗话术错误', '', '', conversation_2]) sign_num = 1 return finnal_list, sign_num def process_queries(current_query): pass_word = ['不需要', '不用了', '不用', '一点', '考虑', '谢', '在', '看', '问', '想', '天', '刚', '试'] remove_word = [ '不好意思', '对不起', '谢谢你', '哎呀', '你好', '您好', '再见', '谢谢', '好吧', '感谢', '呃', '啊', '哦', '嘞', '喂', '哎', '哈', '哟', '哇', '呦', '拜' '嗯,', '嗯。', '嗯', ',好,', '。好,', ',行,', ',行。', ',对,', ',对。'] for remove in remove_word: if len(current_query.replace(remove, '')) > 2: current_query = current_query.replace(remove, '') # 删除相近相同词 sign = True while sign: i = 0 sign = False record_list = list() final_str = list() while i < len(current_query): repeat_num = 0 for j in range(1, 20): target = current_query[i:i + j] while True: if target == current_query[i + j * (repeat_num + 1):i + j * (repeat_num + 2)]: repeat_num += 1 else: break if repeat_num: record_append = (i, j, repeat_num) break if repeat_num: if target in pass_word or not (u'\u4e00' <= target <= u'\u9fff'): final_str.append(target * 2) else: final_str.append(target) sign = True record_list.append(target) i = i + j * (repeat_num + 1) continue final_str.append(current_query[i]) i += 1 current_query = ''.join(final_str) # 将被标点符号隔开的相同字合并在一块 current_query_result = '' for i in range(len(current_query)): if current_query[i] in [',', '。', '?', '!']: for j in range(1, 5): if current_query[i - j if i - j >= 0 else 0:i] == current_query[i + 1:i + j + 1]: current_query_result = current_query_result[:-1 * j] break current_query_result += current_query[i] current_query = current_query_result for point_sign in ['?。', '?,', ',。', ',,', '。,', '。。']: current_query = current_query.replace(point_sign, ',') if current_query[0] in [',', '。']: current_query = current_query[1:] return current_query def intent_judge(query, customer_id, ip, text): url = 'http://47.92.230.239:8679/nlu?session_id=-1&workspace=222¤t_query={}'.format(str(query)) try: response = requests.get(url) result = json.loads(response.text) intention = result['intention'] if intention and intention[0].get('value', ''): return intention[0]['value']['standard_query'] else: return '' except Exception: send_wechat_warning(customer_id, ip, text) return '' if __name__ == '__main__': pass