# -*- coding : utf-8 -*- import re import os import json import glob import pymysql import logging import datetime import requests import traceback from openpyxl import Workbook, load_workbook # from server import send_wechat_warning # 空挂,全程空白 # 整通免责话术的质检 project_list = ['平安银行', '西安银行', '北京银行'] work_word = ['备用金', '提现', '额度', '活跃账户', '权益', '信用卡', '银行', '利息', '信用卡', '西安'] re_dict1 = { '开场类': [r'.*(京东金融|京东|金融).*工号.*', r'.*(京东金融|京东|金融).*(一|二|三|四|五|六|七|八|九|零).*'], '敏感话术': [r'^((?!((抱歉|不好意思|对不起|打扰)|回访|工号是|95118)).)*$', ], '结束类': [r'^((?!(感谢|祝您|不打扰|挂机)).)*再见((?!(感谢|祝您|不打扰|挂机)).)*$', ], '防诈骗话术': [r'(防范.*(诈骗|意识)|报警|反诈|110|电信.{0,10}诈骗)', ], '业务类': [ r'^((?!以.{0,4}(系统|app|APP|页面).{0,4}为准).)*((您的|你的).{0,4}额度.{0,4}(万|元)|利息.*[0-9])((?!以.{0,4}(系统|app|APP|页面).{0,4}为准).)*$', r'^((?!已经|可以|吗|吧|好|通过|能不能|要不要|方便|需不需要|有.{0,10}需求|银行|审核|结果|最快|还款|验证码).)*(发.{0,5}短信|发送.{0,20}查收|短信.{0,10}的(形|方)式|加.{0,5}微信)((?!已经|可以|吗|吧|好|通过|能不能|要不要|方便|需不需要|有.{0,10}需求|银行|审核|结果|最快|还款|验证码).)*$', ] } # 短信,微信相关 re_dict2 = {'开场类': [r'^((?!经理).)*我是.{0,5}京东.{0,5}客服((?!经理).)*$'], # 询问 进度|额度|银行审核 疑问句 '业务类': [ # r'短信链接', # r'^((?!号码|电话|座机|几个数字|楼号).)*随便.{0,5}(写|填)((?!号码|电话|座机|几个数字|楼号).)*$', r'^((?=不是).)*给(你|您).*红包((?=不是).)*$', # r'^((?!为(准|标准)|银行.{0,10}(审核|资历|判定)).)*(额度.{0,5}最高.{0,5}(万|千|百)|利息.{0,5}最低)((?!为(准|标准)|银行.{0,10}(审核|资历|判定)).)*$' ], # 是哪就勾选哪 '职业操守': [r'^((?!勾选).)*(您|你|我).{0,5}(微信|电话|号码|身份证号).{0,5}(是多少)((?!勾选).)*$', r'你的.{0,5}待遇|几口人|身体好吗|吃饭了吗', # 第几步,表单,电话,住址,已经提交,成功,审核成功,失败,被拒 r'(我).{0,10}(显示|看到).{0,10}(第(一|二|三|四)步|已经提交|提交(成功|失败)|审核(成功|失败|被拒|通过)|表单|电话|号码|住址)', # r'^((?!到账).)*([0-9]|(一|两|三|四|五|六|七|八|九|十))(点|分|秒).{0,10}激活.{0,10}额度((?!到账).)*$', # r'(点|分|秒).{0,10}注册', r'^((?!最高).)*(激活.{0,5}(万|千|百).{0,5}额度|激活.{0,5}额度.{0,5}(万|千|百))((?!最高).)*$'], '风险类': [#r'^((?!有助于|有帮助|的机会|一定的|输入).)*((额度|利息)?(肯定|一定).{0,10}(升满|降低|降息|降到|提升|提额))((?!有助于|有帮助|的机会|一定的|输入).)*$', r'帮你.{1,4}查.{1,4}额度', r'(您的|你的).{0,5}额度.{0,5}(最少|大概|大约|一般).{0,5}(万|元)', # r'^((?!不推荐|不建议|不太好|免息|优惠券|想用).)*(只用|就用).{0,5}(几|一|二|三|四|五|六|七|八|九|十|1|2|3|4|5|6|7|8|9|10).{0,2}天((?!不推荐|不建议|不太好|免息|优惠券|想用).)*$', r'审核.{0,4}(都能|全部).{0,4}通过.*[^不能](100%|百分之百)?|[^不能](100%|百分之百)?.{0,4}审核.{0,4}(都能|全部).{0,4}通过', # r'^((?!(良好.{0,3}(记录|习惯)|(有助于|有可能|有机会|不定期)|具体.{0,3}为准)).)*(提额|降息)((?!(良好.{0,3}(记录|习惯)|(有助于|有可能|有机会|不定期)|具体.{0,3}为准)).)*$' ], '过渡营销': [], '服务类': [r'((不用|不办).{0,4}一直.{0,4}打)|(天天.{0,4}打)|^((?=拦截).)*骚扰((?=拦截).)*$', r'(不要打断我|你看着办|听不懂我说话)', r'^((?!动).)*滚((?!动).)*$', r'笨|傻|妈的|傻逼|神经病|^((?!没).)*有病((?!没).)*$|卧槽|他妈的|贱人|智商|卑鄙无耻|衣冠禽兽|我丢你老母|装疯卖傻|装傻充愣|疯子|王八|疯狗|乱咬人|听不懂人话|猪狗不如|混蛋|没脑子|傻屌|去死吧|TM|人渣|怂|装逼|操你妈|妈逼|傻子|SB|sb', r'(有人生没人养|不要脸|叫声妈|叫声爹|装纯洁|在东莞挣钱|有脸活|枉为人|噎着|臭嘴|坑蒙拐骗|生下你|镜子|照照|三观不正|吃软饭|是不是人[^工]|脑残|丧样|人话|个泡|装逼|牛逼|婊子|贱|废话|丧失|嘴硬|三观不正|出息|没教养|上门女婿|装什么装|没素质|找小三|吃软饭|拿镜子照照|撒泡尿照照|几斤几两|蠢|不着调|脑残|人渣|吸毒|溜冰|贩毒|瘾君子|赢钱|赌场|毒品|大波|有种|犯贱|痴呆|败类|丢脸|脑残|人渣|天堂|脸皮厚|羞|脸红)', ], '甲方禁词': [r'不上征信', r'^((?!不能(保证|说|确定)?).)*(百分之百|100%|包)(审核)?通过*((?!不能(保证|说|确定)?).)*$', r'征信没有影响', # r'(保过|包过)' ], } re_dict3 = {'开场类': [r'^((?!经理).)*我是.{0,5}京东.{0,5}客服((?!经理).)*$'], '职业操守': [r'^((?!勾选).)*(您|你|我).{0,5}(微信|电话|号码|身份证号).{0,5}(是多少)((?!勾选).)*$', r'你的.{0,5}待遇|几口人|身体好吗|吃饭了吗', r'(我).{0,10}(显示|看到).{0,10}(第(一|二|三|四)步|已经提交|提交(成功|失败)|审核(成功|失败|被拒|通过)|表单|电话|住址)', # r'^((?!到账).)*([0-9]|(一|两|三|四|五|六|七|八|九|十))(点|分|秒).{0,10}激活.{0,10}额度((?!到账).)*$', # r'(点|分|秒).{0,10}注册', r'^((?!最高).)*(激活.{0,5}(万|千|百).{0,5}额度|激活.{0,5}额度.{0,5}(万|千|百))((?!最高).)*$'], '风险类': [#r'^((?!有助于|有帮助|的机会|一定的|输入).)*((额度|利息)?(肯定|一定).{0,10}(升满|降低|降息|降到|提升|提额))((?!有助于|有帮助|的机会|一定的|输入).)*$', r'帮你.{1,4}查.{1,4}额度', r'(您的|你的).{0,5}额度.{0,5}(最少|大概|大约|一般).{0,5}(万|元)', # r'^((?!不推荐|不建议|不太好|免息|优惠券|想用).)*(只用|就用).{0,5}(几|一|二|三|四|五|六|七|八|九|十|1|2|3|4|5|6|7|8|9|10).{0,2}天((?!不推荐|不建议|不太好|免息|优惠券|想用).)*$', r'审核.{0,4}(都能|全部).{0,4}通过.*[^不能](100%|百分之百)?|[^不能](100%|百分之百)?.{0,4}审核.{0,4}(都能|全部).{0,4}通过', # r'^((?!(良好.{0,3}(记录|习惯)|(有助于|有可能|有机会|不定期)|具体.{0,3}为准)).)*(提额|降息)((?!(良好.{0,3}(记录|习惯)|(有助于|有可能|有机会|不定期)|具体.{0,3}为准)).)*$' ], '过渡营销': [], '服务类': [r'((不用|不办).{0,4}一直.{0,4}打)|(天天.{0,4}打)|^((?=拦截).)*骚扰((?=拦截).)*$', r'(不要打断我|你看着办|听不懂我说话)', r'^((?!动).)*滚((?!动).)*$', r'笨|傻|妈的|傻逼|神经病|^((?!没).)*有病((?!没).)*$|卧槽|他妈的|贱人|智商|卑鄙无耻|衣冠禽兽|我丢你老母|装疯卖傻|装傻充愣|疯子|王八|疯狗|乱咬人|听不懂人话|猪狗不如|混蛋|没脑子|傻屌|去死吧|TM|人渣|怂|装逼|操你妈|妈逼|傻子|SB|sb', r'(有人生没人养|不要脸|叫声妈|叫声爹|装纯洁|在东莞挣钱|有脸活|枉为人|噎着|臭嘴|坑蒙拐骗|生下你|镜子|照照|三观不正|吃软饭|是不是人[^工]|脑残|丧样|人话|个泡|装逼|牛逼|婊子|贱|废话|丧失|嘴硬|三观不正|出息|没教养|上门女婿|装什么装|没素质|找小三|吃软饭|拿镜子照照|撒泡尿照照|几斤几两|蠢|不着调|脑残|人渣|吸毒|溜冰|贩毒|瘾君子|赢钱|赌场|毒品|大波|有种|犯贱|痴呆|败类|丢脸|脑残|人渣|天堂|脸皮厚|羞|脸红)', ], '甲方禁词': [r'不上征信', r'^((?!不能(保证|说|确定)?).)*(百分之百|100%|包)(审核)?通过*((?!不能(保证|说|确定)?).)*$', r'征信没有影响', # r'(保过|包过)' ], } def get(agentName, cusNo, cookie): headers_jingka = { 'Host': 'jingyin.jd.com', 'Accept': 'application/json, text/plain, */*', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Connection': 'keep-alive', 'Content-Type': 'application/json', 'sec-ch-ua': '" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"', 'Origin': 'https://jingyin.jd.com', 'Referer': 'https://jingyin.jd.com/', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36', 'Cookie': '' } cookie = '__jdu=1603541000594473648267; shshshfpa=44104b1b-f4ba-5d5e-b476-23e07498708c-1603541001; shshshfpb=e4milog6sAic202v9T7VA%2Fg%3D%3D; qid_uid=ba6bea63-03e0-4903-9db2-5cf665a9477a; qid_fs=1604545783382; shshshfp=dfeb941e71f86dbba908c3b17e107823; __jdc=122270672; pinId=qpAqqYTDrG-2BNNl9BUCbA; ceshi3.com=000; logining=1; __jdv=122270672|direct|-|none|-|1623913616358; pin=%E7%8E%8B123xxx_514; unick=%E7%8E%8B123xxx_514; _tp=XJbagNP9Yf6tXzGo1MMTQOx1DFKrRo%2BvM3XPEHCue4k%3D; _pst=%E7%8E%8B123xxx_514; __jda=122270672.1603541000594473648267.1603541001.1624937364.1624947248.17; wlfstk_smdl=3yvs7uzpmd4ki2hz73ur7prllpn5vfmz; 3AB9D23F7A4B3C9B=5TCS5MAHWQ4G32QUXSXQDLK5YCVBHKNYG2DVOQYSJQ7GFGDHBJB2AL6XMQCM2IEWZQGDIXY25T4LTMYIRDS7OLNAMQ; TrackID=18t4hm6IZardFOfRPnVGFAr5o6U2EdhoGaMeU1cTMw3JLETVGMSubLEw4iaEBrOmCiorVh8bZoeub72MesMc3TKHUg1xZ6w4KoCXOwQ2QCsONK6-Uf7kflDgHOdPuonGm; thor=5874217193138EBA5BCF11578F160D2B87DF5BA7B128EF7E3C44EE5B459D7AEFD30E8610F18ABEDE0C506C8F7D6D019E884DF654356441A5FF75C87390772922B9C34328BA694C73897368DD47897BA8747EC60F6CF0C0939DFF681FF431D0F8F3481DD8CEA5CCBEE51A0E900B6F665DF5CCEEADAC4CBED768D8011AC7EBD7C7BC59D2B515904751B939C3971064BEBE; qid_ls=1624947245638; qid_ts=1625120885235; qid_sq=25; qid_sid=ba6bea63-03e0-4903-9db2-5cf665a9477a-25; qid_ad=jingyin.jd.com%7C-%7Cjd%7C-%7C0' data = {'cusNo': "K602840916619023353078", 'workspaceId': '2109', 'agentName': "菁卡机器人27", 'paginatorDTO': {'limit': '10', 'page': '1'}} data['agentName'] = agentName data['cusNo'] = cusNo headers_jingka['Cookie'] = cookie response = requests.post('https://jingyin.jd.com/web/tsgh/call/queryTsCallInfoListPageList', data=json.dumps(data), headers=headers_jingka) result = json.loads(response.text) logging.info(result['data']) wav_url = result['data']['tsTaskInfoDtoList'][0]['saveFile'] ringStartTime = result['data']['tsTaskInfoDtoList'][0]['ringStartTime'] ringEndTime = result['data']['tsTaskInfoDtoList'][0]['ringEndTime'] startTime = result['data']['tsTaskInfoDtoList'][0]['startTime'] endTime = result['data']['tsTaskInfoDtoList'][0]['endTime'] account = result['data']['tsTaskInfoDtoList'][0]['account'] callNo = result['data']['tsTaskInfoDtoList'][0]['callNo'] cusNo = result['data']['tsTaskInfoDtoList'][0]['cusNo'] callTimeStr = result['data']['tsTaskInfoDtoList'][0]['callTimeStr'] callResultStr = result['data']['tsTaskInfoDtoList'][0]['callResultStr'] workspaceName = result['data']['tsTaskInfoDtoList'][0]['workspaceName'] groupName = result['data']['tsTaskInfoDtoList'][0]['groupName'] agentName = result['data']['tsTaskInfoDtoList'][0]['agentName'] return [wav_url, ringEndTime.split(' ')[0], '未成交', account, callNo, ringStartTime, ringEndTime, cusNo, startTime, endTime, callTimeStr, callResultStr, workspaceName, groupName, agentName] def recognition( conversation, repeat_time, call_id, customer_id, who, call_date, wav_path, ip, time, channel, stop_sign, call_answer ): wb = Workbook() ws = wb.active sign_no = False sign_yes = False sign_tousu = False sign_tousu_fix = False sign_erduduoshao = False sign_xiaozuhshou = False sign_xunwenshenhe = False sign_false = 0 sign_num = 0 false_num = 0 num_1, num_2 = 0, 0 conversation_1 = list() conversation_2 = list() finnal_list = list() project_name = '' sign_other_project = 0 re_dict = re_dict3 if call_answer else re_dict2 type_dict = {'开场类': 1, '结束类': 1, '业务类': 0, '职业操守': 0, '风险类': 0, '过渡营销': 0, '服务类': 0, '防诈骗话术': 1} ws.append([call_id, customer_id, who, call_date, wav_path]) last_1_content = '' last_2_content = '' # analyze_conversation() # 判断是否为语音信箱 for c in conversation: if c[1] == 1: conversation_1.append( c ) else: for word in ['留言', '正在通话中']: if word in c[0]: conver_list = [call_id, customer_id, who, call_date, '0', ''] for c in conversation: conver_list.append(str(c[-1]) + '_' + c[0]) ws.append(conver_list) wb.save('./data_dir/{}.xlsx'.format(call_id)) return 0 conversation_2.append( c ) try: finnal_list, sign_num = all_recognition('开场', call_id, customer_id, conversation_1, conversation_2, repeat_time, finnal_list, type_dict, stop_sign, sign_tousu, sign_xiaozuhshou, false_num, sign_num, time) except Exception: traceback.print_exc() for i, c in enumerate(conversation): content, type, start_time, end_time = c try: content = process_queries(content) except Exception: pass if type == 1: num_1 += 1 try: for project in project_list: if project in content: if not project_name or project_name == project: project_nam = project else: project_nam = project sign_other_project = 1 except Exception: pass '''if not sign_xunwenshenhe: pattern = re.compile(re_dict1['业务类'][1]) result = pattern.search(content) if result: finnal_list.append( [call_id, customer_id, '', '业务类错误', re_dict1['业务类'][1], result.group(), '1_' + content]) sign_num = 1 sign_xunwenshenhe = False else: sign_xunwenshenhe = False''' if sign_no: pattern = re.compile(re_dict1['业务类'][0]) result = pattern.search(content) if result: finnal_list.append([call_id, customer_id, '', '业务类错误', re_dict1['业务类'][0], result.group(), '1_'+content]) sign_num = 1 sign_no = False continue sign_no = False if sign_erduduoshao: pattern = re.compile(re_dict['职业操守'][2]) result = pattern.search(content) if result: continue sign_erduduoshao = False if sign_tousu and not sign_tousu_fix: pattern = re.compile(re_dict1['敏感话术'][0]) result = pattern.search(content) if result: sign_num = 1 sign_tousu_fix = True finnal_list.append([call_id, customer_id, '', '投诉意向未按规定回复', re_rule, result.group(), '1_'+content]) # 三次明确表示不要 if false_num == 3: for re_rule in re_dict1['结束类']: pattern = re.compile(re_rule) result = pattern.search(content) if result: if not type_dict['过渡营销']: finnal_list.append([call_id, customer_id, '', '过渡营销错误', re_rule, result.group(), '1_'+content]) sign_num = 1 sign_false = 1 type_dict['过渡营销'] = 1 continue for false_type in re_dict: if sign_yes and false_type == '业务类': re_rule = re_dict[false_type][-1] pattern = re.compile(re_rule) result = pattern.search(content) if result: finnal_list.append([call_id, customer_id, '', '{}错误'.format(false_type), re_rule, result.group(), '1_'+content]) sign_num = 1 sign_false = 1 elif false_type == '开场类' and type_dict['开场类'] and i < 4: pass '''pattern = re.compile(r'(京东|金融)') result = pattern.search(content) if result: finnal_list.append([call_id, customer_id, '', '开场类错误', r'(京东|金融)', result.group(), '1_'+content]) sign_num = 1 sign_false = 1''' else: for re_rule in re_dict[false_type]: pattern = re.compile(re_rule) result = pattern.search(content) if result: finnal_list.append([call_id, customer_id, '', '{}错误'.format(false_type), re_rule, result.group(), '1_'+content]) sign_num = 1 sign_false = 1 if sign_false: sign_false = 0 continue finnal_list.append([call_id, customer_id, '', '', '', '', '1_'+content]) last_1_content = content else: num_2 += 1 # 获取态度,是否增加同意次数 pattern = re.compile(r'可以|好吧|好的|手机号是|^((?!.).)*(,|。)?(好|行|嗯)+(,|。)?((?!.).)*$') result = pattern.search(content) if result: sign_yes = True '''pattern = re.compile(r'多少') result = pattern.search(content) if result: sign_no = True''' # 数字 pattern = re.compile(r'[0-9]|(一|二|三|四|五|六|七|八|九|零|十)') result = pattern.search(content) if result: sign_erduduoshao = True # 用户询问额度,进度,审核结果 pattern = re.compile(r'(进度|额度|结果|能不能过|审核)') result = pattern.search(content) if result: sign_xunwenshenhe = True # 使用线上意图模型识别意图 intent = intent_judge(content, customer_id, ip, 'nlu获取意图错误') if intent and intent.startswith('不需要'): false_num += 1 # 此句的回复是否为营销 for word in work_word: if word not in last_1_content: false_num -= 1 break elif intent == '额度多少': sign_no = True elif intent == '小助手': sign_xiaozuhshou = True elif intent in ['投诉', '别给我打电话了', '是否已经激活额度', '你怎么有我的号码', '询问工号', '人还是机器人', '怀疑平台']: # sign_tousu = True finnal_list.append( [call_id, customer_id, '', ''.format(intent), '', '', '2_'+content] ) continue finnal_list.append( [call_id, customer_id, '', '', '', '', '2_'+content] ) try: finnal_list, sign_num = all_recognition('结束', call_id, customer_id, conversation_1, conversation_2, repeat_time, finnal_list, type_dict, stop_sign, sign_tousu, sign_xiaozuhshou, false_num, sign_num, time) except Exception: traceback.print_exc() # 最终对话内容导入 for fina_content in finnal_list: try: ws.append(fina_content) except Exception: pass if not os.path.exists('./{}_data_dir/'.format(ip)): os.mkdir('./{}_data_dir/'.format(ip)) date = '{}-{}-{}'.format(datetime.datetime.now().year, datetime.datetime.now().month, datetime.datetime.now().day) if glob.glob('./{}_data_dir/*.xlsx'.format(ip)) and not glob.glob('./{}_data_dir/*.xlsx'.format(ip))[-1].split('/')[-1].startswith(date): for file_path in glob.glob('./{}_data_dir/*.xlsx'.format(ip)): os.remove(file_path) wb.save( './{}_data_dir/'.format(ip) + '{}_{}_{}_{}.xlsx'.format(date, call_id, sign_num, time) ) return sign_num def all_recognition(type, call_id, customer_id, conversation_1, conversation_2, repeat_time, finnal_list, type_dict, stop_sign, sign_tousu, sign_xiaozuhshou, false_num, sign_num, time): # 对区域性话语进行判断,开场段,产介段,结束段。 if type == '开场': # 客户声道是否完全无声 if len(conversation_2) == 0 and time > 60: finnal_list.append([call_id, customer_id, '', '客户声道全部无声', '', '', '']) # 开场空挂判断 first_sentence = conversation_1[0] if first_sentence[2] > 10: finnal_list.append([call_id, customer_id, '', '职业操守', '开场空挂时长超过10秒', '', '']) if len( conversation_1 ) > 3 and len( conversation_2 ) > 3: conversation_1 = '{},{},{}'.format(conversation_1[0][0], conversation_1[1][0], conversation_1[2][0],) for r in re_dict1['开场类']: pattern = re.compile(r) result = pattern.search(conversation_1) if result: type_dict['开场类'] = 0 break else: type_dict['开场类'] = 0 if type_dict['开场类']: finnal_list.append([call_id, customer_id, '', '开场类错误', '', '', conversation_1]) sign_num = 1 elif type == '结束': # 与智能语音助手互动1分钟以上 if sign_xiaozuhshou and time > 60: finnal_list.append([call_id, customer_id, '', '职业操守', '与智能语音助手互动1分钟以上', '', '']) for i, re_time in enumerate(repeat_time): if re_time > 5: finnal_list.append([call_id, customer_id, '', '第{}段交互重复时长超过5秒'.format(i), '', '']) # 结束空挂判断 last_sentence = conversation_1[-1] if (time - last_sentence[3]) > 30: finnal_list.append([call_id, customer_id, '', '职业操守', '结束空挂时长超过30秒', '', '']) sign_num = 1 # 算成质检问题 if len( conversation_1 ) > 6 and len( conversation_2 ) > 6 and \ ( not stop_sign or not sign_tousu or false_num>2 ): # 扩展到后六句 conversation_2 = '{},{},{},{},{},{}'.format(conversation_1[-6][0], conversation_1[-5][0], conversation_1[-4][0], conversation_1[-3][0], conversation_1[-2][0], conversation_1[-1][0],) for r in re_dict1['结束类']: pattern = re.compile(r) result = pattern.search(conversation_2) if not result: type_dict['结束类'] = 0 break for con in conversation_1: conversation_2 += con[0] # conversation_2 = '{},{},{},{},{},{}'.format(conversation_1[-6][0], conversation_1[-5][0], conversation_1[-4][0], conversation_1[-3][0], conversation_1[-2][0], conversation_1[-1][0],) for r in re_dict1['防诈骗话术']: pattern = re.compile(r) result = pattern.search(conversation_2) if result: type_dict['防诈骗话术'] = 0 else: type_dict['结束类'] = 0 if type_dict['结束类']: finnal_list.append([call_id, customer_id, '', '结束类错误', '', '', conversation_2]) sign_num = 1 if type_dict['防诈骗话术']: finnal_list.append([call_id, customer_id, '', '防诈骗话术错误', '', '', conversation_2]) sign_num = 1 return finnal_list, sign_num def slipt_recognition(): pass def intent_judge(query, customer_id, ip, text): url = 'http://8.142.85.77:8679/nlu?session_id=-1&workspace=222¤t_query={}'.format(str(query)) try: response = requests.get(url) result = json.loads(response.text) intention = result['intention'] if intention and intention[0].get('value', ''): return intention[0]['value']['standard_query'] else: return '' except Exception: pass # send_wechat_warning(customer_id, ip, text) return '' def process_queries(current_query): pass_word = ['不需要', '不用了', '不用', '一点', '考虑', '谢', '在', '看', '问', '想', '天', '刚', '试'] remove_word = [ '不好意思', '对不起', '谢谢你', '哎呀', '你好', '您好', '再见', '谢谢', '好吧', '感谢', '呃', '啊', '哦', '嘞', '喂', '哎', '哈', '哟', '哇', '呦', '拜' '嗯,', '嗯。', '嗯', ',好,', '。好,', ',行,', ',行。', ',对,', ',对。'] for remove in remove_word: if len(current_query.replace(remove, '')) > 2: current_query = current_query.replace(remove, '') # 删除相近相同词 sign = True while sign: i = 0 sign = False record_list = list() final_str = list() while i < len(current_query): repeat_num = 0 for j in range(1, 20): target = current_query[i:i + j] while True: if target == current_query[i + j * (repeat_num + 1):i + j * (repeat_num + 2)]: repeat_num += 1 else: break if repeat_num: record_append = (i, j, repeat_num) break if repeat_num: if target in pass_word or not (u'\u4e00' <= target <= u'\u9fff'): final_str.append(target * 2) else: final_str.append(target) sign = True record_list.append(target) i = i + j * (repeat_num + 1) continue final_str.append(current_query[i]) i += 1 current_query = ''.join(final_str) # 将被标点符号隔开的相同字合并在一块 current_query_result = '' for i in range(len(current_query)): if current_query[i] in [',', '。', '?', '!']: for j in range(1, 5): if current_query[i - j if i - j >= 0 else 0:i] == current_query[i + 1:i + j + 1]: current_query_result = current_query_result[:-1 * j] break current_query_result += current_query[i] current_query = current_query_result for point_sign in ['?。', '?,', ',。', ',,', '。,', '。。']: current_query = current_query.replace(point_sign, ',') if current_query[0] in [',', '。']: current_query = current_query[1:] return current_query if __name__ == '__main__': wb1 = Workbook() ws1 = wb1.active ws1['A1'] = 'Session_ID' ws1['B1'] = 'Case_ID' ws1['C1'] = 'Sales_full_name' ws1['D1'] = 'Content' ws1['E1'] = 'False_type' ws1['F1'] = 'False_relu' ws1['G1'] = '坐席是否错误' ws1['H1'] = '解释' ws1['I1'] = 'voice_path' conver_dict = dict() cur1.execute( sql ) result = cur1.fetchall() print( len(result) ) for r in result: if r[1] not in [',', '正在呼叫请稍候,', '对对对对对对对,', '用户已挂机,']: if not conver_dict.get(r[0], ''): conver_dict[r[0]] = [r[1:]] else: conver_dict[r[0]].append(r[1:]) # recognition(conver_dict['13835295918']) for r in conver_dict: print( r ) sign = recognition( r, conver_dict[r], ws1 ) if sign: conver_list = [r, '1', ''] for c in conver_dict[r]: conver_list.append(str(c[2]) + '_' + c[0]) ws1.append(conver_list) else: conver_list = [r, '0', ''] for c in conver_dict[r]: conver_list.append(str(c[2]) + '_' + c[0]) ws1.append(conver_list) wb1.save( filepath1 ) pattern = re.compile( re_dict1['开场类'][0] ) result = pattern.search( test_sentence3 ) print( result.group() )