# -*- coding : utf-8 -*- import re import os import tqdm import time import glob import json # import ffmpeg import pymysql import librosa import datetime import requests import threadpool from scipy.io import wavfile from openpyxl import load_workbook, Workbook from concurrent.futures import ThreadPoolExecutor user_sign_rule = r'(我是京东|工号)' user_pattern = re.compile(user_sign_rule) pool = threadpool.ThreadPool(30) executor = ThreadPoolExecutor(max_workers=20) dialogue_dict = dict() def check_data(call_id, call_start_time, cursor): '''connection = pymysql.connect(host="47.92.193.147", port=3306, user="root", passwd="Moxi123#", db="task_dialogue_config", charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) cursor = connection.cursor()''' sql_info = 'select id, identity ' \ 'from human_dialogue ' \ 'where call_id = "{}" ' \ 'and call_start_time = "{}"; ' cursor.execute(sql_info.format(call_id, call_start_time)) result = cursor.fetchall() if result: # connection.close() return False '''sign_1 = 0 sign_2 = 0 for res in result: if res['identity'] == 1: sign_1 = 1 elif res['identity'] == 2: sign_2 = 1 if sign_1 == 0 or sign_2 == 0: return False''' else: # connection.close() return True def new_platform_task(min_date, max_date, workspace_id, company_id): connection = pymysql.connect(host="47.92.76.236", port=13306, user="readonly", passwd="Moxi123#", db="outbound_platform", charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) sql = 'select ocr.dm_session_id, ocr.id, ocr.voice_path, ocr.call_start_time, ocr.answer_time, ocr.transfer_time ' \ 'from outbound_call_result ocr ' \ 'where ocr.call_start_time > "{} 09:00:00" ' \ 'and ocr.call_start_time < "{} 21:00:00" ' \ 'and ocr.company_id = {} ' \ 'and ocr.call_status = "normalConnection" ' \ 'and ocr.transfer_time is not null ' \ 'and ocr.human_answer_duration > 0; '.format(min_date, max_date, company_id) cursor = connection.cursor() cursor.execute(sql) results = cursor.fetchall() args_list = list() for data in tqdm.tqdm(results): print( data ) case_id = data['id'] voice_path = data['voice_path'] answer_time = data['answer_time'] call_start_time = data['call_start_time'] transfer_time = data['transfer_time'] '''args_list.append(([call_id, voice_path, workspace_id, company_id, call_start_time, call_end_time, answer_start_time, answer_end_time, talk_time, '纯人'], None))''' print(len(args_list)) requests = threadpool.makeRequests(platform_task, args_list) [pool.putRequest(req) for req in requests] pool.wait() def old_platform_task(min_date, max_date, workspace_id, company_id): connection_1 = pymysql.connect(host="47.92.193.147", port=3306, user="root", passwd="Moxi123#", db="task_dialogue_config", charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) cursor_1 = connection_1.cursor() sql_1 = 'select distinct call_id ' \ 'from human_dialogue ' \ 'where workspace_id = {} ' \ 'and call_start_time > "{} 09:00:00" ' \ 'and call_start_time < "{} 23:59:59"; ' cursor_1.execute(sql_1.format(workspace_id, min_date, max_date)) results = cursor_1.fetchall() call_id_list = list() for result in results: call_id_list.append(result['call_id']) connection = pymysql.connect(host="39.103.234.234", port=13306, user="root", passwd="Moxi123#", db="outbound_call_platform_sale", charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) sql = 'select ocr.call_id, ocr.customer_id, ocr.sound_record_url, ocr.call_start_time, ocr.call_end_time, ocr.answer_start_time, ocr.answer_end_time, ocr.talk_time ' \ 'from huadanrecord ocr ' \ 'where ocr.call_start_time > "{} 09:00:00" ' \ 'and ocr.call_start_time < "{} 21:00:00" ' \ 'and ocr.company_id = {} ' \ 'and ocr.call_status = "通话成功"' \ 'and ocr.sound_record_url is not null; '.format(min_date, max_date, company_id) cursor = connection.cursor() cursor.execute(sql) results = cursor.fetchall() args_list = list() print(len(results)) # return for data in tqdm.tqdm(results): talk_time = data['talk_time'] call_id = data['call_id'] voice_path = data['sound_record_url'] if int(talk_time.replace('秒', '')) > 0 and call_id not in call_id_list and voice_path: call_start_time = data['call_start_time'] call_end_time = data['call_end_time'] answer_start_time = data['answer_start_time'] answer_end_time = data['answer_end_time'] talk_time = data['talk_time'] '''try: result = check_data(call_id, call_start_time, cursor_1) except Exception: continue''' # if result: args_list.append(([call_id, voice_path, workspace_id, company_id, call_start_time, call_end_time, answer_start_time, answer_end_time, talk_time, '纯人'], None)) print(len(args_list)) requests = threadpool.makeRequests(platform_task, args_list) [pool.putRequest(req) for req in requests] pool.wait() def data_platfrom_task(min_date, max_date, workspace_id, company_id, IP): connection = pymysql.connect(host="39.103.215.119", port=3308, user="zhangjian", passwd="Lingxi@123", db="data_center_temp", charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor ) cursor = connection.cursor() sql_human = 'select ocr.customer_id, ocr.company_id, ocr.case_id, ocr.agent_id, ocr.voice_path, ' \ 'ocr.call_start_time, ocr.answer_time, ocr.transfer_time, ocr.call_end_time, ' \ 'ocr.robot_answer_duration, ocr.human_answer_duration ' \ 'from outbound_call_result ocr ' \ 'where ocr.call_start_time > "{} 09:00:00" ' \ 'and ocr.call_start_time < "{} 21:00:00" ' \ 'and ocr.company_id = {} ' \ 'and ocr.call_status = "normalConnection" ' \ 'and ocr.human_answer_duration > 1; '.format(min_date, max_date, company_id) cursor.execute(sql_human) results = cursor.fetchall() talk_time_all = 0 args_human = list() args_transfer = list() for result in results: agent_id = result['agent_id'] voice_path = result['voice_path'] customer_id = result['customer_id'] company_id = result['company_id'] call_start_time = result['call_start_time'] answer_time = result['answer_time'] transfer_time = result['transfer_time'] robot_duration = result['robot_answer_duration'] talk_time = result['human_answer_duration'] talk_time_all += int(talk_time) if robot_duration: args_human.append(([customer_id, voice_path, '', '', '', agent_id, '', '', '', '', '', talk_time, '{}_实转'.format(IP)], None)) else: args_transfer.append(([customer_id, voice_path, '', '', '', agent_id, '', '', '', '', '', talk_time, '{}_纯人'.format(IP)], None)) print( talk_time_all ) print(len(args_human)) '''requests = threadpool.makeRequests(platform_task, args_human) [pool.putRequest(req) for req in requests] pool.wait()''' print(len(args_transfer)) '''requests = threadpool.makeRequests(platform_task, args_transfer) [pool.putRequest(req) for req in requests] pool.wait()''' def platform_task(call_id, voice_path, workspace_id, company_id, session_id, agent_id, call_start_time, call_end_time, answer_start_time, transfer_time, answer_end_time, talk_time, IP): url = 'http://8.142.69.133:9503/quality_test?&customerId={}&wavPath={}&workspaceId={}&sessionId={}&' \ 'companyId={}&agentId={}&createTime={}&endTime={}&answerTime={}&transferTime={}&answerEndTime={}&talkTime={}&IP={}'.format( call_id, voice_path, workspace_id, session_id, company_id, agent_id, call_start_time, call_end_time, answer_start_time, transfer_time, answer_end_time, talk_time, IP ) response = requests.get(url) def nlu_result(workspace, query): update_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(round(time.time() * 1000)) / 1000)) response = requests.get( # 'http://39.99.156.148:8679/nlu?session_id=-1&workspace={}¤t_query={}'.format(workspace, query)) 'http://8.142.85.77:8679/nlu?session_id=-1&workspace={}¤t_query={}'.format(workspace, query)) response = json.loads(response.text) if response.get('intention', '') and response['intention'][0].get('value', ''): intent = response.get('intention', '')[0].get('value', '') # print( query, intent, type(intent) ) q_type = intent['qtype'] answer = intent['a'] query = intent['original_query'] intent = intent['standard_query'] return [q_type, intent, query, answer, update_time] else: q, i, s = faq_result(workspace, query) return ['默认分类', '{}_{}'.format(i, q), query, 'NOINTENT', update_time] def faq_result(workspace, query): response = requests.get( # 'http://39.99.156.148:8455/level_search?systemId={}&query={}'.format(workspace, query)) 'http://8.142.85.77:8455/level_search?systemId={}&query={}'.format(workspace, query)) response = json.loads(response.text) if response['ch']: query = response['ch'][0]['original_query'] intent = response['ch'][0]['standard_query'] semantic = response['ch'][0]['semantic'] return query, intent, semantic elif response['h']: query = response['h'][0]['original_query'] intent = response['h'][0]['standard_query'] semantic = response['h'][0]['semantic'] return query, intent, semantic return '', '', '' def get_session_text(session_id, system_id, customer_id, ws_w): url = "http://47.92.230.239:8630/report/getDetailedRecord?sessionId=%s&workSpaceId=%s" % \ (session_id, system_id) result = json.loads(requests.get(url, timeout=5).text) # print( result ) content_list = list() for sentence in result["result"]: role = 0 if sentence['speakerType'] == 'USER' else 1 if sentence['speakerType'] == 'IVR': content = sentence['botWordsText'] else: content = sentence['msgContent'] ws_w.append([customer_id, content, role]) content_list.append([content, role]) ws_w.append([]) return content_list def transfer_wav(call_id, wav_path, ip): url = 'http://8.142.69.133:9503/quality_test?customerId={}&wavPath={}&IP={}'.format( call_id, wav_path, ip ) response = requests.get(url) return response def transfer_from_es(company, file_paths): connection_robot = pymysql.connect(host="47.92.76.236", port=13306, user="readonly", passwd="Moxi123#", db="outbound_platform", charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) sql_robot = 'select ocr.dm_session_id, ocr.id, ocr.company_id, ocr.voice_path, ' \ 'ocr.call_start_time, ocr.answer_time, ocr.transfer_time, ocr.call_end_time, ocr.human_answer_duration ' \ 'from outbound_call_result ocr ' \ 'where ocr.call_start_time > "2022-04-01 09:00:00" ' \ 'and ocr.call_start_time < "2022-06-01 21:00:00" ' \ 'and ocr.customer_id = "{}" ' \ 'and ocr.company_id in {} ' \ 'and ocr.call_status = "normalConnection" ' \ 'and ocr.transfer_time is null; ' cursor_robot = connection_robot.cursor() wb_w = Workbook() ws_w = wb_w.active ws_w.append(['客户ID', '文本内容', '角色']) for file_path in file_paths: wb = load_workbook(file_path) ws = wb[wb.sheetnames[0]] wav_robot = list() wav_human = list() num = 0 for i, row in tqdm.tqdm(enumerate(ws.values)): if i != 0: wav_url = '' customer_id = row[3] if not wav_url: if company == '金条': cursor_robot.execute(sql_robot.format(customer_id, '(2132, 2144)')) # cursor_robot.execute(sql_robot.format(call_start_time, call_start_time, customer_id, '(2132, 2144)')) elif company == '菁卡': cursor_robot.execute(sql_robot.format(customer_id, '(2114, 2117)')) # cursor_robot.execute(sql_robot.format(call_start_time, call_start_time, customer_id, '(2114, 2117)')) result = cursor_robot.fetchall() if result: num += 1 wav_url = result[-1]['voice_path'] session_id = result[-1]['dm_session_id'] get_session_text(session_id, '222', customer_id, ws_w) else: print( customer_id ) print( num ) wb_w.save('./2022-5-20_机器人_人机记录.xlsx') def transfer_from_file(company, file_paths, customer_list): connection_robot = pymysql.connect(host="47.92.76.236", port=13306, user="readonly", passwd="Moxi123#", db="outbound_platform", charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) sql_robot = 'select ocr.dm_session_id, ocr.id, ocr.company_id, ocr.voice_path, ' \ 'ocr.call_start_time, ocr.answer_time, ocr.transfer_time, ocr.call_end_time, ocr.human_answer_duration ' \ 'from outbound_call_result ocr ' \ 'where ocr.call_start_time > "2022-03-01 09:00:00" ' \ 'and ocr.call_start_time < "2022-06-01 21:00:00" ' \ 'and ocr.customer_id = "{}" ' \ 'and ocr.company_id in {} ' \ 'and ocr.call_status = "normalConnection" ' \ 'and ocr.transfer_time is not null; ' connection_human = pymysql.connect(host="39.103.234.234", port=13306, user="root", passwd="Moxi123#", db="outbound_call_platform_sale", charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) sql_human = 'select ocr.call_id, ocr.customer_id, ocr.company_id, ocr.sound_record_url, ' \ 'ocr.call_start_time, ocr.call_end_time, ocr.answer_start_time, ocr.answer_end_time, ocr.talk_time ' \ 'from huadanrecord ocr ' \ 'where ocr.call_start_time > "2022-03-01 09:00:00" ' \ 'and ocr.call_start_time < "2022-06-01 21:00:00" ' \ 'and ocr.customer_id = "{}" ' \ 'and ocr.call_status = "通话成功"' \ 'and ocr.sound_record_url is not null; ' cursor_robot = connection_robot.cursor() cursor_human = connection_human.cursor() for file_path in file_paths: wb = load_workbook(file_path) ws = wb[wb.sheetnames[0]] wav_robot = list() wav_human = list() for i, row in tqdm.tqdm(enumerate(ws.values)): # 金条 if i != 0 and i < 300 and '转人工' in row[5] and row[7] not in customer_list: # if i != 0 and not row[8] and '机器人' not in row[2]: # 菁卡 # if i != 0 and (row[29] and row[29] != '无'): # 金条 wav_url = '' customer_id = row[7] # customer_id = row[3] # call_start_time = row[5].split(' ')[0] # 菁卡 # customer_id = row[3] # wav_url = row[7] # print( customer_id ) '''try: call_start_time = row[8].split(' ')[0] # print(customer_id, call_start_time, row[29]) except Exception as e: # print(customer_id, call_start_time, row[29]) pass''' if not wav_url: cursor_human.execute(sql_human.format(customer_id)) # cursor_human.execute(sql_human.format(call_start_time, call_start_time, customer_id)) result = cursor_human.fetchall() if result: wav_url = result[-1]['sound_record_url'] call_start_time = result[-1]['call_start_time'] call_end_time = result[-1]['call_end_time'] answer_start_time = result[-1]['answer_start_time'] answer_end_time = result[-1]['answer_end_time'] company_id = result[-1]['company_id'] talk_time = result[-1]['talk_time'] # print( [customer_id, wav_url, '{}质检实转'.format(company)] ) wav_human.append(( [customer_id, wav_url, '{}质检纯人'.format(company)], None )) '''wav_human.append(( [customer_id, wav_url, '', company_id, '', call_start_time, call_end_time, answer_start_time, '', answer_end_time, talk_time, '{}质检纯人'.format(company)], None))''' # ws_1.append([customer_id, wav_url, # call_start_time, call_end_time, answer_start_time, '', answer_end_time, talk_time]) # print( wav_url ) # else: if company == '金条': cursor_robot.execute(sql_robot.format(customer_id, '(2132, 2144)')) # cursor_robot.execute(sql_robot.format(call_start_time, call_start_time, customer_id, '(2132, 2144)')) elif company == '菁卡': cursor_robot.execute(sql_robot.format(customer_id, '(2114, 2117)')) # cursor_robot.execute(sql_robot.format(call_start_time, call_start_time, customer_id, '(2114, 2117)')) result = cursor_robot.fetchall() if result: wav_url = result[-1]['voice_path'] call_start_time = result[-1]['call_start_time'] answer_time = result[-1]['answer_time'] transfer_time = result[-1]['transfer_time'] call_end_time = result[-1]['call_end_time'] company_id = result[-1]['company_id'] talk_time = result[-1]['human_answer_duration'] # print( [customer_id, wav_url, '{}质检实转'.format(company)] ) wav_robot.append(( [customer_id, wav_url, '{}质检实转'.format(company)], None )) '''wav_robot.append(( [customer_id, wav_url, '', company_id, '', call_start_time, call_end_time, answer_time, transfer_time, call_end_time, talk_time, '{}质检实转'.format(company)], None))''' else: print( customer_id ) else: # print([customer_id, wav_url, '{}质检实转'.format(company)]) wav_robot.append(( [customer_id, wav_url, '{}质检实转'.format(company)], None )) # print( wav_robot ) # print(len(wav_robot)) # print( wav_human ) # print( len(wav_human) ) requests = threadpool.makeRequests(transfer_wav, wav_robot) [pool.putRequest(req) for req in requests] pool.wait() requests = threadpool.makeRequests(transfer_wav, wav_human) [pool.putRequest(req) for req in requests] pool.wait() def get_quality_detail_from_mysql(): connection = pymysql.connect(host="47.92.193.147", port=3306, user="root", passwd="Moxi123#", db="task_dialogue_config", charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) cursor = connection.cursor() sql = 'select distinct call_id ' \ 'from human_dialogue ' \ 'where workspace_id = {} ' \ 'and call_start_time > "{} 09:00:00" ' \ 'and call_start_time < "{} 23:59:59"; ' sql_id = 'select call_id, content_text, identity, workspace_id, company_id, call_start_time ' \ 'from human_dialogue ' \ 'where workspace_id = {} ' \ 'and call_start_time > "{} 09:00:00" ' \ 'and call_start_time < "{} 23:59:59" ' \ 'and call_id = "{}"; ' file_path = './' wb = load_workbook(file_path) ws = wb[wb.sheetnames[0]] wb_w = Workbook() ws_w = wb_w.active ws_w.append() wav_robot = list() wav_human = list() for i, row in enumerate(ws.values): if i != 0: customer_id = row[3] call_start_time = row[5].split(' ')[0] cursor.execute(sql_id.format(call_start_time, call_start_time, customer_id)) def daily_task(): file_paths = ['../../5-26-纯人-纯人.xlsx', ] '''file_paths = ['./2022-5-20_纯人_金条质检纯人.xlsx', './2022-5-20_纯人_金条质检实转.xlsx', './2022-5-20_机器人_金条质检纯人.xlsx', './2022-5-20_机器人_金条质检实转.xlsx', './2022-5-20_机器人_人机记录.xlsx' ]''' customer_list = list() for file_path in file_paths: wb = load_workbook(file_path) ws = wb[wb.sheetnames[0]] for i, row in enumerate(ws.values): if i != 0: if row[0] not in customer_list: customer_list.append(row[0]) print(len(customer_list)) # last_task() company = '金条' file_paths = [ './金条录音跟听明细-AI.xlsx' ] transfer_from_file(company, file_paths, customer_list) # transfer_from_es(company, file_paths) ''' connection_robot = pymysql.connect(host="lingxics.rwlb.zhangbei.rds.aliyuncs.com", port=3306, user="ai_group", passwd="Moxi123#", db="outbound_call_platform", charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) sql_robot = 'select ocr.id, ocr.file_name, ocr.file_upload_path, ocr.content, ocr.tts_model ' \ 'from record_manage ocr ' \ 'where ocr.tenant_id = 83 ' \ 'and ocr.tts_model in ("nuonan", "test1") ' \ 'and ocr.file_upload_path is not null; ' cursor_robot = connection_robot.cursor() cursor_robot.execute(sql_robot) results = cursor_robot.fetchall() # print( len(results) ) wb = Workbook() ws = wb.active ws.append(['id', '录音编号', '音频地址', '数据库内容', '真实内容', '语音模型']) wav_list = list() for result in tqdm.tqdm(results): id = result['id'] file_name = result['file_name'] wav_path = result['file_upload_path'] content = result['content'] tts_model = result['tts_model'] response = transfer_wav(id, wav_path, '贷后') response = json.loads(response.text) # print( content ) content_real = '' # print( response['data'] ) for res in response['data']: content_real += res[1][2:] ws.append([id, file_name, wav_path, content, content_real, tts_model]) wb.save('tts.xlsx') ''' def task_cuishou(): connection = pymysql.connect(host="pc-8vb8g36dhvsk6pjp8.rwlb.zhangbei.rds.aliyuncs.com", port=3306, user="ai_group", passwd="Moxi123#", # db="data_center_temp", charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor ) cursor = connection.cursor() sql = 'select id, voice_path ' \ 'from outbound_call_platform.outbound_call_voice_result ' \ 'where intent_id = 10083 ' \ 'and answer_time > "2022-07-01 09:00:00" ' \ 'and call_number is not null; ' cursor.execute(sql) results = cursor.fetchall() parameter = list() for result in tqdm.tqdm(results): id = result['id'] voice_path = result['voice_path'] parameter.append( ([id, voice_path, '贷后'], None) ) print( len(parameter) ) request = threadpool.makeRequests(transfer_wav, parameter) [pool.putRequest(req) for req in request] pool.wait() if __name__ == '__main__': # min_date = datetime.date.today() - datetime.timedelta(days=205) # max_date = datetime.date.today() - datetime.timedelta(days=1) # print( min_date, max_date ) # data_platfrom_task(min_date, max_date, '341', '2130', '水滴') # data_platfrom_task(min_date, max_date, '341', '2199', '黑牛') file_paths = ['./voice_result_2114.xlsx', ] parameter = list() customer_id_list = list() for file_path in file_paths: wb = load_workbook(file_path) ws = wb[wb.sheetnames[0]] for i, row in tqdm.tqdm(enumerate(ws.values)): if i != 0 and row[0] not in customer_id_list: customer_id_list.append(row[0]) parameter.append(([row[0], row[1], '2114'], None)) # print( parameter[:10] ) print(len(parameter)) request = threadpool.makeRequests(transfer_wav, parameter) [pool.putRequest(req) for req in request] pool.wait() '''file_path = './2022-9-9_金条.xlsx' wb = load_workbook(file_path) ws = wb[wb.sheetnames[0]] wb_w = Workbook() ws_w = wb_w.active ws_2 = wb_w.create_sheet('统计') customer_id_dict = dict() for i, row in tqdm.tqdm(enumerate(ws.values)): if i != 0: if row[0]: if not row[1].startswith('http'): query = row[1] customer_id = row[0] if customer_id not in customer_id_dict: customer_id_dict[customer_id] = list() content = nlu_result('222', query) if '_' not in content[1]: customer_id_dict[customer_id].append(content[1]) ws_w.append([customer_id, query, content[1]]) else: ws_w.append([customer_id, query, '']) else: ws_w.append(row) else: ws_w.append([]) for customer_id in tqdm.tqdm(customer_id_dict): ws_2.append([customer_id, ';'.join(customer_id_dict[customer_id])]) wb_w.save('./金条数据.xlsx') intent_list = ['投诉', '别给我打电话了', '怀疑平台', '询问工号', '强烈拒绝', '你怎么有我号码', '是否已经激活额度', '人还是机器人', '非本人操作', '语音信箱'] file_path = './金条数据.xlsx' wb = load_workbook(file_path) ws = wb[wb.sheetnames[0]] ws_w = wb.create_sheet('统计_投诉表征') customer_id_dict = dict() for i, row in enumerate(ws.values): if row[0]: if not row[1].startswith('http'): query = row[1] intent = row[2] customer_id = row[0] if customer_id not in customer_id_dict: customer_id_dict[customer_id] = [list(), list(), dict()] if intent: if intent not in customer_id_dict[customer_id][2]: customer_id_dict[customer_id][2][intent] = 0 customer_id_dict[customer_id][2][intent] += 0 customer_id_dict[customer_id][0].append([query[0], query, intent]) if query[0] not in customer_id_dict[customer_id][1]: customer_id_dict[customer_id][1].append(query[0]) else: customer_id = row[0] customer_id_dict[customer_id] = [list(), list(), dict()] customer_id_intent_dict = dict() for customer_id in tqdm.tqdm(customer_id_dict): sign = False tousu_intent = list() customer_id_intent_dict[customer_id] = list() if len(customer_id_dict[customer_id][1]) < 2: for row in customer_id_dict[customer_id][0]: if row[2]: customer_id_intent_dict[customer_id].append(row[2]) if row[2] in intent_list: tousu_intent.append(row[2]) sign = True else: role = -1 for row in customer_id_dict[customer_id][0]: if '客户' in row[1] or '经理' in row[1] or '京东金融' in row[1]: role = row[0] break if role == -1: for row in customer_id_dict[customer_id][0]: if row[2]: customer_id_intent_dict[customer_id].append(row[2]) else: for row in customer_id_dict[customer_id][0]: if row[0] != role and row[2]: customer_id_intent_dict[customer_id].append(row[2]) if row[2] in intent_list: tousu_intent.append(row[2]) sign = True # 意图识别次数 if customer_id_dict[customer_id][2].get('不需要-无原因', 0) > 2: tousu_intent.append('两次命中不需要') sign = True ws_w.append([customer_id, ';'.join(tousu_intent) if sign else '', ';'.join(customer_id_intent_dict[customer_id])]) wb.save('./金条数据_final.xlsx')''' # task_cuishou() '''file_path = './2022-9-9_金条.xlsx' wb = load_workbook(file_path) ws = wb[wb.sheetnames[0]] customer_id_dict = dict() for i, row in tqdm.tqdm(enumerate(ws.values)): if i != 0: if row[0]: if not row[1].startswith('http'): customer_id = row[0] if customer_id not in customer_id_dict: customer_id_dict[customer_id] = list() if row[1][0] not in customer_id_dict[customer_id] and '呃,呃,呃,呃' not in row[1]: customer_id_dict[customer_id].append(row[1][0]) ws_w = wb.create_sheet('静音') for customer_id in customer_id_dict: if '1' not in customer_id_dict[customer_id]: print( customer_id ) ws_w.append([customer_id]) wb.save('./2022-9-9_金条.xlsx')'''