# -*- coding : utf-8 -*- import datetime import os import re import oss2 import uuid import time import json import logging import datetime import asyncio import wave import librosa import operator import traceback import requests import webrtcvad import websocket import collections import contextlib import numpy as np from typing import List from pydub import AudioSegment from scipy.io import wavfile from tornado.web import RequestHandler # 解冻冷归档 from oss2.models import RestoreJobParameters as RestoreJobParameters from oss2.models import RestoreConfiguration as RestoreConfiguration from oss2.models import RESTORE_TIER_EXPEDITED as RESTORE_TIER_EXPEDITED from oss2.models import RESTORE_TIER_STANDARD as RESTORE_TIER_STANDARD from oss2.models import RESTORE_TIER_BULK as RESTORE_TIER_BULK wav_dir = './wav_dir' # ASR_URL = 'ws://8.142.222.140/asr/v0.6' # ali asr ASR_URL = 'ws://172.26.215.111/asr/v0.8' # jd asr # ASR_URL = 'ws://116.196.96.191/asr/v0.8' # 172.29.200.5 INTENT_URL = 'http://8.142.69.133:8677/predict?type=222&sessionId=-1&query={}' rule_process = [ '刷单', '虚假承诺', '辱骂客户', ] rule_start = [ '开场核身', '自报家门', ] rule_end = [ '防诈骗话术', ] self_pattern = re.compile(r'.*((京东|金融).{0,10}(客户经理|我号|光号|工号|工作编号|工好)).*') check_pattern = re.compile(r'.*((这边|这边的话|边的话|您是).{0,10}(先生|先证|女士|女是)|请问(您|你)?是|是.{0,5}(先生|女士)吗).*') end_pattern = re.compile(r'.*(再见|生活愉快|感谢.{0,5}接听).*') defeat_pattern = re.compile(r'.*((房|防).{0,5}诈骗|诈骗.{0,10}(较多|提发|频发)|(直接|第一时间|马上|立刻).{0,10}报警|(电信|电视|网络).{0,20}诈骗|违法分子|不法分子).*') mianze_pattern = re.compile(r'(具体.{0,10}(页面|系统|显示|审核|审批))') mianxi_pattern = re.compile(r'([一二两三四五六七八九十千百零]+天.{0,5}免息)') class Chat_record(object): def __init__(self, content, start_time, end_time, role, result_id): self.content = content self.start_time = start_time self.end_time = end_time self.role = role self.result_id = result_id self.webhook = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=25ea61a4-35b8-4ca3-9e8a-527136a9a367' self.webhook_token = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=cfc9a7ec-fae4-4802-9844-caae493e5ab4' def add_legal(self, legal_type, detail): self.legal = False self.legal_type = legal_type self.detail = detail class LxAsrError(Exception): pass class LxAsrClient: StartTranscription = "StartTranscription" StopTranscription = "StopTranscription" TranscriptionStarted = "TranscriptionStarted" TranscriptionResultChanged = "TranscriptionResultChanged" SentenceEnd = "SentenceEnd" TranscriptionCompleted = "TranscriptionCompleted" def __init__(self, ws_addr: str): self._ws_addr = ws_addr def send_pcm(self, pcm: bytes, sample_rate: int = 8000) -> List[str]: assert sample_rate in (8000, 16000) sentences: List[str] = [] payload = { "sample_rate": sample_rate, "enable_punctuation": True, "punctuation_threshold": 250, } task_id = str(uuid.uuid1()) start_msg = { "header": { "event_name": self.StartTranscription, "task_id": task_id, }, "payload": payload, } stop_msg = { "header": { "event_name": self.StopTranscription, } } ws = websocket.create_connection(self._ws_addr, suppress_origin=True) ws.send(json.dumps(start_msg)) resp = ws.recv() resp_dict = json.loads(resp) if resp_dict["header"]["event_name"] != self.TranscriptionStarted: raise LxAsrError(resp_dict["header"]["status_message"]) ws.send(pcm, opcode=websocket.ABNF.OPCODE_BINARY) ws.send(json.dumps(stop_msg)) while True: resp = ws.recv() resp_dict = json.loads(resp) event_name = resp_dict["header"]["event_name"] if event_name == self.SentenceEnd: sentences.append(resp_dict["payload"]["result"]) elif event_name == self.TranscriptionCompleted: break elif resp_dict["header"]["status"] != 200: raise LxAsrError(resp_dict["header"]["status_message"]) try: ws.close() except Exception as e: logging.error("asr ws close error: %s" % str(e)) return sentences class PlatformBase(RequestHandler): def asr_send(self, pcm: bytes, sample_rate: int = 8000) -> List[str]: now = datetime.datetime.now().time() # 设置开始和结束时间 start = datetime.time(9, 0, 0) end = datetime.time(21, 0, 0) # 判断当前时间是否在指定范围内 if start <= now <= end: logging.info("----------huoshan mode\n") #client = LxAsrClient('ws://180.184.36.44/asr/v0.7') client = LxAsrClient('ws://180.184.36.57:8800/asr/v0.8') else: logging.info("----------ali mode\n") client = LxAsrClient(ASR_URL) return client.send_pcm(pcm, sample_rate) def read_wave(self, path): """Reads a .wav file. Takes the path, and returns (PCM audio data, sample rate). """ with contextlib.closing(wave.open(path, 'rb')) as wf: num_channels = wf.getnchannels() assert num_channels == 1 sample_width = wf.getsampwidth() assert sample_width == 2 sample_rate = wf.getframerate() assert sample_rate in (8000, 16000, 32000, 48000) pcm_data = wf.readframes(wf.getnframes()) return pcm_data, sample_rate class Frame(object): """Represents a "frame" of audio data.""" def __init__(self, bytes, timestamp, duration): self.bytes = bytes self.timestamp = timestamp self.duration = duration def frame_generator(self, frame_duration_ms, audio, sample_rate): """Generates audio frames from PCM audio data. Takes the desired frame duration in milliseconds, the PCM data, and the sample rate. Yields Frames of the requested duration. """ n = int(sample_rate * (frame_duration_ms / 1000.0) * 2) offset = 0 timestamp = 0.0 duration = (float(n) / sample_rate) / 2.0 while offset + n < len(audio): yield self.Frame(audio[offset:offset + n], timestamp, duration) timestamp += duration offset += n def vad_collector(self, sample_rate, frame_duration_ms, padding_duration_ms, vad, frames): """Filters out non-voiced audio frames. Given a webrtcvad.Vad and a source of audio frames, yields only the voiced audio. Uses a padded, sliding window algorithm over the audio frames. When more than 90% of the frames in the window are voiced (as reported by the VAD), the collector triggers and begins yielding audio frames. Then the collector waits until 90% of the frames in the window are unvoiced to detrigger. The window is padded at the front and back to provide a small amount of silence or the beginnings/endings of speech around the voiced frames. Arguments: sample_rate - The audio sample rate, in Hz. frame_duration_ms - The frame duration in milliseconds. padding_duration_ms - The amount to pad the window, in milliseconds. vad - An instance of webrtcvad.Vad. frames - a source of audio frames (sequence or generator). Returns: A generator that yields PCM audio data. """ num_padding_frames = int(padding_duration_ms / frame_duration_ms) # We use a deque for our sliding window/ring buffer. ring_buffer = collections.deque(maxlen=num_padding_frames) # We have two states: TRIGGERED and NOTTRIGGERED. We start in the # NOTTRIGGERED state. triggered = False time_list = [] voiced_frames = [] for frame in frames: is_speech = vad.is_speech(frame.bytes, sample_rate) if not triggered: ring_buffer.append((frame, is_speech)) num_voiced = len([f for f, speech in ring_buffer if speech]) # If we're NOTTRIGGERED and more than 90% of the frames in # the ring buffer are voiced frames, then enter the # TRIGGERED state. if num_voiced > 0.9 * ring_buffer.maxlen: triggered = True time_list.append(ring_buffer[0][0].timestamp) # print('starttime', ring_buffer[0][0].timestamp) # We want to yield all the audio we see from now until # we are NOTTRIGGERED, but we have to start with the # audio that's already in the ring buffer. for f, s in ring_buffer: voiced_frames.append(f) ring_buffer.clear() else: # We're in the TRIGGERED state, so collect the audio data # and add it to the ring buffer. voiced_frames.append(frame) ring_buffer.append((frame, is_speech)) num_unvoiced = len([f for f, speech in ring_buffer if not speech]) # If more than 90% of the frames in the ring buffer are # unvoiced, then enter NOTTRIGGERED and yield whatever # audio we've collected. if num_unvoiced > 0.9 * ring_buffer.maxlen: time_list.append(frame.timestamp + frame.duration) # print('endtime =', frame.timestamp + frame.duration) triggered = False yield b''.join([f.bytes for f in voiced_frames]), time_list voiced_frames = [] time_list.clear() if triggered: time_list.append(frame.timestamp + frame.duration) # If we have any leftover voiced audio when we run out of input, # yield it. if voiced_frames: yield b''.join([f.bytes for f in voiced_frames]), time_list def asr_audio(self, wav) -> List[dict]: # print('wav name =', wav) if librosa.load(wav, sr=8000)[0].size < 3: # print(wav, 'no wav data open error') return [] set_vadmode = 3 # 0,1,2,3 wav_result: List[dict] = [] audio, sample_rate = self.read_wave(wav) vad = webrtcvad.Vad(set_vadmode) frames = self.frame_generator(30, audio, sample_rate) frames = list(frames) segments = self.vad_collector(sample_rate, 30, 300, vad, frames) for i, segment in enumerate(segments): part_result = {} part_result['start_time'] = segment[1][0] part_result['end_time'] = segment[1][1] part_result['wav_name'] = wav asr_result: List[str] = self.asr_send(segment[0], sample_rate) if asr_result != []: part_result['result'] = "".join(asr_result) wav_result.append(part_result) return wav_result def download_to_wav(self, url, id, ip): # response = request.urlopen(url) response = requests.get(url) try: if response.status_code != 200: voice_path = '/'.join(url.split('/')[3:]) release_voice_file(voice_path) time.sleep(60) response = requests.get(url) except Exception: pass self.file_path = '' if url[-3:] == 'wav': self.file_path = '{}/{}_{}.wav'.format(wav_dir, ip, id) with open(self.file_path, 'wb') as fp: fp.write(response.content) # fp.write(response.read()) fp.flush() elif url[-3:] == '.V3': self.file_path = '{}/{}_{}.wav'.format(wav_dir, ip, id) with open(self.file_path, 'wb') as fp: fp.write(response.content) # fp.write(response.read()) fp.flush() elif url[-3:] == 'mp3': self.file_path = '{}/{}_{}.mp3'.format(wav_dir, ip, id) c = 'wget "{}" -c -T 10 -t 10 -O "{}"'.format(url, self.file_path) os.system(c) c = 'ffmpeg -i "{}" -f wav "{}"'.format(self.file_path, self.file_path[:-3] + 'wav') os.system(c) os.remove(self.file_path) self.file_path = '{}/{}_{}.wav'.format(wav_dir, ip, id) # song.export( self.file_path, format='wav' ) elif url.split('?')[0].split('/')[-1][-3:] == 'mp3': self.file_path = '{}/{}_{}.mp3'.format(wav_dir, ip, id) c = 'wget "{}" -c -T 10 -t 10 -O "{}"'.format(url, self.file_path) os.system(c) c = 'ffmpeg -i "{}" -f wav "{}"'.format(self.file_path, self.file_path[:-3] + 'wav') os.system(c) os.remove(self.file_path) self.file_path = self.file_path[:-3] + 'wav' elif url.split('?')[0].split('/')[-1][-4:] == 'flac': self.file_path = '{}/{}_{}.flac'.format(wav_dir, ip, id) self.file_path_human = '' c = 'wget "{}" -c -T 10 -t 10 -O "{}"'.format(url, self.file_path) os.system(c) c = 'ffmpeg -i "{}" -f wav "{}"'.format(self.file_path, self.file_path[:-4] + 'wav') os.system(c) os.remove(self.file_path) self.file_path = self.file_path[:-4] + 'wav' elif url.split('?')[0].split('/')[-1][-3:] == 'wav': self.file_path = '{}/{}_{}.wav'.format(wav_dir, ip, id) with open(self.file_path, 'wb') as fp: fp.write(response.content) fp.flush() def get_human_port_from(self, human_answer_duration): song = AudioSegment.from_wav(self.file_path) try: human_part = song[-1 * (int(human_answer_duration) + 3) * 1000:] human_part.export(self.file_path, format='wav') except Exception: pass def convert_to_record(self, results, role, result_id): records = [] for result in results: # print( result ) if result.get('result', ''): content = result.get('result', '') start_time = result['start_time'] end_time = result['end_time'] record = Chat_record(content, start_time, end_time, role, result_id) records.append(record) return records def sort_records(self, chat_records): cmpfun = operator.attrgetter('start_time') chat_records.sort(key=cmpfun) return chat_records def sample_from_file(self): try: samplerate, data = wavfile.read(self.file_path) except Exception: try: c = 'ffmpeg -i "{}" "{}"'.format(self.file_path, self.file_path[:-4] + '_fix.wav') os.system(c) os.remove(self.file_path) self.file_path = self.file_path[:-4] + '_fix.wav' samplerate, data = wavfile.read(self.file_path) except Exception: traceback.print_exc() os.remove(self.file_path) return 0, 0, 0 if samplerate not in [8000, 16000]: c = 'sox "{}" -r 16000 "{}"'.format(self.file_path, self.file_path[:-4] + '_down.wav') os.system(c) os.remove(self.file_path) self.file_path = self.file_path[:-4] + '_down.wav' samplerate, data = wavfile.read(self.file_path) time = data.shape[0] // samplerate return samplerate, data, time def get_content(self, id): # self.file_path = './saleCallRecord_RPA_huadan_2021-12-16_a5062471-bf90-452c-95f4-a05b47986980_D638325469292063042148.wav' samplerate, data, time = self.sample_from_file() if samplerate == 0 and data == 0: return 0, 0, 0, 0 time = data.shape[0] // samplerate left = [] right = [] if len(data.shape) == 2: for item in data: left.append(item[0]) right.append(item[1]) self.file_path_left = self.file_path[:-4] + '_left.wav' self.file_path_right = self.file_path[:-4] + '_right.wav' wavfile.write(self.file_path_left, samplerate, np.array(left)) wavfile.write(self.file_path_right, samplerate, np.array(right)) left_result = self.asr_audio(self.file_path_left) right_result = self.asr_audio(self.file_path_right) left_record = self.convert_to_record(left_result, 'staff', id) right_record = self.convert_to_record(right_result, 'user', id) os.remove(self.file_path) os.remove(self.file_path_left) os.remove(self.file_path_right) right_record.extend(left_record) right_record = self.sort_records(right_record) return right_record, time, 2, 1 else: result = self.asr_audio(self.file_path) record = self.convert_to_record(result, 'staff', id) os.remove(self.file_path) return record, time, 1, 1 def optimize_record(self, chat_record): content_list = list() if chat_record: for c in chat_record: if c.content not in ['', ',', '正在呼叫请稍候,', '用户已挂机', '对对对对对对对,', '对对对对对对,', '对对对对对,', '对对对对,', '多多多多多多多,', '多多多多多多,', '多多多多多,', '多多多多,', '多多多', ]: # c.content = c.content.replace(',', '') content_list.append([c.content, 0 if c.role == 'staff' else 1, c.start_time, c.end_time]) return content_list def nlu_result(workspace, query): update_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(round(time.time() * 1000)) / 1000)) response = requests.get( # 'http://8.142.85.77:8679/nlu?session_id=-1&workspace={}¤t_query={}'.format(workspace, query)) 'http://8.142.85.77:8680/nlu?session_id=-1&workspace={}¤t_query={}'.format(workspace, query)) response = json.loads(response.text) # 态度 attitude = int(response.get('original_attitude', 0)) # 实体 slot_dict = dict() for slot in response.get('slot', []): if slot['slot_name'] not in slot_dict: slot_dict[slot['slot_name']] = list() slot_dict[slot['slot_name']].append(slot['slot_value'][0]) # 意图 intent = '' if response.get('intention', '') and response['intention'][0].get('value', ''): intent = response.get('intention', '')[0].get('value', '') # print( query, intent, type(intent) ) q_type = intent['qtype'] answer = intent['a'] query = intent['original_query'] intent = intent['standard_query'] # print( [q_type, intent, query, answer, update_time] ) # print( intent ) # ws.append([customer_id, intent, query]) # ws.append([customer_id, intent]) # return [q_type, intent, query, answer, update_time] else: q, i, s = faq_result(workspace, query) intent = '' # print( ['默认分类', '{}_{}'.format(i, q), query, 'NOINTENT', update_time] ) # return ['默认分类', '{}_{}'.format(i, q), query, 'NOINTENT', update_time] # ws.append([customer_id, '{}_{}'.format(i, q), query,]) return attitude, slot_dict, intent def faq_result(workspace, query): response = requests.get( # 'http://8.142.85.77:8454/level_search?systemId={}&query={}'.format(workspace, query)) 'http://8.142.85.77:8456/level_search?systemId={}&query={}'.format(workspace, query)) response = json.loads(response.text) if response['ch']: query = response['ch'][0]['original_query'] intent = response['ch'][0]['standard_query'] semantic = response['ch'][0]['semantic'] return query, intent, semantic elif response['h']: query = response['h'][0]['original_query'] intent = response['h'][0]['standard_query'] semantic = response['h'][0]['semantic'] return query, intent, semantic return '', '', '' def process_queries(current_query): pass_word = [ # '不需要了', '不需要', '不用了', '不用', '不要', '不要了', '一点', '考虑', '谢', '在', '看', '问', '想', '天', '刚', '试', '拜'] ''' remove_word = ['嗯,', '嗯。', '嗯', ',好,', '。好,', '。好。', ',行,', ',行。', '。行。', ',对,', ',对。', '。对。'] for remove in remove_word: if len( current_query.replace(remove, '') ) > 2: current_query = current_query.replace(remove, '') ''' # 删除相近相同词 sign = True while sign: i = 0 sign = False record_list = list() final_str = list() while i < len(current_query): repeat_num = 0 for j in range(1, 7): target = current_query[i:i + j] while True: if target == current_query[i + j * (repeat_num + 1):i + j * (repeat_num + 2)]: repeat_num += 1 else: break if repeat_num: record_append = (i, j, repeat_num) break if repeat_num: if target in pass_word or not (u'\u4e00' <= target <= u'\u9fff'): final_str.append(target * 2) else: final_str.append(target) sign = True record_list.append(target) i = i + j * (repeat_num + 1) continue final_str.append(current_query[i]) i += 1 current_query = ''.join(final_str) try: # 去除"好","行" current_query_list = list() for i in range(len(current_query)): if current_query[i] in ['好', '行', '对']: if i == 0: if current_query[i+1] == ',': continue elif i == len(current_query) - 1: if current_query[i-1] == ',': continue current_query_list.append(current_query[i]) if len(current_query_list) > 2: current_query = ''.join(current_query_list) except Exception: pass # 将被标点符号隔开的相同字合并在一块 current_query_result = '' for i in range(len(current_query)): if current_query[i] in [',', '。', '?', '!']: for j in range(1, 5): if current_query[i - j if i - j >= 0 else 0:i] == current_query[i + 1:i + j + 1]: current_query_result = current_query_result[:-1 * j] break current_query_result += current_query[i] current_query = current_query_result for point_sign in ['?。', '?,', ',。', ',,', '。,', '。。', ',,']: current_query = current_query.replace(point_sign, ',') if current_query[0] in [',', '。']: current_query = current_query[1:] return current_query def rm_stop_word(query): stop_word = ['quiet', '@', '不好意思', '对不起', '好谢谢', '谢谢您', '谢谢你', '拜拜', '谢谢', '好吧', '好嘞', '你好', '您好', '然后', '抱歉', '再见', '再见', '受累', '姐夫', '嫂子', '大哥', '老妹', '兄弟', '美女', '谢', '哦', '啊', '嘞', '喂', '啦', '唉', '哎', '哥', '姐', '哈', '呐', '呃', '噢', '诶', '噢', '唔', '呢', '呀', '嗯,', '嗯。', '嗯', ',好,', '。好,', '。好。', ',行,', ',行。', '。行。', ',对,', ',对。', '。对。', ] if query == "@@quiet@@": return query for stop in stop_word: if len(query.replace(stop, '').replace(',', '').replace(',', '').replace('。', '').replace('?', '')) > 0: query = query.replace(stop, '') # query = query.replace(stop, '') for start_word in ['好,', '好。', '行,', '行。', '对,', '对。']: if query.startswith(start_word) and len(query[2:].replace(',', '').replace('。', '').replace('?', '')) >= 2: query = query[2:] return query def nlu_fix(content, intent_set, slots_dict): # 识别开端自报家门,开场核身 result = self_pattern.search(content) if result: intent_set.add('自报家门') result = check_pattern.search(content) if result: intent_set.add('开场核身') # 识别结尾结束语,防诈骗话术 result = end_pattern.search(content) if result: intent_set.add('结束语') result = defeat_pattern.search(content) if result: intent_set.add('防诈骗话术') # 识别免责话术实体 result = mianze_pattern.search(content) if result: if '免责话术' not in slots_dict: slots_dict['免责话术'] = set() slots_dict['免责话术'].add(result.group()) result = mianxi_pattern.search(content) if result: if '免息券期限' not in slots_dict: slots_dict['免息券期限'] = set() slots_dict['免息券期限'].add(result.group()) def nlu_task(content, num): content = rm_stop_word(content) content = process_queries(content) content = rm_stop_word(content) content_list = content.replace('。', ',').split(',') # print(content_list) final_list = list() for i, content in enumerate(content_list): if i != 0: if len(final_list[-1]) < 4: final_list[-1] = final_list[-1] + content else: final_list.append(content) else: final_list.append(content) slots_dict = dict() intent_set = set() if len(final_list) > num: for i in range(len(final_list)-num+1): content = ','.join(final_list[i:i+num]) # print(content) # ws.append([customer_id, content]) # result = nlu_result('250', content) attitude, slot_dict, intent = nlu_result('250', content) nlu_fix(content, intent_set, slots_dict) # 意图 if intent: intent_set.add(intent) # 实体 for key in slot_dict: if key not in slots_dict: slots_dict[key] = set() slots_dict[key] = set.union(slots_dict[key], slot_dict[key]) # slots_dict[key].extend(slot_dict[key]) else: content = ','.join(final_list) # print( content ) # result = nlu_result('250', content) attitude, slot_dict, intent = nlu_result('250', content) nlu_fix(content, intent_set, slots_dict) # 意图 if intent: intent_set.add(intent) # 实体 for key in slot_dict: if key not in slots_dict: slots_dict[key] = set() slots_dict[key] = set.union(slots_dict[key], slot_dict[key]) # slots_dict[key].extend(slot_dict[key]) # print( intent_set, slots_dict ) slot_list = list() for slot in slots_dict: slot_list.append(slot) slot_list.append('_'.join(list(slots_dict[slot]))) return intent_set, slots_dict def normalize_number(enty, param='ch'): """ 归一化数字, 中文数字转阿拉伯数字 """ enty = re.search( u'([一二两三四五六七八九十千百零]+)', enty ).group(1) if param == 'num': return enty else: level_list = [1, 10, 100, 1000, 10000] cn_unit = {u'十': 1, u'百': 2, u'千': 3, u'万':4} cn_num = {u'一': 1, u'二': 2, u'两': 2, \ u'三': 3, u'四': 4, u'五': 5, \ u'六': 6, u'七': 7, u'八': 8, \ u'九': 9, u'零': 0} unit_keys = cn_unit.keys() cn = list(enty) inter = set(cn).intersection(set(unit_keys)) # 识别到的中文没有量词时, 按照顺序给出 if not inter: digit = str(cn_num[cn[-1]]) # digit = "".join([str(cn_num[term]) for term in cn if term in cn_num]) return int(digit) # 如果识别到最后量词则给出最后数字,三五百, 35百 if cn[-1] in unit_keys and len(cn) > 1: inter_pre = set(cn[: len(cn) - 1]).intersection(set(unit_keys)) if not inter_pre: cn = [cn[-2], cn[-1]] # 识别到中文有量词时 digit = 0 if cn[0] in unit_keys: if len(cn) == 1: digit = level_list[cn_unit[cn[0]]] else: digit = level_list[cn_unit[cn[0]]] + cn_num[cn[1]] else: i = 0 while i < len(cn): if (i + 1) < len(cn) and cn[i + 1] in unit_keys: digit += cn_num[cn[i]] * level_list[ cn_unit[cn[i + 1]] ] i += 1 else: if cn[i - 1] == u'零': digit += cn_num[cn[i]] else: digit += cn_num[cn[i]] * level_list[cn_unit[cn[i - 1]] - 1] i += 1 return int(digit) def judge_process(intent_set, slots_dict): # 是否触犯质检项标志 sign = False name_list = list() # 标签组合判断质检 if '免息券使用举例:不合理' in intent_set and \ '免息券使用举例:合理' not in intent_set: if '权益介绍' in slots_dict: sign = True name_list.append('刷单') # elif '' # 介绍权益,额度费率可以调整 if ('有利于获取更优权益' in intent_set or '介绍产品权益-权益' in intent_set) and \ '免责话术' not in slots_dict: sign = True name_list.append('缺少免责话术') # 搭配(优惠券|免息券) + 额度费率让您满意 if ('刷单嫌疑' in intent_set) and ('优惠券' in slots_dict) and ('免责话术' not in slots_dict): sign = True name_list.append('刷单') # 举例多少天内还款,天数较少 if ('刷单嫌疑-快速还款' in intent_set) and ('举例时间' in slots_dict): for day in slots_dict['举例时间']: print(day) day = normalize_number(day[:-1]) if day <= 15: sign = True name_list.append('刷单') # 命中标签判断质检 for key in rule_process: if key in intent_set: if key == '虚假承诺' and '免责话术' in slots_dict: continue sign = True name_list.append(key) return sign, name_list def judge_end(intent_set): # 是否触犯质检项标志 sign = False name_list = list() # 未命中结束标签判断质检 for key in rule_start: if key not in intent_set: sign = True name_list.append(key) # 未命中结束标签判断质检 for key in rule_end: if key not in intent_set and '结束语' in intent_set: sign = True name_list.append(key) return sign, name_list def intent_judge(query): url = INTENT_URL.format('341', str(query)) try: response = requests.get(url) result = json.loads(response.text) intent = result['label'] return intent except Exception: pass return '' def release_voice_file(voice_path): """ :param voice_path: 一定不要去掉完整路径的Endpoint和BucketName :return response.status: 请求的状态码 :tips voice_path一定不要去掉Endpoint和BucketName """ auth = oss2.Auth("LTAI4FwnosPHJVTfRU7FJEir", "qoehBZsJkDSrkaXAC5s8DhKXlDfuUh") # yourEndpoint填写Bucket所在地域对应的Endpoint。以华东1(杭州)为例,Endpoint填写为https://oss-cn-hangzhou.aliyuncs.com。 # 填写Bucket名称,例如examplebucket。 bucket = oss2.Bucket(auth, 'oss-cn-zhangjiakou.aliyuncs.com', "lingxi-ai") response = bucket.restore_object(voice_path) # 多次请求之间最好有500ms间隔 time.sleep(0.5) # 请求成功后,会有5-30s的延迟,才能获取文件对象 return response.status def replease_cold_voice_file(voice_path): auth = oss2.Auth("LTAI4FwnosPHJVTfRU7FJEir", "qoehBZsJkDSrkaXAC5s8DhKXlDfuUh") # yourEndpoint填写Bucket所在地域对应的Endpoint。以华东1(杭州)为例,Endpoint填写为https://oss-cn-hangzhou.aliyuncs.com。 # 填写Bucket名称,例如examplebucket。 bucket = oss2.Bucket(auth, 'oss-cn-zhangjiakou.aliyuncs.com', "lingxi-ai") # 如需上传Object的同时指定Object的存储类型为冷归档类型,请参考以下代码。 # bucket.put_object(object_name, 'yourContent', headers={"x-oss-storage-class": oss2.BUCKET_STORAGE_CLASS_COLD_ARCHIVE}) # 指定解冻ColdArchive(冷归档)类型Object的优先级。 # RESTORE_TIER_EXPEDITED: 1个小时之内解冻完成。 # RESTORE_TIER_STANDARD: 2~5小时之内解冻完成。 # RESTORE_TIER_BULK: 5~12小时之内解冻完成。 job_parameters = RestoreJobParameters(RESTORE_TIER_STANDARD) # 配置解冻参数,以设置5小时内解冻完成,解冻状态保持2天为例。 # days表示解冻之后保持解冻状态的天数,默认为1天,此参数适用于解冻Archive与ColdArchive类型Object。 # job_parameters表示解冻优先级配置,此参数只适用于解冻ColdArchive类型的Object。 restore_config = RestoreConfiguration(days=2, job_parameters=job_parameters) # 发起解冻请求。 response = bucket.restore_object(voice_path, input=restore_config) return response.status if __name__ == '__main__': path = "http://lingxi-ai.oss-cn-zhangjiakou.aliyuncs.com/no_sense/shuidi/dianxiao/0331ca36-e890-4e8c-80d1-26756b094f52.wav" # 去掉完整路径的Endpoint和BucketName voice_path = "no_sense/shuidi/dianxiao/0331ca36-e890-4e8c-80d1-26756b094f52.wav" statusCode = release_voice_file(voice_path=voice_path) print(statusCode)