# -*- coding : utf-8 -*-


import datetime
import os
import re
import oss2
import uuid
import time
import json
import logging
import datetime
import asyncio
import wave
import librosa
import operator
import traceback
import requests
import webrtcvad
import websocket
import collections
import contextlib
import numpy as np

from typing import List
from pydub import AudioSegment
from scipy.io import wavfile
from tornado.web import RequestHandler

# 解冻冷归档
from oss2.models import RestoreJobParameters as RestoreJobParameters
from oss2.models import RestoreConfiguration as RestoreConfiguration
from oss2.models import RESTORE_TIER_EXPEDITED as RESTORE_TIER_EXPEDITED
from oss2.models import RESTORE_TIER_STANDARD as RESTORE_TIER_STANDARD
from oss2.models import RESTORE_TIER_BULK as RESTORE_TIER_BULK

wav_dir = './wav_dir'
# ASR_URL = 'ws://8.142.222.140/asr/v0.6'
# ali asr
ASR_URL = 'ws://172.26.215.111/asr/v0.8'
# jd asr
# ASR_URL = 'ws://116.196.96.191/asr/v0.8'
# 172.29.200.5
INTENT_URL = 'http://8.142.69.133:8677/predict?type=222&sessionId=-1&query={}'

rule_process = [
    '刷单',
    '虚假承诺',
    '辱骂客户',
]

rule_start = [
    '开场核身',
    '自报家门',

]

rule_end = [
    '防诈骗话术',
]

self_pattern = re.compile(r'.*((京东|金融).{0,10}(客户经理|我号|光号|工号|工作编号|工好)).*')
check_pattern = re.compile(r'.*((这边|这边的话|边的话|您是).{0,10}(先生|先证|女士|女是)|请问(您|你)?是|是.{0,5}(先生|女士)吗).*')

end_pattern = re.compile(r'.*(再见|生活愉快|感谢.{0,5}接听).*')
defeat_pattern = re.compile(r'.*((房|防).{0,5}诈骗|诈骗.{0,10}(较多|提发|频发)|(直接|第一时间|马上|立刻).{0,10}报警|(电信|电视|网络).{0,20}诈骗|违法分子|不法分子).*')

mianze_pattern = re.compile(r'(具体.{0,10}(页面|系统|显示|审核|审批))')
mianxi_pattern = re.compile(r'([一二两三四五六七八九十千百零]+天.{0,5}免息)')


class Chat_record(object):

    def __init__(self, content, start_time, end_time, role, result_id):
        self.content = content
        self.start_time = start_time
        self.end_time = end_time
        self.role = role
        self.result_id = result_id
        self.webhook = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=25ea61a4-35b8-4ca3-9e8a-527136a9a367'
        self.webhook_token = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=cfc9a7ec-fae4-4802-9844-caae493e5ab4'

    def add_legal(self, legal_type, detail):
        self.legal = False
        self.legal_type = legal_type
        self.detail = detail


class LxAsrError(Exception):
    pass


class LxAsrClient:
    StartTranscription = "StartTranscription"
    StopTranscription = "StopTranscription"
    TranscriptionStarted = "TranscriptionStarted"
    TranscriptionResultChanged = "TranscriptionResultChanged"
    SentenceEnd = "SentenceEnd"
    TranscriptionCompleted = "TranscriptionCompleted"

    def __init__(self, ws_addr: str):
        self._ws_addr = ws_addr

    def send_pcm(self, pcm: bytes, sample_rate: int = 8000) -> List[str]:
        assert sample_rate in (8000, 16000)
        sentences: List[str] = []
        payload = {
            "sample_rate": sample_rate,
            "enable_punctuation": True,
            "punctuation_threshold": 250,
        }
        task_id = str(uuid.uuid1())
        start_msg = {
            "header": {
                "event_name": self.StartTranscription,
                "task_id": task_id,
            },
            "payload": payload,
        }
        stop_msg = {
            "header": {
                "event_name": self.StopTranscription,
            }
        }

        ws = websocket.create_connection(self._ws_addr, suppress_origin=True)
        ws.send(json.dumps(start_msg))
        resp = ws.recv()
        resp_dict = json.loads(resp)
        if resp_dict["header"]["event_name"] != self.TranscriptionStarted:
            raise LxAsrError(resp_dict["header"]["status_message"])
        ws.send(pcm, opcode=websocket.ABNF.OPCODE_BINARY)
        ws.send(json.dumps(stop_msg))

        while True:
            resp = ws.recv()
            resp_dict = json.loads(resp)
            event_name = resp_dict["header"]["event_name"]
            if event_name == self.SentenceEnd:
                sentences.append(resp_dict["payload"]["result"])
            elif event_name == self.TranscriptionCompleted:
                break
            elif resp_dict["header"]["status"] != 200:
                raise LxAsrError(resp_dict["header"]["status_message"])
        try:
            ws.close()
        except Exception as e:
            logging.error("asr ws close error: %s" % str(e))

        return sentences


class PlatformBase(RequestHandler):

    def asr_send(self, pcm: bytes, sample_rate: int = 8000) -> List[str]:
        now = datetime.datetime.now().time()
        # 设置开始和结束时间
        start = datetime.time(9, 0, 0)
        end = datetime.time(21, 0, 0)
        # 判断当前时间是否在指定范围内
        if start <= now <= end:
            logging.info("----------huoshan mode\n")
            #client = LxAsrClient('ws://180.184.36.44/asr/v0.7')
            client = LxAsrClient('ws://180.184.36.57:8800/asr/v0.8')
        else:
            logging.info("----------ali mode\n")
            client = LxAsrClient(ASR_URL)

        return client.send_pcm(pcm, sample_rate)

    def read_wave(self, path):
        """Reads a .wav file.
        Takes the path, and returns (PCM audio data, sample rate).
        """
        with contextlib.closing(wave.open(path, 'rb')) as wf:
            num_channels = wf.getnchannels()
            assert num_channels == 1
            sample_width = wf.getsampwidth()
            assert sample_width == 2
            sample_rate = wf.getframerate()
            assert sample_rate in (8000, 16000, 32000, 48000)
            pcm_data = wf.readframes(wf.getnframes())
            return pcm_data, sample_rate

    class Frame(object):
        """Represents a "frame" of audio data."""

        def __init__(self, bytes, timestamp, duration):
            self.bytes = bytes
            self.timestamp = timestamp
            self.duration = duration

    def frame_generator(self, frame_duration_ms, audio, sample_rate):
        """Generates audio frames from PCM audio data.

        Takes the desired frame duration in milliseconds, the PCM data, and
        the sample rate.

        Yields Frames of the requested duration.
        """
        n = int(sample_rate * (frame_duration_ms / 1000.0) * 2)
        offset = 0
        timestamp = 0.0
        duration = (float(n) / sample_rate) / 2.0
        while offset + n < len(audio):
            yield self.Frame(audio[offset:offset + n], timestamp, duration)
            timestamp += duration
            offset += n

    def vad_collector(self, sample_rate, frame_duration_ms,
                      padding_duration_ms, vad, frames):
        """Filters out non-voiced audio frames.

        Given a webrtcvad.Vad and a source of audio frames, yields only
        the voiced audio.

        Uses a padded, sliding window algorithm over the audio frames.
        When more than 90% of the frames in the window are voiced (as
        reported by the VAD), the collector triggers and begins yielding
        audio frames. Then the collector waits until 90% of the frames in
        the window are unvoiced to detrigger.

        The window is padded at the front and back to provide a small
        amount of silence or the beginnings/endings of speech around the
        voiced frames.

        Arguments:

        sample_rate - The audio sample rate, in Hz.
        frame_duration_ms - The frame duration in milliseconds.
        padding_duration_ms - The amount to pad the window, in milliseconds.
        vad - An instance of webrtcvad.Vad.
        frames - a source of audio frames (sequence or generator).

        Returns: A generator that yields PCM audio data.
        """
        num_padding_frames = int(padding_duration_ms / frame_duration_ms)
        # We use a deque for our sliding window/ring buffer.
        ring_buffer = collections.deque(maxlen=num_padding_frames)
        # We have two states: TRIGGERED and NOTTRIGGERED. We start in the
        # NOTTRIGGERED state.
        triggered = False
        time_list = []
        voiced_frames = []
        for frame in frames:
            is_speech = vad.is_speech(frame.bytes, sample_rate)
            if not triggered:
                ring_buffer.append((frame, is_speech))
                num_voiced = len([f for f, speech in ring_buffer if speech])
                # If we're NOTTRIGGERED and more than 90% of the frames in
                # the ring buffer are voiced frames, then enter the
                # TRIGGERED state.
                if num_voiced > 0.9 * ring_buffer.maxlen:
                    triggered = True
                    time_list.append(ring_buffer[0][0].timestamp)
                    # print('starttime', ring_buffer[0][0].timestamp)
                    # We want to yield all the audio we see from now until
                    # we are NOTTRIGGERED, but we have to start with the
                    # audio that's already in the ring buffer.
                    for f, s in ring_buffer:
                        voiced_frames.append(f)
                    ring_buffer.clear()
            else:
                # We're in the TRIGGERED state, so collect the audio data
                # and add it to the ring buffer.
                voiced_frames.append(frame)
                ring_buffer.append((frame, is_speech))
                num_unvoiced = len([f for f, speech in ring_buffer if not speech])
                # If more than 90% of the frames in the ring buffer are
                # unvoiced, then enter NOTTRIGGERED and yield whatever
                # audio we've collected.
                if num_unvoiced > 0.9 * ring_buffer.maxlen:
                    time_list.append(frame.timestamp + frame.duration)
                    # print('endtime =', frame.timestamp + frame.duration)
                    triggered = False
                    yield b''.join([f.bytes for f in voiced_frames]), time_list
                    voiced_frames = []
                    time_list.clear()
        if triggered:
            time_list.append(frame.timestamp + frame.duration)
        # If we have any leftover voiced audio when we run out of input,
        # yield it.
        if voiced_frames:
            yield b''.join([f.bytes for f in voiced_frames]), time_list

    def asr_audio(self, wav) -> List[dict]:
        # print('wav name =', wav)
        if librosa.load(wav, sr=8000)[0].size < 3:
            # print(wav, 'no wav data open error')
            return []
        set_vadmode = 3  # 0,1,2,3
        wav_result: List[dict] = []
        audio, sample_rate = self.read_wave(wav)
        vad = webrtcvad.Vad(set_vadmode)
        frames = self.frame_generator(30, audio, sample_rate)
        frames = list(frames)
        segments = self.vad_collector(sample_rate, 30, 300, vad, frames)
        for i, segment in enumerate(segments):
            part_result = {}
            part_result['start_time'] = segment[1][0]
            part_result['end_time'] = segment[1][1]
            part_result['wav_name'] = wav
            asr_result: List[str] = self.asr_send(segment[0], sample_rate)
            if asr_result != []:
                part_result['result'] = "".join(asr_result)
            wav_result.append(part_result)
        return wav_result


    def download_to_wav(self, url, id, ip):
        # response = request.urlopen(url)
        response = requests.get(url)
        try:
            if response.status_code != 200:
                voice_path = '/'.join(url.split('/')[3:])
                release_voice_file(voice_path)
                time.sleep(60)
                response = requests.get(url)
        except Exception:
            pass

        self.file_path = ''
        if url[-3:] == 'wav':
            self.file_path = '{}/{}_{}.wav'.format(wav_dir, ip, id)
            with open(self.file_path, 'wb') as fp:
                fp.write(response.content)
                # fp.write(response.read())
                fp.flush()
        elif url[-3:] == '.V3':
            self.file_path = '{}/{}_{}.wav'.format(wav_dir, ip, id)
            with open(self.file_path, 'wb') as fp:
                fp.write(response.content)
                # fp.write(response.read())
                fp.flush()
        elif url[-3:] == 'mp3':
            self.file_path = '{}/{}_{}.mp3'.format(wav_dir, ip, id)

            c = 'wget "{}" -c -T 10 -t 10 -O "{}"'.format(url, self.file_path)
            os.system(c)
            c = 'ffmpeg -i "{}" -f wav "{}"'.format(self.file_path, self.file_path[:-3] + 'wav')
            os.system(c)

            os.remove(self.file_path)

            self.file_path = '{}/{}_{}.wav'.format(wav_dir, ip, id)
            # song.export( self.file_path, format='wav' )
        elif url.split('?')[0].split('/')[-1][-3:] == 'mp3':
            self.file_path = '{}/{}_{}.mp3'.format(wav_dir, ip, id)

            c = 'wget "{}" -c -T 10 -t 10 -O "{}"'.format(url, self.file_path)
            os.system(c)
            c = 'ffmpeg -i "{}" -f wav "{}"'.format(self.file_path, self.file_path[:-3] + 'wav')
            os.system(c)
            os.remove(self.file_path)
            self.file_path = self.file_path[:-3] + 'wav'

        elif url.split('?')[0].split('/')[-1][-4:] == 'flac':
            self.file_path = '{}/{}_{}.flac'.format(wav_dir, ip, id)
            self.file_path_human = ''

            c = 'wget "{}" -c -T 10 -t 10 -O "{}"'.format(url, self.file_path)
            os.system(c)
            c = 'ffmpeg -i "{}" -f wav "{}"'.format(self.file_path, self.file_path[:-4] + 'wav')
            os.system(c)
            os.remove(self.file_path)
            self.file_path = self.file_path[:-4] + 'wav'


        elif url.split('?')[0].split('/')[-1][-3:] == 'wav':
            self.file_path = '{}/{}_{}.wav'.format(wav_dir, ip, id)
            with open(self.file_path, 'wb') as fp:
                fp.write(response.content)
                fp.flush()


    def get_human_port_from(self, human_answer_duration):
        song = AudioSegment.from_wav(self.file_path)

        try:
            human_part = song[-1 * (int(human_answer_duration) + 3) * 1000:]
            human_part.export(self.file_path, format='wav')
        except Exception:
            pass

    def convert_to_record(self, results, role, result_id):

        records = []
        for result in results:
            # print( result )

            if result.get('result', ''):
                content = result.get('result', '')
                start_time = result['start_time']
                end_time = result['end_time']
                record = Chat_record(content, start_time, end_time, role, result_id)
                records.append(record)

        return records


    def sort_records(self, chat_records):
        cmpfun = operator.attrgetter('start_time')
        chat_records.sort(key=cmpfun)

        return chat_records


    def sample_from_file(self):
        try:
            samplerate, data = wavfile.read(self.file_path)
        except Exception:
            try:
                c = 'ffmpeg -i "{}" "{}"'.format(self.file_path, self.file_path[:-4] + '_fix.wav')
                os.system(c)
                os.remove(self.file_path)
                self.file_path = self.file_path[:-4] + '_fix.wav'
                samplerate, data = wavfile.read(self.file_path)
            except Exception:
                traceback.print_exc()
                os.remove(self.file_path)
                return 0, 0, 0

        if samplerate not in [8000, 16000]:
            c = 'sox "{}" -r 16000 "{}"'.format(self.file_path, self.file_path[:-4] + '_down.wav')
            os.system(c)
            os.remove(self.file_path)
            self.file_path = self.file_path[:-4] + '_down.wav'
            samplerate, data = wavfile.read(self.file_path)
        time = data.shape[0] // samplerate

        return samplerate, data, time


    def get_content(self, id):

        # self.file_path = './saleCallRecord_RPA_huadan_2021-12-16_a5062471-bf90-452c-95f4-a05b47986980_D638325469292063042148.wav'
        samplerate, data, time = self.sample_from_file()
        if samplerate == 0 and data == 0:
            return 0, 0, 0, 0

        time = data.shape[0] // samplerate
        left = []
        right = []
        if len(data.shape) == 2:
            for item in data:
                left.append(item[0])
                right.append(item[1])

            self.file_path_left = self.file_path[:-4] + '_left.wav'
            self.file_path_right = self.file_path[:-4] + '_right.wav'
            wavfile.write(self.file_path_left, samplerate, np.array(left))
            wavfile.write(self.file_path_right, samplerate, np.array(right))
            left_result = self.asr_audio(self.file_path_left)
            right_result = self.asr_audio(self.file_path_right)

            left_record = self.convert_to_record(left_result, 'staff', id)
            right_record = self.convert_to_record(right_result, 'user', id)

            os.remove(self.file_path)
            os.remove(self.file_path_left)
            os.remove(self.file_path_right)

            right_record.extend(left_record)
            right_record = self.sort_records(right_record)

            return right_record, time, 2, 1
        else:
            result = self.asr_audio(self.file_path)
            record = self.convert_to_record(result, 'staff', id)

            os.remove(self.file_path)

            return record, time, 1, 1


    def optimize_record(self, chat_record):
        content_list = list()
        if chat_record:
            for c in chat_record:
                if c.content not in ['', '，', '正在呼叫请稍候，', '用户已挂机',
                                     '对对对对对对对，', '对对对对对对，', '对对对对对，', '对对对对，',
                                     '多多多多多多多，', '多多多多多多，', '多多多多多，', '多多多多，', '多多多', ]:
                    # c.content = c.content.replace('，', '')
                    content_list.append([c.content, 0 if c.role == 'staff' else 1, c.start_time, c.end_time])
        return content_list


def nlu_result(workspace, query):
    update_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(round(time.time() * 1000)) / 1000))
    response = requests.get(
        # 'http://8.142.85.77:8679/nlu?session_id=-1&workspace={}&current_query={}'.format(workspace, query))
        'http://8.142.85.77:8680/nlu?session_id=-1&workspace={}&current_query={}'.format(workspace, query))
    response = json.loads(response.text)

    # 态度
    attitude = int(response.get('original_attitude', 0))


    # 实体
    slot_dict = dict()
    for slot in response.get('slot', []):
        if slot['slot_name'] not in slot_dict:
            slot_dict[slot['slot_name']] = list()
            slot_dict[slot['slot_name']].append(slot['slot_value'][0])


    # 意图
    intent = ''
    if response.get('intention', '') and response['intention'][0].get('value', ''):
        intent = response.get('intention', '')[0].get('value', '')
        # print( query, intent, type(intent) )
        q_type = intent['qtype']
        answer = intent['a']
        query = intent['original_query']
        intent = intent['standard_query']

        # print( [q_type, intent, query, answer, update_time] )
        # print( intent )
        # ws.append([customer_id, intent, query])
        # ws.append([customer_id, intent])
        # return [q_type, intent, query, answer, update_time]
    else:
        q, i, s = faq_result(workspace, query)
        intent = ''
        # print( ['默认分类', '{}_{}'.format(i, q), query, 'NOINTENT', update_time] )
        # return ['默认分类', '{}_{}'.format(i, q), query, 'NOINTENT', update_time]
        # ws.append([customer_id, '{}_{}'.format(i, q), query,])


    return attitude, slot_dict, intent


def faq_result(workspace, query):
    response = requests.get(
        # 'http://8.142.85.77:8454/level_search?systemId={}&query={}'.format(workspace, query))
        'http://8.142.85.77:8456/level_search?systemId={}&query={}'.format(workspace, query))
    response = json.loads(response.text)

    if response['ch']:
        query = response['ch'][0]['original_query']
        intent = response['ch'][0]['standard_query']
        semantic = response['ch'][0]['semantic']
        return query, intent, semantic
    elif response['h']:
        query = response['h'][0]['original_query']
        intent = response['h'][0]['standard_query']
        semantic = response['h'][0]['semantic']
        return query, intent, semantic
    return '', '', ''


def process_queries(current_query):
    pass_word = [
        # '不需要了', '不需要', '不用了', '不用', '不要', '不要了',
        '一点', '考虑', '谢',
        '在', '看', '问', '想', '天', '刚', '试', '拜']

    '''
    remove_word = ['嗯，', '嗯。', '嗯',
                   '，好，', '。好，', '。好。',
                   '，行，', '，行。', '。行。',
                   '，对，', '，对。', '。对。']
    for remove in remove_word:
        if len( current_query.replace(remove, '') ) > 2:
            current_query = current_query.replace(remove, '')
    '''

    # 删除相近相同词
    sign = True
    while sign:
        i = 0
        sign = False
        record_list = list()
        final_str = list()

        while i < len(current_query):
            repeat_num = 0
            for j in range(1, 7):
                target = current_query[i:i + j]
                while True:
                    if target == current_query[i + j * (repeat_num + 1):i + j * (repeat_num + 2)]:
                        repeat_num += 1
                    else:
                        break
                if repeat_num:
                    record_append = (i, j, repeat_num)
                    break
            if repeat_num:
                if target in pass_word or not (u'\u4e00' <= target <= u'\u9fff'):
                    final_str.append(target * 2)
                else:
                    final_str.append(target)
                    sign = True
                record_list.append(target)
                i = i + j * (repeat_num + 1)
                continue
            final_str.append(current_query[i])
            i += 1
        current_query = ''.join(final_str)

    try:
        # 去除"好"，"行"
        current_query_list = list()
        for i in range(len(current_query)):
            if current_query[i] in ['好', '行', '对']:
                if i == 0:
                    if current_query[i+1] == '，':
                        continue
                elif i == len(current_query) - 1:
                    if current_query[i-1] == '，':
                        continue

            current_query_list.append(current_query[i])
        if len(current_query_list) > 2:
            current_query = ''.join(current_query_list)
    except Exception:
        pass

    # 将被标点符号隔开的相同字合并在一块
    current_query_result = ''
    for i in range(len(current_query)):
        if current_query[i] in ['，', '。', '？', '！']:
            for j in range(1, 5):
                if current_query[i - j if i - j >= 0 else 0:i] == current_query[i + 1:i + j + 1]:
                    current_query_result = current_query_result[:-1 * j]
                    break
        current_query_result += current_query[i]
    current_query = current_query_result

    for point_sign in ['？。', '？，', '，。', '，，', '。，', '。。', ',，']:
        current_query = current_query.replace(point_sign, '，')

    if current_query[0] in ['，', '。']:
        current_query = current_query[1:]

    return current_query


def rm_stop_word(query):

    stop_word = ['quiet', '@',
                 '不好意思', '对不起', '好谢谢', '谢谢您', '谢谢你',
                 '拜拜', '谢谢', '好吧', '好嘞', '你好', '您好', '然后', '抱歉', '再见',
                 '再见', '受累', '姐夫', '嫂子', '大哥', '老妹', '兄弟', '美女',
                 '谢', '哦', '啊', '嘞', '喂', '啦', '唉', '哎', '哥', '姐',
                 '哈', '呐', '呃', '噢', '诶', '噢', '唔', '呢', '呀',
                 '嗯，', '嗯。', '嗯',
                 '，好，', '。好，', '。好。',
                 '，行，', '，行。', '。行。',
                 '，对，', '，对。', '。对。',
                 ]
    if query == "@@quiet@@":
        return query

    for stop in stop_word:
        if len(query.replace(stop, '').replace('，', '').replace(',', '').replace('。', '').replace('？', '')) > 0:
            query = query.replace(stop, '')
        # query = query.replace(stop, '')

    for start_word in ['好，', '好。', '行，', '行。', '对，', '对。']:
        if query.startswith(start_word) and len(query[2:].replace('，', '').replace('。', '').replace('？', '')) >= 2:
            query = query[2:]

    return query


def nlu_fix(content, intent_set, slots_dict):

    # 识别开端自报家门，开场核身
    result = self_pattern.search(content)
    if result:
        intent_set.add('自报家门')
    result = check_pattern.search(content)
    if result:
        intent_set.add('开场核身')

    # 识别结尾结束语，防诈骗话术
    result = end_pattern.search(content)
    if result:
        intent_set.add('结束语')
    result = defeat_pattern.search(content)
    if result:
        intent_set.add('防诈骗话术')

    # 识别免责话术实体
    result = mianze_pattern.search(content)
    if result:
        if '免责话术' not in slots_dict:
            slots_dict['免责话术'] = set()
        slots_dict['免责话术'].add(result.group())

    result = mianxi_pattern.search(content)
    if result:
        if '免息券期限' not in slots_dict:
            slots_dict['免息券期限'] = set()
        slots_dict['免息券期限'].add(result.group())


def nlu_task(content, num):

    content = rm_stop_word(content)
    content = process_queries(content)
    content = rm_stop_word(content)

    content_list = content.replace('。', '，').split('，')

    # print(content_list)
    final_list = list()
    for i, content in enumerate(content_list):
        if i != 0:
            if len(final_list[-1]) < 4:
                final_list[-1] = final_list[-1] + content
            else:
                final_list.append(content)
        else:
            final_list.append(content)


    slots_dict = dict()
    intent_set = set()

    if len(final_list) > num:
        for i in range(len(final_list)-num+1):
            content = '，'.join(final_list[i:i+num])

            # print(content)
            # ws.append([customer_id, content])
            # result = nlu_result('250', content)
            attitude, slot_dict, intent = nlu_result('250', content)

            nlu_fix(content, intent_set, slots_dict)

            # 意图
            if intent:
                intent_set.add(intent)

            # 实体
            for key in slot_dict:
                if key not in slots_dict:
                    slots_dict[key] = set()

                slots_dict[key] = set.union(slots_dict[key], slot_dict[key])
                # slots_dict[key].extend(slot_dict[key])


    else:
        content = '，'.join(final_list)

        # print( content )
        # result = nlu_result('250', content)
        attitude, slot_dict, intent = nlu_result('250', content)

        nlu_fix(content, intent_set, slots_dict)

        # 意图
        if intent:
            intent_set.add(intent)

        # 实体
        for key in slot_dict:
            if key not in slots_dict:
                slots_dict[key] = set()

            slots_dict[key] = set.union(slots_dict[key], slot_dict[key])
            # slots_dict[key].extend(slot_dict[key])


    # print( intent_set, slots_dict )
    slot_list = list()
    for slot in slots_dict:
        slot_list.append(slot)
        slot_list.append('_'.join(list(slots_dict[slot])))
    return intent_set, slots_dict


def normalize_number(enty, param='ch'):
    """
    归一化数字, 中文数字转阿拉伯数字
    """
    enty = re.search( u'([一二两三四五六七八九十千百零]+)', enty ).group(1)
    if param == 'num':
        return enty
    else:
        level_list = [1, 10, 100, 1000, 10000]
        cn_unit = {u'十': 1, u'百': 2, u'千': 3, u'万':4}
        cn_num = {u'一': 1, u'二': 2, u'两': 2, \
                  u'三': 3, u'四': 4, u'五': 5, \
                  u'六': 6, u'七': 7, u'八': 8, \
                  u'九': 9, u'零': 0}
        unit_keys = cn_unit.keys()
        cn = list(enty)
        inter = set(cn).intersection(set(unit_keys))
        # 识别到的中文没有量词时, 按照顺序给出
        if not inter:
            digit = str(cn_num[cn[-1]])
            # digit = "".join([str(cn_num[term]) for term in cn if term in cn_num])
            return int(digit)
        # 如果识别到最后量词则给出最后数字，三五百, 35百
        if cn[-1] in unit_keys and len(cn) > 1:
            inter_pre = set(cn[: len(cn) - 1]).intersection(set(unit_keys))
            if not inter_pre:
                cn = [cn[-2], cn[-1]]
        # 识别到中文有量词时
        digit = 0
        if cn[0] in unit_keys:
            if len(cn) == 1:
                digit = level_list[cn_unit[cn[0]]]
            else:
                digit = level_list[cn_unit[cn[0]]] + cn_num[cn[1]]
        else:
            i = 0
            while i < len(cn):
                if (i + 1) < len(cn) and cn[i + 1] in unit_keys:
                    digit += cn_num[cn[i]] * level_list[ cn_unit[cn[i + 1]] ]
                    i += 1

                else:
                    if cn[i - 1] == u'零':
                        digit += cn_num[cn[i]]

                    else:
                        digit += cn_num[cn[i]] * level_list[cn_unit[cn[i - 1]] - 1]

                i += 1

        return int(digit)


def judge_process(intent_set, slots_dict):
    # 是否触犯质检项标志
    sign = False
    name_list = list()


    # 标签组合判断质检
    if '免息券使用举例:不合理' in intent_set and \
        '免息券使用举例:合理' not in intent_set:
        if '权益介绍' in slots_dict:
            sign = True
            name_list.append('刷单')
        # elif ''


    # 介绍权益，额度费率可以调整
    if ('有利于获取更优权益' in intent_set or '介绍产品权益-权益' in intent_set) and \
        '免责话术' not in slots_dict:
        sign = True
        name_list.append('缺少免责话术')


    # 搭配(优惠券|免息券) + 额度费率让您满意
    if ('刷单嫌疑' in intent_set) and ('优惠券' in slots_dict) and ('免责话术' not in slots_dict):
        sign = True
        name_list.append('刷单')

    # 举例多少天内还款，天数较少
    if ('刷单嫌疑-快速还款' in intent_set) and ('举例时间' in slots_dict):
        for day in slots_dict['举例时间']:
            print(day)
            day = normalize_number(day[:-1])
            if day <= 15:
                sign = True
                name_list.append('刷单')


    # 命中标签判断质检
    for key in rule_process:
        if key in intent_set:

            if key == '虚假承诺' and '免责话术' in slots_dict:
                continue

            sign = True
            name_list.append(key)

    return sign, name_list


def judge_end(intent_set):
    # 是否触犯质检项标志
    sign = False
    name_list = list()

    # 未命中结束标签判断质检
    for key in rule_start:
        if key not in intent_set:
            sign = True
            name_list.append(key)

    # 未命中结束标签判断质检
    for key in rule_end:
        if key not in intent_set and '结束语' in intent_set:
            sign = True
            name_list.append(key)

    return sign, name_list


def intent_judge(query):
    url = INTENT_URL.format('341', str(query))
    try:
        response = requests.get(url)
        result = json.loads(response.text)
        intent = result['label']

        return intent

    except Exception:
        pass

    return ''


def release_voice_file(voice_path):
    """
    :param voice_path: 一定不要去掉完整路径的Endpoint和BucketName
    :return response.status: 请求的状态码
    :tips voice_path一定不要去掉Endpoint和BucketName
    """
    auth = oss2.Auth("LTAI4FwnosPHJVTfRU7FJEir", "qoehBZsJkDSrkaXAC5s8DhKXlDfuUh")

    # yourEndpoint填写Bucket所在地域对应的Endpoint。以华东1（杭州）为例，Endpoint填写为https://oss-cn-hangzhou.aliyuncs.com。
    # 填写Bucket名称，例如examplebucket。
    bucket = oss2.Bucket(auth, 'oss-cn-zhangjiakou.aliyuncs.com', "lingxi-ai")

    response = bucket.restore_object(voice_path)
    # 多次请求之间最好有500ms间隔
    time.sleep(0.5)
    # 请求成功后，会有5-30s的延迟，才能获取文件对象
    return response.status


def replease_cold_voice_file(voice_path):

    auth = oss2.Auth("LTAI4FwnosPHJVTfRU7FJEir", "qoehBZsJkDSrkaXAC5s8DhKXlDfuUh")

    # yourEndpoint填写Bucket所在地域对应的Endpoint。以华东1（杭州）为例，Endpoint填写为https://oss-cn-hangzhou.aliyuncs.com。
    # 填写Bucket名称，例如examplebucket。
    bucket = oss2.Bucket(auth, 'oss-cn-zhangjiakou.aliyuncs.com', "lingxi-ai")

    # 如需上传Object的同时指定Object的存储类型为冷归档类型，请参考以下代码。
    # bucket.put_object(object_name, 'yourContent', headers={"x-oss-storage-class": oss2.BUCKET_STORAGE_CLASS_COLD_ARCHIVE})

    # 指定解冻ColdArchive（冷归档）类型Object的优先级。
    # RESTORE_TIER_EXPEDITED: 1个小时之内解冻完成。
    # RESTORE_TIER_STANDARD: 2~5小时之内解冻完成。
    # RESTORE_TIER_BULK: 5~12小时之内解冻完成。
    job_parameters = RestoreJobParameters(RESTORE_TIER_STANDARD)

    # 配置解冻参数，以设置5小时内解冻完成，解冻状态保持2天为例。
    # days表示解冻之后保持解冻状态的天数，默认为1天，此参数适用于解冻Archive与ColdArchive类型Object。
    # job_parameters表示解冻优先级配置，此参数只适用于解冻ColdArchive类型的Object。
    restore_config = RestoreConfiguration(days=2, job_parameters=job_parameters)

    # 发起解冻请求。
    response = bucket.restore_object(voice_path, input=restore_config)

    return response.status


if __name__ == '__main__':
    path = "http://lingxi-ai.oss-cn-zhangjiakou.aliyuncs.com/no_sense/shuidi/dianxiao/0331ca36-e890-4e8c-80d1-26756b094f52.wav"
    # 去掉完整路径的Endpoint和BucketName
    voice_path = "no_sense/shuidi/dianxiao/0331ca36-e890-4e8c-80d1-26756b094f52.wav"
    statusCode = release_voice_file(voice_path=voice_path)
    print(statusCode)