import shutil
import sys
import os
import time
import requests
from requests.exceptions import ProxyError, ConnectionError, ConnectTimeout, ReadTimeout
import json
import zipfile
import subprocess
import torch
import shlex
import torchaudio
import numpy as np
import pysrt
from silero_vad import get_speech_timestamps, read_audio, save_audio
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from celery_tasks.celery import app
from config import TELEGRAM_API,SUBSOURCE_BASE_URL,SUBSOURCE_API_KEY,SUBTITLE_DIR,DOWNLOAD_DIR,SOFTSUB_DIR,THUMB_DIR,STREAMS_DIR
from base.utils.funcs import identify_seasons_from_irregular_filename,identify_episodes_from_irregular_filename,identify_release,run_command
from base.utils.subtitle_editor import *

def send_request(method, data):
    if method == 'getFile':
        response = requests.post(f'{TELEGRAM_API}{method}', json=data, timeout=600)
    else:
        response = requests.post(f'{TELEGRAM_API}{method}', json=data)
    return response

def upload_video(method, data, files):
    response = requests.post(f'{TELEGRAM_API}{method}', data=data,files=files)
    return response

def request_to_subsource(endpoint: str, query: str, headers: dict):
    proxy = "http://apiec803a20e08d2684_c_US:driS9IZ2@res.proxy-seller.com:10000"
    proxies = {"http": proxy, "https": proxy}

    try:
        resp = requests.get(f'{SUBSOURCE_BASE_URL}{endpoint}{query}', headers=headers, proxies=proxies, timeout=20)
    except (ProxyError, ConnectionError, ConnectTimeout, ReadTimeout) as e:
        print(f"Proxy/Connection error ({e}) — retrying request with new IP...")
        time.sleep(5)
        return request_to_subsource(endpoint, query, headers)

    if resp.status_code == 200:
        resp_headers = {k.lower(): v.lower() for k, v in resp.headers.items()}
        cf_mitigated = resp_headers.get("cf-mitigated")
        connection_type = resp_headers.get("connection")
        cache_control = resp_headers.get("cache-control", "")
        is_challenge = (
            cf_mitigated == "challenge" or
            ("no-cache" in cache_control and "private" in cache_control and connection_type == "close")
        )

        if is_challenge:
            print("Cloudflare challenge detected — retrying request with new IP...")
            return request_to_subsource(endpoint, query, headers)

        return resp
    else:
        print(f"{resp.status_code} error detected — retrying request with new IP...")
        time.sleep(5)
        return request_to_subsource(endpoint, query, headers)

def download_and_preparing_subtitles(subtitle_items: list, file_id: str, file_episode: int, title_type: int):
    i = 1
    subtitles = {}
    for subtitle_item in subtitle_items:
        subtitle_id = subtitle_item[0]
        subtitle_lang = subtitle_item[1]
        headers = {
            "X-API-Key": SUBSOURCE_API_KEY,
            "contentType": "application/zip",
            "contentDisposition": f"attachment; filename=\"{subtitle_id}.zip\"",
            "body": "ZIP file stream"
        }
        res = request_to_subsource('subtitles', f'/{subtitle_id}/download', headers)
        if res.status_code == 200:
            content_type = res.headers.get("Content-Type", "")
            if "text/html" in content_type.lower():
                exit()
            else:
                try:
                    with open(f'{DOWNLOAD_DIR}{subtitle_id}.zip', "wb") as f:
                        for chunk in res.iter_content(chunk_size=8192):
                            if chunk:
                                f.write(chunk)
                    with zipfile.ZipFile(f'{DOWNLOAD_DIR}{subtitle_id}.zip', 'r') as zip_ref:
                        subtitle_names = zip_ref.namelist()
                        if len(subtitle_names) > 1:
                            for sub_name in subtitle_names:
                                _, s_episode, _ = extract_info(sub_name, title_type)
                                if file_episode == s_episode:
                                    subtitle = sub_name
                                    print(
                                        f'\n**********************\n--- chosen subtitle: {subtitle}\n**********************')
                                    break
                        else:
                            subtitle = subtitle_names[0]
                            print(f'\n**********************\n--- chosen subtitle: {subtitle}\n**********************')
                        zip_ref.extract(subtitle, DOWNLOAD_DIR)
                    os.remove(f'{DOWNLOAD_DIR}{subtitle_id}.zip')
                    _, subtitle_format = os.path.splitext(f'{subtitle}')
                    if subtitle_format != '.srt':
                        print(
                            f'\n**********************\n--- subtitle format is {subtitle_format}. we have to change it to srt. \n**********************')
                        convert_to_srt(f'{DOWNLOAD_DIR}{subtitle}', subtitle_format)
                        subtitle = subtitle.replace(subtitle_format, '.srt')
                    os.rename(f'{DOWNLOAD_DIR}{subtitle}', f'{DOWNLOAD_DIR}{i}_{file_id}.srt')
                    print(
                        f'\n**********************\n--- cleaning and preparation of the subtitles began.\n**********************')
                    subtitle = f'{i}_{file_id}.srt'
                    convert_to_utf8(subtitle)
                    if subtitle_lang == 'farsi_persian':
                        find_ads_in_subtitle(subtitle)
                    if os.path.exists(f'{SUBTITLE_DIR}{subtitle}'):
                        os.remove(f'{DOWNLOAD_DIR}{subtitle}')
                    else:
                        os.rename(f'{DOWNLOAD_DIR}{subtitle}', f'{SUBTITLE_DIR}{subtitle}')
                    subtitles[subtitle] = subtitle_lang
                except:
                    pass
        i += 1
    return subtitles

def extract_info(release_info, title_type):
    if title_type == 2:
        season = identify_seasons_from_irregular_filename(release_info)
        episode = identify_episodes_from_irregular_filename(release_info)
    else:
        season = episode = None
    release_type = identify_release(release_info)
    return season, episode, release_type

def get_sub_times(sub_path):
    """Extract start and end times (in seconds) of all dialogue lines from a subtitle."""
    subs = pysrt.open(sub_path, encoding='utf-8')
    starts = [s.start.ordinal / 1000.0 for s in subs if s.text.strip()]
    ends = [s.end.ordinal / 1000.0 for s in subs if s.text.strip()]
    return starts, ends

def compare_subs_timing(sub_times_list):
    """Compare timing similarity between all synced subtitles."""
    n = len(sub_times_list)
    similarity_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(i + 1, n):
            min_len = min(len(sub_times_list[i][0]), len(sub_times_list[j][0]))
            if min_len == 0:
                diff = 9999
            else:
                diffs = np.abs(
                    np.array(sub_times_list[i][0][:min_len]) -
                    np.array(sub_times_list[j][0][:min_len])
                )
                diff = np.mean(diffs)
            similarity_matrix[i, j] = similarity_matrix[j, i] = diff
    return similarity_matrix

def find_outliers(similarity_matrix, threshold=2.0):
    """Detect subtitles that are too different from others."""
    mean_diffs = similarity_matrix.mean(axis=1)
    avg = np.mean(mean_diffs)
    std = np.std(mean_diffs)
    keep = [i for i, m in enumerate(mean_diffs) if m < avg + threshold * std]
    return keep, mean_diffs

def rank_central_subs(mean_diffs):
    n = len(mean_diffs)
    distances = np.zeros(n)

    for i in range(n):
        distances[i] = np.mean([abs(mean_diffs[i] - mean_diffs[j]) for j in range(n) if j != i])

    ranked_indices = np.argsort(distances)

    return ranked_indices.tolist()


@app.task(bind=True, queue='softsubs')
def soft_sub(self, request_type: str, user_id: int, imdb_id: int, file_name: str, file_id: str, title_type: int):

    if title_type == 2:
        file_season = identify_seasons_from_irregular_filename(file_name)
        file_episode = identify_episodes_from_irregular_filename(file_name)
        query = f'?searchType=imdb&imdb={imdb_id}&season={file_season}'
    else:
        file_season = file_episode = None
        query = f'?searchType=imdb&imdb={imdb_id}'
    file_release_type = identify_release(file_name)

    print(f'\n**********************\n--- imdb id: {imdb_id}\n--- title type: {title_type}\n--- file release type: {file_release_type}\n--- file season: {file_season}\n--- file episode: {file_episode}\n**********************\n')

    headers = {"X-API-Key": SUBSOURCE_API_KEY}
    movie = request_to_subsource('movies/search', query, headers).json()
    subsource_id = movie['data'][0]['movieId']
    subtitles = request_to_subsource('subtitles', f'?movieId={subsource_id}&language=english', headers).json()['data'][:5]
    subtitles += request_to_subsource('subtitles', f'?movieId={subsource_id}&language=farsi_persian', headers).json()['data'][:5]

    subtitle_items = []
    for sub in subtitles:
        for release in sub['releaseInfo']:
            print(release)
            s_season, s_episode, _ = extract_info(release, title_type)
            files_count = sub['files'] or 0
            if (file_season == s_season and file_episode == s_episode) or (file_season == s_season and s_episode == None) or 'complete' in release.lower() or 'all episodes' in release.replace('.', ' ').lower() or files_count > 2:
                print(f'\n**********************\n--- subtitle {release} added.\n**********************')
                subtitle_items.append((sub['subtitleId'],sub['language']))
                break
        if len(subtitle_items) > 10:
            break

    if subtitle_items:
        print(subtitle_items)
        print(f'\n**********************\n--- subtitles download and extraction has begun. \n**********************')
        subtitles = download_and_preparing_subtitles(subtitle_items,file_id,file_episode,title_type)
        print(subtitles)
        print(f'\n**********************\n--- download {file_name} has begun. \n**********************')
        tries = 0
        while(True):
            data = {
                'file_id': file_id,
            }
            response = send_request('getFile', data)
            if response.status_code == 200:
                break
            if tries > 20:
                break
            tries += 1
        if response.status_code == 200:
            json_data = json.loads(response.content)
            save_path = json_data['result']['file_path']
            name, file_format = os.path.splitext(f'{file_name}')
            server_file_name = f'{file_id}{file_format}'
            original_audio_wav = f'{DOWNLOAD_DIR}{file_id}_original.wav'
            speech_audio_wav = f"{DOWNLOAD_DIR}speech_{file_id}.wav"
            shutil.move(save_path, f'{DOWNLOAD_DIR}{server_file_name}')
            print(f'\n**********************\n--- extracting audio from video file... \n**********************')
            cmd_start_time = f'ffprobe -i "{DOWNLOAD_DIR}{server_file_name}" -show_entries stream=start_time -select_streams a -v quiet -of csv="p=0"'
            audio_start_output = subprocess.check_output(shlex.split(cmd_start_time)).decode().strip()
            audio_start = float(audio_start_output.split('\n')[0])
            cmd_duration = f'ffprobe -i "{DOWNLOAD_DIR}{server_file_name}" -show_entries format=duration -v quiet -of csv="p=0"'
            video_duration_output = subprocess.check_output(shlex.split(cmd_duration)).decode().strip()
            video_duration = float(video_duration_output)
            delay_ms = int(audio_start * 1000)
            command_convert = f'ffmpeg -i "{DOWNLOAD_DIR}{server_file_name}" -vn -af "adelay={delay_ms}|{delay_ms},aresample=16000" -ac 1 -t {video_duration} "{original_audio_wav}" -y'
            if not run_command(command_convert, "Initial conversion to WAV failed."):
                exit()
            print(f'\n**********************\n--- speech recognition (VAD) \n**********************')
            try:
                model, utils = torch.hub.load(
                    repo_or_dir='snakers4/silero-vad',
                    model='silero_vad',
                    force_reload=False
                )
                (get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils

                wav = read_audio(original_audio_wav, sampling_rate=16000)
                sr = 16000
                speech_timestamps = get_speech_timestamps(
                    wav,
                    model,
                    sampling_rate=sr,
                    threshold=0.45,
                    min_speech_duration_ms=150,
                    min_silence_duration_ms=300,
                    speech_pad_ms=150
                )
            except Exception as e:
                print(f'\n**********************\n--- error loading or running Silero VAD model: {e} \n**********************')
                exit()
            merge_threshold_samples = sr * 2
            merged_segments = []
            if speech_timestamps:
                current_segment = speech_timestamps[0].copy()
                for next_segment in speech_timestamps[1:]:
                    if next_segment['start'] - current_segment['end'] <= merge_threshold_samples:
                        current_segment['end'] = next_segment['end']
                    else:
                        merged_segments.append(current_segment)
                        current_segment = next_segment.copy()
                merged_segments.append(current_segment)
            output_wav = torch.zeros_like(wav)
            for segment in merged_segments:
                output_wav[segment['start']:segment['end']] = wav[segment['start']:segment['end']]
            save_audio(speech_audio_wav, output_wav, sampling_rate=sr)
            print(f'\n**********************\n--- syncing subtitles with Alass \n**********************')
            sub_times_list = []
            for subtitle in subtitles:
                command_alass = f"alass \"{speech_audio_wav}\" \"{SUBTITLE_DIR}{subtitle}\" \"fixed_{subtitle}\""
                if not run_command(command_alass, "Alass process was unsuccessful."):
                    exit()
                try:
                    subs = pysrt.open(f'fixed_{subtitle}', encoding='utf-8')

                    def round_time_to_100ms(time_obj):
                        total_ms = time_obj.ordinal
                        rounded_ms = round(total_ms / 100) * 100
                        return pysrt.SubRipTime.from_ordinal(rounded_ms)

                    for s in subs:
                        s.start = round_time_to_100ms(s.start)
                        s.end = round_time_to_100ms(s.end)

                    subs.save(f'smoothed_{subtitle}', encoding='utf-8')
                    os.rename(f'smoothed_{subtitle}',f'{SUBTITLE_DIR}temp_{subtitle}')
                except Exception as e:
                    os.rename(f'fixed_{subtitle}', f'{SUBTITLE_DIR}temp_{subtitle}')
                sub_times_list.append(get_sub_times(f'{SUBTITLE_DIR}temp_{subtitle}'))
            sim_matrix = compare_subs_timing(sub_times_list)
            keep, mean_diffs = find_outliers(sim_matrix)
            print(f"\n**********************\n--- Subtitle Similarity Report\n**********************")
            for i, sub in enumerate(subtitles):
                print(f"\n**********************\n--- {os.path.basename(sub)} → avg diff = {mean_diffs[i]:.3f} sec\n**********************")
            indexes = rank_central_subs(mean_diffs)
            unknown_subtitle = [subtitle_name for subtitle_name in subtitles][indexes[0]]
            if request_type == 'Persian SoftSub' and subtitles.get(unknown_subtitle) == 'english':
                print(f"\n**********************\n--- user request is Persian SoftSub but selected subtitle language is English.\n**********************")
                print(f"\n**********************\n--- trying to sync Persian subtitle with selected English subtitle...\n**********************")
                for persian_subtitle in subtitles:
                    if subtitles.get(persian_subtitle) == 'farsi_persian':
                        break
                command_alass = f"alass \"{SUBTITLE_DIR}temp_{unknown_subtitle}\" \"{SUBTITLE_DIR}{persian_subtitle}\" \"{SUBTITLE_DIR}{file_id}.srt\""
                if not run_command(command_alass, "Alass process was unsuccessful."):
                    exit()
            elif request_type == 'Persian SoftSub' and subtitles.get(unknown_subtitle) == 'farsi_persian':
                os.rename(f'{SUBTITLE_DIR}temp_{unknown_subtitle}', f'{SUBTITLE_DIR}{file_id}.srt')
            elif request_type == 'English SoftSub' and subtitles.get(unknown_subtitle) == 'farsi_persian':
                print(f"\n**********************\n--- user request is English SoftSub but selected subtitle language is Persian.\n**********************")
                print(f"\n**********************\n--- trying to sync English subtitle with selected Persian subtitle...\n**********************")
                for english_subtitle in subtitles:
                    if subtitles.get(english_subtitle) == 'english':
                        break
                command_alass = f"alass \"{SUBTITLE_DIR}temp_{unknown_subtitle}\" \"{SUBTITLE_DIR}{english_subtitle}\" \"{SUBTITLE_DIR}{file_id}.srt\""
                if not run_command(command_alass, "Alass process was unsuccessful."):
                    exit()
            else:
                os.rename(f'{SUBTITLE_DIR}temp_{unknown_subtitle}', f'{SUBTITLE_DIR}{file_id}.srt')
                if not run_command(command_alass, "Alass process was unsuccessful."):
                    exit()
            subtitle = f'{file_id}.srt'
            add_intro(subtitle,1)
            add_outro(subtitle)
            os.remove(original_audio_wav)
            os.remove(speech_audio_wav)
            if file_format != '.mkv':
                print(f'\n**********************\n--- file format is {file_format}. we have to change it to mkv. \n**********************')
                command = f"ffmpeg -i {DOWNLOAD_DIR}{server_file_name} -n -c copy {DOWNLOAD_DIR}{server_file_name.replace(file_format, '.mkv')}"
                subprocess.run(command, shell=True, capture_output=True, text=True)
                os.remove(f"{DOWNLOAD_DIR}{server_file_name}")
                server_file_name = server_file_name.replace(file_format, ".mkv")
            print(f'\n**********************\n--- trying to convert the file to soft sub... \n**********************')
            command = f"ffmpeg -i {DOWNLOAD_DIR}{server_file_name} -n -sub_charenc 'UTF-8' -f srt -i {SUBTITLE_DIR}{subtitle} -map 0:0 -map 0:1 -map 1:0 -disposition:s:0 default -c:v copy -c:a copy -c:s srt {SOFTSUB_DIR}{server_file_name}"
            subprocess.run(command, shell=True, capture_output=True, text=True)
            try:
                os.remove(f"{DOWNLOAD_DIR}{file_name}")
                os.remove(f"{SUBTITLE_DIR}{subtitle}")
            except:
                pass
            command = f"ffmpeg  -y -i {SOFTSUB_DIR}{server_file_name} -map 0:s:m:language:per {STREAMS_DIR}{subtitle}"
            subprocess.run(command, shell=True, capture_output=True, text=True)
            if not os.path.exists(f'{STREAMS_DIR}{subtitle}'):
                command = f"ffmpeg  -y -i {SOFTSUB_DIR}{server_file_name} -map 0:s:0 {STREAMS_DIR}{subtitle}"
                subprocess.run(command, shell=True, capture_output=True, text=True)
            command = f"ffmpeg  -y -i {STREAMS_DIR}{subtitle} {STREAMS_DIR}{subtitle[:-3]}vtt"
            subprocess.run(command, shell=True, capture_output=True, text=True)
            command = f"ffmpeg  -y -i {SOFTSUB_DIR}{server_file_name} -codec copy {STREAMS_DIR}{server_file_name[:-3]}mov"
            subprocess.run(command, shell=True, capture_output=True, text=True)
            command = f"ffprobe -i {SOFTSUB_DIR}{server_file_name} -v quiet -show_entries format=duration -hide_banner -of default=noprint_wrappers=1:nokey=1 -sexagesimal"
            res = subprocess.run(command, shell=True, capture_output=True, text=True)
            if res.returncode == 0:
                duration = res.stdout.split('.')[0].split(':')
                sec = 0
                if int(duration[0]) != 0:
                    sec = int(duration[0]) * 60
                    sec = sec * 60
                sec += int(duration[1]) * 60
                sec += int(duration[2])
                print(f'\n**********************\n--- uploading soft sub file to the telegram... \n**********************')
                def upload():
                    with open(f'{SOFTSUB_DIR}{server_file_name}', 'rb') as video_file, open(THUMB_DIR,'rb') as thumb:
                        files = {'video': (file_name,video_file), 'thumb': thumb}
                        data = {'chat_id': user_id, 'duration': sec, 'caption': f'{file_name}\n\nhttp://157.180.58.225/player.php?file_name={server_file_name[:-3]}mov&subtitle={subtitle[:-3]}vtt','supports_streaming': True}
                        return upload_video('sendVideo', data, files)
                res = upload().json()
                while True:
                    if res['ok'] == False:
                        print('Something went wrong while uploading the video. retrying after 15 seconds...')
                        time.sleep(15)
                        res = upload().json()
                    else:
                        os.remove(f'{SOFTSUB_DIR}{server_file_name}')
                        os.remove(f'{DOWNLOAD_DIR}{server_file_name}')
                        break