"""
/***************************************************************************
Name       :  climate_data_download_worker.py
Description:  Climate Data Download worker class for FEWSTools plugin,
                        updated from QGIS2
copyright  :  (C) 2019-2023 by FEWS
email      :  minxuansun@contractor.usgs.gov
Created    :  3/30/2020 - Minxuan Sun
Modified   :  06/05/2020 - msun - add supports to ftp and https remote host
              07/13/2020 - msun - continue downloading even a file fails
                                  to be downloaded
              07/15/2020 - cholen - Adjust error
              12/28/2020 - cholen - Adjust for summed downloads
              03/10/2021 - cholen - Fix for no new files
              01/20/2022 - cholen - New gdal utilities
              03/25/2022 - cholen - Add tiff support
              07/13/2022 - cholen - Add FEWS daily format support
              09/16/2022 - jhowton - Added download missing data alert
              07/18/2023 - aeben - Added error message for FileNotFoundError
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ****
"""
import datetime
from ftplib import FTP
import os
import shutil
from urllib3.exceptions import MaxRetryError

from PyQt5 import QtCore

from qgis.core import QgsMessageLog, Qgis

from fews_tools import fews_tools_config as config

from fews_tools.models.workspace_setup_model import WorkspaceSetupModel

from fews_tools.utilities import date_utilities as d_util
from fews_tools.utilities import download_utilities as dwnld_util
from fews_tools.utilities import geoclim_utilities as util
from fews_tools.utilities import geoclim_gdal_utilities as g_util
from fews_tools.utilities import geoclim_qgs_utilities as qgs_util


class ClimateDataDownloadWorker(QtCore.QObject):
    """
    Worker for data download
    """

    MAX_DAILY_VALUE = 500  # used for summing daily files
    TEMP_DWNLD_FOLDER = 'downloads'

    def __init__(self, final_data_file_list, ds_dic):
        QtCore.QObject.__init__(self)
        self.wrksp_setup = WorkspaceSetupModel()
        if not final_data_file_list or not ds_dic:
            self.killed = True
        else:
            self.killed = False

        self.ds_dic = ds_dic
        self.final_data_file_list = sorted(final_data_file_list)
        self.download_file_list = None

        # remote file is uncompressed
        if not self.ds_dic['FILEDATEFORMAT'] and \
                not self.ds_dic['FILEPREFIX'] and not self.ds_dic['FILESUFFIX']:
            self.uncompressed = True
        else:
            self.uncompressed = False

        # error message
        self.err_msg = "Unspecified error in Climate Data Download"

        # need to handle cases when the downloaded files need to be summed
        # to get the final files
        if self.ds_dic['DATADATEFORMAT'] !=\
                self.ds_dic['UNCOMPRESSEDDATADATEFORMAT']:
            self.flag_sum = True
        else:
            self.flag_sum = False
        self.step = 95.0 / len(self.final_data_file_list)
        self.progress_val = 0
        self.progress.emit(int(self.progress_val))

    def __download_files__(self):
        '''
        Helper for the download
        '''
        dwnld_dst_dir = os.path.join(
            self.wrksp_setup.get_temp_data_path(), self.TEMP_DWNLD_FOLDER)
        if self.flag_sum is True:
            final_path = dwnld_dst_dir
        else:
            final_path = self.ds_dic['DATAFOLDER']
        if not os.path.exists(dwnld_dst_dir):
            os.makedirs(dwnld_dst_dir)
        url = self.ds_dic['REMOTEHOST'] + self.ds_dic['REMOTEDIRECTORY']

        ftp = None
        if self.ds_dic['REMOTEHOST'][0:3].lower() == 'ftp':
            remote_host = self.ds_dic['REMOTEHOST'].replace("ftp://", "")
            ftp = FTP(remote_host)  # take the ftp:// off the front
            ftp.login(self.ds_dic['USERNAME'], self.ds_dic['PASSWORD'])
        try:
            for entry in self.download_file_list:
                # daily files are split up by years on the remote site
                # add the year folder to the download path
                if self.ds_dic["DATADATEFORMAT"] == "YYYY.MM.DD":
                    yyyy = entry.split(".")[2]
                    url = (self.ds_dic['REMOTEHOST'] +
                           self.ds_dic['REMOTEDIRECTORY'] +
                           yyyy + "/")
                if self.uncompressed:
                    file_names = [entry + self.ds_dic['UNCOMPRESSEDDATASUFFIX']]
                    # Need to also get the .hdr if uncompressed .bil file
                    if self.ds_dic["UNCOMPRESSEDDATASUFFIX"] == config.BIL_SUFFIX:
                        file_names += [entry + config.HDR_SUFFIX]
                else:
                    file_names = [entry + self.ds_dic['FILESUFFIX']]
                
                for file_name in file_names:
                    # Set this so that if download fails this is set for error handling
                    dwnld_file = file_name
                    # download the file to temp(for consistency)
                    if ftp:
                        dwnld_file = dwnld_util.download_ftp_file(
                            ftp, self.ds_dic['REMOTEDIRECTORY'],
                            file_name, dwnld_dst_dir)
                        if os.path.isfile(dwnld_file):
                            resp = 200
                        else:
                            resp = -1
                    else:
                        retries = config.DOWNLOAD_RETRIES
                        for i in range(retries):
                            try:
                                dwnld_file, resp = dwnld_util.download_http_file(
                                    url + file_name, dwnld_dst_dir)
                                break
                            except BaseException as e:
                                QgsMessageLog.logMessage(
                                    f"Downloading file {dwnld_file} Attempt {i+1} failed: {e}", level=Qgis.Info)
                                if i < retries - 1:
                                    QgsMessageLog.logMessage(
                                        f"Retrying... (Attempt {i+1} of {retries})", level=Qgis.Info)
                                else:
                                    QgsMessageLog.logMessage(
                                        "All retries failed.", level=Qgis.Info)
                                    raise Exception

                    if resp != 200:
                        self.missing_file.emit(file_name, resp)
                    else:
                        if not self.uncompressed:
                            if "tar.gz" in file_name:
                                dwnld_util.untar_download(dwnld_file, final_path)
                            elif "tif.gz" in file_name:
                                ext_file = os.path.join(
                                    final_path,
                                    os.path.splitext(os.path.basename(dwnld_file))[0])
                                dwnld_util.gunzip(dwnld_file, ext_file)
                        else:
                            shutil.copy2(dwnld_file, final_path)
                        QgsMessageLog.logMessage(
                            'Downloaded: {}'.format(file_name),
                            level=Qgis.Info)

                # set progress bar if not summing
                if self.flag_sum is False:
                    self.progress_val += self.step
                    self.progress.emit(int(self.progress_val))
                if self.killed:
                    break
        except MaxRetryError as err:
            if ftp:
                ftp.close()
            self.error.emit(
                err, 'Remote site isn\'t reachable for now. Please try back later.')
        except FileNotFoundError as err:
            if ftp:
                ftp.close()
            self.error.emit(
                err, 'Destination folder \'{}\' does not exist. Please check the dataset definition.'.format(final_path)) 
        except BaseException as err:
            if ftp:
                ftp.close()
            self.error.emit(
                err, 'Failed to download: {} - {}'.format(dwnld_file, err))
        if ftp:
            ftp.close()

    def __get_daily_download_list__(self, period_date, d_format):
        '''
        Get files needed for the specified period and date format
        params(string) - period_date for final file
        d_format(string) - date format for downloaded file
        '''
        download_list = []
        flag_convert = False
        if 'special' in d_format:
            flag_convert = True

        if self.ds_dic['PERIODICITY'] == 'Dekadal':
            sum_ct = 10
            s_day, e_day = d_util.get_dekad_start_and_end_days(
                self.ds_dic, period_date)
        elif self.ds_dic['PERIODICITY'] == 'Pentadal':
            sum_ct = 5
            s_day, e_day = d_util.get_pentad_start_and_end_days(
                self.ds_dic, period_date)

        if flag_convert is True:
            e_day_date = d_util.convert_jdate_format(e_day, d_format)
        else:
            e_day_date = e_day.strftime(d_format)
        e_day_file = (self.ds_dic['UNCOMPRESSEDDATAPREFIX'] + e_day_date)

        # we want a daily file name
        for count in range(sum_ct):
            next_day = s_day + datetime.timedelta(days=count)
            # make sure we didn't cross a month (Feburary)
            if next_day.strftime('%m') != s_day.strftime('%m'):
                break
            if flag_convert is True:
                next_day_str = d_util.convert_jdate_format(next_day, d_format)
            else:
                next_day_str = next_day.strftime(d_format)
            download_list.append(
                self.ds_dic['UNCOMPRESSEDDATAPREFIX'] + next_day_str)
        if e_day_file not in download_list:
            download_list.append(e_day_file)
        return download_list

    def __sum_files__(self, period_date):
        '''
        Used when uncompressed data is day and final data is either pentad
        or dekad
        params(string) - period_date for final file
        '''
        files_exist = True
        raster_calculator_file_list = []
        for entry in self.download_file_list:
            filename_l = os.path.join(self.wrksp_setup.get_temp_data_path(),
                                      self.TEMP_DWNLD_FOLDER,
                                      entry + self.ds_dic['DATASUFFIX'])
            if not os.path.exists(filename_l):
                files_exist = False
                QgsMessageLog.logMessage(
                    "Missing a daily file: " + filename_l,
                    level=Qgis.Critical)
            else:
                raster_calculator_file_list.append(filename_l)

        if files_exist:  # only runs if all needed files exist
            if not os.path.exists(self.ds_dic['DATAFOLDER']):
                os.makedirs(self.ds_dic['DATAFOLDER'])

            dst_filename = os.path.join(
                self.ds_dic['DATAFOLDER'],
                self.ds_dic['DATAPREFIX'] + period_date +
                self.ds_dic['DATASUFFIX'])
            t_val_nm = (datetime.datetime.now().strftime('%Y%m%d%H%M%S') +
                        self.ds_dic['DATASUFFIX'])
            temp_filename = dst_filename.replace(
                self.ds_dic['DATASUFFIX'], t_val_nm)

            # not sure if this is a good idea, but VB does this
            if 'PET' in self.ds_dic['DATASETNAME']:
                dst_data_type = g_util.GDAL_BYTE
            else:
                dst_data_type = g_util.GDAL_INT16

            msg = 'Sum files'
            if self.ds_dic['DATASCALEFACTOR'] != 1:
                msg += (', and apply scale factor of: 1/' +
                        str(self.ds_dic['DATASCALEFACTOR']))
            QgsMessageLog.logMessage(msg, level=Qgis.Info)
            try:
                result = qgs_util.rstr_calc_sum_files_raw(
                    raster_calculator_file_list,
                    self.ds_dic['DATAMISSINGVALUE'],
                    self.ds_dic['DATASCALEFACTOR'],
                    temp_filename)
                # translate from RasterCalculator float result to desired type
                if result == 0:
                    g_util.translate_datatype(
                        temp_filename, dst_filename, dst_data_type)
                    util.remove_raster_file(temp_filename)
                    QgsMessageLog.logMessage(
                        "Created file: " + dst_filename, level=Qgis.Info)
            except BaseException:
                QgsMessageLog.logMessage(
                    "Failed to create file: " + dst_filename,
                    level=Qgis.Critical)

    def get_download_date_format(self):
        """
        Method to convert date format
        """
        if self.ds_dic['UNCOMPRESSEDDATADATEFORMAT'] == 'YYYYJJJ':
            d_format = '%Y%j'
        # python datetime doesn't handle a 1 to 3 digit julian date
        elif self.ds_dic['UNCOMPRESSEDDATADATEFORMAT'] == 'YYYYJ':
            d_format = 'special_YYYYJ'
        elif self.ds_dic['UNCOMPRESSEDDATADATEFORMAT'] == 'YYJJJ':
            d_format = '%y%j'
        elif self.ds_dic['UNCOMPRESSEDDATADATEFORMAT'] == 'YYJ':
            d_format = 'special_YYJ'
        elif 'YYYY' in self.ds_dic['UNCOMPRESSEDDATADATEFORMAT']:
            d_format = '%Y%m%d'
        else:
            d_format = '%y%m%d'
        return d_format

    def run(self):
        """
        Download data function
        """
        return_value = None
        return_code = 0
        return_string = ""

        try:
            if self.flag_sum is True:
                d_format = self.get_download_date_format()
                for entry in self.final_data_file_list:
                    # entry is not a path, so splitting on dataprefix is ok
                    period_date =\
                        entry.split(self.ds_dic['DATAPREFIX'])[1]
                    self.download_file_list = self.__get_daily_download_list__(
                        period_date, d_format)
                    self.__download_files__()
                    self.__sum_files__(period_date)
                    self.progress_val += self.step
                    # sometimes the step isn't small enough and we
                    # go past 100
                    if self.progress_val > 99.0:
                        self.progress_val = 99.0
                    self.progress.emit(int(self.progress_val))
                    if self.killed:
                        break
            else:
                # if we are not summing files,
                # then the final file is constrained to use the
                # downloaded file prefix and date format
                self.download_file_list = self.final_data_file_list
                QgsMessageLog.logMessage(
                    'Final list to download:  ' + str(self.download_file_list),
                    level=Qgis.Info)
                if not self.download_file_list:
                    # set progress bar to 100 if don't need to download file
                    self.progress.emit(100)
                self.__download_files__()

            # set progress bar
            self.progress.emit(100)
            if self.killed:
                return_value = None
            else:  # not self.killed:
                return_string = 'Data download completed'
                return_value = (return_code, return_string)
            try:
                temp_dwld_dir = os.path.join(
                    self.wrksp_setup.get_temp_data_path(),
                    self.TEMP_DWNLD_FOLDER)
                if os.path.exists(temp_dwld_dir):
                    shutil.rmtree(temp_dwld_dir)
            except OSError:
                pass

        except BaseException as err:
            # forward the exception upstream
            self.error.emit(err, self.err_msg)
            return_value = None
        self.finished.emit(return_value)

    def kill(self):
        """
        set kill flog to true so that it stops processing in the infinite loop.
        """
        self.err_msg = "Climate Data Download aborted by user"
        self.killed = True

    finished = QtCore.pyqtSignal(object)

    error = QtCore.pyqtSignal(Exception, str)

    missing_file = QtCore.pyqtSignal(str, int)

    progress = QtCore.pyqtSignal(int)
