Source code for seppy.loader.psp

# Licensed under a 3-clause BSD style license - see LICENSE.rst

import copy
import os
# import warnings

import cdflib
import numpy as np
import pandas as pd
import sunpy

from packaging.version import Version
from sunpy.net import Fido
from sunpy.net import attrs as a
from sunpy.timeseries import TimeSeries

from seppy.util import custom_warning, resample_df

# Not needed atm as units are skipped in the modified read_cdf
# if hasattr(sunpy, "__version__") and Version(sunpy.__version__) >= Version("5.0.0"):
#     from sunpy.io._cdf import read_cdf, _known_units
# else:
#     from sunpy.io.cdf import read_cdf, _known_units


def _fillval_nan(data, fillval):
    try:
        data[data == fillval] = np.nan
    except ValueError:
        # This happens if we try and assign a NaN to an int type
        pass
    return data


def _get_cdf_vars(cdf):
    # Get list of all the variables in an open CDF file
    var_list = []
    cdf_info = cdf.cdf_info()
    for attr in list(cdf_info.keys()):
        if 'variable' in attr.lower() and len(cdf_info[attr]) > 0:
            for var in cdf_info[attr]:
                var_list += [var]

    return var_list



[docs]
def psp_isois_load(dataset, startdate, enddate, epilo_channel='F', epilo_threshold=None, path=None, resample=None, all_columns=False):
    """
    Downloads CDF files via SunPy/Fido from CDAWeb for ISOIS onboard PSP

    Parameters
    ----------
    dataset : str
        Name of PSP dataset: \n
            - 'PSP_ISOIS-EPIHI_L2-HET-RATES60' \n
            - 'PSP_ISOIS-EPIHI_L2-HET-RATES3600' (higher coverage than 'RATES60' before mid-2021) \n
            - 'PSP_ISOIS-EPIHI_L2-LET1-RATES60' \n
            - 'PSP_ISOIS-EPIHI_L2-LET2-RATES60' \n
            - 'PSP_ISOIS-EPILO_L2-PE' \n
            - 'PSP_ISOIS-EPILO_L2-IC'
    startdate, enddate : datetime or str
        Datetime object (e.g., dt.date(2021,12,31) or dt.datetime(2021,4,15)) or "standard"
        datetime string (e.g., "2021/04/15") (enddate must always be later than startdate)
    epilo_channel : string
        'E', 'F', 'G' (for 'EPILO PE'), or 'C', 'D', 'P', 'R', 'T' (for 'EPILO IC').
        EPILO chan, by default 'F'
    epilo_threshold : int or float, optional
        Replace ALL flux/countrate values above 'epilo_threshold' with np.nan, by default None.
        Only works for Electron count rates in 'PSP_ISOIS-EPILO_L2-PE' dataset
    path : str, optional
        Local path for storing downloaded data, by default None
    resample : str, optional
        resample frequency in format understandable by Pandas, e.g. '1min', by default None.
        Note that this is just a simple wrapper around thepandas
        resample function that is calculating the mean of the data in the new
        time bins. This is not necessarily the correct way to resample data,
        depending on the data type (for example for errors)!
    all_columns : boolean, optional
        Whether to return all columns of the datafile for EPILO (or skip
        usually unneeded columns for better performance), by default False
    Returns
    -------
    df : Pandas dataframe
        See links above for the different datasets for a description of the dataframe columns
    energies_dict : dictionary
        Dictionary containing energy information. \n
        NOTE: For EPIHI energy values are only loaded from the first day of the interval!
        For EPILO energy values are the mean of the whole loaded interval.
    """
    trange = a.Time(startdate, enddate)
    cda_dataset = a.cdaweb.Dataset(dataset)
    try:
        result = Fido.search(trange, cda_dataset)
        filelist = [i[0].split('/')[-1] for i in result.show('URL')[0]]
        filelist.sort()
        if path is None:
            filelist = [sunpy.config.get('downloads', 'download_dir') + os.sep + file for file in filelist]
        elif type(path) is str:
            filelist = [path + os.sep + f for f in filelist]
        downloaded_files = filelist

        for i, f in enumerate(filelist):
            if os.path.exists(f) and os.path.getsize(f) == 0:
                os.remove(f)
            if not os.path.exists(f):
                _downloaded_file = Fido.fetch(result[0][i], path=path, max_conn=1)

        # loading for EPIHI
        if dataset.split('-')[1] == 'EPIHI_L2':
            # downloaded_files = Fido.fetch(result, path=path, max_conn=1)
            # downloaded_files.sort()
            data = TimeSeries(downloaded_files, concatenate=True)
            df = data.to_dataframe()
            # df = read_cdf(downloaded_files[0])

            # # reduce data frame to only H_Flux, H_Uncertainty, Electron_Counts, and Electron_Rate.
            # # There is no Electron_Uncertainty, maybe one could use at least the Poission error from Electron_Counts for that.
            # # df = df.filter(like='H_Flux') + df.filter(like='H_Uncertainty') + df.filter(like='Electrons')
            # if dataset.split('-')[2].upper() == 'HET':
            #     if dataset.split('-')[3] == 'RATES60':
            #         selected_cols = ["A_H_Flux", "B_H_Flux", "A_H_Uncertainty", "B_H_Uncertainty", "A_Electrons", "B_Electrons"]
            #     if dataset.split('-')[3] == 'RATES3600':
            #         selected_cols = ["A_H_Flux", "B_H_Flux", "A_H_Uncertainty", "B_H_Uncertainty", "A_Electrons", "B_Electrons"]
            # if dataset.split('-')[2].upper() == 'LET1':
            #     selected_cols = ["A_H_Flux", "B_H_Flux", "A_H_Uncertainty", "B_H_Uncertainty", "A_Electrons", "B_Electrons"]
            # if dataset.split('-')[2].upper() == 'LET2':
            #     selected_cols = ["A_H_Flux", "B_H_Flux", "A_H_Uncertainty", "B_H_Uncertainty", "A_Electrons", "B_Electrons"]
            # df = df[df.columns[df.columns.str.startswith(tuple(selected_cols))]]
            # raise Warning(f"{dataset} is not fully suppported, only proton and electron data will be processed!")

            cdf = cdflib.CDF(downloaded_files[0])

            # remove this (i.e. following line) when sunpy's read_cdf is updated,
            # and FILLVAL will be replaced directly, see
            # https://github.com/sunpy/sunpy/issues/5908
            # df = df.replace(cdf.varattsget('A_H_Flux')['FILLVAL'], np.nan)
            # 4 Apr 2023: previous 1 lines removed because they are taken care of with sunpy
            # 4.1.0:
            # https://docs.sunpy.org/en/stable/whatsnew/changelog.html#id7
            # https://github.com/sunpy/sunpy/pull/5956

            # get info on energies and units
            energies_dict = {"H_ENERGY":
                             cdf['H_ENERGY'],
                             "H_ENERGY_DELTAPLUS":
                             cdf['H_ENERGY_DELTAPLUS'],
                             "H_ENERGY_DELTAMINUS":
                             cdf['H_ENERGY_DELTAMINUS'],
                             "H_ENERGY_LABL":
                             cdf['H_ENERGY_LABL'],
                             "Electrons_ENERGY":
                             cdf['Electrons_ENERGY'],
                             "Electrons_ENERGY_DELTAPLUS":
                             cdf['Electrons_ENERGY_DELTAPLUS'],
                             "Electrons_ENERGY_DELTAMINUS":
                             cdf['Electrons_ENERGY_DELTAMINUS'],
                             "Electrons_ENERGY_LABL":
                             cdf['Electrons_ENERGY_LABL']
                             }
            try:
                energies_dict["H_FLUX_UNITS"] = cdf.varattsget('A_H_Flux')['UNITS']
                energies_dict["Electrons_Rate_UNITS"] = cdf.varattsget('A_Electrons_Rate')['UNITS']
            except ValueError:
                try:
                    energies_dict["H_FLUX_UNITS"] = cdf.varattsget('C_H_Flux')['UNITS']
                    energies_dict["Electrons_Rate_UNITS"] = cdf.varattsget('C_Electrons_Rate')['UNITS']
                except ValueError:
                    raise Warning("Can't obtain UNITS from metadata. Possibly an unsupported dataset is loaded!")

        # loading for EPILO
        if dataset.split('-')[1] == 'EPILO_L2':
            if dataset[-2:] == 'PE':
                species_str = 'Electron'
            elif dataset[-2:] == 'IC':
                species_str = 'H'

            if len(downloaded_files) > 0:
                if all_columns:
                    ignore = []
                else:
                    ignore = [f'Epoch_Chan{epilo_channel}_DELTA', f'HCI_Chan{epilo_channel}', f'HCI_Lat_Chan{epilo_channel}', f'HCI_Lon_Chan{epilo_channel}',
                              f'HCI_R_Chan{epilo_channel}', f'HGC_Lat_Chan{epilo_channel}', f'HGC_Lon_Chan{epilo_channel}', f'HGC_R_Chan{epilo_channel}',
                              f'{species_str}_Chan{epilo_channel}_Energy_LABL', f'{species_str}_Counts_Chan{epilo_channel}', f'RTN_Chan{epilo_channel}']
                    # ignore = ['Epoch_ChanP_DELTA', 'HCI_ChanP', 'HCI_Lat_ChanP', 'HCI_Lon_ChanP', 'HCI_R_ChanP', 'HGC_Lat_ChanP', 'HGC_Lon_ChanP', 'HGC_R_ChanP', 'H_ChanP_Energy', 'H_ChanP_Energy_DELTAMINUS', 'H_ChanP_Energy_DELTAPLUS', 'H_ChanP_Energy_LABL', 'H_CountRate_ChanP', 'H_Counts_ChanP', 'H_Flux_ChanP', 'H_Flux_ChanP_DELTA', 'PA_ChanP', 'Quality_Flag_ChanP', 'RTN_ChanP', 'SA_ChanP

                # read 0th cdf file
                # # cdf = cdflib.CDF(downloaded_files[0])
                # # df = _cdf2df_3d_psp(cdf, f"Epoch_Chan{epilo_channel.upper()}", ignore=ignore)
                df = _read_cdf_psp(downloaded_files[0], f"Epoch_Chan{epilo_channel.upper()}", ignore_vars=ignore)

                # read additional cdf files
                if len(downloaded_files) > 1:
                    for f in downloaded_files[1:]:
                        # # cdf = cdflib.CDF(f)
                        # # t_df = _cdf2df_3d_psp(cdf, f"Epoch_Chan{epilo_channel.upper()}", ignore=ignore)
                        t_df = _read_cdf_psp(f, f"Epoch_Chan{epilo_channel.upper()}", ignore_vars=ignore)
                        df = pd.concat([df, t_df])

                # columns of returned df for EPILO PE
                # -----------------------------------
                # PA_ChanF_0 to PA_ChanF_7
                # SA_ChanF_0 to SA_ChanF_7
                # Electron_ChanF_Energy_E0_P0 to Electron_ChanF_Energy_E47_P7
                # Electron_ChanF_Energy_DELTAMINUS_E0_P0 to Electron_ChanF_Energy_DELTAMINUS_E47_P7
                # Electron_ChanF_Energy_DELTAPLUS_E0_P0 to Electron_ChanF_Energy_DELTAPLUS_E47_P7
                # Electron_CountRate_ChanF_E0_P0 to Electron_CountRate_ChanF_E47_P7
                energies_dict = {}
                for k in [f'{species_str}_Chan{epilo_channel.upper()}_Energy_E',
                          f'{species_str}_Chan{epilo_channel.upper()}_Energy_DELTAMINUS',
                          f'{species_str}_Chan{epilo_channel.upper()}_Energy_DELTAPLUS']:
                    energies_dict[k] = df[df.columns[df.columns.str.startswith(k)]].mean()
                    df.drop(df.columns[df.columns.str.startswith(k)], axis=1, inplace=True)
                # rename energy column (removing trailing '_E')
                energies_dict[f'{species_str}_Chan{epilo_channel.upper()}_Energy'] = energies_dict.pop(f'{species_str}_Chan{epilo_channel.upper()}_Energy_E')

                # replace outlier data points above given threshold with np.nan
                # note: df.where(cond, np.nan) replaces all values where the cond is NOT fullfilled with np.nan
                # following Pandas Dataframe work is not too elegant, but works...
                if epilo_threshold:
                    # create new dataframe of FLUX columns only with removed outliers
                    df2 = df.filter(like='Electron_CountRate_').where(df.filter(like='Electron_CountRate_') <= epilo_threshold, np.nan)
                    # drop these FLUX columns from original dataframe
                    flux_cols = df.filter(like='Electron_CountRate_').columns
                    df.drop(labels=flux_cols, axis=1, inplace=True)
                    # add cleaned new FLUX columns to original dataframe
                    df = pd.concat([df2, df], axis=1)
            else:
                df = ''
                energies_dict = ''

        if isinstance(resample, str):
            if dataset.upper() in ['PSP_ISOIS-EPILO_L2-PE']:
                cols_unc = 'auto'
                keywords_unc = ['_DELTA_E']  # uncertainty columns for EPILO PE have '_DELTA_E' in their name, e.g. 'Electron_Flux_ChanE_DELTA_E47_P6'. There is also 'Epoch_ChanE_DELTA' column for the time uncertainty, but this should not be resampled with the same method as the flux uncertainties, so it is not included here.
            elif dataset.upper() in ['PSP_ISOIS-EPIHI_L2-HET-RATES60', 'PSP_ISOIS-EPIHI_L2-LET1-RATES60', 'PSP_ISOIS-EPIHI_L2-LET2-RATES60', 'PSP_ISOIS-EPILO_L2-IC']:
                cols_unc = 'auto'
                keywords_unc=['unc', 'err', 'sigma', '_DELTA_']  # 'PSP_ISOIS-EPILO_L2-IC' has 'H_Flux_ChanP_DELTA_Exx_Pxx' uncertainty columns
            df = resample_df(df=df, resample=resample, pos_timestamp="center", origin="start", cols_unc=cols_unc, verbose=False, keywords_unc=keywords_unc)

    except (RuntimeError, IndexError):
        print(f'Unable to obtain "{dataset}" data!')
        downloaded_files = []
        df = pd.DataFrame()
        energies_dict = []
    return df, energies_dict




[docs]
def calc_av_en_flux_PSP_EPIHI(df, energies, en_channel, species, instrument, viewing):
    """
    This function averages the flux of several energy channels into a combined energy channel
    channel numbers counted from 0

    So far only works for EPIHI-HET

    Parameters
    ----------
    df : pd.DataFrame DataFrame containing HET data
        DataFrame containing PSP data
    energies : dict
        Energy dict returned from psp_load
    en_channel : int or list
        energy channel number(s) to be used
    species : string
        'e', 'electrons', 'p', 'i', 'protons', 'ions'
    instrument : string
        'het'
    viewing : string
        'A', 'B'

    Returns
    -------
    pd.DataFrame
        flux_out: contains channel-averaged flux
    """
    if instrument.lower() == 'het':
        if species.lower() in ['e', 'electrons']:
            species_str = 'Electrons'
            flux_key = 'Electrons_Rate'
        if species.lower() in ['p', 'protons', 'i', 'ions', 'h']:
            species_str = 'H'
            flux_key = 'H_Flux'
    en_str = energies[f'{species_str}_ENERGY_LABL']
    if type(en_channel) is list:
        energy_low = en_str[en_channel[0]].flat[0].split('-')[0]
        energy_up = en_str[en_channel[-1]].flat[0].split('-')[-1]
        en_channel_string = energy_low + '-' + energy_up

        DE = energies[f'{species_str}_ENERGY_DELTAPLUS']+energies[f'{species_str}_ENERGY_DELTAMINUS']

        if len(en_channel) > 2:
            raise Exception('en_channel must have len 2 or less!')
        if len(en_channel) == 2:
            try:
                df = df[df.columns[df.columns.str.startswith(f'{viewing.upper()}_{flux_key}')]]
            except (AttributeError, KeyError):
                None
            for bins in np.arange(en_channel[0], en_channel[-1]+1):
                if bins == en_channel[0]:
                    I_all = df[f'{viewing.upper()}_{flux_key}_{bins}'] * DE[bins]
                else:
                    I_all = I_all.add(df[f'{viewing.upper()}_{flux_key}_{bins}'] * DE[bins], fill_value=0)  # to handle possible NaN values
            DE_total = np.sum(DE[(en_channel[0]):(en_channel[-1]+1)])
            flux_out = pd.DataFrame({'flux': I_all/DE_total}, index=df.index)
        else:
            en_channel = en_channel[0]
            flux_out = pd.DataFrame({'flux': df[f'{viewing.upper()}_{flux_key}_{en_channel}']}, index=df.index)
    else:
        flux_out = pd.DataFrame({'flux': df[f'{viewing.upper()}_{flux_key}_{en_channel}']}, index=df.index)
        en_channel_string = en_str[en_channel].flat[0]
    # replace multiple whitespaces with single ones
    en_channel_string = ' '.join(en_channel_string.split())
    return flux_out, en_channel_string




[docs]
def calc_av_en_flux_PSP_EPILO(df, en_dict, en_channel, species, mode, chan, viewing):
    """
    This function averages the flux of several energy channels (and viewing directions) into a combined energy channel.
    channel numbers counted from 0

    So far only works for EPILO PE chanF electrons

    Parameters
    ----------
    df : pd.DataFrame DataFrame containing HET data
        DataFrame containing PSP data
    energies : dict
        Energy dict returned from psp_load
    en_channel : int or list
        energy channel number(s) to be used
    species : string
        'e', 'electrons'
    mode : string
        'pe' or 'ic'. EPILO mode
    chan : string
        'E', 'F', 'G', 'P', 'T'. EPILO chan
    viewing : int or list
        EPILO viewing. 0 to 7 for electrons; 0 to 79 for ions
        (ions 70-79 correspond to electrons 7, i.e., the electron wedges are
        split up into 10 viewings for ions)

    Returns
    -------
    pd.DataFrame
        flux_out: contains channel-averaged flux
    """

    if mode.lower() == 'pe':
        if species.lower() in ['e', 'electrons']:
            species_str = 'Electron'
            flux_key = 'Electron_CountRate'

            # TODO: the following is for introducing electron fluxes instead of countates
            # # check if electron flux columns are in the dataframe, because they are (so far) only available for chanE!
            # # fall back to count rates if not available
            # if 'Electron_Flux_Chan{chan}_E0_P0' in df.keys():
            #     flux_key = 'Electron_Flux'
            # else:
            #     flux_key = 'Electron_CountRate'

        # if species.lower() in ['p', 'protons', 'i', 'ions', 'h']:
        #     species_str = 'H'
        #     flux_key = 'H_Flux'
    elif mode.lower() == 'ic':
        # if species.lower() in ['e', 'electrons']:
        #     species_str = 'Electrons'
        #     flux_key = 'Electrons_Rate'
        if species.lower() in ['p', 'protons', 'i', 'ions', 'h']:
            species_str = 'H'
            flux_key = 'H_Flux'
    if type(en_channel) is int:
        en_channel = [en_channel]
    if type(viewing) is int:
        viewing = [viewing]

    df_out = pd.DataFrame()
    # flux_out_all = {}
    en_channel_string_all = []
    for view in viewing:
        if type(en_channel) is list:
            # energy = en_dict[f'{species_str}_Chan{chan}_Energy'].filter(like=f'_P{view}').values
            energy = en_dict[f'{species_str}_Chan{chan}_Energy'][en_dict[f'{species_str}_Chan{chan}_Energy'].keys().str.endswith(f'_P{view}')].values
            # energy_low = energy - en_dict[f'{species_str}_Chan{chan}_Energy_DELTAMINUS'].filter(like=f'_P{view}').values
            energy_low = energy - en_dict[f'{species_str}_Chan{chan}_Energy_DELTAMINUS'][en_dict[f'{species_str}_Chan{chan}_Energy_DELTAMINUS'].keys().str.endswith(f'_P{view}')].values
            # energy_high = energy + en_dict[f'{species_str}_Chan{chan}_Energy_DELTAPLUS'].filter(like=f'_P{view}').values
            energy_high = energy + en_dict[f'{species_str}_Chan{chan}_Energy_DELTAPLUS'][en_dict[f'{species_str}_Chan{chan}_Energy_DELTAPLUS'].keys().str.endswith(f'_P{view}')].values
            DE = en_dict[f'{species_str}_Chan{chan}_Energy_DELTAMINUS'].filter(like=f'_P{view}').values + en_dict[f'{species_str}_Chan{chan}_Energy_DELTAPLUS'].filter(like=f'_P{view}').values

            # build energy string of combined channel
            en_channel_string = np.round(energy_low[en_channel[0]], 1).astype(str) + ' - ' + np.round(energy_high[en_channel[-1]], 1).astype(str) + ' keV'

            # select view direction
            # df = df.filter(like=f'_P{view}')

            if len(en_channel) > 2:
                raise Exception("en_channel must have length 2 or less! Define first and last channel to use (don't list all of them)")
            if len(en_channel) == 2:
                # try:
                #     df = df[df.columns[df.columns.str.startswith(f'{view.upper()}_{flux_key}')]]
                #     # df = df[df.columns[df.columns.str.startswith(f'{flux_key}_Chan{chan}_')]]
                # except (AttributeError, KeyError):
                #     None
                for bins in np.arange(en_channel[0], en_channel[-1]+1):
                    if bins == en_channel[0]:
                        I_all = df[f"{flux_key}_Chan{chan}_E{bins}_P{view}"] * DE[bins]
                    else:
                        I_all = I_all.add(df[f"{flux_key}_Chan{chan}_E{bins}_P{view}"] * DE[bins], fill_value=0)  # to handle possible NaN values
                DE_total = np.sum(DE[(en_channel[0]):(en_channel[-1]+1)])
                flux_out = pd.DataFrame({f'viewing_{view}': I_all/DE_total}, index=df.index)
            if len(en_channel) == 1:
                en_channel = en_channel[0]
                flux_out = pd.DataFrame({f'viewing_{view}': df[f"{flux_key}_Chan{chan}_E{en_channel}_P{view}"]}, index=df.index)

            df_out = pd.concat([df_out, flux_out], axis=1)

        # calculate mean of all viewings:
        df_out2 = pd.DataFrame({'flux': df_out.mean(axis=1, skipna=True)}, index=df_out.index)
        # df_out2 = pd.DataFrame({flux_key: df_out.mean(axis=1, skipna=True)}, index=df_out.index)  # TODO: introduce flux_key into column name to distinguish between electron countrate and flux
        en_channel_string_all.append(en_channel_string)

    # check if not all elements of en_channel_string_all are the same:
    if len(en_channel_string_all) != en_channel_string_all.count(en_channel_string_all[0]):
        custom_warning(f"PSP/EPI-Lo {mode.upper()}: You are combining viewing directions that have different energies. This is strongly advised against!")
        print(en_channel_string_all)
    return df_out2, en_channel_string_all[0]



psp_load = copy.copy(psp_isois_load)


"""
Modification of sunpy's read_cdf function to allow skipping of reading variables from a cdf file.
This function is copied from sunpy under the terms of the BSD 2-Clause licence. See licenses/SUNPY_LICENSE.rst
"""


def _read_cdf_psp(fname, index_key, ignore_vars=[]):
    """
    Read a CDF file that follows the ISTP/IACG guidelines.

    Parameters
    ----------
    fname : path-like
        Location of single CDF file to read.
    index_key : str
        The CDF key to use as the index in the output DataFrame.
        For example, index_key='Epoch_ChanP'
    ignore_vars : list
        In case a CDF file has columns that are unused / not required, then
        the column names can be passed as a list into the function.

    Returns
    -------
    DataFrame
        A Pandas DataFrame for the time index defined by index_key.

    References
    ----------
    Space Physics Guidelines for CDF https://spdf.gsfc.nasa.gov/sp_use_of_cdf.html
    """
    # import astropy.units as u
    from cdflib.epochs import CDFepoch
    from sunpy import log
    # from sunpy.timeseries import GenericTimeSeries
    from sunpy.util.exceptions import warn_user
    cdf = cdflib.CDF(str(fname))
    # Extract the time varying variables
    cdf_info = cdf.cdf_info()
    # meta = cdf.globalattsget()
    if hasattr(cdflib, "__version__") and Version(cdflib.__version__) >= Version("1.0.0"):
        all_var_keys = cdf_info.rVariables + cdf_info.zVariables
    else:
        all_var_keys = cdf_info['rVariables'] + cdf_info['zVariables']
    var_attrs = {key: cdf.varattsget(key) for key in all_var_keys}
    # Get keys that depend on time
    var_keys = [var for var in var_attrs if 'DEPEND_0' in var_attrs[var] and var_attrs[var]['DEPEND_0'] is not None]

    # # Get unique time index keys
    # time_index_keys = sorted(set([var_attrs[var]['DEPEND_0'] for var in var_keys]))

    # all_ts = []
    # # For each time index, construct a GenericTimeSeries
    # for index_key in time_index_keys:
    #     try:
    #         index = cdf.varget(index_key)
    #     except ValueError:
    #         # Empty index for cdflib >= 0.3.20
    #         continue

    # Only for selected index_key:
    index = cdf.varget(index_key)

    # use to_astropy_time() instead here when we drop pandas in timeseries
    index = CDFepoch.to_datetime(index)
    # df = pd.DataFrame(index=pd.DatetimeIndex(name=index_key, data=index))
    # units = {}
    df_dict = {}

    for var_key in var_keys:
        if var_key in ignore_vars:
            continue  # leave for-loop, skipping var_key

        attrs = var_attrs[var_key]
        # If this variable doesn't depend on this index, continue
        if attrs['DEPEND_0'] != index_key:
            continue

        # Get data
        if hasattr(cdflib, "__version__") and Version(cdflib.__version__) >= Version("1.0.0"):
            var_last_rec = cdf.varinq(var_key).Last_Rec
        else:
            var_last_rec = cdf.varinq(var_key)['Last_Rec']
        if var_last_rec == -1:
            log.debug(f'Skipping {var_key} in {fname} as it has zero elements')
            continue

        data = cdf.varget(var_key)

        # Set fillval values to NaN
        # It would be nice to properley mask these values to work with
        # non-floating point (ie. int) dtypes, but this is not possible with pandas
        if np.issubdtype(data.dtype, np.floating):
            data[data == attrs['FILLVAL']] = np.nan

        # Skip all units :-(
        # # Get units
        # if 'UNITS' in attrs:
        #     unit_str = attrs['UNITS']
        #     try:
        #         unit = u.Unit(unit_str)
        #     except ValueError:
        #         if unit_str in _known_units:
        #             unit = _known_units[unit_str]
        #         else:
        #             warn_user(f'astropy did not recognize units of "{unit_str}". '
        #                         'Assigning dimensionless units. '
        #                         'If you think this unit should not be dimensionless, '
        #                         'please raise an issue at https://github.com/sunpy/sunpy/issues')
        #             unit = u.dimensionless_unscaled
        # else:
        #     warn_user(f'No units provided for variable "{var_key}". '
        #                 'Assigning dimensionless units.')
        #     unit = u.dimensionless_unscaled

        if data.ndim > 3:
            # Skip data with dimensions >= 3 and give user warning
            warn_user(f'The variable "{var_key}" has been skipped because it has more than 3 dimensions, which is unsupported.')
        elif data.ndim == 3:
            # Multiple columns, give each column a unique label.
            for j in range(data.T.shape[0]):
                for i, col in enumerate(data.T[j, :, :]):
                    # var_key_mod = var_key+'_E'+str(j).rjust(2, '0')
                    var_key_mod = var_key+f'_E{j}'
                    # df[var_key_mod + '_P'+str(i).rjust(2, '0')] = col
                    df_dict[var_key_mod + f'_P{i}'] = col
                    # units[var_key_mod + f'_{i}'] = unit
        elif data.ndim == 2:
            # Multiple columns, give each column a unique label
            for i, col in enumerate(data.T):
                df_dict[var_key + f'_{i}'] = col
                # units[var_key + f'_{i}'] = unit
        else:
            # Single column
            df_dict[var_key] = data
            # units[var_key] = unit

        df = pd.DataFrame(df_dict, index=pd.DatetimeIndex(name=index_key, data=index))

    # all_ts.append(GenericTimeSeries(data=df, units=units, meta=meta))

    # if not len(all_ts):
    if not len(df):
        log.debug(f'No data found in file {fname}')
    return df  # all_ts