Source code for seppy.loader.juice

import os

import cdflib
import pandas as pd
import pooch
import requests
import sunpy
from bs4 import BeautifulSoup
from packaging.version import Version
from seppy.util import resample_df
from sunpy.timeseries import TimeSeries

logger = pooch.get_logger()
logger.setLevel("WARNING")


[docs] def juice_radem_download(date, path=None): """Download JUICE/RADEM cruise science data file from ESA's PSA to local path Parameters ---------- date : datetime object datetime of data to retrieve path : str local path where the files will be stored Returns ------- downloaded_file : str full local path to downloaded file """ # use sunpy download directory if no path is provided if not path: path = sunpy.config.get('downloads', 'download_dir') # add a OS-specific '/' to end end of 'path' if path: if not path[-1] == os.sep: path = f'{path}{os.sep}' # URL of the webpage containing the downloadable files base_url = f"https://archives.esac.esa.int/psa/ftp/Juice/juice_radem/data_raw/cruise/sc/{date.year}{date.strftime('%m')}/" # Send an HTTP GET request to the webpage response = requests.get(base_url) # Check if the request was successful if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') # Find all links on the page links = soup.find_all('a') # Filter for the file link fname = None for link in links: href = link.get('href') if href and f"radem_raw_sc_{date.year}{date.strftime('%m')}{date.strftime('%d')}__" in href and href.endswith('.cdf'): fname = href break # Get the first found link if fname: url = base_url + fname try: downloaded_file = pooch.retrieve(url=url, known_hash=None, fname=fname, path=path, progressbar=True) except ModuleNotFoundError: downloaded_file = pooch.retrieve(url=url, known_hash=None, fname=fname, path=path, progressbar=False) except requests.HTTPError: print(f'No corresponding JUICE/RADEM data found at {url}') downloaded_file = [] # # Download the file # file_response = requests.get(file_url) # # Save the file if the request was successful # if file_response.status_code == 200: # fname = file_url.split('/')[-1] # Extract fname from the URL # with open(fname, 'wb') as f: # f.write(file_response.content) # print(f"Downloaded: {fname}") # else: # print(f"Failed to download file: {file_response.status_code}") return downloaded_file else: print("No suitable file found online.") return None else: print(f"Failed to fetch the webpage: {response.status_code}") return None
[docs] def juice_radem_load(startdate, enddate, resample=None, path=None, pos_timestamp='center'): """Download & load JUICE/RADEM cruise science data and returns it as Pandas DataFrame (and metadata dictionaries). Note that the data is provided in counts and not converted to physical units (as of Nov 2025); also the instrument configuration changes over time. Parameters ---------- startdate : datetime object start datetime of data to retrieve enddate : datetime object end datetime of data to retrieve resample : str resampling frequency (e.g. '1min', '10min', '1H', etc.). If None, no resampling is applied. path : str local path where the files are stored / will be downloaded to pos_timestamp : str position of the timestamp when resampling ('start', 'center', 'end') Returns ------- df : Pandas DataFrame DataFrame containing the JUICE/RADEM data energies_dict : dict Dictionary containing the JUICE/RADEM data energy and label information metadata_dict : dict Dictionary containing the JUICE/RADEM data metadata """ # Generate list of dates between startdate and enddate dates = pd.date_range(start=startdate, end=enddate, freq='D') downloaded_files = [] for date in dates: fname = juice_radem_download(date, path=path) if fname: downloaded_files.append(fname) if not downloaded_files: print("No data files were downloaded.") return pd.DataFrame(), {}, {} # Load the data using SunPy TimeSeries data = TimeSeries(downloaded_files, concatenate=True) df = data.to_dataframe() # drop string columns df.drop(columns=['TIME_OBT'], inplace=True) # convert TIME_UTC column from string to datetime df['TIME_UTC'] = pd.to_datetime(df['TIME_UTC']) if resample: df = resample_df(df, resample, pos_timestamp=pos_timestamp, cols_unc=[], verbose=False) energies_dict, metadata_dict = juice_radem_load_metadata(filename=downloaded_files[0]) return df, energies_dict, metadata_dict
[docs] def juice_radem_load_metadata(filename): """Load JUICE/RADEM cruise science data metadata and return it as a dictionary Returns ------- energies_dict : dict Dictionary containing the JUICE/RADEM data energy and label information metadata_dict : dict Dictionary containing the JUICE/RADEM data metadata """ # open cdf file with cdflib to access metadata cdf = cdflib.CDF(filename) # dict with all metadata info metadata_dict = {"Global_Attributes": cdf.globalattsget()} # dict with energy/label infos energies_dict = {} cdf_info = cdf.cdf_info() if hasattr(cdflib, "__version__") and Version(cdflib.__version__) >= Version("1.0.0"): all_var_keys = cdf_info.rVariables + cdf_info.zVariables else: all_var_keys = cdf_info['rVariables'] + cdf_info['zVariables'] # for key in all_var_keys: metadata_dict[key] = cdf.varattsget(key) if cdf.varattsget(key)['VAR_TYPE'] == 'metadata': energies_dict[key] = cdf.varget(key) return energies_dict, metadata_dict