pace module¶
This module contains functions to read and process PACE data.
extract_pace(dataset, latitude, longitude, delta=0.01, return_plot=False, **kwargs)
¶
Extracts data from a PACE dataset for a given latitude and longitude range and calculates the mean over these dimensions.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
Union[xr.Dataset, str] |
The PACE dataset or path to the dataset file. |
required |
latitude |
Union[float, Tuple[float, float]] |
The latitude or range of latitudes to extract data for. |
required |
longitude |
Union[float, Tuple[float, float]] |
The longitude or range of longitudes to extract data for. |
required |
delta |
float |
The range to add/subtract to the latitude and longitude if they are not ranges. Defaults to 0.01. |
0.01 |
return_plot |
bool |
Whether to return a plot of the data. Defaults to False. |
False |
**kwargs |
Additional keyword arguments to pass to the plot function. |
{} |
Returns:
Type | Description |
---|---|
Union[xr.DataArray, plt.figure.Figure] |
The mean data over the latitude and longitude dimensions, or a plot of this data if return_plot is True. |
Source code in hypercoast/pace.py
def extract_pace(
dataset: Union[xr.Dataset, str],
latitude: Union[float, Tuple[float, float]],
longitude: Union[float, Tuple[float, float]],
delta: float = 0.01,
return_plot: bool = False,
**kwargs,
) -> Union[xr.DataArray, plt.Figure]:
"""
Extracts data from a PACE dataset for a given latitude and longitude range
and calculates the mean over these dimensions.
Args:
dataset (Union[xr.Dataset, str]): The PACE dataset or path to the dataset file.
latitude (Union[float, Tuple[float, float]]): The latitude or range of
latitudes to extract data for.
longitude (Union[float, Tuple[float, float]]): The longitude or range of
longitudes to extract data for.
delta (float, optional): The range to add/subtract to the latitude and
longitude if they are not ranges. Defaults to 0.01.
return_plot (bool, optional): Whether to return a plot of the data. Defaults to False.
**kwargs: Additional keyword arguments to pass to the plot function.
Returns:
Union[xr.DataArray, plt.figure.Figure]: The mean data over the latitude
and longitude dimensions, or a plot of this data if return_plot is True.
"""
if isinstance(latitude, list) or isinstance(latitude, tuple):
pass
else:
latitude = (latitude - delta, latitude + delta)
if isinstance(longitude, list) or isinstance(longitude, tuple):
pass
else:
longitude = (longitude - delta, longitude + delta)
ds = filter_pace(dataset, latitude, longitude, return_plot=False)
data = ds.mean(dim=["latitude", "longitude"])
if return_plot:
return data.plot.line(**kwargs)
else:
return data
filter_pace(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs)
¶
Filters a PACE dataset based on latitude and longitude.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
xr.Dataset |
The PACE dataset to filter. |
required |
latitude |
float or tuple |
The latitude to filter by. If a tuple or list, it represents a range. |
required |
longitude |
float or tuple |
The longitude to filter by. If a tuple or list, it represents a range. |
required |
drop |
bool |
Whether to drop the filtered out data. Defaults to True. |
True |
Returns:
Type | Description |
---|---|
xr.DataArray |
The filtered PACE data. |
Source code in hypercoast/pace.py
def filter_pace(dataset, latitude, longitude, drop=True, return_plot=False, **kwargs):
"""
Filters a PACE dataset based on latitude and longitude.
Args:
dataset (xr.Dataset): The PACE dataset to filter.
latitude (float or tuple): The latitude to filter by. If a tuple or list, it represents a range.
longitude (float or tuple): The longitude to filter by. If a tuple or list, it represents a range.
drop (bool, optional): Whether to drop the filtered out data. Defaults to True.
Returns:
xr.DataArray: The filtered PACE data.
"""
if isinstance(latitude, list) or isinstance(latitude, tuple):
lat_con = (dataset["latitude"] > latitude[0]) & (
dataset["latitude"] < latitude[1]
)
else:
lat_con = dataset["latitude"] == latitude
if isinstance(longitude, list) or isinstance(longitude, tuple):
lon_con = (dataset["longitude"] > longitude[0]) & (
dataset["longitude"] < longitude[1]
)
else:
lon_con = dataset["longitude"] == longitude
da = dataset["Rrs"].where(lat_con & lon_con, drop=drop, **kwargs)
da_filtered = da.dropna(dim="latitude", how="all")
da_filtered = da_filtered.dropna(dim="longitude", how="all")
if return_plot:
rrs_stack = da_filtered.stack(
{"pixel": ["latitude", "longitude"]},
create_index=False,
)
rrs_stack.plot.line(hue="pixel")
else:
return da_filtered
grid_pace(dataset, wavelengths=None, method='nearest', **kwargs)
¶
Grids a PACE dataset based on latitude and longitude.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
xr.Dataset |
The PACE dataset to grid. |
required |
wavelengths |
float or int |
The wavelength to select. |
None |
method |
str |
The method to use for griddata interpolation. Defaults to "nearest". |
'nearest' |
**kwargs |
Additional keyword arguments to pass to the xr.Dataset constructor. |
{} |
Returns:
Type | Description |
---|---|
xr.DataArray |
The gridded PACE data. |
Source code in hypercoast/pace.py
def grid_pace(dataset, wavelengths=None, method="nearest", **kwargs):
"""
Grids a PACE dataset based on latitude and longitude.
Args:
dataset (xr.Dataset): The PACE dataset to grid.
wavelengths (float or int): The wavelength to select.
method (str, optional): The method to use for griddata interpolation.
Defaults to "nearest".
**kwargs: Additional keyword arguments to pass to the xr.Dataset constructor.
Returns:
xr.DataArray: The gridded PACE data.
"""
from scipy.interpolate import griddata
if wavelengths is None:
wavelengths = dataset.coords["wavelength"].values[0]
# Ensure wavelengths is a list
if not isinstance(wavelengths, list):
wavelengths = [wavelengths]
lat = dataset.latitude
lon = dataset.longitude
grid_lat = np.linspace(lat.min(), lat.max(), lat.shape[0])
grid_lon = np.linspace(lon.min(), lon.max(), lon.shape[1])
grid_lon_2d, grid_lat_2d = np.meshgrid(grid_lon, grid_lat)
gridded_data_dict = {}
for wavelength in wavelengths:
data = dataset.sel(wavelength=wavelength, method="nearest")["Rrs"]
gridded_data = griddata(
(lat.data.flatten(), lon.data.flatten()),
data.data.flatten(),
(grid_lat_2d, grid_lon_2d),
method=method,
)
gridded_data_dict[wavelength] = gridded_data
# Create a 3D array with dimensions latitude, longitude, and wavelength
gridded_data_3d = np.dstack(list(gridded_data_dict.values()))
dataset2 = xr.Dataset(
{"Rrs": (("latitude", "longitude", "wavelength"), gridded_data_3d)},
coords={
"latitude": ("latitude", grid_lat),
"longitude": ("longitude", grid_lon),
"wavelength": ("wavelength", list(gridded_data_dict.keys())),
},
**kwargs,
)
dataset2["Rrs"].rio.write_crs("EPSG:4326", inplace=True)
return dataset2
grid_pace_bgc(dataset, variable='chlor_a', method='nearest', **kwargs)
¶
Grids PACE BGC data using specified interpolation method.
This function takes an xarray Dataset containing PACE BGC data, interpolates it onto a regular grid using the specified method, and returns the gridded data as an xarray DataArray with the specified variable.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
xr.Dataset |
The input dataset containing PACE BGC data with latitude and longitude coordinates. |
required |
variable |
str |
The variable within the dataset to grid. Can be one of chlor_a, carbon_phyto, poc, chlor_a_unc, carbon_phyto_unc, and l2_flags. Defaults to "chlor_a". |
'chlor_a' |
method |
str |
The interpolation method to use. Options include "nearest", "linear", and "cubic". Defaults to "nearest". |
'nearest' |
**kwargs |
Any |
Additional keyword arguments to pass to the xr.Dataset creation. |
{} |
Returns:
Type | Description |
---|---|
xr.DataArray |
The gridded data as an xarray DataArray, with the specified variable and EPSG:4326 CRS. |
Examples:
>>> dataset = hypercoast.read_pace_bgc("path_to_your_dataset.nc")
>>> gridded_data = grid_pace_bgc(dataset, variable="chlor_a", method="nearest")
>>> print(gridded_data)
Source code in hypercoast/pace.py
def grid_pace_bgc(
dataset: xr.Dataset,
variable: str = "chlor_a",
method: str = "nearest",
**kwargs: Any,
) -> xr.DataArray:
"""
Grids PACE BGC data using specified interpolation method.
This function takes an xarray Dataset containing PACE BGC data, interpolates it onto a regular grid
using the specified method, and returns the gridded data as an xarray DataArray with the specified
variable.
Args:
dataset (xr.Dataset): The input dataset containing PACE BGC data with latitude and longitude coordinates.
variable (str, optional): The variable within the dataset to grid. Can be
one of chlor_a, carbon_phyto, poc, chlor_a_unc, carbon_phyto_unc, and l2_flags.
Defaults to "chlor_a".
method (str, optional): The interpolation method to use. Options include "nearest", "linear", and "cubic".
Defaults to "nearest".
**kwargs (Any): Additional keyword arguments to pass to the xr.Dataset creation.
Returns:
xr.DataArray: The gridded data as an xarray DataArray, with the specified variable and EPSG:4326 CRS.
Example:
>>> dataset = hypercoast.read_pace_bgc("path_to_your_dataset.nc")
>>> gridded_data = grid_pace_bgc(dataset, variable="chlor_a", method="nearest")
>>> print(gridded_data)
"""
import rioxarray
from scipy.interpolate import griddata
lat = dataset.latitude
lon = dataset.longitude
grid_lat = np.linspace(lat.min(), lat.max(), lat.shape[0])
grid_lon = np.linspace(lon.min(), lon.max(), lon.shape[1])
grid_lon_2d, grid_lat_2d = np.meshgrid(grid_lon, grid_lat)
data = dataset[variable]
gridded_data = griddata(
(lat.data.flatten(), lon.data.flatten()),
data.data.flatten(),
(grid_lat_2d, grid_lon_2d),
method=method,
)
dataset2 = xr.Dataset(
{variable: (("latitude", "longitude"), gridded_data)},
coords={
"latitude": ("latitude", grid_lat),
"longitude": ("longitude", grid_lon),
},
**kwargs,
)
dataset2 = dataset2[variable].rio.write_crs("EPSG:4326")
return dataset2
pace_chla_to_image(data, output=None, **kwargs)
¶
Converts PACE chlorophyll-a data to an image.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
xr.DataArray or str |
The chlorophyll-a data or the file path to the data. |
required |
output |
str |
The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None. |
None |
**kwargs |
Additional keyword arguments to be passed to |
{} |
Returns:
Type | Description |
---|---|
rasterio.Dataset or None |
The image converted from the data. If |
Source code in hypercoast/pace.py
def pace_chla_to_image(data, output=None, **kwargs):
"""
Converts PACE chlorophyll-a data to an image.
Args:
data (xr.DataArray or str): The chlorophyll-a data or the file path to the data.
output (str, optional): The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.
**kwargs: Additional keyword arguments to be passed to `leafmap.array_to_image`.
Returns:
rasterio.Dataset or None: The image converted from the data. If `output` is provided, the image will be saved to the specified file and the function will return None.
"""
from leafmap import array_to_image, image_to_geotiff
if isinstance(data, str):
data = read_pace_chla(data)
elif not isinstance(data, xr.DataArray):
raise ValueError("data must be an xarray DataArray")
image = array_to_image(data, transpose=False, output=None, **kwargs)
if output is not None:
image_to_geotiff(image, output, dtype="float32")
return image
pace_to_image(dataset, wavelengths=None, method='nearest', gridded=False, output=None, **kwargs)
¶
Converts an PACE dataset to an image.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
xarray.Dataset or str |
The dataset containing the EMIT data or the file path to the dataset. |
required |
wavelengths |
array-like |
The specific wavelengths to select. If None, all wavelengths are selected. Defaults to None. |
None |
method |
str |
The method to use for data interpolation. Defaults to "nearest". |
'nearest' |
gridded |
bool |
Whether the dataset is a gridded dataset. Defaults to False, |
False |
output |
str |
The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None. |
None |
**kwargs |
Additional keyword arguments to be passed to |
{} |
Returns:
Type | Description |
---|---|
rasterio.Dataset or None |
The image converted from the dataset. If |
Source code in hypercoast/pace.py
def pace_to_image(
dataset, wavelengths=None, method="nearest", gridded=False, output=None, **kwargs
):
"""
Converts an PACE dataset to an image.
Args:
dataset (xarray.Dataset or str): The dataset containing the EMIT data or the file path to the dataset.
wavelengths (array-like, optional): The specific wavelengths to select. If None, all wavelengths are selected. Defaults to None.
method (str, optional): The method to use for data interpolation. Defaults to "nearest".
gridded (bool, optional): Whether the dataset is a gridded dataset. Defaults to False,
output (str, optional): The file path where the image will be saved. If None, the image will be returned as a PIL Image object. Defaults to None.
**kwargs: Additional keyword arguments to be passed to `leafmap.array_to_image`.
Returns:
rasterio.Dataset or None: The image converted from the dataset. If `output` is provided, the image will be saved to the specified file and the function will return None.
"""
from leafmap import array_to_image
if isinstance(dataset, str):
dataset = read_pace(dataset, wavelengths=wavelengths, method="nearest")
if wavelengths is not None:
dataset = dataset.sel(wavelength=wavelengths, method="nearest")
if not gridded:
grid = grid_pace(dataset, wavelengths=wavelengths, method=method)
else:
grid = dataset
data = grid["Rrs"]
data.rio.write_crs("EPSG:4326", inplace=True)
return array_to_image(data, transpose=False, output=output, **kwargs)
read_pace(filepath, wavelengths=None, method='nearest', engine='h5netcdf', **kwargs)
¶
Reads PACE data from a given file and returns an xarray Dataset.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
filepath |
str |
Path to the file to read. |
required |
wavelengths |
array-like |
Specific wavelengths to select. If None, all wavelengths are selected. |
None |
method |
str |
Method to use for selection when wavelengths is not None. Defaults to "nearest". |
'nearest' |
**kwargs |
Additional keyword arguments to pass to the |
{} |
Returns:
Type | Description |
---|---|
xr.Dataset |
An xarray Dataset containing the PACE data. |
Source code in hypercoast/pace.py
def read_pace(
filepath, wavelengths=None, method="nearest", engine="h5netcdf", **kwargs
):
"""
Reads PACE data from a given file and returns an xarray Dataset.
Args:
filepath (str): Path to the file to read.
wavelengths (array-like, optional): Specific wavelengths to select. If None, all wavelengths are selected.
method (str, optional): Method to use for selection when wavelengths is not None. Defaults to "nearest".
**kwargs: Additional keyword arguments to pass to the `sel` method when wavelengths is not None.
Returns:
xr.Dataset: An xarray Dataset containing the PACE data.
"""
rrs = xr.open_dataset(filepath, engine=engine, group="geophysical_data")["Rrs"]
wvl = xr.open_dataset(filepath, engine=engine, group="sensor_band_parameters")
dataset = xr.open_dataset(filepath, engine=engine, group="navigation_data")
dataset = dataset.set_coords(("longitude", "latitude"))
if "pixel_control_points" in dataset.dims:
dataset = dataset.rename({"pixel_control_points": "pixels_per_line"})
dataset = xr.merge([rrs, dataset.coords.to_dataset()])
dataset.coords["wavelength_3d"] = wvl.coords["wavelength_3d"]
dataset = dataset.rename(
{
"number_of_lines": "latitude",
"pixels_per_line": "longitude",
"wavelength_3d": "wavelength",
}
)
if wavelengths is not None:
dataset = dataset.sel(wavelength=wavelengths, method=method, **kwargs)
return dataset
read_pace_aop(filepath, engine='h5netcdf', **kwargs)
¶
Reads PACE data from a given file and returns an xarray Dataset.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
filepath |
str |
Path to the file to read. |
required |
wavelengths |
array-like |
Specific wavelengths to select. If None, all wavelengths are selected. |
required |
method |
str |
Method to use for selection when wavelengths is not None. Defaults to "nearest". |
required |
**kwargs |
Additional keyword arguments to pass to the |
{} |
Returns:
Type | Description |
---|---|
xr.Dataset |
An xarray Dataset containing the PACE data. |
Source code in hypercoast/pace.py
def read_pace_aop(filepath, engine="h5netcdf", **kwargs):
"""
Reads PACE data from a given file and returns an xarray Dataset.
Args:
filepath (str): Path to the file to read.
wavelengths (array-like, optional): Specific wavelengths to select. If None, all wavelengths are selected.
method (str, optional): Method to use for selection when wavelengths is not None. Defaults to "nearest".
**kwargs: Additional keyword arguments to pass to the `sel` method when wavelengths is not None.
Returns:
xr.Dataset: An xarray Dataset containing the PACE data.
"""
rrs = xr.open_dataset(filepath, engine=engine, group="geophysical_data", **kwargs)[
"Rrs"
]
wvl = xr.open_dataset(
filepath, engine=engine, group="sensor_band_parameters", **kwargs
)
dataset = xr.open_dataset(
filepath, engine=engine, group="navigation_data", **kwargs
)
dataset = dataset.set_coords(("longitude", "latitude"))
if "pixel_control_points" in dataset.dims:
dataset = dataset.rename({"pixel_control_points": "pixels_per_line"})
dataset = xr.merge([rrs, dataset.coords.to_dataset()])
dataset.coords["wavelength_3d"] = wvl.coords["wavelength_3d"]
return dataset
read_pace_bgc(filepath, variable=None, engine='h5netcdf', **kwargs)
¶
Reads PACE BGC data from a specified file and returns an xarray Dataset.
This function opens a dataset from a file using the specified engine, optionally selects a single variable, merges geophysical and navigation data, sets appropriate coordinates, and renames dimensions for easier use.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
filepath |
str |
The path to the file containing the PACE BGC data. |
required |
variable |
Optional[str] |
The specific variable to extract from the geophysical_data group. If None, all variables are read. Defaults to None. |
None |
engine |
str |
The engine to use for reading the file. Defaults to "h5netcdf". |
'h5netcdf' |
**kwargs |
Any |
Additional keyword arguments to pass to |
{} |
Returns:
Type | Description |
---|---|
xr.Dataset |
An xarray Dataset containing the requested PACE BGC data, with merged geophysical and navigation data, set coordinates, and renamed dimensions. |
Examples:
>>> dataset = read_pace_bgc("path/to/your/datafile.h5", variable="chlor_a")
>>> print(dataset)
Source code in hypercoast/pace.py
def read_pace_bgc(
filepath: str,
variable: Optional[str] = None,
engine: str = "h5netcdf",
**kwargs: Any,
) -> xr.Dataset:
"""
Reads PACE BGC data from a specified file and returns an xarray Dataset.
This function opens a dataset from a file using the specified engine,
optionally selects a single variable, merges geophysical and navigation data,
sets appropriate coordinates, and renames dimensions for easier use.
Args:
filepath (str): The path to the file containing the PACE BGC data.
variable (Optional[str], optional): The specific variable to extract
from the geophysical_data group. If None, all variables are read. Defaults to None.
engine (str, optional): The engine to use for reading the file. Defaults to "h5netcdf".
**kwargs (Any): Additional keyword arguments to pass to `xr.open_dataset`.
Returns:
xr.Dataset: An xarray Dataset containing the requested PACE BGC data,
with merged geophysical and navigation data, set coordinates, and renamed dimensions.
Example:
>>> dataset = read_pace_bgc("path/to/your/datafile.h5", variable="chlor_a")
>>> print(dataset)
"""
ds = xr.open_dataset(filepath, engine=engine, group="geophysical_data", **kwargs)
if variable is not None:
ds = ds[variable]
dataset = xr.open_dataset(
filepath, engine=engine, group="navigation_data", **kwargs
)
dataset = dataset.set_coords(("longitude", "latitude"))
if "pixel_control_points" in dataset.dims:
dataset = dataset.rename({"pixel_control_points": "pixels_per_line"})
dataset = xr.merge([ds, dataset.coords.to_dataset()])
dataset = dataset.rename(
{
"number_of_lines": "latitude",
"pixels_per_line": "longitude",
}
)
attrs = xr.open_dataset(filepath, engine=engine, **kwargs).attrs
dataset.attrs.update(attrs)
return dataset
read_pace_chla(filepaths, engine='h5netcdf', **kwargs)
¶
Reads chlorophyll-a data from PACE files and applies a logarithmic transformation.
This function supports reading from a single file or multiple files. For multiple files, it combines them into a single dataset. It then extracts the chlorophyll-a variable, applies a logarithmic transformation, and sets the coordinate reference system to EPSG:4326.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
filepaths |
Union[str, List[str]] |
A string or a list of strings containing the file path(s) to the PACE chlorophyll-a data files. |
required |
engine |
str |
The backend engine to use for reading files. Defaults to "h5netcdf". |
'h5netcdf' |
**kwargs |
Additional keyword arguments to pass to |
{} |
Returns:
Type | Description |
---|---|
DataArray |
An xarray DataArray containing the logarithmically transformed chlorophyll-a data with updated attributes. |
Examples:
Read chlorophyll-a data from a single file:
>>> chla_data = read_pace_chla('path/to/single/file.nc')
Read and combine chlorophyll-a data from multiple files:
>>> chla_data = read_pace_chla(['path/to/file1.nc', 'path/to/file2.nc'], combine='by_coords')
Source code in hypercoast/pace.py
def read_pace_chla(
filepaths: Union[str, List[str]], engine: str = "h5netcdf", **kwargs
) -> xr.DataArray:
"""
Reads chlorophyll-a data from PACE files and applies a logarithmic transformation.
This function supports reading from a single file or multiple files. For multiple files,
it combines them into a single dataset. It then extracts the chlorophyll-a variable,
applies a logarithmic transformation, and sets the coordinate reference system to EPSG:4326.
Args:
filepaths: A string or a list of strings containing the file path(s) to the PACE chlorophyll-a data files.
engine: The backend engine to use for reading files. Defaults to "h5netcdf".
**kwargs: Additional keyword arguments to pass to `xr.open_dataset` or `xr.open_mfdataset`.
Returns:
An xarray DataArray containing the logarithmically transformed chlorophyll-a data with updated attributes.
Examples:
Read chlorophyll-a data from a single file:
>>> chla_data = read_pace_chla('path/to/single/file.nc')
Read and combine chlorophyll-a data from multiple files:
>>> chla_data = read_pace_chla(['path/to/file1.nc', 'path/to/file2.nc'], combine='by_coords')
"""
import os
import glob
import rioxarray
date = None
if isinstance(filepaths, str) and os.path.isfile(filepaths):
filepaths = [filepaths]
if "combine" not in kwargs:
kwargs["combine"] = "nested"
if "concat_dim" not in kwargs:
kwargs["concat_dim"] = "date"
dataset = xr.open_mfdataset(filepaths, engine=engine, **kwargs)
if not isinstance(filepaths, list):
filepaths = glob.glob(filepaths)
filepaths.sort()
dates = [extract_date_from_filename(f) for f in filepaths]
date = [timestamp.strftime("%Y-%m-%d") for timestamp in dates]
dataset = dataset.assign_coords(date=("date", date))
chla = np.log10(dataset["chlor_a"])
chla.attrs.update(
{
"units": f'lg({dataset["chlor_a"].attrs["units"]})',
}
)
if date is not None:
chla.attrs["date"] = date
chla = chla.transpose("lat", "lon", "date")
chla.rio.write_crs("EPSG:4326", inplace=True)
return chla
view_pace_pixel_locations(filepath, step=20, figsize=(8, 6), **kwargs)
¶
Visualizes a subset of PACE pixel locations on a scatter plot.
This function reads PACE AOP data from a specified file, subsamples the data according to a step size, and plots the longitude and latitude of the selected pixels using a scatter plot.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
filepath |
str |
The path to the file containing the PACE AOP data. |
required |
step |
int |
The step size for subsampling the data. A smaller step size results in more data points being plotted. Defaults to 20. |
20 |
**kwargs |
Any |
Additional keyword arguments to pass to the |
{} |
Returns:
Type | Description |
---|---|
plt.Figure |
A matplotlib figure object containing the scatter plot. |
Examples:
>>> plot = view_pace_pixel_locations("path/to/your/datafile.h5", step=10)
>>> plt.show()
Source code in hypercoast/pace.py
def view_pace_pixel_locations(
filepath: str, step: int = 20, figsize: Tuple[float, float] = (8, 6), **kwargs: Any
) -> plt.Figure:
"""
Visualizes a subset of PACE pixel locations on a scatter plot.
This function reads PACE AOP data from a specified file, subsamples the data according to a step size,
and plots the longitude and latitude of the selected pixels using a scatter plot.
Args:
filepath (str): The path to the file containing the PACE AOP data.
step (int, optional): The step size for subsampling the data. A smaller step size results in more
data points being plotted. Defaults to 20.
**kwargs (Any): Additional keyword arguments to pass to the `plot.scatter` method.
Returns:
plt.Figure: A matplotlib figure object containing the scatter plot.
Example:
>>> plot = view_pace_pixel_locations("path/to/your/datafile.h5", step=10)
>>> plt.show()
"""
# Create a new figure
fig, ax = plt.subplots(figsize=figsize)
# Create the plot
dataset = read_pace_aop(filepath)
number_of_lines = dataset.sizes["number_of_lines"]
pixels_per_line = dataset.sizes["pixels_per_line"]
ax.scatter(
dataset.sel(
{
"number_of_lines": slice(None, None, number_of_lines // step),
"pixels_per_line": slice(None, None, pixels_per_line // step),
}
).longitude,
dataset.sel(
{
"number_of_lines": slice(None, None, number_of_lines // step),
"pixels_per_line": slice(None, None, pixels_per_line // step),
}
).latitude,
**kwargs,
)
# Set labels and title
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.set_title("PACE Pixel Locations")
return fig
viz_pace(dataset, wavelengths=None, method='nearest', figsize=(6.4, 4.8), cmap='jet', vmin=0, vmax=0.02, ncols=1, crs=None, xlim=None, ylim=None, **kwargs)
¶
Plots PACE data from a given xarray Dataset.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
xr.Dataset |
An xarray Dataset containing the PACE data. |
required |
wavelengths |
array-like |
Specific wavelengths to select. If None, all wavelengths are selected. |
None |
method |
str |
Method to use for selection when wavelengths is not None. Defaults to "nearest". |
'nearest' |
figsize |
tuple |
Figure size. Defaults to (6.4, 4.8). |
(6.4, 4.8) |
cmap |
str |
Colormap to use. Defaults to "jet". |
'jet' |
vmin |
float |
Minimum value for the colormap. Defaults to 0. |
0 |
vmax |
float |
Maximum value for the colormap. Defaults to 0.02. |
0.02 |
ncols |
int |
Number of columns in the plot. Defaults to 1. |
1 |
crs |
str or cartopy.crs.CRS |
Coordinate reference system to use. If None, a simple plot is created. Defaults to None. See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html |
None |
xlim |
array-like |
Limits for the x-axis. Defaults to None. |
None |
ylim |
array-like |
Limits for the y-axis. Defaults to None. |
None |
**kwargs |
Additional keyword arguments to pass to the |
{} |
Source code in hypercoast/pace.py
def viz_pace(
dataset: Union[xr.Dataset, str],
wavelengths: Optional[Union[List[float], float]] = None,
method: str = "nearest",
figsize: Tuple[float, float] = (6.4, 4.8),
cmap: str = "jet",
vmin: float = 0,
vmax: float = 0.02,
ncols: int = 1,
crs: Optional[str] = None,
xlim: Optional[List[float]] = None,
ylim: Optional[List[float]] = None,
**kwargs,
):
"""
Plots PACE data from a given xarray Dataset.
Args:
dataset (xr.Dataset): An xarray Dataset containing the PACE data.
wavelengths (array-like, optional): Specific wavelengths to select. If None, all wavelengths are selected.
method (str, optional): Method to use for selection when wavelengths is not None. Defaults to "nearest".
figsize (tuple, optional): Figure size. Defaults to (6.4, 4.8).
cmap (str, optional): Colormap to use. Defaults to "jet".
vmin (float, optional): Minimum value for the colormap. Defaults to 0.
vmax (float, optional): Maximum value for the colormap. Defaults to 0.02.
ncols (int, optional): Number of columns in the plot. Defaults to 1.
crs (str or cartopy.crs.CRS, optional): Coordinate reference system to use. If None, a simple plot is created. Defaults to None.
See https://scitools.org.uk/cartopy/docs/latest/reference/projections.html
xlim (array-like, optional): Limits for the x-axis. Defaults to None.
ylim (array-like, optional): Limits for the y-axis. Defaults to None.
**kwargs: Additional keyword arguments to pass to the `plt.subplots` function.
"""
import matplotlib.pyplot as plt
import numpy as np
import math
if isinstance(dataset, str):
dataset = read_pace(dataset, wavelengths, method)
if wavelengths is not None:
if not isinstance(wavelengths, list):
wavelengths = [wavelengths]
dataset = dataset.sel(wavelength=wavelengths, method=method)
else:
wavelengths = dataset.coords["wavelength"][0].values.tolist()
lat = dataset.coords["latitude"]
lon = dataset.coords["longitude"]
nrows = math.ceil(len(wavelengths) / ncols)
if crs is None:
fig, axes = plt.subplots(
nrows=nrows,
ncols=ncols,
figsize=(figsize[0] * ncols, figsize[1] * nrows),
**kwargs,
)
for i in range(nrows):
for j in range(ncols):
index = i * ncols + j
if index < len(wavelengths):
wavelength = wavelengths[index]
data = dataset.sel(wavelength=wavelength, method=method)["Rrs"]
if min(nrows, ncols) == 1:
ax = axes[index]
else:
ax = axes[i, j]
im = ax.pcolormesh(
lon, lat, np.squeeze(data), cmap=cmap, vmin=vmin, vmax=vmax
)
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.set_title(
f"wavelength = {dataset.coords['wavelength'].values[index]} [nm]"
)
fig.colorbar(im, ax=ax, label="Reflectance")
plt.tight_layout()
plt.show()
else:
import cartopy
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
if crs == "default":
crs = cartopy.crs.PlateCarree()
if xlim is None:
xlim = [math.floor(lon.min()), math.ceil(lon.max())]
if ylim is None:
ylim = [math.floor(lat.min()), math.ceil(lat.max())]
fig, axes = plt.subplots(
nrows=nrows,
ncols=ncols,
figsize=(figsize[0] * ncols, figsize[1] * nrows),
subplot_kw={"projection": cartopy.crs.PlateCarree()},
**kwargs,
)
for i in range(nrows):
for j in range(ncols):
index = i * ncols + j
if index < len(wavelengths):
wavelength = wavelengths[index]
data = dataset.sel(wavelength=wavelength, method=method)["Rrs"]
if min(nrows, ncols) == 1:
ax = axes[index]
else:
ax = axes[i, j]
im = ax.pcolormesh(lon, lat, data, cmap="jet", vmin=0, vmax=0.02)
ax.coastlines()
ax.add_feature(cartopy.feature.STATES, linewidth=0.5)
ax.set_xticks(np.linspace(xlim[0], xlim[1], 5), crs=crs)
ax.set_yticks(np.linspace(ylim[0], ylim[1], 5), crs=crs)
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax.xaxis.set_major_formatter(lon_formatter)
ax.yaxis.set_major_formatter(lat_formatter)
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.set_title(
f"wavelength = {dataset.coords['wavelength'].values[index]} [nm]"
)
plt.colorbar(im, label="Reflectance")
plt.tight_layout()
plt.show()
viz_pace_chla(data, date=None, aspect=2, cmap='jet', size=6, **kwargs)
¶
Visualizes PACE chlorophyll-a data using an xarray DataArray.
This function supports loading data from a file path (str) or directly using an xarray DataArray. It allows for selection of a specific date for visualization or averages over all dates if none is specified.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Union[str, xr.DataArray] |
The chlorophyll-a data to visualize. Can be a file path or an xarray DataArray. |
required |
date |
Optional[str] |
Specific date to visualize. If None, averages over all dates. Defaults to None. |
None |
aspect |
float |
Aspect ratio of the plot. Defaults to 2. |
2 |
cmap |
str |
Colormap for the plot. Defaults to "jet". |
'jet' |
size |
int |
Size of the plot. Defaults to 6. |
6 |
**kwargs |
Any |
Additional keyword arguments to pass to |
{} |
Returns:
Type | Description |
---|---|
xr.plot.facetgrid.FacetGrid |
The plot generated from the chlorophyll-a data. |
Exceptions:
Type | Description |
---|---|
ValueError |
If |
Source code in hypercoast/pace.py
def viz_pace_chla(
data: Union[str, xr.DataArray],
date: Optional[str] = None,
aspect: float = 2,
cmap: str = "jet",
size: int = 6,
**kwargs: Any,
) -> xr.plot.facetgrid.FacetGrid:
"""
Visualizes PACE chlorophyll-a data using an xarray DataArray.
This function supports loading data from a file path (str) or directly using an xarray DataArray.
It allows for selection of a specific date for visualization or averages over all dates if none is specified.
Args:
data (Union[str, xr.DataArray]): The chlorophyll-a data to visualize. Can be a file path or an xarray DataArray.
date (Optional[str], optional): Specific date to visualize. If None, averages over all dates. Defaults to None.
aspect (float, optional): Aspect ratio of the plot. Defaults to 2.
cmap (str, optional): Colormap for the plot. Defaults to "jet".
size (int, optional): Size of the plot. Defaults to 6.
**kwargs (Any): Additional keyword arguments to pass to `xarray.plot`.
Returns:
xr.plot.facetgrid.FacetGrid: The plot generated from the chlorophyll-a data.
Raises:
ValueError: If `data` is not a file path (str) or an xarray DataArray.
"""
if isinstance(data, str):
data = read_pace_chla(data)
elif not isinstance(data, xr.DataArray):
raise ValueError("data must be an xarray DataArray")
if date is not None:
data = data.sel(date=date)
else:
if "date" in data.coords:
data = data.mean(dim="date")
return data.plot(aspect=aspect, cmap=cmap, size=size, **kwargs)