making sense out of data and metadata
retrieve data from s3 bucket¶
import os
import urllib.request
from pathlib import Path
# Set the URL for the cloud
URL = "https://js2.jetstream-cloud.org:8001/"
!mkdir -p data
files = [
"pythia/radar/erad2024/baltrad/baltrad_short_course/201405190715_SUR.h5",
]
for file in files:
name = os.path.join("data", Path(file).name)
if not os.path.exists(name):
print(f"downloading, {name}")
urllib.request.urlretrieve(f"{URL}{file}", name)
Import the file I/O module along with the main RAVE module containing useful constants¶
%matplotlib inline
import _raveio, _rave
Read an input ODIM_H5 file¶
rio = _raveio.open("data/201405190715_SUR.h5")
What is the payload in the I/O container?¶
rio.objectType is _rave.Rave_ObjectType_PVOL
How many scans does this volume contain?¶
pvol = rio.object
print("%i scans in polar volume" % pvol.getNumberOfScans())
Ascending or descending scan strategy?¶
pvol.isAscendingScans()
Where is this site?¶
Note that all angles are represented internally in radians¶
from Proj import rd
print(
"Site is located at %2.3f° lon, %2.3f° lat and %3.1f masl"
% (pvol.longitude * rd, pvol.latitude * rd, pvol.height)
)
print("Site's ODIM source identifiers are: %s" % pvol.source)
Access lowest scan and query some characteristics¶
scan = pvol.getScan(0)
nrays, nbins = scan.nrays, scan.nbins
print("Elevation angle %2.1f°" % (scan.elangle * rd))
print("%i rays per sweep" % nrays)
print("%i bins per ray" % nbins)
print("%3.1f meter range bins" % scan.rscale)
print("First ray scanned is ray %i (indexing starts at 0)" % scan.a1gate)
print("Data acquisition started on %s:%sZ" % (scan.startdate, scan.starttime))
print("Data acquisition ended on %s:%sZ" % (scan.enddate, scan.endtime))
print(
"Scan contains %i quantities: %s"
% (len(scan.getParameterNames()), scan.getParameterNames())
)
Access horizontal reflectivity and query some characteristics¶
dbzh = scan.getParameter("DBZH")
print("Quantity is %s" % dbzh.quantity)
print("8-bit unsigned byte data? %s" % str(dbzh.datatype is _rave.RaveDataType_UCHAR))
print(
"Linear scaling coefficients from 0-255 to dBZ: gain=%2.1f, offset=%2.1f"
% (dbzh.gain, dbzh.offset)
)
print(
"Unradiated areas = %2.1f, radiated areas with no echo = %2.1f"
% (dbzh.nodata, dbzh.undetect)
)
dbzh_data = dbzh.getData() # Accesses the NumPy array containing the reflectivities
print(
"NumPy array's dimensions = %s and type = %s"
% (str(dbzh_data.shape), dbzh_data.dtype)
)
A primitive visualizer for plotting B-scans¶
# Convenience functionality. First convert a palette from GoogleMapsPlugin for use with matplotlib
import matplotlib
from GmapColorMap import dbzh as pal
colorlist = []
for i in range(0, len(pal), 3):
colorlist.append([pal[i] / 255.0, pal[i + 1] / 255.0, pal[i + 2] / 255.0])
# Then create a simple plotter
import matplotlib.pyplot as plt
def plot(data):
fig = plt.figure(figsize=(16, 12))
plt.title("B-scan")
plt.imshow(data, cmap=matplotlib.colors.ListedColormap(colorlist), clim=(0, 255))
plt.colorbar(shrink=float(nrays) / nbins)
plot(dbzh_data)
Management of optional metadata¶
While manadatory metadata are represented as object attributes in Python, optional metadata are not!¶
print("Polar volume has %i optional attributes" % len(pvol.getAttributeNames()))
print("Polar scan has %i optional attributes" % len(scan.getAttributeNames()))
print(
"Quantity %s has %i optional attributes"
% (dbzh.quantity, len(dbzh.getAttributeNames()))
)
print("Mandatory attribute: beamwidth is %2.1f°" % (pvol.beamwidth * rd))
print(
"Optional attributes: Radar is a %s running %s"
% (pvol.getAttribute("how/system"), pvol.getAttribute("how/software"))
)
Add a bogus attribute¶
dbzh.addAttribute("how/foo", "bar")
print(
"Quantity %s now has %i optional attributes"
% (dbzh.quantity, len(dbzh.getAttributeNames()))
)
Create an empty parameter and populate it¶
import _polarscanparam
param = _polarscanparam.new()
param.quantity = "DBZH"
param.nodata, param.undetect = 255.0, 0.0
param.gain, param.offset = 0.4, -30.0
import numpy
data = numpy.zeros((420, 500), numpy.uint8)
param.setData(data)
Create an empty scan and add the parameter to it¶
import _polarscan
from Proj import dr
newscan = _polarscan.new()
newscan.elangle = 25.0 * dr
newscan.addAttribute("how/simulated", "True")
newscan.addParameter(param)
print("%i rays per sweep" % newscan.nrays)
print("%i bins per ray" % newscan.nbins)
See how the parameter’s dimensions were passed along to the scan, so they don’t have to be set explicitly. Nevertheless, plenty of metadata must be handled explicitly or ODIM_H5 files risk being incomplete.¶
newscan.a1gate = 0
newscan.beamwidth = 1.0 * dr
newscan.rscale = 500.0
newscan.rstart = (
0.0 # Distance in meters to the start of the first range bin, unknown=0.0
)
newscan.startdate = "20140831"
newscan.starttime = "145005"
newscan.enddate = "20140831"
newscan.endtime = "145020"
# Top-level attributes
newscan.date = "20140831"
newscan.time = "145000"
newscan.source = "WMO:26232,RAD:EE41,PLC:Sürgavere,NOD:eesur"
newscan.longitude = 25.519 * dr
newscan.latitude = 58.482 * dr
newscan.height = 157.0
Now create a new I/O container and write the scan to ODIM_H5 file.¶
container = _raveio.new()
container.object = newscan
container.save("data/myscan.h5")
import os
print("ODIM_H5 file is %i bytes large" % os.path.getsize("data/myscan.h5"))
Remove compression. It makes file I/O faster. You can also tune HDF5 file-creation properties through the I/O container object.¶
container.compression_level = 0 # ZLIB compression levels 0-9
container.save("data/myscan.h5")
print("ODIM_H5 file is now %i bytes large" % os.path.getsize("data/myscan.h5"))