[1]:
from pixcdust.converters.zarr import Nc2ZarrConverter
from glob import glob

Create a zarr database

[2]:
pixc = Nc2ZarrConverter(
            path_in = sorted(glob('/tmp/pixc'+'/*/*nc')),
            variables=['height', 'sig0', 'classification'],
        )
pixc.database_from_nc(path_out= "/tmp/pixc_zarr")
/home/vschaffn/Documents/swot_pixc_study/pixc-env/lib/python3.10/site-packages/distributed/node.py:187: UserWarning: Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 46393 instead
  warnings.warn(
/home/vschaffn/Documents/swot_pixc_study/pixc-env/lib/python3.10/site-packages/distributed/client.py:3361: UserWarning: Sending large graph of size 93.27 MiB.
This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
  warnings.warn(
/home/vschaffn/Documents/swot_pixc_study/pixc-env/lib/python3.10/site-packages/distributed/client.py:3361: UserWarning: Sending large graph of size 93.27 MiB.
This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
  warnings.warn(
/home/vschaffn/Documents/swot_pixc_study/pixc-env/lib/python3.10/site-packages/distributed/client.py:3361: UserWarning: Sending large graph of size 93.27 MiB.
This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
  warnings.warn(

Read a zarr database

[3]:
from pixcdust.readers.zarr import ZarrReader
import datetime
[4]:
pixcr = ZarrReader("/tmp/pixc_zarr")
pixcr.read((datetime.datetime(2023,4,6), datetime.datetime(2023,4,8)))
pixc
/home/vschaffn/Documents/swot_pixc_study/pixc-env/lib/python3.10/site-packages/distributed/node.py:187: UserWarning: Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 38089 instead
  warnings.warn(
[4]:
<pixcdust.converters.zarr.Nc2ZarrConverter at 0x73169ecba4a0>
[5]:
gdf_pixc = pixcr.to_geodataframe()
gdf_pixc
/home/vschaffn/Documents/swot_pixc_study/pixcdust/readers/base_reader.py:115: UserWarning: No active geometry column to be set. The resulting object will be a pandas.DataFrame with geopandas.GeometryArray(s) containing geometry and CRS information. Use `.set_geometry()` to set an active geometry and upcast to the geopandas.GeoDataFrame manually.
  gdf = self.data.xvec.to_geodataframe()
[5]:
latitude time sig0 longitude tile_number cycle_number pass_number height classification
points
0 43.766094 2023-04-06 09:46:18 -0.393647 0.731600 78 482 16 265.322205 1.0
1 43.766003 2023-04-06 09:46:18 -0.049691 0.731114 78 482 16 264.606812 1.0
2 43.765961 2023-04-06 09:46:18 2.246204 0.730877 78 482 16 263.874756 1.0
3 43.765705 2023-04-06 09:46:18 -0.891803 0.729504 78 482 16 263.236237 1.0
4 43.765690 2023-04-06 09:46:18 2.372672 0.729421 78 482 16 262.494110 1.0
... ... ... ... ... ... ... ... ... ...
12225590 43.351582 2023-04-07 09:36:56 4.794077 1.719383 78 483 16 291.671906 1.0
12225591 43.351494 2023-04-07 09:36:56 3.340938 1.718887 78 483 16 287.349701 1.0
12225592 43.353043 2023-04-07 09:36:56 1.013320 1.727640 78 483 16 349.980957 1.0
12225593 43.353001 2023-04-07 09:36:56 2.926115 1.727400 78 483 16 347.478943 1.0
12225594 43.353046 2023-04-07 09:36:56 2.399815 1.727659 78 483 16 348.614990 1.0

12225595 rows × 9 columns