[1]:
from pixcdust.converters.zarr import Nc2ZarrConverter
from glob import glob
Create a zarr database¶
[2]:
pixc = Nc2ZarrConverter(
path_in = sorted(glob('/tmp/pixc'+'/*/*nc')),
variables=['height', 'sig0', 'classification'],
)
pixc.database_from_nc(path_out= "/tmp/pixc_zarr")
/home/vschaffn/Documents/swot_pixc_study/pixc-env/lib/python3.10/site-packages/distributed/node.py:187: UserWarning: Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 46393 instead
warnings.warn(
/home/vschaffn/Documents/swot_pixc_study/pixc-env/lib/python3.10/site-packages/distributed/client.py:3361: UserWarning: Sending large graph of size 93.27 MiB.
This may cause some slowdown.
Consider loading the data with Dask directly
or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
warnings.warn(
/home/vschaffn/Documents/swot_pixc_study/pixc-env/lib/python3.10/site-packages/distributed/client.py:3361: UserWarning: Sending large graph of size 93.27 MiB.
This may cause some slowdown.
Consider loading the data with Dask directly
or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
warnings.warn(
/home/vschaffn/Documents/swot_pixc_study/pixc-env/lib/python3.10/site-packages/distributed/client.py:3361: UserWarning: Sending large graph of size 93.27 MiB.
This may cause some slowdown.
Consider loading the data with Dask directly
or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
warnings.warn(
Read a zarr database¶
[3]:
from pixcdust.readers.zarr import ZarrReader
import datetime
[4]:
pixcr = ZarrReader("/tmp/pixc_zarr")
pixcr.read((datetime.datetime(2023,4,6), datetime.datetime(2023,4,8)))
pixc
/home/vschaffn/Documents/swot_pixc_study/pixc-env/lib/python3.10/site-packages/distributed/node.py:187: UserWarning: Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 38089 instead
warnings.warn(
[4]:
<pixcdust.converters.zarr.Nc2ZarrConverter at 0x73169ecba4a0>
[5]:
gdf_pixc = pixcr.to_geodataframe()
gdf_pixc
/home/vschaffn/Documents/swot_pixc_study/pixcdust/readers/base_reader.py:115: UserWarning: No active geometry column to be set. The resulting object will be a pandas.DataFrame with geopandas.GeometryArray(s) containing geometry and CRS information. Use `.set_geometry()` to set an active geometry and upcast to the geopandas.GeoDataFrame manually.
gdf = self.data.xvec.to_geodataframe()
[5]:
| latitude | time | sig0 | longitude | tile_number | cycle_number | pass_number | height | classification | |
|---|---|---|---|---|---|---|---|---|---|
| points | |||||||||
| 0 | 43.766094 | 2023-04-06 09:46:18 | -0.393647 | 0.731600 | 78 | 482 | 16 | 265.322205 | 1.0 |
| 1 | 43.766003 | 2023-04-06 09:46:18 | -0.049691 | 0.731114 | 78 | 482 | 16 | 264.606812 | 1.0 |
| 2 | 43.765961 | 2023-04-06 09:46:18 | 2.246204 | 0.730877 | 78 | 482 | 16 | 263.874756 | 1.0 |
| 3 | 43.765705 | 2023-04-06 09:46:18 | -0.891803 | 0.729504 | 78 | 482 | 16 | 263.236237 | 1.0 |
| 4 | 43.765690 | 2023-04-06 09:46:18 | 2.372672 | 0.729421 | 78 | 482 | 16 | 262.494110 | 1.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 12225590 | 43.351582 | 2023-04-07 09:36:56 | 4.794077 | 1.719383 | 78 | 483 | 16 | 291.671906 | 1.0 |
| 12225591 | 43.351494 | 2023-04-07 09:36:56 | 3.340938 | 1.718887 | 78 | 483 | 16 | 287.349701 | 1.0 |
| 12225592 | 43.353043 | 2023-04-07 09:36:56 | 1.013320 | 1.727640 | 78 | 483 | 16 | 349.980957 | 1.0 |
| 12225593 | 43.353001 | 2023-04-07 09:36:56 | 2.926115 | 1.727400 | 78 | 483 | 16 | 347.478943 | 1.0 |
| 12225594 | 43.353046 | 2023-04-07 09:36:56 | 2.399815 | 1.727659 | 78 | 483 | 16 | 348.614990 | 1.0 |
12225595 rows × 9 columns