hbase data

1. set hbase client
Before running this cell, you need to create a .env file in the same directory as this notebook, and set the following environment variables in the .env file:
HBASE_FETCH_API =
HBASE_SEND_API =
HBASE_TOKEN =
[1]:
import logging
import os
import geopandas as gpd
import nest_asyncio
from dotenv import load_dotenv
from h3_toolkit.core import H3Toolkit
logging.basicConfig(level=logging.INFO)
load_dotenv()
nest_asyncio.apply() # needed for jupyter notebook to run asyncio
FETCH_API = os.getenv("HBASE_FETCH_API")
SEND_API = os.getenv("HBASE_SEND_API")
TOKEN = os.getenv("HBASE_TOKEN")
[2]:
from h3_toolkit.hbase import HBaseClient
toolkit = H3Toolkit()
toolkit.set_hbase_client(
HBaseClient(
fetch_url=FETCH_API,
send_url=SEND_API,
token = TOKEN,
max_concurrent_requests=5, # don't change this
chunk_size=1000, # don't change this
)
)
[2]:
<h3_toolkit.core.H3Toolkit at 0x1063b54b0>
[3]:
# Check for setting hbase client correctly
toolkit.hbase_client
[3]:
HBaseClient(
fetch_url = http://10.100.1.64:2891/api/hbase/v1/test/filterdata2,
send_url = http://10.100.1.64:2891/api/hbase/v1/test/putdata,
token = eyJhb*********************************************************************************************X-H5I,
max_concurrent_requests = 5,
chunk_size = 1000
)
2. Select boundary of the data
[4]:
gdf = gpd.read_file('data/test_geom.geojson')
merged_geometry = gdf['geometry'].union_all()
new_gdf = gpd.GeoDataFrame(geometry=[merged_geometry], crs= gdf.crs)
new_gdf.plot(edgecolor='red', facecolor='none')
[4]:
<Axes: >
[5]:
result = (
toolkit
.process_from_vector(
data = new_gdf,
resolution = 12,
geometry_col = 'geometry'
)
)
result.get_result().head()
INFO:h3_toolkit.core:2025-06-18 09:51:06 - `process_from_vector` - Start converting data to h3 cells in resolution 12
INFO:h3_toolkit.core:2025-06-18 09:51:06 - `process_from_vector` - Finish converting data to h3 cells in resolution 12 with shape (42080, 1)
[5]:
shape: (5, 1)
| hex_id |
|---|
| str |
| "8c4ba0a406403ff" |
| "8c4ba0a406411ff" |
| "8c4ba0a406413ff" |
| "8c4ba0a406415ff" |
| "8c4ba0a406417ff" |
[6]:
import polars as pl
result = (
toolkit
.fetch_from_hbase(
table_name = 'res12_pre_data',
column_family = 'demographic',
column_qualifier = ['p_cnt', 'f_cnt', 'm_cnt'],
)
# Convert str to float and round to 3 decimal places
.apply(lambda df: df.with_columns([pl.col(pl.Utf8).exclude('hex_id').cast(pl.Float64).round(3)]))
)
result.get_result(return_geometry=True).head()
INFO:h3_toolkit.hbase:2025-06-18 09:51:06 - `fetch_from_hbase` - Start fetching data from HBase
Fetching data from Hbase ... : 100%|██████████| 43/43 [00:00<00:00, 7413.78chunk/s]
INFO:h3_toolkit.hbase:2025-06-18 09:51:08 - `fetch_from_hbase` - Finish fetching data from HBase
[6]:
| hex_id | f_cnt | m_cnt | p_cnt | geometry | |
|---|---|---|---|---|---|
| 0 | 8c4ba0a406403ff | 0.567 | 0.567 | 1.133 | POLYGON ((121.52403 25.03519, 121.52401 25.035... |
| 1 | 8c4ba0a406411ff | 0.567 | 0.567 | 1.133 | POLYGON ((121.52431 25.03532, 121.5243 25.0352... |
| 2 | 8c4ba0a406413ff | 0.567 | 0.567 | 1.133 | POLYGON ((121.52449 25.03531, 121.52447 25.035... |
| 3 | 8c4ba0a406415ff | 0.567 | 0.567 | 1.133 | POLYGON ((121.5242 25.03518, 121.52419 25.0350... |
| 4 | 8c4ba0a406417ff | 0.567 | 0.567 | 1.133 | POLYGON ((121.52438 25.03518, 121.52436 25.035... |
3. Chain all steps together
[7]:
from h3_toolkit import H3Toolkit
from h3_toolkit.hbase import HBaseClient
toolkit = H3Toolkit()
result = (
toolkit
.process_from_vector(
data = new_gdf,
resolution = 12,
geometry_col = 'geometry'
)
.set_hbase_client(
HBaseClient(
fetch_url=FETCH_API,
send_url=SEND_API,
token = TOKEN,
max_concurrent_requests=5, # don't change this
chunk_size=200000, # don't change this
)
)
.fetch_from_hbase(
table_name = 'res12_pre_data',
column_family = 'demographic',
column_qualifier = ['p_cnt'],
)
.apply(lambda df: df.with_columns([pl.col(pl.Utf8).exclude('hex_id').cast(pl.Float64).round(3)]))
.get_result(return_geometry=True)
)
result.head()
INFO:h3_toolkit.core:2025-06-18 09:51:08 - `process_from_vector` - Start converting data to h3 cells in resolution 12
INFO:h3_toolkit.core:2025-06-18 09:51:08 - `process_from_vector` - Finish converting data to h3 cells in resolution 12 with shape (42080, 1)
INFO:h3_toolkit.hbase:2025-06-18 09:51:08 - `fetch_from_hbase` - Start fetching data from HBase
Fetching data from Hbase ... : 100%|██████████| 43/43 [00:00<00:00, 9300.01chunk/s]
INFO:h3_toolkit.hbase:2025-06-18 09:51:09 - `fetch_from_hbase` - Finish fetching data from HBase
[7]:
| hex_id | p_cnt | geometry | |
|---|---|---|---|
| 0 | 8c4ba0a406403ff | 1.133 | POLYGON ((121.52403 25.03519, 121.52401 25.035... |
| 1 | 8c4ba0a406411ff | 1.133 | POLYGON ((121.52431 25.03532, 121.5243 25.0352... |
| 2 | 8c4ba0a406413ff | 1.133 | POLYGON ((121.52449 25.03531, 121.52447 25.035... |
| 3 | 8c4ba0a406415ff | 1.133 | POLYGON ((121.5242 25.03518, 121.52419 25.0350... |
| 4 | 8c4ba0a406417ff | 1.133 | POLYGON ((121.52438 25.03518, 121.52436 25.035... |
4. fetch data from hbase and aggregate new data back to hbase
[8]:
from h3_toolkit import H3Toolkit
from h3_toolkit.aggregation import Mean, SumUp
from h3_toolkit.hbase import HBaseClient
toolkit = H3Toolkit()
result = (
toolkit
.process_from_vector(
data = new_gdf,
resolution = 12,
geometry_col = 'geometry'
)
.set_hbase_client(
HBaseClient(
fetch_url=FETCH_API,
send_url=SEND_API,
token = TOKEN,
)
)
.fetch_from_hbase(
table_name = 'res12_pre_data',
column_family = 'demographic',
column_qualifier = ['p_cnt', 'f_cnt', 'm_cnt'],
)
.set_aggregation_strategy(
{
('f_cnt', 'p_cnt'): SumUp(),
'm_cnt': Mean(),
}
)
.process_from_h3(
target_resolution= 10,
)
.apply(lambda df: df.with_columns([pl.col(pl.Float64).round(3)]))
.send_to_hbase(
table_name = 'res10_test_data',
column_family = 'demographic',
column_qualifier = ['p_cnt'],
)
.get_result()
)
result.head()
INFO:h3_toolkit.core:2025-06-18 09:51:09 - `process_from_vector` - Start converting data to h3 cells in resolution 12
INFO:h3_toolkit.core:2025-06-18 09:51:09 - `process_from_vector` - Finish converting data to h3 cells in resolution 12 with shape (42080, 1)
INFO:h3_toolkit.hbase:2025-06-18 09:51:09 - `fetch_from_hbase` - Start fetching data from HBase
Fetching data from Hbase ... : 100%|██████████| 43/43 [00:00<00:00, 9797.11chunk/s]
INFO:h3_toolkit.hbase:2025-06-18 09:51:10 - `fetch_from_hbase` - Finish fetching data from HBase
INFO:h3_toolkit.core:2025-06-18 09:51:10 - `process_from_h3` - Start converting data to h3 cells in resolution 10
INFO:h3_toolkit.core:2025-06-18 09:51:10 - `process_from_h3` - Finish converting data to h3 cells in resolution 10 with shape (937, 4)
INFO:h3_toolkit.hbase:2025-06-18 09:51:10 - `send_to_hbase` - Start sending data from HBase
Sending data to Hbase ... : 100%|██████████| 1/1 [00:00<00:00, 587.85chunk/s]
INFO:h3_toolkit.hbase:2025-06-18 09:51:10 - `send_to_hbase` - Finish sending data from HBase
[8]:
shape: (5, 4)
| hex_id | f_cnt | p_cnt | m_cnt |
|---|---|---|---|
| str | f64 | f64 | f64 |
| "8a4ba0a4330ffff" | 464.757 | 898.606 | 8.854 |
| "8a4ba0a412d7fff" | 271.692 | 484.174 | 4.336 |
| "8a4ba0a4ecf7fff" | 283.727 | 542.568 | 5.282 |
| "8a4ba0a4e0e7fff" | 329.451 | 626.595 | 6.064 |
| "8a4ba0a4155ffff" | 0.0 | 0.0 | 0.0 |
5. Visualization
Visualize the fetched H3 data using pydeck with automatic map boundary calculation.
[ ]:
from h3_toolkit.visualization import show_h3
# Visualize the fetched data
# Using Quantiles classifier with k=5 and viridis colormap
show_h3(
result,
'p_cnt',
classifier='Quantiles',
k=5,
cmap='viridis'
)