BigQuery partitioning and clustering

Bigquery partioning and clustering

This page outlines how to use partitioning and clustering features in Y42's virtual build system for BigQuery tables. Learn how to optimize your tables for faster query runtimes and reduced compute costs by setting models to be partitioned and clustered directly within the Y42 platform.

Table partioning

Specify the partitioning options in a similar dictionary format:

my_first_model.sql

_13
{
_13
partition_by={
_13
"field": "<field name>",
_13
"data_type": "<timestamp | date | datetime | int64>",
_13
"granularity": "<hour | day | month | year>",
_13
_13
# Only required if data_type is "int64"
_13
"range": {
_13
"start": <int>,
_13
"end": <int>,
_13
"interval": <int>
_13
}
_13
}

Table clustering

Specify a single column or set of columns by which to cluster your BigQuery table for optimized performance.

my_first_model.sql

_10
{
_10
cluster_by = ["<field_name>", "<field_name>"]
_10
}

Example

Partioning by date and clustering:

orders.sql

_22
{{ config(
_22
materialized='table',
_22
partition_by={
_22
"field": "orderdate",
_22
"data_type": "date",
_22
"granularity": "month"
_22
},
_22
_22
cluster_by = ['customerid', 'orderid'],
_22
)
_22
}}
_22
_22
with orders AS (
_22
select
_22
orderid,
_22
customerid,
_22
employeeid,
_22
orderdate,
_22
price
_22
from {{ source('mdm-prod', 'orders') }}
_22
)
_22
select * from orders

Partioning by int64 column example:

orders.sql

_24
{{ config(
_24
materialized='table',
_24
partition_by={
_24
"field": "orderid",
_24
"data_type": "int64",
_24
"range": {
_24
"start": 0,
_24
"end": 100,
_24
"interval": 10
_24
}
_24
}
_24
)
_24
}}
_24
_24
with orders AS (
_24
select
_24
orderid,
_24
customerid,
_24
employeeid,
_24
orderdate,
_24
price
_24
from {{ source('mdm-prod', 'orders') }}
_24
)
_24
select * from orders