可以使用BigQuery的table decorators来控制输出分区。
示例代码:
from google.cloud import bigquery
client = bigquery.Client()
table_reference = client.dataset('my_dataset').table('my_table')
destination_uri = 'gs://my_bucket/my_file*.csv'
# 控制输出分区
table = client.get_table(table_reference)
num_of_partitions = 10 # 设置分区数量
last_partition = table.time_partitioning.range.end # 最后一个分区的结束时间
time_partitions = []
for partition_num in range(num_of_partitions):
partition_end = last_partition - timedelta(days=partition_num*(last_partition-start_partition).days//num_of_partitions)
partition_start = partition_end - timedelta(days=(last_partition-start_partition).days//num_of_partitions)
time_partition = f'{partition_start.strftime("%Y%m%d")}_{partition_end.strftime("%Y%m%d")}'
time_partitions.append(time_partition)
job_config = bigquery.ExtractJobConfig()
job_config.destination_format = bigquery.DestinationFormat.CSV
job_config.field_delimiter = ','
job_config.print_header = True
for partition in time_partitions:
table_decorator = f"@{partition}"
destination_uri_with_partition = f"{destination_uri[:-4]}_{partition}.csv"
extract_job = client.extract_table(
table_reference,
destination_uri_with_partition,
location='US',
job_config=job_config,
table_decorators=[table_decorator],
)
extract_job.result()