若要在BigQuery数据传输中使用通配符存储对象名称,需要使用Cloud Storage客户端库的list()
方法来获取匹配指定模式的对象列表,并将其传递给BigQuery数据传输。以下是一个Python示例:
from google.cloud.storage import Client
from google.cloud.bigquery_datatransfer import (
DataTransferServiceClient, types as transfer_types
)
# 初始化客户端
storage_client = Client()
transfer_client = DataTransferServiceClient()
# 指定通配符字符串
wildcard_string = "gs://my-bucket/logs/*"
# 使用Cloud Storage客户端库的'list()'方法获取存储桶中匹配的对象列表
bucket, prefix = wildcard_string.split("gs://")[1].split("/", 1)
blobs = storage_client.list_blobs(bucket, prefix=prefix)
# 将匹配的对象传递给BigQuery数据传输
transfer_config = transfer_types.TransferConfig(
destination_dataset_id="my-dataset",
display_name="My Transfer Config",
data_source_id="google_cloud_storage",
params={
"bucket_name": bucket,
"paths": [blob.name for blob in blobs]
}
)
response = transfer_client.create_transfer_config(
parent="projects/my-project-id",
transfer_config=transfer_config
)
print(f"Created transfer config with name: {response.name}")