访问 AWS Glue 服务时,需要提供访问权限。可以使用以下代码设置 IAM 用户的 AWS Glue 权限:
import boto3
glue_client = boto3.client('glue')
iam = boto3.client('iam') role_name = 'GlueAccessRole'
assume_role_policy_document = { 'Version': '2012-10-17', 'Statement': [ { 'Effect': 'Allow', 'Principal': {'Service': 'glue.amazonaws.com'}, 'Action': 'sts:AssumeRole' } ] }
response = iam.create_role( RoleName=role_name, AssumeRolePolicyDocument=json.dumps(assume_role_policy_document) )
policy_arn = 'arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole'
response = iam.attach_role_policy( RoleName=role_name, PolicyArn=policy_arn )
role_arn = response['Role']['Arn']
database_name = 'my_database' response = glue_client.get_database( Name=database_name )
create_table_writer_permission = { 'Principal': { 'DataLakePrincipalIdentifier': role_arn }, 'Permissions': ['DATA_LOCATION_ACCESS'] }
response = glue_client.batch_create_partition( DatabaseName=database_name, TableName='my_table', PartitionInput=[ { 'Values': [ '2020-01-01' ], 'StorageDescriptor': { 'Location': 's3://my_bucket/year=2020/month=01/day=01', 'InputFormat': 'org.apache.hadoop.mapred.TextInputFormat', 'OutputFormat': 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', } } ], CatalogId='123456789012', CreateTableWriter=True, CreateTableWithLocation=True, TableLocation='s3://my_bucket', Configuration={'jsonProperty': 'value'}, Options={'skip.header.line.count': '1'}, PartitionWriter='myotling', TableInput={