在 AWS Glue 作业中添加 IAM 角色和正确的 S3 存储桶权限。
示例代码:
import boto3
iam = boto3.client('iam')
glue_role = iam.create_role(
RoleName='GlueRole',
Description='AWS Glue Service Role',
AssumeRolePolicyDocument='{"Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": {"Service": "glue.amazonaws.com"}, "Action": "sts:AssumeRole"}] }'
)
import boto3
s3 = boto3.resource('s3')
bucket = s3.Bucket('my-bucket')
policy = {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": ["s3:GetObject", "s3:ListBucket"],
"Resource": [bucket.arn, bucket.arn + "/*"],
"Principal": {
"Service": "glue.amazonaws.com"
}
}
]
}
bucket_policy = boto3.client('s3')
bucket_policy.put_bucket_policy(
Bucket='my-bucket',
Policy=json.dumps(policy)
)
import boto3
import sys
JOB_NAME = 'my_glue_job'
glue = boto3.client('glue')
response = glue.create_job(
Name=JOB_NAME,
Role='arn:aws:iam:::role/GlueRole',
Command={
'Name': 'pythonshell',
'ScriptLocation': 's3://my-bucket/my_script.py'
},
DefaultArguments={
'--job-language': 'python',
'--job-bookmark-option': 'job-bookmark-enable'
},
AllocatedCapacity=10,
Timeout=180
)
# Run the job
response = glue.start_job_run(
JobName=JOB_NAME,
Arguments={}
)
job_run_id = response['JobRunId']
while True:
status = glue.get_job_run(JobName=JOB_NAME, RunId=job_run_id)['JobRun']['JobRunState']
print("Job Status: {}".format(status))
if status in ('FAILED', 'SUCCEEDED', 'STOPPED'):
break
以上代码示例中,S3 存储桶名为 my-bucket,IAM 角色名为 GlueRole,作业名为 my_glue_job,