使用更高的AWS Glue作业执行器类型,增加任务作业的任务空间和计算资源,或者通过优化代码来减少内存占用。下面是一个使用更高的AWS Glue作业执行器类型的代码示例:
import boto3
import sys
job_name = 'my-glue-job'
job_command = {
'Name': 'glueetl',
'ScriptLocation': 's3://my-bucket/glue-etl-script.py',
'PythonVersion': '3'
}
job_execution_role = 'my-glue-job-execution-role'
allocated_capacity = 10 # 10 units
max_capacity = 20 # 20 units
glue = boto3.client('glue')
# Get the default Glue ETL job runner settings
response = glue.get_job_runners()
# Get the highest available Glue ETL job runner type
max_worker_type = response['MaxWorkers']['Value']
# Create a new Glue ETL job, using the highest available worker type
response = glue.create_job(
Name=job_name,
Command=job_command,
Role=job_execution_role,
AllocatedCapacity=allocated_capacity,
MaxCapacity=max_capacity,
WorkerType=max_worker_type
)
job_run_id = response['JobRunId']
# Start the job
response = glue.start_job_run(
JobName=job_name,
JobRunId=job_run_id,
)
# Wait for the job to finish
waiter = glue.get_waiter('job_run_complete')
waiter.wait(
JobName=job_name,
JobRunId=job_run_id,
)
# Check the job status
response = glue.get_job_run(
JobName=job_name,
RunId=job_run_id,
)
status = response['JobRun']['JobRunState']
if status == 'SUCCEEDED':
print('Glue job succeeded!')
else:
print('Glue job failed! Status: ', status)
sys.exit(1)