import pandas as pd
import pyarrow as pa
from awsglue.dataframe_implicit_serializer import GlueSerializer
// 数据框 to S3
DataFrame.to_parquet('s3://path/to/output', compression='snappy', index=False, serializer=GlueSerializer())
// S3 to 数据框
temp = pd.concat([pd.read_parquet(pf) for pf in files])
df = temp.copy()
del temp
这些调整应该会显着减少写入时间。