可以使用以下代码示例在写入后保持分区列作为行值:
在您的脚本中添加以下行:
job = Job(glueContext) job.init('myjob')
logger = glueContext.get_logger()
glue_parser = argparse.ArgumentParser(description='Arguments for my glue job') glue_parser.add_argument('--s3_destination_path', type=str, required=True, help='S3 destination path where the data should be written') args = glue_parser.parse_args()
s3_destination_path = args.s3_destination_path
input_dyf = glueContext.create_dynamic_frame.from_catalog(database="mydb", table_name="mytable")
input_df = input_dyf.toDF()
input_df.write
.partitionBy('partition_column')
.parquet(s3_destination_path)
output_df = spark.read
.parquet(s3_destination_path)
output_df = output_df.drop('partition_column')
output_df = output_df.withColumn('partition_column', lit('value'))
output_df.write
.parquet(s3_destination_path)