要从AWS Glue中的SQL表中删除行,可以使用以下解决方案:
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from pyspark.sql import SparkSession
# 初始化AWS Glue上下文
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
# 读取SQL表数据
database = "your_database"
table_name = "your_table"
df = glueContext.create_dynamic_frame.from_catalog(database=database, table_name=table_name).toDF()
# 删除行
df = df.filter(~(df.column_name == "value_to_delete"))
# 写回到SQL表
glueContext.write_dynamic_frame.from_options(
frame=glueContext.create_dynamic_frame.from_rdd(df.rdd, spark),
connection_type="sqlserver",
connection_options={
"url": "jdbc:sqlserver://your_sqlserver_endpoint;databaseName=your_database",
},
format="sql"
)
请确保替换以下变量:
your_database - 数据库名称your_table - 表名称column_name - 要删除行的列名称value_to_delete - 要删除的值your_sqlserver_endpoint - SQL服务器的终端节点import sys
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from pyspark.sql import SparkSession
# 初始化AWS Glue上下文
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
# 读取SQL表数据
database = "your_database"
table_name = "your_table"
df = glueContext.create_dynamic_frame.from_catalog(database=database, table_name=table_name).toDF()
# 删除行
df = df.filter(~(df.column_name == "value_to_delete"))
# 写回到SQL表
df.write.format("jdbc").options(
url="jdbc:sqlserver://your_sqlserver_endpoint;databaseName=your_database",
dbtable="your_table",
user="your_username",
password="your_password"
).mode("overwrite").save()
请确保替换以下变量:
your_database - 数据库名称your_table - 表名称column_name - 要删除行的列名称value_to_delete - 要删除的值your_sqlserver_endpoint - SQL服务器的终端节点your_username - SQL服务器的用户名your_password - SQL服务器的密码以上示例代码使用了AWS Glue的Python Shell和PySpark作业来删除AWS Glue中的SQL表中的行。您可以根据您的要求和环境进行适当的修改。