Amazon Kendra支持硬删除和软删除。当删除文档时,可以通过设置documentMetadataConfiguration中的softDeleteMarker和idFieldName来执行软删除。默认情况下,这些字段包括contentAccessControl等文档元数据。因此,软删除意味着将文档中的元数据值设置为NULL,而未删除文档本身。以下是一个使用Amazon Kendra SDK Python示例,展示了如何在Kendra中执行软删除:
import boto3
from botocore.config import Config
import json
# Create a client with the Kendra endpoint URL configuration
client = boto3.client('kendra', config=Config(connect_timeout=50, read_timeout=70, retries={'max_attempts': 10}, region_name=''), endpoint_url='https://')
# Define a document and its metadata
document = {
'Id': '123456',
'Type': 'TXT',
'Title': 'Sample Document',
'Content': 'This is a sample content of a text document'
}
metadata = {
'key1': 'val1',
'key2': 'val2'
}
response = client.batch_put_document(
IndexId='',
Documents=[
{
'Id': document['Id'],
'Title': document['Title'],
'Blob': json.dumps(document),
'Metadata': json.dumps(metadata)
}
]
)
# Delete the document with softDeleteMarker
response = client.batch_delete_document(
IndexId='',
DocumentIdList=[
'123456'
],
DataSourceSyncJobMetricTarget={
'DataSourceId': 'datasource-id-1',
'MetricName': 'DOCUMENTS_DELETED'
},
DocumentMetadataConfiguration={
'S3Prefix': 's3:///',
'Name': 'CustomData',
'Type': 'STRING_VALUE',
'Relevance': {
'Freshness': False
},
'Search': {
'Searchable': True,
'SearcherMetadata': {
'DocumentIdFieldName': 'Id',
'