下面是一个示例查询,它查找每行相邻单元格之间的唯一性。
WITH
input AS (
SELECT
1 AS id,
[1, 2, 2, 3, 4] AS array UNION ALL
SELECT
2 AS id,
[1, 2, 3, 4, 4] AS array UNION ALL
SELECT
3 AS id,
[1, 1, 2, 3, 4] AS array UNION ALL
SELECT
4 AS id,
[1, 1, 2, 2, 3, 4] AS array),
distinct_pairs AS (
SELECT
id,
OFFSET,
value,
LEAD(value, 1) OVER (PARTITION BY id ORDER BY OFFSET) lead_value
FROM
input
CROSS JOIN
UNNEST(array) WITH OFFSET AS OFFSET,
value
WHERE
LEAD(value, 1) OVER (PARTITION BY id ORDER BY OFFSET) IS NOT NULL),
distinct_adjacent_pairs AS (
SELECT
id,
value
FROM (
SELECT
id,
value,
COUNTIF(value = lead_value) OVER (PARTITION BY id, value ORDER BY OFFSET) run_count,
COUNT(*) OVER (PARTITION BY id, value) count
FROM
distinct_pairs)
WHERE
run_count = 0
AND count = 2)
SELECT
id,
ARRAY(SELECT DISTINCT value FROM distinct_adjacent_pairs d WHERE d.id = i.id) distinct_values
FROM (
SELECT DISTINCT
id
FROM
distinct_adjacent_pairs) i
该查询在BigQuery中运行。
该查询具有三个步骤:
input:输入包含数组。
distinct_pairs:此步骤展开数组并找到每行相邻的唯一值对。
distinct_adjacent_pairs:此步骤筛选出仅在相邻单元格中出现