为了获得所需的结果,我会使用数据的关系表示而不是数组。
arrayJoin 可以将数组转换为关系。
SELECT avg(duration)
FROM
(
SELECT max(time) - min(time) AS duration
FROM
(
SELECT
data.1 AS id,
toDateTime(data.2) AS time
FROM
(
SELECT arrayJoin([('aa', '2020-11-08 01:00:01'), ('aa', '2020-11-08 01:00:03'), ('aa', '2020-11-08 01:00:05'), ('ab', '2020-11-09 01:00:00'), ('ab', '2020-11-09 01:00:05'), ('ab', '2020-11-09 01:00:15')]) AS data
)
)
GROUP BY id
)
基于数组的决策。请注意,它可能比基于关系的决策慢得多(在选择最佳方法之前,请检查两者)。通过使用arrayReduceInRanges函数可以改进此实现。
SELECT
arraySort(x -> (x.1), data) AS sorted_array,
arraySplit((x, y) -> y, sorted_array, arrayMap((x, i) -> if(i = 1, 1, if((x.1) = ((sorted_array[i - 1]).1), 0, 1)), sorted_array, arrayEnumerate(sorted_array))) AS session_arrays,
arrayMap(arr -> arrayReduce('min', arrayMap(x -> (x.2), arr)), session_arrays) AS min_session_times,
arrayMap(arr -> arrayReduce('max', arrayMap(x -> (x.2), arr)), session_arrays) AS max_session_times,
arrayReduce('avg', arrayMap((x, y) -> (y - x), min_session_times, max_session_times)) AS avg
FROM
(
SELECT [('aa', toDateTime('2020-11-08 01:00:01')), ('aa', toDateTime('2020-11-08 01:00:03')), ('aa', toDateTime('2020-11-08 01:00:05')), ('ab', toDateTime('2020-11-09 01:00:00')), ('ab', toDateTime('2020-11-09 01:00:05')), ('ab', toDateTime('2020-11-09 01:00:15'))] AS data
)