diff options
author | Maxim Yurchuk <maxim-yurchuk@ydb.tech> | 2025-04-01 10:57:33 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-01 13:57:33 +0300 |
commit | a1278fe78caf7b05a927f9802e2b267b51c2b1c4 (patch) | |
tree | 4b62817c7ea079e7f35ca4288427d8478c46bf77 | |
parent | 39496b7b4704128dfd0c4138943c3b891c5072b5 (diff) | |
download | ydb-a1278fe78caf7b05a927f9802e2b267b51c2b1c4.tar.gz |
Return of aggregations in workload log (#16519)
-rw-r--r-- | ydb/library/workload/log/select_queries.sql | 410 | ||||
-rw-r--r-- | ydb/library/workload/log/select_queries_original.sql | 452 | ||||
-rw-r--r-- | ydb/library/workload/log/ya.make | 3 |
3 files changed, 410 insertions, 455 deletions
diff --git a/ydb/library/workload/log/select_queries.sql b/ydb/library/workload/log/select_queries.sql index 92d58e4aad..f8d3c66aff 100644 --- a/ydb/library/workload/log/select_queries.sql +++ b/ydb/library/workload/log/select_queries.sql @@ -6,8 +6,97 @@ SELECT COUNT(*) FROM `{table}` WHERE level <> 0; SELECT SUM(level), COUNT(*), AVG(CAST(JSON_VALUE(metadata, "$.response_time") AS DOUBLE)) FROM `{table}`; -- Q3: Средние значения SELECT AVG(CAST(request_id AS Int64)) FROM `{table}`; +-- Q4: Уникальные значения +SELECT COUNT(DISTINCT request_id) FROM `{table}`; +-- Q5: Уникальные сообщения об ошибках +SELECT COUNT(DISTINCT message) FROM `{table}`; -- Q6: Временной диапазон логов SELECT MIN(timestamp), MAX(timestamp) FROM `{table}`; +-- Q7: Группировка по уровню важности +SELECT level, COUNT(*) as count +FROM `{table}` +WHERE level <> 0 +GROUP BY level +ORDER BY count DESC; +-- Q8: Топ-10 сервисов по уникальным request_id +SELECT service_name, COUNT(DISTINCT request_id) AS u +FROM `{table}` +GROUP BY service_name +ORDER BY u DESC +LIMIT 10; +-- Q9: Комплексная агрегация по сервисам +SELECT + service_name, + SUM(level), + COUNT(*) AS c, + AVG(CAST(JSON_VALUE(metadata, "$.response_time") AS DOUBLE)), + COUNT(DISTINCT request_id) +FROM `{table}` +GROUP BY service_name +ORDER BY c DESC +LIMIT 10; +-- Q10: Группировка по компонентам с подсчетом уникальных request_id +SELECT component, COUNT(DISTINCT request_id) AS u +FROM `{table}` +WHERE component <> '' +GROUP BY component +ORDER BY u DESC +LIMIT 10; +-- Q11: Группировка по компоненту и сервису +SELECT component, service_name, COUNT(DISTINCT request_id) AS u +FROM `{table}` +WHERE component <> '' +GROUP BY component, service_name +ORDER BY u DESC +LIMIT 10; +-- Q12: Топ сообщений об ошибках +SELECT message, COUNT(*) AS c +FROM `{table}` +WHERE message <> '' +GROUP BY message +ORDER BY c DESC +LIMIT 10; +-- Q13: Уникальные request_id по сообщениям +SELECT message, COUNT(DISTINCT request_id) AS u +FROM `{table}` +WHERE message <> '' +GROUP BY message +ORDER BY u DESC +LIMIT 10; +-- Q14: Группировка по уровню и сообщению +SELECT level, message, COUNT(*) AS c +FROM `{table}` +WHERE message <> '' +GROUP BY level, message +ORDER BY c DESC +LIMIT 10; +-- Q15: Топ по request_id +SELECT request_id, COUNT(*) as count +FROM `{table}` +GROUP BY request_id +ORDER BY count DESC +LIMIT 10; +-- Q16: Группировка по request_id и сообщению +SELECT request_id, message, COUNT(*) as count +FROM `{table}` +GROUP BY request_id, message +ORDER BY count DESC +LIMIT 10; +-- Q17: Простая группировка без сортировки +SELECT request_id, message, COUNT(*) +FROM `{table}` +GROUP BY request_id, message +LIMIT 10; +-- Q18: Группировка с извлечением минут +SELECT + request_id, + m, + message, + COUNT(*) as count +FROM `{table}` +GROUP BY request_id, CAST(CAST(timestamp AS Uint64) / 60000000 * 60000000 AS DateTime) AS m, message +ORDER BY count DESC +LIMIT 10; -- Q19: Поиск по конкретному request_id SELECT request_id FROM `{table}` @@ -16,6 +105,31 @@ WHERE request_id = '435090932899640449'; SELECT COUNT(*) FROM `{table}` WHERE message LIKE '%error%'; +-- Q21: Анализ ошибок с URL +SELECT + message, + MIN(JSON_VALUE(metadata, "$.url")), + COUNT(*) AS c +FROM `{table}` +WHERE JSON_VALUE(metadata, "$.url") LIKE '%api%' + AND message <> '' +GROUP BY message +ORDER BY c DESC +LIMIT 10; +-- Q22: Сложный поиск с несколькими условиями +SELECT + message, + MIN(JSON_VALUE(metadata, "$.url")), + MIN(JSON_VALUE(metadata, "$.title")), + COUNT(*) AS c, + COUNT(DISTINCT request_id) +FROM `{table}` +WHERE JSON_VALUE(metadata, "$.title") LIKE '%Error%' + AND JSON_VALUE(metadata, "$.url") NOT LIKE '%api%' + AND message <> '' +GROUP BY message +ORDER BY c DESC +LIMIT 10; -- Q23: Выборка с сортировкой по времени SELECT * FROM `{table}` @@ -40,3 +154,299 @@ FROM `{table}` WHERE message <> '' ORDER BY timestamp, message LIMIT 10; +-- Q27: Анализ длины сообщений +SELECT + service_name, + AVG(Unicode::GetLength(message)) AS l, + COUNT(*) AS c +FROM `{table}` +WHERE message <> '' +GROUP BY service_name +HAVING COUNT(*) > 1000 +ORDER BY l DESC +LIMIT 25; +-- Q28: Анализ источников ошибок +SELECT + component, + AVG(Unicode::GetLength(message)) AS l, + COUNT(*) AS c, + MIN(message) +FROM `{table}` +WHERE message <> '' +GROUP BY component +HAVING COUNT(*) > 1000 +ORDER BY l DESC +LIMIT 25; +-- Q29: Множественные суммы +SELECT + SUM(level), + SUM(level + 1), + SUM(level + 2), + SUM(level + 3), + SUM(level + 4), + SUM(level + 5), + SUM(level + 6), + SUM(level + 7), + SUM(level + 8), + SUM(level + 9), + SUM(level + 10), + SUM(level + 11), + SUM(level + 12), + SUM(level + 13), + SUM(level + 14), + SUM(level + 15), + SUM(level + 16), + SUM(level + 17), + SUM(level + 18), + SUM(level + 19), + SUM(level + 20), + SUM(level + 21), + SUM(level + 22), + SUM(level + 23), + SUM(level + 24), + SUM(level + 25), + SUM(level + 26), + SUM(level + 27), + SUM(level + 28), + SUM(level + 29), + SUM(level + 30), + SUM(level + 31), + SUM(level + 32), + SUM(level + 33), + SUM(level + 34), + SUM(level + 35), + SUM(level + 36), + SUM(level + 37), + SUM(level + 38), + SUM(level + 39), + SUM(level + 40), + SUM(level + 41), + SUM(level + 42), + SUM(level + 43), + SUM(level + 44), + SUM(level + 45), + SUM(level + 46), + SUM(level + 47), + SUM(level + 48), + SUM(level + 49), + SUM(level + 50), + SUM(level + 51), + SUM(level + 52), + SUM(level + 53), + SUM(level + 54), + SUM(level + 55), + SUM(level + 56), + SUM(level + 57), + SUM(level + 58), + SUM(level + 59), + SUM(level + 60), + SUM(level + 61), + SUM(level + 62), + SUM(level + 63), + SUM(level + 64), + SUM(level + 65), + SUM(level + 66), + SUM(level + 67), + SUM(level + 68), + SUM(level + 69), + SUM(level + 70), + SUM(level + 71), + SUM(level + 72), + SUM(level + 73), + SUM(level + 74), + SUM(level + 75), + SUM(level + 76), + SUM(level + 77), + SUM(level + 78), + SUM(level + 79), + SUM(level + 80), + SUM(level + 81), + SUM(level + 82), + SUM(level + 83), + SUM(level + 84), + SUM(level + 85), + SUM(level + 86), + SUM(level + 87), + SUM(level + 88), + SUM(level + 89) +FROM `{table}`; +-- Q30: Группировка по нескольким полям +SELECT + level, + client_ip, + COUNT(*) AS c, + SUM(CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32)), + AVG(CAST(JSON_VALUE(metadata, "$.response_time") AS DOUBLE)) +FROM `{table}` +WHERE message <> '' +GROUP BY level, JSON_VALUE(metadata, "$.client_ip") AS client_ip +ORDER BY c DESC +LIMIT 10; +-- Q31: Аналогичная группировка с другими полями +SELECT + request_id, + client_ip, +COUNT(*) AS c, + SUM(CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32)), + AVG(CAST(JSON_VALUE(metadata, "$.response_time") AS DOUBLE)) +FROM `{table}` +WHERE message <> '' +GROUP BY request_id, JSON_VALUE(metadata, "$.client_ip") AS client_ip +ORDER BY c DESC +LIMIT 10; +-- Q32: Похожая группировка без фильтра +SELECT + request_id, + client_ip, + COUNT(*) AS c, + SUM(CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32)), + AVG(CAST(JSON_VALUE(metadata, "$.response_time") AS DOUBLE)) +FROM `{table}` +GROUP BY request_id, JSON_VALUE(metadata, "$.client_ip") AS client_ip +ORDER BY c DESC +LIMIT 10; +-- Q33: Группировка по URL +SELECT + url, + COUNT(*) AS c +FROM `{table}` +GROUP BY JSON_VALUE(metadata, "$.url") AS url +ORDER BY c DESC +LIMIT 10; +-- Q34: Группировка с константой +SELECT + 1, + url, + COUNT(*) AS c +FROM `{table}` +GROUP BY JSON_VALUE(metadata, "$.url") AS url +ORDER BY c DESC +LIMIT 10; +-- Q35: Группировка по IP с вычислениями +SELECT + client_ip + client_ip_1, + client_ip_2, + client_ip_3, + COUNT(*) AS c +FROM `{table}` +GROUP BY + JSON_VALUE(metadata, "$.client_ip") AS client_ip, + CAST(JSON_VALUE(metadata, "$.client_ip") AS Int64) - 1 AS client_ip_1, + CAST(JSON_VALUE(metadata, "$.client_ip") AS Int64) - 2 AS client_ip_2, + CAST(JSON_VALUE(metadata, "$.client_ip") AS Int64) - 3 AS client_ip_3 +ORDER BY c DESC +LIMIT 10; +-- Q36: Сложная фильтрация по датам +SELECT + url, + COUNT(*) AS views +FROM `{table}` +WHERE service_name = 'service1' + AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() + AND CAST(JSON_VALUE(metadata, "$.dont_count") AS Int32) = 0 + AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 + AND JSON_VALUE(metadata, "$.url") <> '' +GROUP BY JSON_VALUE(metadata, "$.url") as url +ORDER BY views DESC +LIMIT 10; +-- Q37: Аналогичный запрос для заголовков +SELECT + title, + COUNT(*) AS views +FROM `{table}` +WHERE service_name = 'service1' + AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() + AND CAST(JSON_VALUE(metadata, "$.dont_count") AS Int32) = 0 + AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 + AND JSON_VALUE(metadata, "$.title") <> '' +GROUP BY JSON_VALUE(metadata, "$.title") as title +ORDER BY views DESC +LIMIT 10; +-- Q38: Сложная фильтрация с отступом +SELECT + url, + COUNT(*) AS views +FROM `{table}` +WHERE service_name = 'service1' + AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() + AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 + AND CAST(JSON_VALUE(metadata, "$.is_link") AS Int32) <> 0 + AND CAST(JSON_VALUE(metadata, "$.is_download") AS Int32) = 0 +GROUP BY JSON_VALUE(metadata, "$.url") AS url +ORDER BY views DESC +LIMIT 10 +OFFSET 1000; +-- Q39: Сложная группировка с множеством условий +SELECT + traffic_source_id, + search_engine_id, + adv_engine_id, + src, + dst, + COUNT(*) AS views +FROM `{table}` +WHERE service_name = 'service1' + AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() + AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 +GROUP BY + JSON_VALUE(metadata, "$.traffic_source_id") AS traffic_source_id, + JSON_VALUE(metadata, "$.search_engine_id") AS search_engine_id, + JSON_VALUE(metadata, "$.adv_engine_id") AS adv_engine_id, + CASE + WHEN JSON_VALUE(metadata, "$.search_engine_id") = '0' + AND JSON_VALUE(metadata, "$.adv_engine_id") = '0' + THEN JSON_VALUE(metadata, "$.referer") + ELSE '' + END AS src, + JSON_VALUE(metadata, "$.url") AS dst +ORDER BY views DESC +LIMIT 10 +OFFSET 1000; +-- Q40: Группировка по хешу URL +SELECT + url_hash, + date, + COUNT(*) AS views +FROM `{table}` +WHERE service_name = 'service1' + AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() + AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 + AND JSON_VALUE(metadata, "$.traffic_source_id") IN ('-1', '6') + AND JSON_VALUE(metadata, "$.referer_hash") = '3594120000172545465' +GROUP BY + JSON_VALUE(metadata, "$.url_hash") AS url_hash, + CAST(timestamp AS Date) AS date +ORDER BY views DESC +LIMIT 10 +OFFSET 100; +-- Q41: Сложная группировка по размерам окна +SELECT + window_client_width, + window_client_height, + COUNT(*) AS views +FROM `{table}` +WHERE service_name = 'service1' + AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() + AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 + AND CAST(JSON_VALUE(metadata, "$.dont_count") AS Int32) = 0 + AND JSON_VALUE(metadata, "$.url_hash") = '2868770270353813622' +GROUP BY + JSON_VALUE(metadata, "$.window_client_width") AS window_client_width, + JSON_VALUE(metadata, "$.window_client_height") AS window_client_height +ORDER BY views DESC +LIMIT 10 +OFFSET 10000; +-- Q42: Группировка по минутам +SELECT + M, + COUNT(*) AS views +FROM `{table}` +WHERE service_name = 'service1' + AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() + AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 + AND CAST(JSON_VALUE(metadata, "$.dont_count") AS Int32) = 0 +GROUP BY CAST(CAST(timestamp AS Uint64) / 60000000 * 60000000 AS DateTime) AS M +ORDER BY M +LIMIT 10 +OFFSET 1000;
\ No newline at end of file diff --git a/ydb/library/workload/log/select_queries_original.sql b/ydb/library/workload/log/select_queries_original.sql deleted file mode 100644 index f8d3c66aff..0000000000 --- a/ydb/library/workload/log/select_queries_original.sql +++ /dev/null @@ -1,452 +0,0 @@ --- Q0: Базовый подсчет всех записей -SELECT COUNT(*) FROM `{table}`; --- Q1: Подсчет записей с определенным уровнем (аналог AdvEngineID <> 0) -SELECT COUNT(*) FROM `{table}` WHERE level <> 0; --- Q2: Агрегация нескольких метрик -SELECT SUM(level), COUNT(*), AVG(CAST(JSON_VALUE(metadata, "$.response_time") AS DOUBLE)) FROM `{table}`; --- Q3: Средние значения -SELECT AVG(CAST(request_id AS Int64)) FROM `{table}`; --- Q4: Уникальные значения -SELECT COUNT(DISTINCT request_id) FROM `{table}`; --- Q5: Уникальные сообщения об ошибках -SELECT COUNT(DISTINCT message) FROM `{table}`; --- Q6: Временной диапазон логов -SELECT MIN(timestamp), MAX(timestamp) FROM `{table}`; --- Q7: Группировка по уровню важности -SELECT level, COUNT(*) as count -FROM `{table}` -WHERE level <> 0 -GROUP BY level -ORDER BY count DESC; --- Q8: Топ-10 сервисов по уникальным request_id -SELECT service_name, COUNT(DISTINCT request_id) AS u -FROM `{table}` -GROUP BY service_name -ORDER BY u DESC -LIMIT 10; --- Q9: Комплексная агрегация по сервисам -SELECT - service_name, - SUM(level), - COUNT(*) AS c, - AVG(CAST(JSON_VALUE(metadata, "$.response_time") AS DOUBLE)), - COUNT(DISTINCT request_id) -FROM `{table}` -GROUP BY service_name -ORDER BY c DESC -LIMIT 10; --- Q10: Группировка по компонентам с подсчетом уникальных request_id -SELECT component, COUNT(DISTINCT request_id) AS u -FROM `{table}` -WHERE component <> '' -GROUP BY component -ORDER BY u DESC -LIMIT 10; --- Q11: Группировка по компоненту и сервису -SELECT component, service_name, COUNT(DISTINCT request_id) AS u -FROM `{table}` -WHERE component <> '' -GROUP BY component, service_name -ORDER BY u DESC -LIMIT 10; --- Q12: Топ сообщений об ошибках -SELECT message, COUNT(*) AS c -FROM `{table}` -WHERE message <> '' -GROUP BY message -ORDER BY c DESC -LIMIT 10; --- Q13: Уникальные request_id по сообщениям -SELECT message, COUNT(DISTINCT request_id) AS u -FROM `{table}` -WHERE message <> '' -GROUP BY message -ORDER BY u DESC -LIMIT 10; --- Q14: Группировка по уровню и сообщению -SELECT level, message, COUNT(*) AS c -FROM `{table}` -WHERE message <> '' -GROUP BY level, message -ORDER BY c DESC -LIMIT 10; --- Q15: Топ по request_id -SELECT request_id, COUNT(*) as count -FROM `{table}` -GROUP BY request_id -ORDER BY count DESC -LIMIT 10; --- Q16: Группировка по request_id и сообщению -SELECT request_id, message, COUNT(*) as count -FROM `{table}` -GROUP BY request_id, message -ORDER BY count DESC -LIMIT 10; --- Q17: Простая группировка без сортировки -SELECT request_id, message, COUNT(*) -FROM `{table}` -GROUP BY request_id, message -LIMIT 10; --- Q18: Группировка с извлечением минут -SELECT - request_id, - m, - message, - COUNT(*) as count -FROM `{table}` -GROUP BY request_id, CAST(CAST(timestamp AS Uint64) / 60000000 * 60000000 AS DateTime) AS m, message -ORDER BY count DESC -LIMIT 10; --- Q19: Поиск по конкретному request_id -SELECT request_id -FROM `{table}` -WHERE request_id = '435090932899640449'; --- Q20: Поиск по подстроке в message -SELECT COUNT(*) -FROM `{table}` -WHERE message LIKE '%error%'; --- Q21: Анализ ошибок с URL -SELECT - message, - MIN(JSON_VALUE(metadata, "$.url")), - COUNT(*) AS c -FROM `{table}` -WHERE JSON_VALUE(metadata, "$.url") LIKE '%api%' - AND message <> '' -GROUP BY message -ORDER BY c DESC -LIMIT 10; --- Q22: Сложный поиск с несколькими условиями -SELECT - message, - MIN(JSON_VALUE(metadata, "$.url")), - MIN(JSON_VALUE(metadata, "$.title")), - COUNT(*) AS c, - COUNT(DISTINCT request_id) -FROM `{table}` -WHERE JSON_VALUE(metadata, "$.title") LIKE '%Error%' - AND JSON_VALUE(metadata, "$.url") NOT LIKE '%api%' - AND message <> '' -GROUP BY message -ORDER BY c DESC -LIMIT 10; --- Q23: Выборка с сортировкой по времени -SELECT * -FROM `{table}` -WHERE message LIKE '%error%' -ORDER BY timestamp -LIMIT 10; --- Q24: Простая выборка сообщений -SELECT message -FROM `{table}` -WHERE message <> '' -ORDER BY timestamp -LIMIT 10; --- Q25: Сортировка по сообщению -SELECT message -FROM `{table}` -WHERE message <> '' -ORDER BY message -LIMIT 10; --- Q26: Сортировка по времени и сообщению -SELECT message -FROM `{table}` -WHERE message <> '' -ORDER BY timestamp, message -LIMIT 10; --- Q27: Анализ длины сообщений -SELECT - service_name, - AVG(Unicode::GetLength(message)) AS l, - COUNT(*) AS c -FROM `{table}` -WHERE message <> '' -GROUP BY service_name -HAVING COUNT(*) > 1000 -ORDER BY l DESC -LIMIT 25; --- Q28: Анализ источников ошибок -SELECT - component, - AVG(Unicode::GetLength(message)) AS l, - COUNT(*) AS c, - MIN(message) -FROM `{table}` -WHERE message <> '' -GROUP BY component -HAVING COUNT(*) > 1000 -ORDER BY l DESC -LIMIT 25; --- Q29: Множественные суммы -SELECT - SUM(level), - SUM(level + 1), - SUM(level + 2), - SUM(level + 3), - SUM(level + 4), - SUM(level + 5), - SUM(level + 6), - SUM(level + 7), - SUM(level + 8), - SUM(level + 9), - SUM(level + 10), - SUM(level + 11), - SUM(level + 12), - SUM(level + 13), - SUM(level + 14), - SUM(level + 15), - SUM(level + 16), - SUM(level + 17), - SUM(level + 18), - SUM(level + 19), - SUM(level + 20), - SUM(level + 21), - SUM(level + 22), - SUM(level + 23), - SUM(level + 24), - SUM(level + 25), - SUM(level + 26), - SUM(level + 27), - SUM(level + 28), - SUM(level + 29), - SUM(level + 30), - SUM(level + 31), - SUM(level + 32), - SUM(level + 33), - SUM(level + 34), - SUM(level + 35), - SUM(level + 36), - SUM(level + 37), - SUM(level + 38), - SUM(level + 39), - SUM(level + 40), - SUM(level + 41), - SUM(level + 42), - SUM(level + 43), - SUM(level + 44), - SUM(level + 45), - SUM(level + 46), - SUM(level + 47), - SUM(level + 48), - SUM(level + 49), - SUM(level + 50), - SUM(level + 51), - SUM(level + 52), - SUM(level + 53), - SUM(level + 54), - SUM(level + 55), - SUM(level + 56), - SUM(level + 57), - SUM(level + 58), - SUM(level + 59), - SUM(level + 60), - SUM(level + 61), - SUM(level + 62), - SUM(level + 63), - SUM(level + 64), - SUM(level + 65), - SUM(level + 66), - SUM(level + 67), - SUM(level + 68), - SUM(level + 69), - SUM(level + 70), - SUM(level + 71), - SUM(level + 72), - SUM(level + 73), - SUM(level + 74), - SUM(level + 75), - SUM(level + 76), - SUM(level + 77), - SUM(level + 78), - SUM(level + 79), - SUM(level + 80), - SUM(level + 81), - SUM(level + 82), - SUM(level + 83), - SUM(level + 84), - SUM(level + 85), - SUM(level + 86), - SUM(level + 87), - SUM(level + 88), - SUM(level + 89) -FROM `{table}`; --- Q30: Группировка по нескольким полям -SELECT - level, - client_ip, - COUNT(*) AS c, - SUM(CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32)), - AVG(CAST(JSON_VALUE(metadata, "$.response_time") AS DOUBLE)) -FROM `{table}` -WHERE message <> '' -GROUP BY level, JSON_VALUE(metadata, "$.client_ip") AS client_ip -ORDER BY c DESC -LIMIT 10; --- Q31: Аналогичная группировка с другими полями -SELECT - request_id, - client_ip, -COUNT(*) AS c, - SUM(CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32)), - AVG(CAST(JSON_VALUE(metadata, "$.response_time") AS DOUBLE)) -FROM `{table}` -WHERE message <> '' -GROUP BY request_id, JSON_VALUE(metadata, "$.client_ip") AS client_ip -ORDER BY c DESC -LIMIT 10; --- Q32: Похожая группировка без фильтра -SELECT - request_id, - client_ip, - COUNT(*) AS c, - SUM(CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32)), - AVG(CAST(JSON_VALUE(metadata, "$.response_time") AS DOUBLE)) -FROM `{table}` -GROUP BY request_id, JSON_VALUE(metadata, "$.client_ip") AS client_ip -ORDER BY c DESC -LIMIT 10; --- Q33: Группировка по URL -SELECT - url, - COUNT(*) AS c -FROM `{table}` -GROUP BY JSON_VALUE(metadata, "$.url") AS url -ORDER BY c DESC -LIMIT 10; --- Q34: Группировка с константой -SELECT - 1, - url, - COUNT(*) AS c -FROM `{table}` -GROUP BY JSON_VALUE(metadata, "$.url") AS url -ORDER BY c DESC -LIMIT 10; --- Q35: Группировка по IP с вычислениями -SELECT - client_ip - client_ip_1, - client_ip_2, - client_ip_3, - COUNT(*) AS c -FROM `{table}` -GROUP BY - JSON_VALUE(metadata, "$.client_ip") AS client_ip, - CAST(JSON_VALUE(metadata, "$.client_ip") AS Int64) - 1 AS client_ip_1, - CAST(JSON_VALUE(metadata, "$.client_ip") AS Int64) - 2 AS client_ip_2, - CAST(JSON_VALUE(metadata, "$.client_ip") AS Int64) - 3 AS client_ip_3 -ORDER BY c DESC -LIMIT 10; --- Q36: Сложная фильтрация по датам -SELECT - url, - COUNT(*) AS views -FROM `{table}` -WHERE service_name = 'service1' - AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() - AND CAST(JSON_VALUE(metadata, "$.dont_count") AS Int32) = 0 - AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 - AND JSON_VALUE(metadata, "$.url") <> '' -GROUP BY JSON_VALUE(metadata, "$.url") as url -ORDER BY views DESC -LIMIT 10; --- Q37: Аналогичный запрос для заголовков -SELECT - title, - COUNT(*) AS views -FROM `{table}` -WHERE service_name = 'service1' - AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() - AND CAST(JSON_VALUE(metadata, "$.dont_count") AS Int32) = 0 - AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 - AND JSON_VALUE(metadata, "$.title") <> '' -GROUP BY JSON_VALUE(metadata, "$.title") as title -ORDER BY views DESC -LIMIT 10; --- Q38: Сложная фильтрация с отступом -SELECT - url, - COUNT(*) AS views -FROM `{table}` -WHERE service_name = 'service1' - AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() - AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 - AND CAST(JSON_VALUE(metadata, "$.is_link") AS Int32) <> 0 - AND CAST(JSON_VALUE(metadata, "$.is_download") AS Int32) = 0 -GROUP BY JSON_VALUE(metadata, "$.url") AS url -ORDER BY views DESC -LIMIT 10 -OFFSET 1000; --- Q39: Сложная группировка с множеством условий -SELECT - traffic_source_id, - search_engine_id, - adv_engine_id, - src, - dst, - COUNT(*) AS views -FROM `{table}` -WHERE service_name = 'service1' - AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() - AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 -GROUP BY - JSON_VALUE(metadata, "$.traffic_source_id") AS traffic_source_id, - JSON_VALUE(metadata, "$.search_engine_id") AS search_engine_id, - JSON_VALUE(metadata, "$.adv_engine_id") AS adv_engine_id, - CASE - WHEN JSON_VALUE(metadata, "$.search_engine_id") = '0' - AND JSON_VALUE(metadata, "$.adv_engine_id") = '0' - THEN JSON_VALUE(metadata, "$.referer") - ELSE '' - END AS src, - JSON_VALUE(metadata, "$.url") AS dst -ORDER BY views DESC -LIMIT 10 -OFFSET 1000; --- Q40: Группировка по хешу URL -SELECT - url_hash, - date, - COUNT(*) AS views -FROM `{table}` -WHERE service_name = 'service1' - AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() - AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 - AND JSON_VALUE(metadata, "$.traffic_source_id") IN ('-1', '6') - AND JSON_VALUE(metadata, "$.referer_hash") = '3594120000172545465' -GROUP BY - JSON_VALUE(metadata, "$.url_hash") AS url_hash, - CAST(timestamp AS Date) AS date -ORDER BY views DESC -LIMIT 10 -OFFSET 100; --- Q41: Сложная группировка по размерам окна -SELECT - window_client_width, - window_client_height, - COUNT(*) AS views -FROM `{table}` -WHERE service_name = 'service1' - AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() - AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 - AND CAST(JSON_VALUE(metadata, "$.dont_count") AS Int32) = 0 - AND JSON_VALUE(metadata, "$.url_hash") = '2868770270353813622' -GROUP BY - JSON_VALUE(metadata, "$.window_client_width") AS window_client_width, - JSON_VALUE(metadata, "$.window_client_height") AS window_client_height -ORDER BY views DESC -LIMIT 10 -OFFSET 10000; --- Q42: Группировка по минутам -SELECT - M, - COUNT(*) AS views -FROM `{table}` -WHERE service_name = 'service1' - AND timestamp BETWEEN (CurrentUtcTimestamp() - Interval("P7D")) AND CurrentUtcTimestamp() - AND CAST(JSON_VALUE(metadata, "$.is_refresh") AS Int32) = 0 - AND CAST(JSON_VALUE(metadata, "$.dont_count") AS Int32) = 0 -GROUP BY CAST(CAST(timestamp AS Uint64) / 60000000 * 60000000 AS DateTime) AS M -ORDER BY M -LIMIT 10 -OFFSET 1000;
\ No newline at end of file diff --git a/ydb/library/workload/log/ya.make b/ydb/library/workload/log/ya.make index 8f93fcec40..d02dca1851 100644 --- a/ydb/library/workload/log/ya.make +++ b/ydb/library/workload/log/ya.make @@ -14,9 +14,6 @@ PEERDIR( RESOURCE( select_queries.sql workload_logs_select_queries.sql - - # Temporary disabled GROUP BY and DISNINCT queries - # select_queries_original.sql workload_logs_select_queries.sql ) GENERATE_ENUM_SERIALIZATION(log.h) |