aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/tests/sql/suites/csee/yql-7237.sql
blob: 52a0a68ea7ed013ec12f6e8e5683ba1e367d4999 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
/* syntax version 1 */
USE plato;

$push_final_data = AsList(
    AsStruct("manufacturer" AS manufacturer, "state" AS state));

INSERT INTO @push_final
SELECT * FROM AS_TABLE($push_final_data);
COMMIT;

$manufacturer_name_fix = ($manufacturer) -> {
    $lowered_manufacturer = CAST(Unicode::ToLower(CAST(String::Strip($manufacturer) AS Utf8)) AS String);
    $in = AsList(
        "oysters", -- bullshit in naming
        "qumo", -- bullshit in naming
        "texet", -- bullshit in naming
        "alcatel", -- bullshit in naming
        "dexp", -- bullshit in naming
        "haier", -- bullshit in naming
        "dexp", -- bullshit in naming
        "asus", -- ASUSTek Computer Inc & ASUS both usable
        "yota", -- Yota Devices & Yota Devices Limited ...
        "ark" -- "ark" & "ark electronic technology" & "ark_electronic_technology"
    );
    $lambda = ($substring) -> {
        RETURN FIND($lowered_manufacturer, $substring) IS NULL;
    };
    $list = ListSkipWhile($in, $lambda);
    RETURN IF(ListHasItems($list), $list[0], $lowered_manufacturer);
};

$manufacturers_whitelist = (
    SELECT
        man as manufacturer
    FROM
    (
        SELECT
            man, COUNT(*) as cnt
        FROM
            @push_final
        GROUP BY
            $manufacturer_name_fix(manufacturer) as man
    )
    WHERE
        cnt > 1000
);

$push_final_preprocessing = (
    SELECT
        $manufacturer_name_fix(manufacturer) as manufacturer,
        state
    FROM
        @push_final
);

SELECT
    COALESCE(fixed_manufacturer, "other") as manufacturer,
    L.* WITHOUT L.manufacturer
FROM
    $push_final_preprocessing as L
    LEFT JOIN
    (
        SELECT
            manufacturer as fixed_manufacturer
        FROM
            $manufacturers_whitelist
    ) as R
    ON(L.manufacturer = R.fixed_manufacturer);