1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
/* syntax version 1 */
USE plato;
$push_final_data = AsList(
AsStruct("manufacturer" AS manufacturer, "state" AS state));
INSERT INTO @push_final
SELECT * FROM AS_TABLE($push_final_data);
COMMIT;
$manufacturer_name_fix = ($manufacturer) -> {
$lowered_manufacturer = CAST(Unicode::ToLower(CAST(String::Strip($manufacturer) AS Utf8)) AS String);
$in = AsList(
"oysters", -- bullshit in naming
"qumo", -- bullshit in naming
"texet", -- bullshit in naming
"alcatel", -- bullshit in naming
"dexp", -- bullshit in naming
"haier", -- bullshit in naming
"dexp", -- bullshit in naming
"asus", -- ASUSTek Computer Inc & ASUS both usable
"yota", -- Yota Devices & Yota Devices Limited ...
"ark" -- "ark" & "ark electronic technology" & "ark_electronic_technology"
);
$lambda = ($substring) -> {
RETURN FIND($lowered_manufacturer, $substring) IS NULL;
};
$list = ListSkipWhile($in, $lambda);
RETURN IF(ListHasItems($list), $list[0], $lowered_manufacturer);
};
$manufacturers_whitelist = (
SELECT
man as manufacturer
FROM
(
SELECT
man, COUNT(*) as cnt
FROM
@push_final
GROUP BY
$manufacturer_name_fix(manufacturer) as man
)
WHERE
cnt > 1000
);
$push_final_preprocessing = (
SELECT
$manufacturer_name_fix(manufacturer) as manufacturer,
state
FROM
@push_final
);
SELECT
COALESCE(fixed_manufacturer, "other") as manufacturer,
L.* WITHOUT L.manufacturer
FROM
$push_final_preprocessing as L
LEFT JOIN
(
SELECT
manufacturer as fixed_manufacturer
FROM
$manufacturers_whitelist
) as R
ON(L.manufacturer = R.fixed_manufacturer);
|