#[cfg(test)] mod tests { use gw_dsl_parser::parser; use gw_dsl_parser::parser::model::{Dataset, DatasetType}; use parser as kanaries_parser; use sqlparser::dialect::DuckDbDialect; use sqlparser::parser::Parser; use std::fmt::Error; #[test] fn test_custom_bin() { let query = r#"{ "workflow": [ { "type": "transform", "transform": [ { "fid": "gw_Kr7j", "expression": { "op": "bin", "as": "gw_Kr7j", "params": [ { "type": "field", "value": "col_1" } ], "num" : 5 } } ] }, { "type": "view", "query": [ { "op": "raw", "fields": [ "gw_Kr7j", "col_13" ] } ] } ] }"#; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let sql = "SELECT (\"min_gw_Kr7j\" + (least(floor(((\"col_1\" - \"min_gw_Kr7j\") / ((\"max_gw_Kr7j\" - \"min_gw_Kr7j\") / 5.0))), 4) * ((\"max_gw_Kr7j\" - \"min_gw_Kr7j\") / 5.0))) AS \"gw_Kr7j\", \"col_13\" FROM (SELECT min(\"col_1\") OVER () AS \"min_gw_Kr7j\", max(\"col_1\") OVER () AS \"max_gw_Kr7j\", * FROM \"table_1\") AS kanaries_sub_query"; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_custom_bin_count() { let query = r#"{ "workflow": [ { "type": "transform", "transform": [ { "fid": "gw_ZM8H", "expression": { "op": "binCount", "as": "gw_ZM8H", "params": [ { "type": "field", "value": "col_3" } ], "num" : 5 } } ] }, { "type": "view", "query": [ { "op": "raw", "fields": [ "gw_ZM8H" ] } ] } ] }"#; let sql = "SELECT least(((\"col_3\" - \"min_gw_ZM8H\") / ((\"max_gw_ZM8H\" - \"min_gw_ZM8H\") / 5)), 4) + 1 AS \"gw_ZM8H\" FROM (SELECT min(\"col_3\") OVER () AS \"min_gw_ZM8H\", max(\"col_3\") OVER () AS \"max_gw_ZM8H\", * FROM \"table_1\") AS kanaries_sub_query"; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_time_drill() { let query = r#"{ "workflow": [{ "type": "transform", "transform": [{ "key": "gw_MRzB", "expression": { "op": "dateTimeDrill", "as": "gw_MRzB", "params": [{ "type": "field", "value": "c_0" }, { "type": "value", "value": "week" }] } }] }, { "type": "view", "query": [{ "op": "aggregate", "groupBy": ["gw_MRzB"], "measures": [{ "field": "c_11", "agg": "sum", "asFieldKey": "c_11_sum" }] }] }] }"#; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let sql = "SELECT STRFTIME(DATE_TRUNC('week', IFNULL(TRY_CAST(CAST(\"c_0\" AS TEXT) AS TIMESTAMP), STRPTIME(CAST(\"c_0\" AS TEXT), '%Y-%m-%d %H:%M:%S'))), '%Y-%m-%d %H:%M:%S') AS \"gw_MRzB\", sum(\"c_11\") AS \"c_11_sum\" FROM \"table_1\" GROUP BY \"gw_MRzB\""; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_median() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "groupBy": [ "col_0" ], "measures": [ { "field": "col_3", "agg": "median", "asFieldKey": "col_3_median" } ] } ] } ] }"#; let sql = "SELECT \"col_0\", median(\"col_3\") AS \"col_3_median\" FROM \"table_1\" GROUP BY \"col_0\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_bin_count() { let query = r#"{ "workflow": [ { "type": "transform", "transform": [ { "fid": "gw_ZM8H", "expression": { "op": "binCount", "as": "gw_ZM8H", "params": [ { "type": "field", "value": "col_3" } ] } } ] }, { "type": "view", "query": [ { "op": "raw", "fields": [ "gw_ZM8H" ] } ] } ] }"#; let sql = "SELECT least(((\"col_3\" - \"min_gw_ZM8H\") / ((\"max_gw_ZM8H\" - \"min_gw_ZM8H\") / 10)), 9) + 1 AS \"gw_ZM8H\" FROM (SELECT min(\"col_3\") OVER () AS \"min_gw_ZM8H\", max(\"col_3\") OVER () AS \"max_gw_ZM8H\", * FROM \"table_1\") AS kanaries_sub_query"; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_agg_bin_count() { let query = r#"{ "workflow": [ { "type": "transform", "transform": [ { "fid": "gw_ZM8H", "expression": { "op": "binCount", "as": "gw_ZM8H", "params": [ { "type": "field", "value": "col_3" } ] } } ] }, { "type": "view", "query": [ { "op": "raw", "fields": [ "col_0", "gw_ZM8H" ] } ] } ] }"#; let sql = "SELECT \"col_0\", least(((\"col_3\" - \"min_gw_ZM8H\") / ((\"max_gw_ZM8H\" - \"min_gw_ZM8H\") / 10)), 9) + 1 AS \"gw_ZM8H\" FROM (SELECT min(\"col_3\") OVER () AS \"min_gw_ZM8H\", max(\"col_3\") OVER () AS \"max_gw_ZM8H\", * FROM \"table_1\") AS kanaries_sub_query"; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_bin() { let query = r#"{ "workflow": [ { "type": "transform", "transform": [ { "fid": "gw_Kr7j", "expression": { "op": "bin", "as": "gw_Kr7j", "params": [ { "type": "field", "value": "col_1" } ] } } ] }, { "type": "view", "query": [ { "op": "raw", "fields": [ "gw_Kr7j", "col_13" ] } ] } ] }"#; let sql = "SELECT (\"min_gw_Kr7j\" + (least(floor(((\"col_1\" - \"min_gw_Kr7j\") / ((\"max_gw_Kr7j\" - \"min_gw_Kr7j\") / 10.0))), 9) * ((\"max_gw_Kr7j\" - \"min_gw_Kr7j\") / 10.0))) AS \"gw_Kr7j\", \"col_13\" FROM (SELECT min(\"col_1\") OVER () AS \"min_gw_Kr7j\", max(\"col_1\") OVER () AS \"max_gw_Kr7j\", * FROM \"table_1\") AS kanaries_sub_query"; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_sum_log2() { let query = r#"{ "workflow": [{ "type": "transform", "transform": [{ "key": "gw_62jy", "expression": { "op": "bin", "as": "gw_62jy", "params": [{ "type": "field", "value": "col_29" }] } }, { "key": "gw_MMjF", "expression": { "op": "log2", "as": "gw_MMjF", "params": [{ "type": "field", "value": "col_21" }] } }] }, { "type": "view", "query": [{ "op": "aggregate", "groupBy": ["gw_62jy"], "measures": [{ "field": "gw_MMjF", "agg": "sum", "asFieldKey": "gw_MMjF_sum" }] }] }] }"#; let sql = "SELECT (\"min_gw_62jy\" + (least(floor(((\"col_29\" - \"min_gw_62jy\") / ((\"max_gw_62jy\" - \"min_gw_62jy\") / 10.0))), 9) * ((\"max_gw_62jy\" - \"min_gw_62jy\") / 10.0))) AS \"gw_62jy\", sum(log(\"col_21\") / log(2)) AS \"gw_MMjF_sum\" FROM (SELECT min(\"col_29\") OVER () AS \"min_gw_62jy\", max(\"col_29\") OVER () AS \"max_gw_62jy\", * FROM \"table_1\") AS kanaries_sub_query GROUP BY \"gw_62jy\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_filter() { let query = r#"{ "workflow": [ { "type": "filter", "filters": [ { "key": "col_4", "rule": { "type": "one of", "value": [ "0.1.6.1", "0.1.7", "0.1.6.0", "0.1.6.2-alpha.0", "0.1.7a1", "0.1.7-alpha.0", "0.1.7a4", "0.1.7a5", "0.1.9.1", "0.1.6.2", "0.1.6.1a6", "0.1.8", "0.1.7a3", "0.1.9", "0.1.10", "0.1.11", "0.1.8.dev.1", "0.1.8.dev.0", "0.2.0a1" ] } } ] }, { "type": "transform", "transform": [ { "key": "gw_count_fid", "expression": { "op": "one", "params": [], "as": "gw_count_fid" } } ] }, { "type": "view", "query": [ { "op": "aggregate", "groupBy": [ "col_14", "col_4", "col_4" ], "measures": [ { "field": "gw_count_fid", "agg": "sum", "asFieldKey": "gw_count_fid_sum" } ] } ] } ]}"#; let sql = "SELECT \"col_14\", \"col_4\", \"col_4\", sum(1) AS \"gw_count_fid_sum\" FROM \"table_1\" WHERE \"col_4\" IN ('0.1.6.1', '0.1.7', '0.1.6.0', '0.1.6.2-alpha.0', '0.1.7a1', '0.1.7-alpha.0', '0.1.7a4', '0.1.7a5', '0.1.9.1', '0.1.6.2', '0.1.6.1a6', '0.1.8', '0.1.7a3', '0.1.9', '0.1.10', '0.1.11', '0.1.8.dev.1', '0.1.8.dev.0', '0.2.0a1') GROUP BY \"col_14\", \"col_4\", \"col_4\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_alias() { let query = r#"{ "workflow": [{ "type": "transform", "transform": [{ "key": "gw_W4Tf", "expression": { "op": "bin", "as": "gw_W4Tf", "params": [{ "type": "field", "value": "col_6" }] } }, { "key": "gw_vH37", "expression": { "op": "log2", "as": "gw_vH37", "params": [{ "type": "field", "value": "col_11" }] } }] }, { "type": "view", "query": [{ "op": "aggregate", "groupBy": ["gw_W4Tf"], "measures": [{ "field": "gw_vH37", "agg": "sum", "asFieldKey": "gw_vH37_sum" }] }] }] }"#; let sql = "SELECT (\"min_gw_W4Tf\" + (least(floor(((\"col_6\" - \"min_gw_W4Tf\") / ((\"max_gw_W4Tf\" - \"min_gw_W4Tf\") / 10.0))), 9) * ((\"max_gw_W4Tf\" - \"min_gw_W4Tf\") / 10.0))) AS \"gw_W4Tf\", sum(log(\"col_11\") / log(2)) AS \"gw_vH37_sum\" FROM (SELECT min(\"col_6\") OVER () AS \"min_gw_W4Tf\", max(\"col_6\") OVER () AS \"max_gw_W4Tf\", * FROM \"table_1\") AS kanaries_sub_query GROUP BY \"gw_W4Tf\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_log2() { let query = r#"{ "workflow": [ { "type": "transform", "transform": [ { "fid": "gw_f23i", "expression": { "op": "log2", "as": "gw_f23i", "params": [ { "type": "field", "value": "col_1" } ] } } ] }, { "type": "view", "query": [ { "op": "raw", "fields": [ "gw_f23i", "col_1" ] } ] }]}"#; let sql = "SELECT log(\"col_1\") / log(2) AS \"gw_f23i\", \"col_1\" FROM \"table_1\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } // #[test] // fn test_special() { // let query = r#"{ // "workflow": [{ // "type": "view", // "query": [{ // "op": "aggregate", // "groupBy": ["c2Vhc29uXzI="], // "measures": []} // ] // }] // }"#; // let sql = "SELECT c2Vhc29uXzI= FROM table_1 GROUP BY c2Vhc29uXzI="; // let dataset = Dataset { // source: "table_1".to_string(), // r#type: DatasetType::Table, // }; // let err = test_parser(query, sql, &dataset); // assert!(err.is_none()); // } #[test] fn test_mean() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "groupBy": [ "col_0" ], "measures": [ { "field": "col_3", "agg": "mean", "asFieldKey": "col_3_mean" } ] } ] } ] }"#; let sql = "SELECT \"col_0\", avg(\"col_3\") AS \"col_3_mean\" FROM \"table_1\" GROUP BY \"col_0\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_std_dev() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "groupBy": [ "col_0" ], "measures": [ { "field": "col_3", "agg": "stdev", "asFieldKey": "col_3_stdev" } ] } ] } ] }"#; let sql = "SELECT \"col_0\", stddev(\"col_3\") AS \"col_3_stdev\" FROM \"table_1\" GROUP BY \"col_0\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_variance() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "groupBy": [ "col_0" ], "measures": [ { "field": "col_3", "agg": "variance", "asFieldKey": "col_3_variance" } ] } ] } ] }"#; let sql = "SELECT \"col_0\", variance(\"col_3\") AS \"col_3_variance\" FROM \"table_1\" GROUP BY \"col_0\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_count() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "groupBy": [ "col_0" ], "measures": [ { "field": "col_3", "agg": "count", "asFieldKey": "col_3_count" } ] } ] } ] }"#; let sql = "SELECT \"col_0\", count(\"col_3\") AS \"col_3_count\" FROM \"table_1\" GROUP BY \"col_0\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_log() { let query = r#"{ "workflow": [ { "type": "transform", "transform": [ { "fid": "gw_f23i", "expression": { "op": "log", "as": "gw_f23i", "params": [ { "type": "field", "value": "col_1" } ], "num": 3 } } ] }, { "type": "view", "query": [ { "op": "raw", "fields": [ "gw_f23i", "col_1" ] } ] } ] }"#; let sql = "SELECT log(\"col_1\") / log(3) AS \"gw_f23i\", \"col_1\" FROM \"table_1\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_all_count() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "groupBy": [], "measures": [ { "field": "*", "agg": "count", "asFieldKey": "count" } ] } ] } ] }"#; let sql = "SELECT count(*) AS \"count\" FROM \"table_1\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_sum_func() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "groupBy": [ "col_0" ], "measures": [ { "field": "col_3", "agg": "sum", "asFieldKey": "col_3_sum" } ] } ] } ] }"#; let sql = "SELECT \"col_0\", sum(\"col_3\") AS \"col_3_sum\" FROM \"table_1\" GROUP BY \"col_0\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_min() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "groupBy": [ "col_0" ], "measures": [ { "field": "col_3", "agg": "min", "asFieldKey": "col_3_min" } ] } ] } ] }"#; let sql = "SELECT \"col_0\", min(\"col_3\") AS \"col_3_min\" FROM \"table_1\" GROUP BY \"col_0\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_in() { let query = r#"{ "workflow": [{ "type": "filter", "filters": [{ "key": "col_10", "rule": { "type": "one of", "value": ["charge"] } }] }, { "type": "transform", "transform": [{ "key": "gw_count_fid", "expression": { "op": "one", "params": [], "as": "gw_count_fid" } }] }, { "type": "view", "query": [{ "op": "aggregate", "groupBy": ["col_14", "col_10"], "measures": [{ "field": "gw_count_fid", "agg": "sum", "asFieldKey": "gw_count_fid_sum" }] }] }] }"#; let sql = "SELECT \"col_14\", \"col_10\", sum(1) AS \"gw_count_fid_sum\" FROM \"table_1\" WHERE \"col_10\" IN ('charge') GROUP BY \"col_14\", \"col_10\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_range() { let query = r#"{ "workflow": [ { "type": "filter", "filters": [ { "fid": "col_1", "rule": { "type": "range", "value": [ 4.1387, 12.1 ] } } ] }, { "type": "view", "query": [ { "op": "raw", "fields": [ "col_1", "col_1" ] } ] } ] }"#; let sql = "SELECT \"col_1\", \"col_1\" FROM \"table_1\" WHERE \"col_1\" BETWEEN 4.1387 AND 12.1"; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_limit() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "raw", "fields": [ "col_2" ] } ] } ], "limit": 10 }"#; let sql = "SELECT \"col_2\" FROM \"table_1\" LIMIT 10"; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_find_all() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "raw", "groupBy": [], "fields" : ["*"] } ] } ] }"#; let sql = "SELECT * FROM \"table_1\""; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_custom_time_drill() { let query = r#"{ "workflow": [{ "type": "transform", "transform": [{ "key": "gw_MRzB", "expression": { "op": "dateTimeDrill", "as": "gw_MRzB", "params": [{ "type": "field", "value": "c_0" }, { "type": "value", "value": "week" },{ "type": "format", "value": "%Y-%m-%d" }] } }] }, { "type": "view", "query": [{ "op": "aggregate", "groupBy": ["gw_MRzB"], "measures": [{ "field": "c_11", "agg": "sum", "asFieldKey": "c_11_sum" }] }] }] }"#; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let sql = "SELECT STRFTIME(DATE_TRUNC('week', IFNULL(TRY_CAST(CAST(\"c_0\" AS TEXT) AS TIMESTAMP), STRPTIME(CAST(\"c_0\" AS TEXT), '%Y-%m-%d'))), '%Y-%m-%d %H:%M:%S') AS \"gw_MRzB\", sum(\"c_11\") AS \"c_11_sum\" FROM \"table_1\" GROUP BY \"gw_MRzB\""; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_order_by() { let query = r#" { "workflow": [ { "type": "view", "query": [ { "op": "raw", "fields": [ "col_2" ] } ] }, { "type": "sort", "sort": "ascending", "by": ["col_2"] } ] }"#; let sql = "SELECT \"col_2\" FROM \"table_1\" ORDER BY \"col_2\" ASC"; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_temporal_terange() { let query = r#"{ "workflow": [ { "type": "filter", "filters": [ { "fid": "col_1", "rule": { "type": "temporal range", "value": [ 1667810584123456, 1667810584123498 ] } } ] }, { "type": "view", "query": [ { "op": "raw", "fields": [ "col_1" ] } ] } ] }"#; let sql = "SELECT \"col_1\" FROM \"table_1\" WHERE \"col_1\" BETWEEN epoch_ms(1667810584123456) AND epoch_ms(1667810584123498)"; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_custom_time_feature() { let query = r#"{ "workflow": [{ "type": "transform", "transform": [{ "key": "gw_MRzB", "expression": { "op": "dateTimeFeature", "as": "gw_MRzB", "params": [{ "type": "field", "value": "c_0" }, { "type": "value", "value": "week" },{ "type": "format", "value": "%Y-%m-%d" }] } }] }, { "type": "view", "query": [{ "op": "aggregate", "groupBy": ["gw_MRzB"], "measures": [{ "field": "c_11", "agg": "sum", "asFieldKey": "c_11_sum" }] }] }] }"#; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let sql = "SELECT DATE_PART('week', IFNULL(TRY_CAST(CAST(\"c_0\" AS TEXT) AS TIMESTAMP), STRPTIME(CAST(\"c_0\" AS TEXT), '%Y-%m-%d'))) AS \"gw_MRzB\", sum(\"c_11\") AS \"c_11_sum\" FROM \"table_1\" GROUP BY \"gw_MRzB\""; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_custom_time_feature_timestamp() { let query = r#"{ "workflow": [{ "type": "transform", "transform": [{ "key": "gw_MRzB", "expression": { "op": "dateTimeFeature", "as": "gw_MRzB", "params": [{ "type": "field", "value": "c_0" }, { "type": "value", "value": "week" },{ "type": "format", "value": "timestamp" }] } }] }, { "type": "view", "query": [{ "op": "aggregate", "groupBy": ["gw_MRzB"], "measures": [{ "field": "c_11", "agg": "sum", "asFieldKey": "c_11_sum" }] }] }] }"#; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let sql = "SELECT DATE_PART('week', epoch_ms(IFNULL(TRY_CAST(\"c_0\" AS BIGINT), EXTRACT(EPOCH FROM CAST(\"c_0\" AS TIMESTAMP)) * 1000))) AS \"gw_MRzB\", sum(\"c_11\") AS \"c_11_sum\" FROM \"table_1\" GROUP BY \"gw_MRzB\""; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } // #[test] // fn test_check_sql() { // let query = r#"{"workflow":[{"type":"view","query":[{"op":"aggregate","groupBy":["';DELETE TABLE A"],"measures":[]}]}]}"#; // let dataset = Dataset { // source: "table_1".to_string(), // r#type: DatasetType::Table, // }; // let sql = "check sql error"; // let err = test_parser(query, sql, &dataset); // assert!(err.is_none()); // } fn test_parser(query: &str, sql: &str, dataset: &Dataset) -> Option { let res_ast = Parser::parse_sql(&DuckDbDialect {}, sql); dbg!(&res_ast); let parsed_sql = kanaries_parser::parse_dsl( dataset, query, parser::ParserParams { quote_style: Some('"'), as_quote_style: Some('"'), }, ); if parsed_sql != sql { dbg!(&parsed_sql); return Some(Error {}); } None } #[test] fn test_sql_epxr() { let query = r#" { "workflow": [ { "type": "transform", "transform": [{ "expression": { "op": "expr", "as": "gw_f23i", "params": [{ "type": "sql", "value":"log(10)" }] } }] },{ "type": "view", "query": [{ "op": "raw", "fields": [ "col_2", "gw_f23i" ] }] }], "limit": 10 }"#; let sql = "SELECT \"col_2\", log(10) AS \"gw_f23i\" FROM \"table_1\" LIMIT 10"; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_min_date_timestamp() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "measures": [ { "field": "col_3", "agg": "min", "asFieldKey" : "col_3_min", "format" : "timestamp" } ] } ] } ] }"#; let sql = "SELECT min(epoch_ms(IFNULL(TRY_CAST(\"col_3\" AS BIGINT), EXTRACT(EPOCH FROM CAST(\"col_3\" AS TIMESTAMP)) * 1000))) AS \"col_3_min\" FROM \"table1\""; let dataset = Dataset { source: "table1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_min_date() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "measures": [ { "field": "col_3", "agg": "min", "asFieldKey" : "col_3_min", "format" : "YYYY-MM-DD" } ] } ] } ] }"#; let sql = "SELECT min(IFNULL(TRY_CAST(CAST(\"col_3\" AS TEXT) AS TIMESTAMP), STRPTIME(CAST(\"col_3\" AS TEXT), 'YYYY-MM-DD'))) AS \"col_3_min\" FROM \"table1\""; let dataset = Dataset { source: "table1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_sql_data_view() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "groupBy": [ "id" ] } ] } ], "dataview": [{ "type": "sql", "query": [{ "sql" : "select id, name1, name2 from (select 1 as id, * from default)", "fidMap": { "name1": "timestamp_s_col", "name2": "bigint_col" } }] }] }"#; let sql = "SELECT \"id\" FROM (SELECT id, name1, name2 FROM (SELECT 1 AS id, * FROM (SELECT \"timestamp_s_col\" AS \"name1\", \"bigint_col\" AS \"name2\" FROM \"table_name\") AS \"k_gw_write_view\") AS \"k_gw_review_default\") AS \"k_gw_temp_view\" GROUP BY \"id\""; let dataset = Dataset { source: "table_name".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_fid_space() { let query = r#"{ "workflow": [ { "type": "view", "query": [ { "op": "aggregate", "groupBy": [ "i d" ] } ] } ], "dataview": [{ "type": "sql", "query": [{ "sql" : "select \"i d\", name1, name2 from (select 1 as \"i d\", * from default)", "fidMap": { "name1": "timestamp_s_col", "name2": "bigint_col" } }] }] }"#; let sql = "SELECT \"i d\" FROM (SELECT \"i d\", name1, name2 FROM (SELECT 1 AS \"i d\", * FROM (SELECT \"timestamp_s_col\" AS \"name1\", \"bigint_col\" AS \"name2\" FROM \"table_name\") AS \"k_gw_write_view\") AS \"k_gw_review_default\") AS \"k_gw_temp_view\" GROUP BY \"i d\""; let dataset = Dataset { source: "table_name".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_distinct_count() { let query = r#"{ "workflow": [{ "type": "view", "query": [{ "op": "aggregate", "measures": [{ "field": "gw_f23i", "agg": "distinctCount", "asFieldKey": "gw_f23i_count" }] }] } ] }"#; let sql = "SELECT count(DISTINCT \"gw_f23i\") AS \"gw_f23i_count\" FROM \"table-name\""; let dataset = Dataset { source: "table-name".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_multi_filter() { let query = r#"{ "workflow": [ { "type": "filter", "filters": [ { "fid": "col_1", "rule": { "type": "range", "value": [ 4.1387, 12.1 ] } } ] }, { "type": "view", "query": [ { "op": "raw", "fields": [ "col_1", "col_1" ] } ] }, { "type": "filter", "filters": [ { "fid": "col_2", "rule": { "type": "range", "value": [ 4.1387, 12.1 ] } } ] }, { "type": "filter", "filters": [ { "fid": "col_3", "rule": { "type": "range", "value": [ 4.1387, 12.1 ] } } ] } ] }"#; let sql = "SELECT \"col_1\", \"col_1\" FROM \"table_1\" WHERE \"col_1\" BETWEEN 4.1387 AND 12.1 AND \"col_2\" BETWEEN 4.1387 AND 12.1 AND \"col_3\" BETWEEN 4.1387 AND 12.1"; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } #[test] fn test_unuse_transform() { let query = r#" { "workflow": [ { "type": "filter", "filters": [ { "fid": "lunch", "rule": { "type": "one of", "value": [ "standard" ] } } ] }, { "type": "transform", "transform": [ { "key": "gw_2a3l", "expression": { "op": "bin", "as": "gw_2a3l", "params": [ { "type": "field", "value": "math score" } ], "num": 10 } } ] }, { "type": "filter", "filters": [ { "fid": "gw_2a3l", "rule": { "type": "range", "value": [ 67.44462674323215, 90 ] } } ] }, { "type": "view", "query": [ { "op": "raw", "fields": [ "reading score", "writing score" ] } ] } ] }"#; let sql = "SELECT \"reading score\", \"writing score\" FROM (SELECT min(\"math score\") OVER () AS \"min_gw_2a3l\", max(\"math score\") OVER () AS \"max_gw_2a3l\", * FROM \"table_1\") AS kanaries_sub_query WHERE \"lunch\" IN ('standard') AND (\"min_gw_2a3l\" + (least(floor(((\"math score\" - \"min_gw_2a3l\") / ((\"max_gw_2a3l\" - \"min_gw_2a3l\") / 10.0))), 9) * ((\"max_gw_2a3l\" - \"min_gw_2a3l\") / 10.0))) BETWEEN 67.44462674323215 AND 90"; let dataset = Dataset { source: "table_1".to_string(), r#type: DatasetType::Table, }; let err = test_parser(query, sql, &dataset); assert!(err.is_none()); } }