From 04d15ed21e3f9468f13963e9e1e8ec725862cd5f Mon Sep 17 00:00:00 2001 From: "l0ginp@gmail.com" <l0ginp@gmail.com> Date: Thu, 14 Oct 2021 23:49:16 +0700 Subject: [PATCH 1/2] initial SQL Parser --- AutomationScripts/SQL Parser/README.md | 49 +++++++++++++++++++ AutomationScripts/SQL Parser/requirements.txt | 1 + AutomationScripts/SQL Parser/sql_parser.py | 32 ++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 AutomationScripts/SQL Parser/README.md create mode 100644 AutomationScripts/SQL Parser/requirements.txt create mode 100644 AutomationScripts/SQL Parser/sql_parser.py diff --git a/AutomationScripts/SQL Parser/README.md b/AutomationScripts/SQL Parser/README.md new file mode 100644 index 000000000..8f1617182 --- /dev/null +++ b/AutomationScripts/SQL Parser/README.md @@ -0,0 +1,49 @@ +## SQL Parser + +## Short description of package/script: +Extracts column names and tables used by the query. Automatically conduct column alias resolution, sub queries aliases resolution as well as tables aliases resolving. + +Provides also a helper for normalization of SQL queries. + +## List out the libraries imported: +```` +pip install sql-metadata +```` + +## Example extracting raw sql +## Input + +```sql +select id, name, sum(amount) as total_amt from schema.foo a +left join ( select id, name from schema.bar limit 10 ) b on a.id = b.id +-- left join schema_b.bars c on b.id = c.id +left join schema_b.foos c on b.id = c.id +group by id, name +limit 1000 +``` + +## Output +### sql_parser.columns +```` + +```` + +### sql_parser.columns +```` +['id', 'name', 'amount', 'schema.foo.id', 'schema_b.foos.id'] +```` + +### sql_parser.tables +```` +['schema.foo', 'schema.bar', 'schema_b.foos'] +```` + +### sql_parser.columns_aliases +```` +{'total_amt': 'amount'} +```` + +### sql_parser.subqueries +```` +{'b': 'select id, name from schema.bar limit 10'} +```` diff --git a/AutomationScripts/SQL Parser/requirements.txt b/AutomationScripts/SQL Parser/requirements.txt new file mode 100644 index 000000000..507311cfc --- /dev/null +++ b/AutomationScripts/SQL Parser/requirements.txt @@ -0,0 +1 @@ +import sql-metadata \ No newline at end of file diff --git a/AutomationScripts/SQL Parser/sql_parser.py b/AutomationScripts/SQL Parser/sql_parser.py new file mode 100644 index 000000000..c836c33e6 --- /dev/null +++ b/AutomationScripts/SQL Parser/sql_parser.py @@ -0,0 +1,32 @@ +from sql_metadata import Parser + +rawsql = """ + select id, name, sum(amount) as total_amt from schema.foo a + left join ( select id, name from schema.bar limit 10 ) b on a.id = b.id + -- left join schema_b.bars c on b.id = c.id + left join schema_b.foos c on b.id = c.id + group by id, name + limit 1000 + """ + +# initial Parser +sql_parser = Parser(rawsql) + +# example sql_parser +sql_parser_columns = sql_parser.columns +print("## exact columns form sql") +print(sql_parser_columns) + +sql_parser_tables = sql_parser.tables +print("## exact schema and table form sql") +print(sql_parser_tables) + +sql_parser_columns_aliases = sql_parser.columns_aliases +print("## exact columns_aliases form sql") +print(sql_parser_columns_aliases) + +sql_parser_subqueries = sql_parser.subqueries +print("## exact subqueries form sql") +print(sql_parser_subqueries) + + From 1a4fc0124dd0907d58365502995fe8ca8e5ff172 Mon Sep 17 00:00:00 2001 From: "l0ginp@gmail.com" <l0ginp@gmail.com> Date: Thu, 14 Oct 2021 23:50:15 +0700 Subject: [PATCH 2/2] initial SQL Parser --- AutomationScripts/SQL Parser/README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/AutomationScripts/SQL Parser/README.md b/AutomationScripts/SQL Parser/README.md index 8f1617182..b7083361f 100644 --- a/AutomationScripts/SQL Parser/README.md +++ b/AutomationScripts/SQL Parser/README.md @@ -23,10 +23,6 @@ limit 1000 ``` ## Output -### sql_parser.columns -```` - -```` ### sql_parser.columns ````