insert into in dbt prehook

Solutions on MaxInterview for insert into in dbt prehook by the best coders in the world

showing results for - "insert into in dbt prehook"
Leni
22 Jun 2020
1{%- macro profile_schema(table_schema) -%}
2
3{{ config(schema='profiles') }}
4
5{% set not_null_profile_threshold_pct = ".9" %}
6{% set unique_profile_threshold_pct = ".9" %}
7
8{% set tables = dbt_utils.get_relations_by_prefix(table_schema, '') %}
9
10SELECT column_stats.table_catalog,
11       column_stats.table_schema,
12       column_stats.table_name,
13       column_stats.column_name,
14       case when column_metadata.is_nullable = 'YES' then false else true end as is_not_nullable_column,
15       case when column_stats.pct_not_null > {{ not_null_profile_threshold_pct }} then true else false end as is_recommended_not_nullable_column,
16
17       column_stats._nulls as count_nulls,
18       column_stats._non_nulls as count_not_nulls,
19       column_stats.pct_not_null as pct_not_null,
20       column_stats.table_rows,
21       column_stats.count_distinct_values,
22       column_stats.pct_unique,
23       case when column_stats.pct_unique >= {{ unique_profile_threshold_pct }} then true else false end as is_recommended_unique_column,
24
25       column_metadata.* EXCEPT (table_catalog,
26                       table_schema,
27                       table_name,
28                       column_name,
29                       is_nullable),
30       column_stats.* EXCEPT (table_catalog,
31                              table_schema,
32                              table_name,
33                              column_name,
34                              _nulls,
35                              _non_nulls,
36                              pct_not_null,
37                              table_rows,
38                              pct_unique,
39                              count_distinct_values)
40FROM
41(
42{% for table in tables %}
43  SELECT *
44  FROM
45(
46  WITH
47    `table` AS (SELECT * FROM {{ table }} ),
48    table_as_json AS (SELECT REGEXP_REPLACE(TO_JSON_STRING(t), r'^{|}$', '') AS ROW FROM `table` AS t ),
49    pairs AS (SELECT REPLACE(column_name, '"', '') AS column_name, IF (SAFE_CAST(column_value AS STRING)='null',NULL, column_value) AS column_value
50              FROM table_as_json,UNNEST(SPLIT(ROW, ',"')) AS z,UNNEST([SPLIT(z, ':')[SAFE_OFFSET(0)]]) AS column_name,UNNEST([SPLIT(z, ':')[SAFE_OFFSET(1)]]) AS column_value ),
51    profile AS (
52    SELECT
53      split(replace('{{ table }}','`',''),'.' )[safe_offset(0)] as table_catalog,
54      split(replace('{{ table }}','`',''),'.' )[safe_offset(1)] as table_schema,
55      split(replace('{{ table }}','`',''),'.' )[safe_offset(2)] as table_name,
56      column_name,
57      COUNT(*) AS table_rows,
58      COUNT(DISTINCT column_value) AS count_distinct_values,
59      safe_divide(COUNT(DISTINCT column_value),COUNT(*)) AS pct_unique,
60      COUNTIF(column_value IS NULL) AS _nulls,
61      COUNTIF(column_value IS NOT NULL) AS _non_nulls,
62      COUNTIF(column_value IS NOT NULL) / COUNT(*) AS pct_not_null,
63      min(column_value) as _min_value,
64      max(column_value) as _max_value,
65      avg(SAFE_CAST(column_value AS numeric)) as _avg_value,
66      APPROX_TOP_COUNT(column_value, 1)[OFFSET(0)] AS _most_frequent_value,
67      MIN(LENGTH(SAFE_CAST(column_value AS STRING))) AS _min_length,
68      MAX(LENGTH(SAFE_CAST(column_value AS STRING))) AS _max_length,
69      ROUND(AVG(LENGTH(SAFE_CAST(column_value AS STRING)))) AS _avr_length
70    FROM
71      pairs
72    WHERE
73      column_name <> ''
74      AND column_name NOT LIKE '%-%'
75    GROUP BY
76      column_name
77    ORDER BY
78      column_name)
79  SELECT
80    *
81  FROM
82    profile)
83{%- if not loop.last %}
84    UNION ALL
85{%- endif %}
86{% endfor %}
87) column_stats
88LEFT OUTER JOIN
89(
90  SELECT
91    * EXCEPT
92      (is_generated,
93       generation_expression,
94       is_stored,
95       is_updatable)
96  FROM
97    {{ table_schema }}.INFORMATION_SCHEMA.COLUMNS
98) column_metadata
99ON  column_stats.table_catalog = column_metadata.table_catalog
100AND column_stats.table_schema = column_metadata.table_schema
101AND column_stats.table_name = column_metadata.table_name
102AND column_stats.column_name = column_metadata.column_name
103
104{%- endmacro -%}
105