#!/bin/bash # globals TARGETFOLDER=/development/etl/source/ampy; ENVIRONMENT=dev_ LPAREN="("; RPAREN=")"; # https://stackoverflow.com/questions/16487258/how-to-declare-2d-array-in-bash/16487733 # Engine to twist multidimensional dataset into a BASH one-dimensional array mapfile -t INCOMINGDATASET < <(cat thetablesplusfields.csv); TWODARRAY=() ONEDARRAY=() for COMMADELIMITEDROW in ${INCOMINGDATASET[@]} do TWODARRAY+=(`echo "$COMMADELIMITEDROW" | sed "s|,|' '|g"`) done col_size=3 col_count=0 for ELEMENT in ${TWODARRAY[@]}; do if [ ${col_count} -eq ${col_size} ]; then col_count=0 fi ONEDARRAY+=(`echo -e "$ELEMENT"`) ((col_count++)) done # main processing r="${#ONEDARRAY[@]}" i=0 while [ $i -lt $r ] do # dynamic variable assignment from the dataset. # BASH introduces single ticks that need to be removed, # first step is to dereference the loop var, second step removes single ticks TABLENAMETICK=${ONEDARRAY[${i}]} TABLENAME=`echo "$TABLENAMETICK" | sed -e "s/'//g"` PARTITIONFIELDDTTICK=${ONEDARRAY[$i+1]} PARTITIONFIELDDT=`echo "$PARTITIONFIELDDTTICK" | sed -e "s/'//g"` PRIMARYKEYTICK=${ONEDARRAY[$i+2]} PRIMARYKEY=`echo "$PRIMARYKEYTICK" | sed -e "s/'//g"` # target load script to update LOADHQLFILE=$TARGETFOLDER/$TABLENAME/load_table_ampy-${TABLENAME}_impala.hql; # Comments LOADHQLTEXT=`hdfs dfs -cat $LOADHQLFILE`; # find the first blank line in the file OLDCOMMENTLINES=`echo "$LOADHQLTEXT" | sed -n -e '/./p;/./!q'` MYCOMMENTLINE="-- | 2020-07-16 | JBBONIFA | Logic change to accommodate record edits" echo -e "$OLDCOMMENTLINES""\n""$MYCOMMENTLINE""\n" # Refresh Lines REFRESHLINES="REFRESH ${ENVIRONMENT}raw.ampy_$TABLENAME;""\n""REFRESH ${ENVIRONMENT}ampy.$TABLENAME;""\n" echo -e "$REFRESHLINES" # With Block WITHBLOCK=" with old as\n \ (\n \ select *\n \ from ${ENVIRONMENT}ampy.$TABLENAME\n \ where dt in\n \ (\n \ select distinct to_date($PARTITIONFIELDDT) as dt\n \ from ${ENVIRONMENT}raw.ampy_$TABLENAME\n \ )\n \ )\n \ INSERT OVERWRITE TABLE ${ENVIRONMENT}ampy.$TABLENAME PARTITION(dt)\n\n \ -- New records to add and old records to update\n \ SELECT\n" echo -e "$WITHBLOCK"; # top select list UNIONALIAS="new"; RESULTS=`impala-shell -k -i srphdw010 --quiet -q "describe ${ENVIRONMENT}ampy.$TABLENAME;" -B --output_delimiter="@" | \ sed ':a;N;$!ba;s|\n||g'` IFS=$'@'; FIELDNAMES=($RESULTS); IFS=$'\n'; for ((i=0;i< ${#FIELDNAMES[@]} ;i+=2)); do if [ ${FIELDNAMES[i]} != "dt" ]; then echo "cast"$LPAREN"$UNIONALIAS.${FIELDNAMES[i]} AS ${FIELDNAMES[i+1]}"$RPAREN" as ${FIELDNAMES[i]}"","; fi done echo -e "\n"; # Top Join JOINTOP=" to_date(new.$PARTITIONFIELDDT) AS dt\n \ FROM ${ENVIRONMENT}raw.ampy_$TABLENAME as new\n \ LEFT JOIN old\n \ ON nvl(cast(new.$PRIMARYKEY AS int),0) = nvl(old.$PRIMARYKEY,0)\n" echo -e "$JOINTOP"; # Union All Statements echo -e "UNION ALL\n\n--Old records to retain (no update)\nSELECT\n"; # bottom select list UNIONALIAS="old"; RESULTS=`impala-shell -k -i srphdw010 --quiet -q "describe ${ENVIRONMENT}ampy.$TABLENAME;" -B --output_delimiter="@" | \ sed ':a;N;$!ba;s|\n||g'` IFS=$'@'; FIELDNAMES=($RESULTS); IFS=$'\n'; for ((i=0;i< ${#FIELDNAMES[@]} ;i+=2)); do if [ ${FIELDNAMES[i]} != "dt" ]; then echo "cast"$LPAREN"$UNIONALIAS.${FIELDNAMES[i]} AS ${FIELDNAMES[i+1]}"$RPAREN" as ${FIELDNAMES[i]}"","; fi done echo -e "\n"; # Bottom Join JOINBOTTOM=" to_date(old.$PARTITIONFIELDDT) AS dt\n\n \ FROM old\n\n \ LEFT JOIN ${ENVIRONMENT}raw.ampy_$TABLENAME as new\n \ ON nvl(cast(new.$PRIMARYKEY AS int),0) = nvl(old.$PRIMARYKEY,0)\n" echo -e "$JOINBOTTOM"; # final clause echo "WHERE new.$PRIMARYKEY IS NULL;" i=`expr $i + 3` done