#!/bin/bash # AMPY Find Varchar8000 data types and ${environment} variables # John Bonifas # last updated 05-11-2020 # | = pipe command output to another command # ` = run command first then pipe the output to the enclosing statement # sed and perl use regex # >> add stdout to a file # 2>> add stderr to a file IFS=$'\n' let "var=0" TARGETFOLDER=/development/etl/source/ampy #TARGETFOLDER=/user/jbbonifa/test echo withvarchar8000 > varchar8000files.csv echo '' > varchar8000files_errors.txt # https://stackoverflow.com/questions/21569172/how-to-list-only-the-file-names-in-hdfs for D in `hdfs dfs -ls $TARGETFOLDER | sed 1d | perl -wlne'print +(split " ",$_,8)[7]'` ; do let "var++" THETABLE=`basename $D` echo working on table: $THETABLE ... $var CREATENAME=$D/create_table_ampy-$THETABLE.hql LOADNAME=$D/load_table_ampy-$THETABLE.hql # here the THETABLE variable has to be forced to expand IMPALANAME=$D/load_table_ampy-${THETABLE}_impala.hql if hdfs dfs -cat $CREATENAME 2>>varchar8000files_errors.txt | grep -q "varchar(8000)" then echo $CREATENAME >>varchar8000files.csv 2>>varchar8000files_errors.txt && echo "Found create."; fi if hdfs dfs -cat $LOADNAME 2>>varchar8000files_errors.txt | grep -q "varchar(8000)" then echo $LOADNAME >>varchar8000files.csv 2>>varchar8000files_errors.txt && echo "Found load."; fi if [ `hdfs dfs -find $D -name '*impala.hql'` ] then # the $ character needs to be escaped. -e for multiple strings. if hdfs dfs -cat $IMPALANAME 2>>varchar8000files_errors.txt | grep -q -e '\${environment}' -e 'varchar(8000)' then echo $IMPALANAME >>varchar8000files.csv 2>>varchar8000files_errors.txt && echo "Found Impala load."; fi fi done