#!/bin/bash # AMPY script to retrieve table rowcounts from hive # John Bonifas # last updated 05-29-2020 # https://docs.cloudera.com/documentation/enterprise/5-9-x/topics/impala_shell_options.html IFS=$'\n' ENVIRONMENT="dev" mapfile -t OUTPUT < <(impala-shell -k -i srphdw010 -B --quiet -q "use ${ENVIRONMENT}_ampy; show tables;") let "var=0" echo tablename,rowcount,rawcount > datalake_raw.csv echo '' > datalake_raw_errors.txt for TABLENAME in ${OUTPUT[@]} do let "var++"; echo working on table: $TABLENAME ... $var; # don't include compute stats. Generates INFO crap into the flat file. impala-shell -k -i srphdw010 --quiet -q \ "select ampy.tablename, ampy.rowcount, raw.rawcount \ from \ ( \ select '$TABLENAME' as tablename, count(*) as rowcount \ from ${ENVIRONMENT}_ampy.$TABLENAME \ ) as ampy \ cross join \ ( \ select '$TABLENAME' as tablename, count(*) as rawcount \ from ${ENVIRONMENT}_raw.ampy_$TABLENAME \ ) as raw \ where ampy.tablename = raw.tablename" \ -B --output_delimiter="," >> datalake_raw.csv 2>>datalake_raw_errors.txt; done