#!/bin/bash # AMPY get import statement parms for each table # John Bonifas # last updated 11-05-2020 AMPY=/development/etl/source/ampy; PARAMETER=importstatementparms; IFS=$'\n'; # record counter let "var=0"; # initialize files echo tablename','splitby','numberofmappers','fetchsize > ${PARAMETER}.csv; echo '' > ${PARAMETER}_errors.txt; echo '' > ${PARAMETER}_log.txt; # https://stackoverflow.com/questions/21569172/how-to-list-only-the-file-names-in-hdfs # loop through the ampy script files for D in `hdfs dfs -ls $AMPY | sed 1d | perl -wlne'print +(split " ",$_,8)[7]'` ; do # removes path to get just the tablename THETABLE=`basename $D`; # progress bar let "var++" echo working on table: $THETABLE ... $var | tee -a ${PARAMETER}_log.txt; if `hdfs dfs -test -e $D/import_argument_ampy-$THETABLE.txt`; then SPLITBY=`hdfs dfs -cat $D/import_argument_ampy-$THETABLE.txt | \ awk '/--split-by/{nr[NR+1]; next}; NR in nr' | sed 's/\r$//'` 2>>${PARAMETER}_errors.txt; NUMBEROFMAPPERS=`hdfs dfs -cat $D/import_argument_ampy-$THETABLE.txt | \ awk '/-m/{nr[NR+1]; next}; NR in nr' | sed 's/\r$//'` 2>>${PARAMETER}_errors.txt; FETCHSIZE=`hdfs dfs -cat $D/import_argument_ampy-$THETABLE.txt | \ awk '/--fetch-size/{nr[NR+1]; next}; NR in nr' | sed 's/\r$//'` 2>>${PARAMETER}_errors.txt; echo $THETABLE','$SPLITBY','$NUMBEROFMAPPERS','$FETCHSIZE >> ${PARAMETER}.csv; fi done