#!/bin/bash # AMPY get number of mappers for each table # John Bonifas # last updated 10-14-2020 AMPY=/development/etl/source/ampy; IFS=$'\n'; # record counter let "var=0"; # initialize files echo tablename','NumberOfMappersConfigured > numberofmappers_raw.csv; echo '' > numberofmappers_errors.txt; echo '' > numberofmappers_log.txt; # https://stackoverflow.com/questions/21569172/how-to-list-only-the-file-names-in-hdfs # loop through the ampy script files for D in `hdfs dfs -ls $AMPY | sed 1d | perl -wlne'print +(split " ",$_,8)[7]'` ; do # removes path to get just the tablename THETABLE=`basename $D`; # progress bar let "var++" echo working on table: $THETABLE ... $var | tee -a numberofmappers_log.txt; NUMBEROFMAPPERS=`hdfs dfs -cat $D/import_argument_ampy-$THETABLE.txt | awk '/-m/{nr[NR+1]; next}; NR in nr' | sed 's/\r$//'`; echo $THETABLE','$NUMBEROFMAPPERS >> numberofmappers_raw.csv 2>>numberofmappers_errors.txt; done