#!/bin/bash # # This script produces a Hive script that generates DDL on every table. # The DDLs could then be run on another HDInsight cluster. # This method of copying table metadata is supported only for non-transactional tables. # HDINSIGHT_UTILITIES_FNAME='HDInsightUtilities-v01.sh' HDINSIGHT_UTILITIES="/tmp/$HDINSIGHT_UTILITIES_FNAME" # shellcheck source=/dev/null wget -O "$HDINSIGHT_UTILITIES" -q https://hdiconfigactions.blob.core.windows.net/linuxconfigactionmodulev01/$HDINSIGHT_UTILITIES_FNAME && source "$HDINSIGHT_UTILITIES" HIVE_MIGRATION_UTILS_FNAME='hive-migration-utils-v01.sh' HIVE_MIGRATION_UTILS="/tmp/$HIVE_MIGRATION_UTILS_FNAME" # shellcheck source=hive-migration-utils-v01.sh wget -O "$HIVE_MIGRATION_UTILS" -q "https://hdiconfigactions.blob.core.windows.net/linuxhivemigrationv01/${HIVE_MIGRATION_UTILS_FNAME}" && source "$HIVE_MIGRATION_UTILS" HIVE_DDL_SCRIPT='/tmp/hdi_hive_ddls.hql' PRIMARY_HEADNODE=$(get_primary_headnode) echo "Getting Beeline command." BEE_CMD=$(get_beeline_command_and_kinit "$PRIMARY_HEADNODE") dbs=$(eval "$BEE_CMD -e 'show databases;'") rm -f "${HIVE_DDL_SCRIPT}" for d in $dbs; do [[ "$d" == "sys" ]] && continue echo "-- database: ${d}" >> "${HIVE_DDL_SCRIPT}" echo "CREATE DATABASE IF NOT EXISTS $d; USE $d;" >> "${HIVE_DDL_SCRIPT}" tables=$(eval "$BEE_CMD -e 'USE $d; SHOW TABLES;'" | tr '\n' ' ' | tr -s ' ') for t in $tables; do [[ "$t" == "hivesampletable" && "$d" == "default" ]] && continue ddl=$(eval "$BEE_CMD -e 'USE $d; SHOW CREATE TABLE $t;'") if is_transactional_table "${ddl}"; then echo "Skipping transactional table \`$d\`.\`$t\`." continue fi echo "Adding table \`$d\`.\`$t\` to Hive script." echo "-- table: ${d}.${t}" >> "${HIVE_DDL_SCRIPT}" echo "$ddl;" >> "${HIVE_DDL_SCRIPT}" [[ "$(to_upper "$ddl")" == *"PARTITIONED BY"* ]] && echo "MSCK REPAIR TABLE \`$d\`.\`$t\`;" >> "${HIVE_DDL_SCRIPT}" done done echo "Hive DDL script written to '${HIVE_DDL_SCRIPT}'."