如何使用hive获得1000个表的描述?

siv3szwd  于 2021-05-29  发布在  Hadoop
关注(0)|答案(2)|浏览(357)

我有1000张table,需要检查一下table describe <table name>; 一个接一个。不要一个接一个地运行,你能不能给我一个命令,一次取n个表。

iecba09b

iecba09b1#

您可以创建一个shell脚本并用参数调用它。例如,下面的脚本接收schema,准备schema中的表列表,调用describe extended命令,提取位置,打印schema中按名称排序的前1000个表的表位置。可以将其作为单个命令进行修改和使用:


# !/bin/bash

# Create table list for a schema (script parameter)

HIVE_SCHEMA=$1
echo Processing Hive schema $HIVE_SCHEMA...
tablelist=tables_$HIVE_SCHEMA

 hive -e " set hive.cli.print.header=false; use $HIVE_SCHEMA; show tables;" 1>  $tablelist

# number of tables

tableNum_limit=1000

# For each table do:

for table in $(cat $tablelist|sort|head -n "$tableNum_limit") #add proper sorting
 do 

 echo Processing table $table ...

     #Call DESCRIBE
     out=$(hive client -S -e "use $HIVE_SCHEMA; DESCRIBE EXTENDED $table")

     #Get location for example
     table_location=$(echo "${out}" | egrep -o 'location:[^,]+' | sed 's/location://')
     echo Table location: $table_location
     #Do something else here

done
0s0u357o

0s0u357o2#

查询元存储

演示

Hive

create database my_db_1;
create database my_db_2;
create database my_db_3;

create table my_db_1.my_tbl_1 (i int);
create table my_db_2.my_tbl_2 (c1 string,c2 date,c3 decimal(12,2));
create table my_db_3.my_tbl_3 (x array<int>,y struct<i:int,j:int,k:int>);

mysql(元存储)

use metastore
;

select      d.name              as db_name
           ,t.tbl_name      
           ,c.integer_idx + 1   as col_position
           ,c.column_name
           ,c.type_name

from                DBS         as d

            join    TBLS        as t

            on      t.db_id =
                    d.db_id

            join    SDS         as s

            on      s.sd_id =
                    t.sd_id        

            join    COLUMNS_V2  as c

            on      c.cd_id =
                    s.cd_id

where       d.name like 'my\_db\_%'

order by    d.name         
           ,t.tbl_name  
           ,c.integer_idx
;
+---------+----------+--------------+-------------+---------------------------+
| db_name | tbl_name | col_position | column_name |         type_name         |
+---------+----------+--------------+-------------+---------------------------+
| my_db_1 | my_tbl_1 |            1 | i           | int                       |
| my_db_2 | my_tbl_2 |            1 | c1          | string                    |
| my_db_2 | my_tbl_2 |            2 | c2          | date                      |
| my_db_2 | my_tbl_2 |            3 | c3          | decimal(12,2)             |
| my_db_3 | my_tbl_3 |            1 | x           | array<int>                |
| my_db_3 | my_tbl_3 |            2 | y           | struct<i:int,j:int,k:int> |
+---------+----------+--------------+-------------+---------------------------+

相关问题