Hive,如何检索所有数据库表的列?

7

我想在Hive中编写与此SQL请求等效的代码:

select * from information_schema.columns where table_schema='database_name'

如何访问hive的元数据存储并检索特定数据库中所有表的所有列? 我知道我们可以通过描述[表名]来做到这一点,但是否有办法在同一请求中获取数据库中所有表的所有列?


请参考相关问题:http://stackoverflow.com/questions/19633000/hive-tables-meta-details-needed?rq=1 - Turophile
1
我知道这是一个老问题,但为了使它相关,我添加了... HiveMetaStoreClient 可以在这种情况下提供帮助。 - Ram Ghadiyaram
可能是just get column names from hive table的重复问题。 - JJFord3
2个回答

6
我该如何访问Hive的元数据存储库并检索特定数据库中所有表的所有列?
这是一种连接HiveMetaStoreClient的方法,您可以使用方法getTableColumnsInformation获取列。
在此类中,除了列之外,还可以提取所有其他信息,例如分区。请参见示例客户端和示例方法。
import org.apache.hadoop.hive.conf.HiveConf;

// test program
public class Test {
    public static void main(String[] args){

        HiveConf hiveConf = new HiveConf();
        hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
        hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://host:port");

        HiveMetaStoreConnector hiveMetaStoreConnector = new HiveMetaStoreConnector(hiveConf);
        if(hiveMetaStoreConnector != null){
            System.out.print(hiveMetaStoreConnector.getAllPartitionInfo("tablename"));
        }
    }
}


// define a class like this

import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.thrift.TException;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormatter;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class HiveMetaStoreConnector {
    private HiveConf hiveConf;
    HiveMetaStoreClient hiveMetaStoreClient;

    public HiveMetaStoreConnector(String msAddr, String msPort){
        try {
            hiveConf = new HiveConf();
            hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, msAddr+":"+ msPort);
            hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
        } catch (MetaException e) {
            e.printStackTrace();
            System.err.println("Constructor error");
            System.err.println(e.toString());
            System.exit(-100);
        }
    }

    public HiveMetaStoreConnector(HiveConf hiveConf){
        try {
            this.hiveConf = hiveConf;
            hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
        } catch (MetaException e) {
            e.printStackTrace();
            System.err.println("Constructor error");
            System.err.println(e.toString());
            System.exit(-100);
        }
    }

    public String getAllPartitionInfo(String dbName){
        List<String> res = Lists.newArrayList();
        try {
            List<String> tableList = hiveMetaStoreClient.getAllTables(dbName);
            for(String tableName:tableList){
                res.addAll(getTablePartitionInformation(dbName,tableName));
            }
        } catch (MetaException e) {
            e.printStackTrace();
            System.out.println("getAllTableStatistic error");
            System.out.println(e.toString());
            System.exit(-100);
        }

        return Joiner.on("\n").join(res);
    }

    public List<String> getTablePartitionInformation(String dbName, String tableName){
        List<String> partitionsInfo = Lists.newArrayList();
        try {
            List<String> partitionNames = hiveMetaStoreClient.listPartitionNames(dbName,tableName, (short) 10000);
            List<Partition> partitions = hiveMetaStoreClient.listPartitions(dbName,tableName, (short) 10000);
            for(Partition partition:partitions){
                StringBuffer sb = new StringBuffer();
                sb.append(tableName);
                sb.append("\t");
                List<String> partitionValues = partition.getValues();
                if(partitionValues.size()<4){
                    int size = partitionValues.size();
                    for(int j=0; j<4-size;j++){
                        partitionValues.add("null");
                    }
                }
                sb.append(Joiner.on("\t").join(partitionValues));
                sb.append("\t");
                DateTime createDate = new DateTime((long)partition.getCreateTime()*1000);
                sb.append(createDate.toString("yyyy-MM-dd HH:mm:ss"));
                partitionsInfo.add(sb.toString());
            }

        } catch (TException e) {
            e.printStackTrace();
            return Arrays.asList(new String[]{"error for request on" + tableName});
        }

        return partitionsInfo;
    }

    public String getAllTableStatistic(String dbName){
        List<String> res = Lists.newArrayList();
        try {
            List<String> tableList = hiveMetaStoreClient.getAllTables(dbName);
            for(String tableName:tableList){
                res.addAll(getTableColumnsInformation(dbName,tableName));
            }
        } catch (MetaException e) {
            e.printStackTrace();
            System.out.println("getAllTableStatistic error");
            System.out.println(e.toString());
            System.exit(-100);
        }

        return Joiner.on("\n").join(res);
    }

    public List<String> getTableColumnsInformation(String dbName, String tableName){
        try {
            List<FieldSchema> fields = hiveMetaStoreClient.getFields(dbName, tableName);
            List<String> infs = Lists.newArrayList();
            int cnt = 0;
            for(FieldSchema fs : fields){
                StringBuffer sb = new StringBuffer();
                sb.append(tableName);
                sb.append("\t");
                sb.append(cnt);
                sb.append("\t");
                cnt++;
                sb.append(fs.getName());
                sb.append("\t");
                sb.append(fs.getType());
                sb.append("\t");
                sb.append(fs.getComment());
                infs.add(sb.toString());
            }

            return infs;

        } catch (TException e) {
            e.printStackTrace();
            System.out.println("getTableColumnsInformation error");
            System.out.println(e.toString());
            System.exit(-100);
            return null;
        }
    }
}

嗨@ram Ghadiyarm,上面的代码能处理复杂列类型吗?据我所知,它不能。从下面的链接中阅读到,如果我们有复杂的列类型,我们必须递归调用该列。https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-DescribeTable/View/Column - soMuchToLearnAndShare

4
如果您想要运行返回Hive元数据的查询,您可以使用MySQL设置Hive元数据存储库,Hive中使用的元数据存储在MySQL的特定帐户中。您需要通过执行“CREATE USER 'hive'@'metastorehost' IDENTIFIED BY 'mypassword'”来为Hive创建一个MySQL用户。
然后,您将找到像“COLUMNS_VS”这样的表,其中包含您要查找的信息。检索所有表中所有列的示例查询可能是:“SELECT COLUMN_NAME,TBL_NAME FROM COLUMNS_V2 c JOIN TBLS a ON c.CD_ID = a.TBL_ID”。
或者,您可以通过REST调用访问此信息到WebHCat,请参见wiki以获取更多信息。

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接