Search in sources :

Example 1 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class MetaStoreDirectSql method getPartitionStats.

public List<ColumnStatistics> getPartitionStats(final String dbName, final String tableName, final List<String> partNames, List<String> colNames) throws MetaException {
    if (colNames.isEmpty() || partNames.isEmpty()) {
        return Lists.newArrayList();
    }
    final boolean doTrace = LOG.isDebugEnabled();
    final String queryText0 = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from " + " \"PART_COL_STATS\" where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\"" + "  in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by \"PARTITION_NAME\"";
    Batchable<String, Object[]> b = new Batchable<String, Object[]>() {

        public List<Object[]> run(final List<String> inputColNames) throws MetaException {
            Batchable<String, Object[]> b2 = new Batchable<String, Object[]>() {

                public List<Object[]> run(List<String> inputPartNames) throws MetaException {
                    String queryText = String.format(queryText0, makeParams(inputColNames.size()), makeParams(inputPartNames.size()));
                    long start = doTrace ? System.nanoTime() : 0;
                    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
                    Object qResult = executeWithArray(query, prepareParams(dbName, tableName, inputPartNames, inputColNames), queryText);
                    timingTrace(doTrace, queryText0, start, (doTrace ? System.nanoTime() : 0));
                    if (qResult == null) {
                        query.closeAll();
                        return Lists.newArrayList();
                    }
                    addQueryAfterUse(query);
                    return ensureList(qResult);
                }
            };
            try {
                return runBatched(partNames, b2);
            } finally {
                addQueryAfterUse(b2);
            }
        }
    };
    List<Object[]> list = runBatched(colNames, b);
    List<ColumnStatistics> result = new ArrayList<ColumnStatistics>(Math.min(list.size(), partNames.size()));
    String lastPartName = null;
    int from = 0;
    for (int i = 0; i <= list.size(); ++i) {
        boolean isLast = i == list.size();
        String partName = isLast ? null : (String) list.get(i)[0];
        if (!isLast && partName.equals(lastPartName)) {
            continue;
        } else if (from != i) {
            ColumnStatisticsDesc csd = new ColumnStatisticsDesc(false, dbName, tableName);
            csd.setPartName(lastPartName);
            result.add(makeColumnStats(list.subList(from, i), csd, 1));
        }
        lastPartName = partName;
        from = i;
        Deadline.checkTimeout();
    }
    b.closeAllQueries();
    return result;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) MTableColumnStatistics(org.apache.hadoop.hive.metastore.model.MTableColumnStatistics) Query(javax.jdo.Query) ArrayList(java.util.ArrayList) MConstraint(org.apache.hadoop.hive.metastore.model.MConstraint) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) List(java.util.List) ArrayList(java.util.ArrayList)

Example 2 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class TestHiveMetaStoreStatsMerge method testStatsMerge.

public void testStatsMerge() throws Exception {
    int listSize = 0;
    List<ListenerEvent> notifyList = DummyListener.notifyList;
    assertEquals(notifyList.size(), listSize);
    msc.createDatabase(db);
    listSize++;
    assertEquals(listSize, notifyList.size());
    CreateDatabaseEvent dbEvent = (CreateDatabaseEvent) (notifyList.get(listSize - 1));
    assert dbEvent.getStatus();
    msc.createTable(table);
    listSize++;
    assertEquals(notifyList.size(), listSize);
    CreateTableEvent tblEvent = (CreateTableEvent) (notifyList.get(listSize - 1));
    assert tblEvent.getStatus();
    table = msc.getTable(dbName, tblName);
    ColumnStatistics cs = new ColumnStatistics();
    ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tblName);
    cs.setStatsDesc(desc);
    ColumnStatisticsObj obj = new ColumnStatisticsObj();
    obj.setColName("a");
    obj.setColType("string");
    ColumnStatisticsData data = new ColumnStatisticsData();
    StringColumnStatsData scsd = new StringColumnStatsData();
    scsd.setAvgColLen(10);
    scsd.setMaxColLen(20);
    scsd.setNumNulls(30);
    scsd.setNumDVs(123);
    scsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}");
    data.setStringStats(scsd);
    obj.setStatsData(data);
    cs.addToStatsObj(obj);
    List<ColumnStatistics> colStats = new ArrayList<>();
    colStats.add(cs);
    SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
    msc.setPartitionColumnStatistics(request);
    List<String> colNames = new ArrayList<>();
    colNames.add("a");
    StringColumnStatsData getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0).getStatsData().getStringStats();
    assertEquals(getScsd.getNumDVs(), 123);
    cs = new ColumnStatistics();
    scsd = new StringColumnStatsData();
    scsd.setAvgColLen(20);
    scsd.setMaxColLen(5);
    scsd.setNumNulls(70);
    scsd.setNumDVs(456);
    scsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}");
    data.setStringStats(scsd);
    obj.setStatsData(data);
    cs.addToStatsObj(obj);
    request = new SetPartitionsStatsRequest(colStats);
    request.setNeedMerge(true);
    msc.setPartitionColumnStatistics(request);
    getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0).getStatsData().getStringStats();
    assertEquals(getScsd.getAvgColLen(), 20.0);
    assertEquals(getScsd.getMaxColLen(), 20);
    assertEquals(getScsd.getNumNulls(), 100);
    // since metastore is ObjectStore, we use the max function to merge.
    assertEquals(getScsd.getNumDVs(), 456);
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) CreateDatabaseEvent(org.apache.hadoop.hive.metastore.events.CreateDatabaseEvent) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) SetPartitionsStatsRequest(org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest) ListenerEvent(org.apache.hadoop.hive.metastore.events.ListenerEvent) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) CreateTableEvent(org.apache.hadoop.hive.metastore.events.CreateTableEvent) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 3 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class HBaseReadWrite method buildColStats.

private ColumnStatistics buildColStats(byte[] key, boolean fromTable) throws IOException {
    // We initialize this late so that we don't create extras in the case of
    // partitions with no stats
    ColumnStatistics colStats = new ColumnStatistics();
    ColumnStatisticsDesc csd = new ColumnStatisticsDesc();
    // If this is a table key, parse it as one
    List<String> reconstructedKey;
    if (fromTable) {
        reconstructedKey = Arrays.asList(HBaseUtils.deserializeKey(key));
        csd.setIsTblLevel(true);
    } else {
        reconstructedKey = HBaseUtils.deserializePartitionKey(key, this);
        csd.setIsTblLevel(false);
    }
    csd.setDbName(reconstructedKey.get(0));
    csd.setTableName(reconstructedKey.get(1));
    if (!fromTable) {
        // Build the part name, for which we need the table
        Table table = getTable(reconstructedKey.get(0), reconstructedKey.get(1));
        if (table == null) {
            throw new RuntimeException("Unable to find table " + reconstructedKey.get(0) + "." + reconstructedKey.get(1) + " even though I have a partition for it!");
        }
        csd.setPartName(HBaseStore.buildExternalPartName(table, reconstructedKey.subList(2, reconstructedKey.size())));
    }
    colStats.setStatsDesc(csd);
    return colStats;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) Table(org.apache.hadoop.hive.metastore.api.Table) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Example 4 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class TestHBaseStoreBitVector method doubleTableStatistics.

@Test
public void doubleTableStatistics() throws Exception {
    createMockTable(DOUBLE_COL, DOUBLE_TYPE);
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for table level stats
    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = doubleColStatsObjs.get(0);
    DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats();
    // Add to DB
    stats.addToStatsObj(obj);
    store.updateTableColumnStatistics(stats);
    // Get from DB
    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DOUBLE_COL));
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField());
    // Compare DoubleColumnStatsData
    DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats();
    Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01);
    Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01);
    Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls());
    Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs());
    Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 5 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class TestHBaseStoreBitVector method longTableStatistics.

@Test
public void longTableStatistics() throws Exception {
    createMockTable(LONG_COL, LONG_TYPE);
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for table level stats
    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = longColStatsObjs.get(0);
    LongColumnStatsData longData = obj.getStatsData().getLongStats();
    // Add to DB
    stats.addToStatsObj(obj);
    store.updateTableColumnStatistics(stats);
    // Get from DB
    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(LONG_COL));
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField());
    // Compare LongColumnStatsData
    LongColumnStatsData longDataFromDB = dataFromDB.getLongStats();
    Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue());
    Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue());
    Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls());
    Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs());
    Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Aggregations

ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)81 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)69 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)63 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)56 ArrayList (java.util.ArrayList)54 Test (org.junit.Test)53 Table (org.apache.hadoop.hive.metastore.api.Table)37 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)36 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)32 Partition (org.apache.hadoop.hive.metastore.api.Partition)31 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)31 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)28 List (java.util.List)22 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)18 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)12 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)11 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)11 Database (org.apache.hadoop.hive.metastore.api.Database)7 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)7 LongColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)5