Search in sources :

Example 1 with ForwardQueryResult

use of org.datanucleus.store.rdbms.query.ForwardQueryResult in project hive by apache.

the class MetaStoreDirectSql method aggrStatsUseDB.

private List<ColumnStatisticsObj> aggrStatsUseDB(String dbName, String tableName, List<String> partNames, List<String> colNames, boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
    // TODO: all the extrapolation logic should be moved out of this class,
    // only mechanical data retrieval should remain here.
    String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", " + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), " + "min(cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal)), max(cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)), " + "sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), " + // and LowerBound (calculated by "max(\"NUM_DISTINCTS\")")
    "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")," + "sum(\"NUM_DISTINCTS\")" + " from " + PART_COL_STATS + "" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? ";
    String queryText = null;
    long start = 0;
    long end = 0;
    Query query = null;
    boolean doTrace = LOG.isDebugEnabled();
    Object qResult = null;
    ForwardQueryResult<?> fqr = null;
    // Extrapolation is not needed.
    if (areAllPartsFound) {
        queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
        start = doTrace ? System.nanoTime() : 0;
        query = pm.newQuery("javax.jdo.query.SQL", queryText);
        qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), queryText);
        if (qResult == null) {
            query.closeAll();
            return Collections.emptyList();
        }
        end = doTrace ? System.nanoTime() : 0;
        timingTrace(doTrace, queryText, start, end);
        List<Object[]> list = ensureList(qResult);
        List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(list.size());
        for (Object[] row : list) {
            colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation, ndvTuner));
            Deadline.checkTimeout();
        }
        query.closeAll();
        return colStats;
    } else {
        // Extrapolation is needed for some columns.
        // In this case, at least a column status for a partition is missing.
        // We need to extrapolate this partition based on the other partitions
        List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(colNames.size());
        queryText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") " + " from " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
        start = doTrace ? System.nanoTime() : 0;
        query = pm.newQuery("javax.jdo.query.SQL", queryText);
        qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), queryText);
        end = doTrace ? System.nanoTime() : 0;
        timingTrace(doTrace, queryText, start, end);
        if (qResult == null) {
            query.closeAll();
            return Collections.emptyList();
        }
        List<String> noExtraColumnNames = new ArrayList<String>();
        Map<String, String[]> extraColumnNameTypeParts = new HashMap<String, String[]>();
        List<Object[]> list = ensureList(qResult);
        for (Object[] row : list) {
            String colName = (String) row[0];
            String colType = (String) row[1];
            // Extrapolation is not needed for this column if
            // count(\"PARTITION_NAME\")==partNames.size()
            // Or, extrapolation is not possible for this column if
            // count(\"PARTITION_NAME\")<2
            Long count = extractSqlLong(row[2]);
            if (count == partNames.size() || count < 2) {
                noExtraColumnNames.add(colName);
            } else {
                extraColumnNameTypeParts.put(colName, new String[] { colType, String.valueOf(count) });
            }
            Deadline.checkTimeout();
        }
        query.closeAll();
        // Extrapolation is not needed for columns noExtraColumnNames
        if (noExtraColumnNames.size() != 0) {
            queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(noExtraColumnNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
            start = doTrace ? System.nanoTime() : 0;
            query = pm.newQuery("javax.jdo.query.SQL", queryText);
            qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, noExtraColumnNames), queryText);
            if (qResult == null) {
                query.closeAll();
                return Collections.emptyList();
            }
            list = ensureList(qResult);
            for (Object[] row : list) {
                colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation, ndvTuner));
                Deadline.checkTimeout();
            }
            end = doTrace ? System.nanoTime() : 0;
            timingTrace(doTrace, queryText, start, end);
            query.closeAll();
        }
        // give a sequence number for all the partitions
        if (extraColumnNameTypeParts.size() != 0) {
            Map<String, Integer> indexMap = new HashMap<String, Integer>();
            for (int index = 0; index < partNames.size(); index++) {
                indexMap.put(partNames.get(index), index);
            }
            // get sum for all columns to reduce the number of queries
            Map<String, Map<Integer, Object>> sumMap = new HashMap<String, Map<Integer, Object>>();
            queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), sum(\"NUM_DISTINCTS\")" + " from " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(extraColumnNameTypeParts.size()) + ") and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ") group by \"COLUMN_NAME\"";
            start = doTrace ? System.nanoTime() : 0;
            query = pm.newQuery("javax.jdo.query.SQL", queryText);
            List<String> extraColumnNames = new ArrayList<String>();
            extraColumnNames.addAll(extraColumnNameTypeParts.keySet());
            qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, extraColumnNames), queryText);
            if (qResult == null) {
                query.closeAll();
                return Collections.emptyList();
            }
            list = ensureList(qResult);
            // see the indexes for colstats in IExtrapolatePartStatus
            Integer[] sumIndex = new Integer[] { 6, 10, 11, 15 };
            for (Object[] row : list) {
                Map<Integer, Object> indexToObject = new HashMap<Integer, Object>();
                for (int ind = 1; ind < row.length; ind++) {
                    indexToObject.put(sumIndex[ind - 1], row[ind]);
                }
                // row[0] is the column name
                sumMap.put((String) row[0], indexToObject);
                Deadline.checkTimeout();
            }
            end = doTrace ? System.nanoTime() : 0;
            timingTrace(doTrace, queryText, start, end);
            query.closeAll();
            for (Map.Entry<String, String[]> entry : extraColumnNameTypeParts.entrySet()) {
                Object[] row = new Object[IExtrapolatePartStatus.colStatNames.length + 2];
                String colName = entry.getKey();
                String colType = entry.getValue()[0];
                Long sumVal = Long.parseLong(entry.getValue()[1]);
                // fill in colname
                row[0] = colName;
                // fill in coltype
                row[1] = colType;
                // use linear extrapolation. more complicated one can be added in the
                // future.
                IExtrapolatePartStatus extrapolateMethod = new LinearExtrapolatePartStatus();
                // fill in colstatus
                Integer[] index = null;
                boolean decimal = false;
                if (colType.toLowerCase().startsWith("decimal")) {
                    index = IExtrapolatePartStatus.indexMaps.get("decimal");
                    decimal = true;
                } else {
                    index = IExtrapolatePartStatus.indexMaps.get(colType.toLowerCase());
                }
                // all index.
                if (index == null) {
                    index = IExtrapolatePartStatus.indexMaps.get("default");
                }
                for (int colStatIndex : index) {
                    String colStatName = IExtrapolatePartStatus.colStatNames[colStatIndex];
                    // if the aggregation type is sum, we do a scale-up
                    if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Sum) {
                        Object o = sumMap.get(colName).get(colStatIndex);
                        if (o == null) {
                            row[2 + colStatIndex] = null;
                        } else {
                            Long val = extractSqlLong(o);
                            row[2 + colStatIndex] = val / sumVal * (partNames.size());
                        }
                    } else if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Min || IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Max) {
                        // left/right borders
                        if (!decimal) {
                            queryText = "select \"" + colStatName + "\",\"PARTITION_NAME\" from " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " order by \"" + colStatName + "\"";
                        } else {
                            queryText = "select \"" + colStatName + "\",\"PARTITION_NAME\" from " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " order by cast(\"" + colStatName + "\" as decimal)";
                        }
                        start = doTrace ? System.nanoTime() : 0;
                        query = pm.newQuery("javax.jdo.query.SQL", queryText);
                        qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText);
                        if (qResult == null) {
                            query.closeAll();
                            return Collections.emptyList();
                        }
                        fqr = (ForwardQueryResult<?>) qResult;
                        Object[] min = (Object[]) (fqr.get(0));
                        Object[] max = (Object[]) (fqr.get(fqr.size() - 1));
                        end = doTrace ? System.nanoTime() : 0;
                        timingTrace(doTrace, queryText, start, end);
                        query.closeAll();
                        if (min[0] == null || max[0] == null) {
                            row[2 + colStatIndex] = null;
                        } else {
                            row[2 + colStatIndex] = extrapolateMethod.extrapolate(min, max, colStatIndex, indexMap);
                        }
                    } else {
                        // if the aggregation type is avg, we use the average on the existing ones.
                        queryText = "select " + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")" + " from " + PART_COL_STATS + "" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\"";
                        start = doTrace ? System.nanoTime() : 0;
                        query = pm.newQuery("javax.jdo.query.SQL", queryText);
                        qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText);
                        if (qResult == null) {
                            query.closeAll();
                            return Collections.emptyList();
                        }
                        fqr = (ForwardQueryResult<?>) qResult;
                        Object[] avg = (Object[]) (fqr.get(0));
                        // colStatIndex=12,13,14 respond to "AVG_LONG", "AVG_DOUBLE",
                        // "AVG_DECIMAL"
                        row[2 + colStatIndex] = avg[colStatIndex - 12];
                        end = doTrace ? System.nanoTime() : 0;
                        timingTrace(doTrace, queryText, start, end);
                        query.closeAll();
                    }
                }
                colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation, ndvTuner));
                Deadline.checkTimeout();
            }
        }
        return colStats;
    }
}
Also used : Query(javax.jdo.Query) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ForwardQueryResult(org.datanucleus.store.rdbms.query.ForwardQueryResult) MConstraint(org.apache.hadoop.hive.metastore.model.MConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 2 with ForwardQueryResult

use of org.datanucleus.store.rdbms.query.ForwardQueryResult in project hive by apache.

the class MetaStoreDirectSql method partsFoundForPartitions.

private long partsFoundForPartitions(final String dbName, final String tableName, final List<String> partNames, List<String> colNames) throws MetaException {
    assert !colNames.isEmpty() && !partNames.isEmpty();
    final boolean doTrace = LOG.isDebugEnabled();
    final String queryText0 = "select count(\"COLUMN_NAME\") from " + PART_COL_STATS + "" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (%1$s) and \"PARTITION_NAME\" in (%2$s)" + " group by \"PARTITION_NAME\"";
    List<Long> allCounts = runBatched(colNames, new Batchable<String, Long>() {

        @Override
        public List<Long> run(final List<String> inputColName) throws MetaException {
            return runBatched(partNames, new Batchable<String, Long>() {

                @Override
                public List<Long> run(List<String> inputPartNames) throws MetaException {
                    long partsFound = 0;
                    String queryText = String.format(queryText0, makeParams(inputColName.size()), makeParams(inputPartNames.size()));
                    long start = doTrace ? System.nanoTime() : 0;
                    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
                    try {
                        Object qResult = executeWithArray(query, prepareParams(dbName, tableName, inputPartNames, inputColName), queryText);
                        long end = doTrace ? System.nanoTime() : 0;
                        timingTrace(doTrace, queryText, start, end);
                        ForwardQueryResult<?> fqr = (ForwardQueryResult<?>) qResult;
                        Iterator<?> iter = fqr.iterator();
                        while (iter.hasNext()) {
                            if (extractSqlLong(iter.next()) == inputColName.size()) {
                                partsFound++;
                            }
                        }
                        return Lists.<Long>newArrayList(partsFound);
                    } finally {
                        query.closeAll();
                    }
                }
            });
        }
    });
    long partsFound = 0;
    for (Long val : allCounts) {
        partsFound += val;
    }
    return partsFound;
}
Also used : Query(javax.jdo.Query) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) ForwardQueryResult(org.datanucleus.store.rdbms.query.ForwardQueryResult) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Example 3 with ForwardQueryResult

use of org.datanucleus.store.rdbms.query.ForwardQueryResult in project hive by apache.

the class MetaStoreDirectSql method columnStatisticsObjForPartitionsBatch.

/** Should be called with the list short enough to not trip up Oracle/etc. */
private List<ColumnStatisticsObj> columnStatisticsObjForPartitionsBatch(String dbName, String tableName, List<String> partNames, List<String> colNames, boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation) throws MetaException {
    // TODO: all the extrapolation logic should be moved out of this class,
    // only mechanical data retrieval should remain here.
    String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", " + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), " + "min(cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal)), max(cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)), " + "sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), " + // and LowerBound (calculated by "max(\"NUM_DISTINCTS\")")
    "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")," + "sum(\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? ";
    String queryText = null;
    long start = 0;
    long end = 0;
    Query query = null;
    boolean doTrace = LOG.isDebugEnabled();
    Object qResult = null;
    ForwardQueryResult fqr = null;
    // Extrapolation is not needed.
    if (areAllPartsFound) {
        queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
        start = doTrace ? System.nanoTime() : 0;
        query = pm.newQuery("javax.jdo.query.SQL", queryText);
        qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), queryText);
        if (qResult == null) {
            query.closeAll();
            return Lists.newArrayList();
        }
        end = doTrace ? System.nanoTime() : 0;
        timingTrace(doTrace, queryText, start, end);
        List<Object[]> list = ensureList(qResult);
        List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(list.size());
        for (Object[] row : list) {
            colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation));
            Deadline.checkTimeout();
        }
        query.closeAll();
        return colStats;
    } else {
        // Extrapolation is needed for some columns.
        // In this case, at least a column status for a partition is missing.
        // We need to extrapolate this partition based on the other partitions
        List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(colNames.size());
        queryText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") " + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
        start = doTrace ? System.nanoTime() : 0;
        query = pm.newQuery("javax.jdo.query.SQL", queryText);
        qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), queryText);
        end = doTrace ? System.nanoTime() : 0;
        timingTrace(doTrace, queryText, start, end);
        if (qResult == null) {
            query.closeAll();
            return Lists.newArrayList();
        }
        List<String> noExtraColumnNames = new ArrayList<String>();
        Map<String, String[]> extraColumnNameTypeParts = new HashMap<String, String[]>();
        List<Object[]> list = ensureList(qResult);
        for (Object[] row : list) {
            String colName = (String) row[0];
            String colType = (String) row[1];
            // Extrapolation is not needed for this column if
            // count(\"PARTITION_NAME\")==partNames.size()
            // Or, extrapolation is not possible for this column if
            // count(\"PARTITION_NAME\")<2
            Long count = extractSqlLong(row[2]);
            if (count == partNames.size() || count < 2) {
                noExtraColumnNames.add(colName);
            } else {
                extraColumnNameTypeParts.put(colName, new String[] { colType, String.valueOf(count) });
            }
            Deadline.checkTimeout();
        }
        query.closeAll();
        // Extrapolation is not needed for columns noExtraColumnNames
        if (noExtraColumnNames.size() != 0) {
            queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(noExtraColumnNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
            start = doTrace ? System.nanoTime() : 0;
            query = pm.newQuery("javax.jdo.query.SQL", queryText);
            qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, noExtraColumnNames), queryText);
            if (qResult == null) {
                query.closeAll();
                return Lists.newArrayList();
            }
            list = ensureList(qResult);
            for (Object[] row : list) {
                colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation));
                Deadline.checkTimeout();
            }
            end = doTrace ? System.nanoTime() : 0;
            timingTrace(doTrace, queryText, start, end);
            query.closeAll();
        }
        // give a sequence number for all the partitions
        if (extraColumnNameTypeParts.size() != 0) {
            Map<String, Integer> indexMap = new HashMap<String, Integer>();
            for (int index = 0; index < partNames.size(); index++) {
                indexMap.put(partNames.get(index), index);
            }
            // get sum for all columns to reduce the number of queries
            Map<String, Map<Integer, Object>> sumMap = new HashMap<String, Map<Integer, Object>>();
            queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), sum(\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\" where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(extraColumnNameTypeParts.size()) + ") and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ") group by \"COLUMN_NAME\"";
            start = doTrace ? System.nanoTime() : 0;
            query = pm.newQuery("javax.jdo.query.SQL", queryText);
            List<String> extraColumnNames = new ArrayList<String>();
            extraColumnNames.addAll(extraColumnNameTypeParts.keySet());
            qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, extraColumnNames), queryText);
            if (qResult == null) {
                query.closeAll();
                return Lists.newArrayList();
            }
            list = ensureList(qResult);
            // see the indexes for colstats in IExtrapolatePartStatus
            Integer[] sumIndex = new Integer[] { 6, 10, 11, 15 };
            for (Object[] row : list) {
                Map<Integer, Object> indexToObject = new HashMap<Integer, Object>();
                for (int ind = 1; ind < row.length; ind++) {
                    indexToObject.put(sumIndex[ind - 1], row[ind]);
                }
                // row[0] is the column name
                sumMap.put((String) row[0], indexToObject);
                Deadline.checkTimeout();
            }
            end = doTrace ? System.nanoTime() : 0;
            timingTrace(doTrace, queryText, start, end);
            query.closeAll();
            for (Map.Entry<String, String[]> entry : extraColumnNameTypeParts.entrySet()) {
                Object[] row = new Object[IExtrapolatePartStatus.colStatNames.length + 2];
                String colName = entry.getKey();
                String colType = entry.getValue()[0];
                Long sumVal = Long.parseLong(entry.getValue()[1]);
                // fill in colname
                row[0] = colName;
                // fill in coltype
                row[1] = colType;
                // use linear extrapolation. more complicated one can be added in the
                // future.
                IExtrapolatePartStatus extrapolateMethod = new LinearExtrapolatePartStatus();
                // fill in colstatus
                Integer[] index = null;
                boolean decimal = false;
                if (colType.toLowerCase().startsWith("decimal")) {
                    index = IExtrapolatePartStatus.indexMaps.get("decimal");
                    decimal = true;
                } else {
                    index = IExtrapolatePartStatus.indexMaps.get(colType.toLowerCase());
                }
                // all index.
                if (index == null) {
                    index = IExtrapolatePartStatus.indexMaps.get("default");
                }
                for (int colStatIndex : index) {
                    String colStatName = IExtrapolatePartStatus.colStatNames[colStatIndex];
                    // if the aggregation type is sum, we do a scale-up
                    if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Sum) {
                        Object o = sumMap.get(colName).get(colStatIndex);
                        if (o == null) {
                            row[2 + colStatIndex] = null;
                        } else {
                            Long val = extractSqlLong(o);
                            row[2 + colStatIndex] = (Long) (val / sumVal * (partNames.size()));
                        }
                    } else if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Min || IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Max) {
                        // left/right borders
                        if (!decimal) {
                            queryText = "select \"" + colStatName + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " order by \"" + colStatName + "\"";
                        } else {
                            queryText = "select \"" + colStatName + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " order by cast(\"" + colStatName + "\" as decimal)";
                        }
                        start = doTrace ? System.nanoTime() : 0;
                        query = pm.newQuery("javax.jdo.query.SQL", queryText);
                        qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText);
                        if (qResult == null) {
                            query.closeAll();
                            return Lists.newArrayList();
                        }
                        fqr = (ForwardQueryResult) qResult;
                        Object[] min = (Object[]) (fqr.get(0));
                        Object[] max = (Object[]) (fqr.get(fqr.size() - 1));
                        end = doTrace ? System.nanoTime() : 0;
                        timingTrace(doTrace, queryText, start, end);
                        query.closeAll();
                        if (min[0] == null || max[0] == null) {
                            row[2 + colStatIndex] = null;
                        } else {
                            row[2 + colStatIndex] = extrapolateMethod.extrapolate(min, max, colStatIndex, indexMap);
                        }
                    } else {
                        // if the aggregation type is avg, we use the average on the existing ones.
                        queryText = "select " + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\"";
                        start = doTrace ? System.nanoTime() : 0;
                        query = pm.newQuery("javax.jdo.query.SQL", queryText);
                        qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText);
                        if (qResult == null) {
                            query.closeAll();
                            return Lists.newArrayList();
                        }
                        fqr = (ForwardQueryResult) qResult;
                        Object[] avg = (Object[]) (fqr.get(0));
                        // colStatIndex=12,13,14 respond to "AVG_LONG", "AVG_DOUBLE",
                        // "AVG_DECIMAL"
                        row[2 + colStatIndex] = avg[colStatIndex - 12];
                        end = doTrace ? System.nanoTime() : 0;
                        timingTrace(doTrace, queryText, start, end);
                        query.closeAll();
                    }
                }
                colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation));
                Deadline.checkTimeout();
            }
        }
        return colStats;
    }
}
Also used : Query(javax.jdo.Query) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ForwardQueryResult(org.datanucleus.store.rdbms.query.ForwardQueryResult) MConstraint(org.apache.hadoop.hive.metastore.model.MConstraint) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Aggregations

ArrayList (java.util.ArrayList)3 Query (javax.jdo.Query)3 ForwardQueryResult (org.datanucleus.store.rdbms.query.ForwardQueryResult)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)2 MConstraint (org.apache.hadoop.hive.metastore.model.MConstraint)2 LinkedList (java.util.LinkedList)1 List (java.util.List)1 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)1 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)1 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)1 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)1 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)1