use of org.datanucleus.store.rdbms.query.ForwardQueryResult in project hive by apache.
the class MetaStoreDirectSql method aggrStatsUseDB.
private List<ColumnStatisticsObj> aggrStatsUseDB(String dbName, String tableName, List<String> partNames, List<String> colNames, boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
// TODO: all the extrapolation logic should be moved out of this class,
// only mechanical data retrieval should remain here.
String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", " + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), " + "min(cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal)), max(cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)), " + "sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), " + // and LowerBound (calculated by "max(\"NUM_DISTINCTS\")")
"avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")," + "sum(\"NUM_DISTINCTS\")" + " from " + PART_COL_STATS + "" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? ";
String queryText = null;
long start = 0;
long end = 0;
Query query = null;
boolean doTrace = LOG.isDebugEnabled();
Object qResult = null;
ForwardQueryResult<?> fqr = null;
// Extrapolation is not needed.
if (areAllPartsFound) {
queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), queryText);
if (qResult == null) {
query.closeAll();
return Collections.emptyList();
}
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
List<Object[]> list = ensureList(qResult);
List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(list.size());
for (Object[] row : list) {
colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation, ndvTuner));
Deadline.checkTimeout();
}
query.closeAll();
return colStats;
} else {
// Extrapolation is needed for some columns.
// In this case, at least a column status for a partition is missing.
// We need to extrapolate this partition based on the other partitions
List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(colNames.size());
queryText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") " + " from " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), queryText);
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
if (qResult == null) {
query.closeAll();
return Collections.emptyList();
}
List<String> noExtraColumnNames = new ArrayList<String>();
Map<String, String[]> extraColumnNameTypeParts = new HashMap<String, String[]>();
List<Object[]> list = ensureList(qResult);
for (Object[] row : list) {
String colName = (String) row[0];
String colType = (String) row[1];
// Extrapolation is not needed for this column if
// count(\"PARTITION_NAME\")==partNames.size()
// Or, extrapolation is not possible for this column if
// count(\"PARTITION_NAME\")<2
Long count = extractSqlLong(row[2]);
if (count == partNames.size() || count < 2) {
noExtraColumnNames.add(colName);
} else {
extraColumnNameTypeParts.put(colName, new String[] { colType, String.valueOf(count) });
}
Deadline.checkTimeout();
}
query.closeAll();
// Extrapolation is not needed for columns noExtraColumnNames
if (noExtraColumnNames.size() != 0) {
queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(noExtraColumnNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, noExtraColumnNames), queryText);
if (qResult == null) {
query.closeAll();
return Collections.emptyList();
}
list = ensureList(qResult);
for (Object[] row : list) {
colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation, ndvTuner));
Deadline.checkTimeout();
}
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
query.closeAll();
}
// give a sequence number for all the partitions
if (extraColumnNameTypeParts.size() != 0) {
Map<String, Integer> indexMap = new HashMap<String, Integer>();
for (int index = 0; index < partNames.size(); index++) {
indexMap.put(partNames.get(index), index);
}
// get sum for all columns to reduce the number of queries
Map<String, Map<Integer, Object>> sumMap = new HashMap<String, Map<Integer, Object>>();
queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), sum(\"NUM_DISTINCTS\")" + " from " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(extraColumnNameTypeParts.size()) + ") and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ") group by \"COLUMN_NAME\"";
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
List<String> extraColumnNames = new ArrayList<String>();
extraColumnNames.addAll(extraColumnNameTypeParts.keySet());
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, extraColumnNames), queryText);
if (qResult == null) {
query.closeAll();
return Collections.emptyList();
}
list = ensureList(qResult);
// see the indexes for colstats in IExtrapolatePartStatus
Integer[] sumIndex = new Integer[] { 6, 10, 11, 15 };
for (Object[] row : list) {
Map<Integer, Object> indexToObject = new HashMap<Integer, Object>();
for (int ind = 1; ind < row.length; ind++) {
indexToObject.put(sumIndex[ind - 1], row[ind]);
}
// row[0] is the column name
sumMap.put((String) row[0], indexToObject);
Deadline.checkTimeout();
}
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
query.closeAll();
for (Map.Entry<String, String[]> entry : extraColumnNameTypeParts.entrySet()) {
Object[] row = new Object[IExtrapolatePartStatus.colStatNames.length + 2];
String colName = entry.getKey();
String colType = entry.getValue()[0];
Long sumVal = Long.parseLong(entry.getValue()[1]);
// fill in colname
row[0] = colName;
// fill in coltype
row[1] = colType;
// use linear extrapolation. more complicated one can be added in the
// future.
IExtrapolatePartStatus extrapolateMethod = new LinearExtrapolatePartStatus();
// fill in colstatus
Integer[] index = null;
boolean decimal = false;
if (colType.toLowerCase().startsWith("decimal")) {
index = IExtrapolatePartStatus.indexMaps.get("decimal");
decimal = true;
} else {
index = IExtrapolatePartStatus.indexMaps.get(colType.toLowerCase());
}
// all index.
if (index == null) {
index = IExtrapolatePartStatus.indexMaps.get("default");
}
for (int colStatIndex : index) {
String colStatName = IExtrapolatePartStatus.colStatNames[colStatIndex];
// if the aggregation type is sum, we do a scale-up
if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Sum) {
Object o = sumMap.get(colName).get(colStatIndex);
if (o == null) {
row[2 + colStatIndex] = null;
} else {
Long val = extractSqlLong(o);
row[2 + colStatIndex] = val / sumVal * (partNames.size());
}
} else if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Min || IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Max) {
// left/right borders
if (!decimal) {
queryText = "select \"" + colStatName + "\",\"PARTITION_NAME\" from " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " order by \"" + colStatName + "\"";
} else {
queryText = "select \"" + colStatName + "\",\"PARTITION_NAME\" from " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " order by cast(\"" + colStatName + "\" as decimal)";
}
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText);
if (qResult == null) {
query.closeAll();
return Collections.emptyList();
}
fqr = (ForwardQueryResult<?>) qResult;
Object[] min = (Object[]) (fqr.get(0));
Object[] max = (Object[]) (fqr.get(fqr.size() - 1));
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
query.closeAll();
if (min[0] == null || max[0] == null) {
row[2 + colStatIndex] = null;
} else {
row[2 + colStatIndex] = extrapolateMethod.extrapolate(min, max, colStatIndex, indexMap);
}
} else {
// if the aggregation type is avg, we use the average on the existing ones.
queryText = "select " + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")" + " from " + PART_COL_STATS + "" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\"";
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText);
if (qResult == null) {
query.closeAll();
return Collections.emptyList();
}
fqr = (ForwardQueryResult<?>) qResult;
Object[] avg = (Object[]) (fqr.get(0));
// colStatIndex=12,13,14 respond to "AVG_LONG", "AVG_DOUBLE",
// "AVG_DECIMAL"
row[2 + colStatIndex] = avg[colStatIndex - 12];
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
query.closeAll();
}
}
colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation, ndvTuner));
Deadline.checkTimeout();
}
}
return colStats;
}
}
use of org.datanucleus.store.rdbms.query.ForwardQueryResult in project hive by apache.
the class MetaStoreDirectSql method partsFoundForPartitions.
private long partsFoundForPartitions(final String dbName, final String tableName, final List<String> partNames, List<String> colNames) throws MetaException {
assert !colNames.isEmpty() && !partNames.isEmpty();
final boolean doTrace = LOG.isDebugEnabled();
final String queryText0 = "select count(\"COLUMN_NAME\") from " + PART_COL_STATS + "" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (%1$s) and \"PARTITION_NAME\" in (%2$s)" + " group by \"PARTITION_NAME\"";
List<Long> allCounts = runBatched(colNames, new Batchable<String, Long>() {
@Override
public List<Long> run(final List<String> inputColName) throws MetaException {
return runBatched(partNames, new Batchable<String, Long>() {
@Override
public List<Long> run(List<String> inputPartNames) throws MetaException {
long partsFound = 0;
String queryText = String.format(queryText0, makeParams(inputColName.size()), makeParams(inputPartNames.size()));
long start = doTrace ? System.nanoTime() : 0;
Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
try {
Object qResult = executeWithArray(query, prepareParams(dbName, tableName, inputPartNames, inputColName), queryText);
long end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
ForwardQueryResult<?> fqr = (ForwardQueryResult<?>) qResult;
Iterator<?> iter = fqr.iterator();
while (iter.hasNext()) {
if (extractSqlLong(iter.next()) == inputColName.size()) {
partsFound++;
}
}
return Lists.<Long>newArrayList(partsFound);
} finally {
query.closeAll();
}
}
});
}
});
long partsFound = 0;
for (Long val : allCounts) {
partsFound += val;
}
return partsFound;
}
use of org.datanucleus.store.rdbms.query.ForwardQueryResult in project hive by apache.
the class MetaStoreDirectSql method columnStatisticsObjForPartitionsBatch.
/** Should be called with the list short enough to not trip up Oracle/etc. */
private List<ColumnStatisticsObj> columnStatisticsObjForPartitionsBatch(String dbName, String tableName, List<String> partNames, List<String> colNames, boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation) throws MetaException {
// TODO: all the extrapolation logic should be moved out of this class,
// only mechanical data retrieval should remain here.
String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", " + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), " + "min(cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal)), max(cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)), " + "sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), " + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), " + // and LowerBound (calculated by "max(\"NUM_DISTINCTS\")")
"avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")," + "sum(\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? ";
String queryText = null;
long start = 0;
long end = 0;
Query query = null;
boolean doTrace = LOG.isDebugEnabled();
Object qResult = null;
ForwardQueryResult fqr = null;
// Extrapolation is not needed.
if (areAllPartsFound) {
queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), queryText);
if (qResult == null) {
query.closeAll();
return Lists.newArrayList();
}
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
List<Object[]> list = ensureList(qResult);
List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(list.size());
for (Object[] row : list) {
colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation));
Deadline.checkTimeout();
}
query.closeAll();
return colStats;
} else {
// Extrapolation is needed for some columns.
// In this case, at least a column status for a partition is missing.
// We need to extrapolate this partition based on the other partitions
List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(colNames.size());
queryText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") " + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames), queryText);
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
if (qResult == null) {
query.closeAll();
return Lists.newArrayList();
}
List<String> noExtraColumnNames = new ArrayList<String>();
Map<String, String[]> extraColumnNameTypeParts = new HashMap<String, String[]>();
List<Object[]> list = ensureList(qResult);
for (Object[] row : list) {
String colName = (String) row[0];
String colType = (String) row[1];
// Extrapolation is not needed for this column if
// count(\"PARTITION_NAME\")==partNames.size()
// Or, extrapolation is not possible for this column if
// count(\"PARTITION_NAME\")<2
Long count = extractSqlLong(row[2]);
if (count == partNames.size() || count < 2) {
noExtraColumnNames.add(colName);
} else {
extraColumnNameTypeParts.put(colName, new String[] { colType, String.valueOf(count) });
}
Deadline.checkTimeout();
}
query.closeAll();
// Extrapolation is not needed for columns noExtraColumnNames
if (noExtraColumnNames.size() != 0) {
queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(noExtraColumnNames.size()) + ")" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, noExtraColumnNames), queryText);
if (qResult == null) {
query.closeAll();
return Lists.newArrayList();
}
list = ensureList(qResult);
for (Object[] row : list) {
colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation));
Deadline.checkTimeout();
}
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
query.closeAll();
}
// give a sequence number for all the partitions
if (extraColumnNameTypeParts.size() != 0) {
Map<String, Integer> indexMap = new HashMap<String, Integer>();
for (int index = 0; index < partNames.size(); index++) {
indexMap.put(partNames.get(index), index);
}
// get sum for all columns to reduce the number of queries
Map<String, Map<Integer, Object>> sumMap = new HashMap<String, Map<Integer, Object>>();
queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), sum(\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\" where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"COLUMN_NAME\" in (" + makeParams(extraColumnNameTypeParts.size()) + ") and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ") group by \"COLUMN_NAME\"";
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
List<String> extraColumnNames = new ArrayList<String>();
extraColumnNames.addAll(extraColumnNameTypeParts.keySet());
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, extraColumnNames), queryText);
if (qResult == null) {
query.closeAll();
return Lists.newArrayList();
}
list = ensureList(qResult);
// see the indexes for colstats in IExtrapolatePartStatus
Integer[] sumIndex = new Integer[] { 6, 10, 11, 15 };
for (Object[] row : list) {
Map<Integer, Object> indexToObject = new HashMap<Integer, Object>();
for (int ind = 1; ind < row.length; ind++) {
indexToObject.put(sumIndex[ind - 1], row[ind]);
}
// row[0] is the column name
sumMap.put((String) row[0], indexToObject);
Deadline.checkTimeout();
}
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
query.closeAll();
for (Map.Entry<String, String[]> entry : extraColumnNameTypeParts.entrySet()) {
Object[] row = new Object[IExtrapolatePartStatus.colStatNames.length + 2];
String colName = entry.getKey();
String colType = entry.getValue()[0];
Long sumVal = Long.parseLong(entry.getValue()[1]);
// fill in colname
row[0] = colName;
// fill in coltype
row[1] = colType;
// use linear extrapolation. more complicated one can be added in the
// future.
IExtrapolatePartStatus extrapolateMethod = new LinearExtrapolatePartStatus();
// fill in colstatus
Integer[] index = null;
boolean decimal = false;
if (colType.toLowerCase().startsWith("decimal")) {
index = IExtrapolatePartStatus.indexMaps.get("decimal");
decimal = true;
} else {
index = IExtrapolatePartStatus.indexMaps.get(colType.toLowerCase());
}
// all index.
if (index == null) {
index = IExtrapolatePartStatus.indexMaps.get("default");
}
for (int colStatIndex : index) {
String colStatName = IExtrapolatePartStatus.colStatNames[colStatIndex];
// if the aggregation type is sum, we do a scale-up
if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Sum) {
Object o = sumMap.get(colName).get(colStatIndex);
if (o == null) {
row[2 + colStatIndex] = null;
} else {
Long val = extractSqlLong(o);
row[2 + colStatIndex] = (Long) (val / sumVal * (partNames.size()));
}
} else if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Min || IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Max) {
// left/right borders
if (!decimal) {
queryText = "select \"" + colStatName + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " order by \"" + colStatName + "\"";
} else {
queryText = "select \"" + colStatName + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " order by cast(\"" + colStatName + "\" as decimal)";
}
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText);
if (qResult == null) {
query.closeAll();
return Lists.newArrayList();
}
fqr = (ForwardQueryResult) qResult;
Object[] min = (Object[]) (fqr.get(0));
Object[] max = (Object[]) (fqr.get(fqr.size() - 1));
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
query.closeAll();
if (min[0] == null || max[0] == null) {
row[2 + colStatIndex] = null;
} else {
row[2 + colStatIndex] = extrapolateMethod.extrapolate(min, max, colStatIndex, indexMap);
}
} else {
// if the aggregation type is avg, we use the average on the existing ones.
queryText = "select " + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal))," + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\")," + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\"";
start = doTrace ? System.nanoTime() : 0;
query = pm.newQuery("javax.jdo.query.SQL", queryText);
qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText);
if (qResult == null) {
query.closeAll();
return Lists.newArrayList();
}
fqr = (ForwardQueryResult) qResult;
Object[] avg = (Object[]) (fqr.get(0));
// colStatIndex=12,13,14 respond to "AVG_LONG", "AVG_DOUBLE",
// "AVG_DECIMAL"
row[2 + colStatIndex] = avg[colStatIndex - 12];
end = doTrace ? System.nanoTime() : 0;
timingTrace(doTrace, queryText, start, end);
query.closeAll();
}
}
colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation));
Deadline.checkTimeout();
}
}
return colStats;
}
}
Aggregations