use of org.apache.hadoop.hive.metastore.api.Timestamp in project hive by apache.
the class TimestampColumnStatsAggregator method extrapolate.
@Override
public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, int numPartsWithStats, Map<String, Double> adjustedIndexMap, Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
int rightBorderInd = numParts;
TimestampColumnStatsDataInspector extrapolateTimestampData = new TimestampColumnStatsDataInspector();
Map<String, TimestampColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getTimestampStats());
}
List<Map.Entry<String, TimestampColumnStatsData>> list = new LinkedList<>(extractedAdjustedStatsMap.entrySet());
// get the lowValue
Collections.sort(list, new Comparator<Map.Entry<String, TimestampColumnStatsData>>() {
@Override
public int compare(Map.Entry<String, TimestampColumnStatsData> o1, Map.Entry<String, TimestampColumnStatsData> o2) {
return o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue());
}
});
double minInd = adjustedIndexMap.get(list.get(0).getKey());
double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
long lowValue = 0;
long min = list.get(0).getValue().getLowValue().getSecondsSinceEpoch();
long max = list.get(list.size() - 1).getValue().getLowValue().getSecondsSinceEpoch();
if (minInd == maxInd) {
lowValue = min;
} else if (minInd < maxInd) {
// left border is the min
lowValue = (long) (max - (max - min) * maxInd / (maxInd - minInd));
} else {
// right border is the min
lowValue = (long) (max - (max - min) * (rightBorderInd - maxInd) / (minInd - maxInd));
}
// get the highValue
Collections.sort(list, new Comparator<Map.Entry<String, TimestampColumnStatsData>>() {
@Override
public int compare(Map.Entry<String, TimestampColumnStatsData> o1, Map.Entry<String, TimestampColumnStatsData> o2) {
return o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue());
}
});
minInd = adjustedIndexMap.get(list.get(0).getKey());
maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
long highValue = 0;
min = list.get(0).getValue().getHighValue().getSecondsSinceEpoch();
max = list.get(list.size() - 1).getValue().getHighValue().getSecondsSinceEpoch();
if (minInd == maxInd) {
highValue = min;
} else if (minInd < maxInd) {
// right border is the max
highValue = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
} else {
// left border is the max
highValue = (long) (min + (max - min) * minInd / (minInd - maxInd));
}
// get the #nulls
long numNulls = 0;
for (Map.Entry<String, TimestampColumnStatsData> entry : extractedAdjustedStatsMap.entrySet()) {
numNulls += entry.getValue().getNumNulls();
}
// we scale up sumNulls based on the number of partitions
numNulls = numNulls * numParts / numPartsWithStats;
// get the ndv
long ndv = 0;
Collections.sort(list, new Comparator<Map.Entry<String, TimestampColumnStatsData>>() {
@Override
public int compare(Map.Entry<String, TimestampColumnStatsData> o1, Map.Entry<String, TimestampColumnStatsData> o2) {
return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
}
});
long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
long higherBound = 0;
for (Map.Entry<String, TimestampColumnStatsData> entry : list) {
higherBound += entry.getValue().getNumDVs();
}
if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) {
ndv = (long) ((highValue - lowValue) / densityAvg);
if (ndv < lowerBound) {
ndv = lowerBound;
} else if (ndv > higherBound) {
ndv = higherBound;
}
} else {
minInd = adjustedIndexMap.get(list.get(0).getKey());
maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
min = list.get(0).getValue().getNumDVs();
max = list.get(list.size() - 1).getValue().getNumDVs();
if (minInd == maxInd) {
ndv = min;
} else if (minInd < maxInd) {
// right border is the max
ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
} else {
// left border is the max
ndv = (long) (min + (max - min) * minInd / (minInd - maxInd));
}
}
extrapolateTimestampData.setLowValue(new Timestamp(lowValue));
extrapolateTimestampData.setHighValue(new Timestamp(highValue));
extrapolateTimestampData.setNumNulls(numNulls);
extrapolateTimestampData.setNumDVs(ndv);
extrapolateData.setTimestampStats(extrapolateTimestampData);
}
use of org.apache.hadoop.hive.metastore.api.Timestamp in project hive by apache.
the class TimestampColumnStatsMerger method setLowValue.
public void setLowValue(TimestampColumnStatsDataInspector aggregateData, TimestampColumnStatsDataInspector newData) {
final Timestamp aggregateLowValue = aggregateData.getLowValue();
final Timestamp newLowValue = newData.getLowValue();
final Timestamp mergedLowValue;
if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) {
return;
} else if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
mergedLowValue = ObjectUtils.min(newLowValue, aggregateLowValue);
} else {
mergedLowValue = MoreObjects.firstNonNull(aggregateLowValue, newLowValue);
}
aggregateData.setLowValue(mergedLowValue);
}
use of org.apache.hadoop.hive.metastore.api.Timestamp in project hive by apache.
the class StatObjectConverter method fillColumnStatisticsData.
// JAVA
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, Object nulls, Object dist, Object bitVector, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException {
colType = colType.toLowerCase();
if (colType.equals("boolean")) {
BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
boolStats.setNumFalses(MetastoreDirectSqlUtils.extractSqlLong(falses));
boolStats.setNumTrues(MetastoreDirectSqlUtils.extractSqlLong(trues));
boolStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
data.setBooleanStats(boolStats);
} else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
stringStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
stringStats.setAvgColLen(MetastoreDirectSqlUtils.extractSqlDouble(avglen));
stringStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen));
stringStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
stringStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
data.setStringStats(stringStats);
} else if (colType.equals("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
binaryStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
binaryStats.setAvgColLen(MetastoreDirectSqlUtils.extractSqlDouble(avglen));
binaryStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen));
data.setBinaryStats(binaryStats);
} else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint")) {
LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
if (lhigh != null) {
longStats.setHighValue(MetastoreDirectSqlUtils.extractSqlLong(lhigh));
}
if (llow != null) {
longStats.setLowValue(MetastoreDirectSqlUtils.extractSqlLong(llow));
}
longStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
longStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
data.setLongStats(longStats);
} else if (colType.equals("double") || colType.equals("float")) {
DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
doubleStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
if (dhigh != null) {
doubleStats.setHighValue(MetastoreDirectSqlUtils.extractSqlDouble(dhigh));
}
if (dlow != null) {
doubleStats.setLowValue(MetastoreDirectSqlUtils.extractSqlDouble(dlow));
}
doubleStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
doubleStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
data.setDoubleStats(doubleStats);
} else if (colType.startsWith("decimal")) {
DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
decimalStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
if (dechigh != null) {
decimalStats.setHighValue(DecimalUtils.createThriftDecimal((String) dechigh));
}
if (declow != null) {
decimalStats.setLowValue(DecimalUtils.createThriftDecimal((String) declow));
}
decimalStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
decimalStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
data.setDecimalStats(decimalStats);
} else if (colType.equals("date")) {
DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
dateStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
if (lhigh != null) {
dateStats.setHighValue(new Date(MetastoreDirectSqlUtils.extractSqlLong(lhigh)));
}
if (llow != null) {
dateStats.setLowValue(new Date(MetastoreDirectSqlUtils.extractSqlLong(llow)));
}
dateStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
dateStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
data.setDateStats(dateStats);
} else if (colType.equals("timestamp")) {
TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector();
timestampStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
if (lhigh != null) {
timestampStats.setHighValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(lhigh)));
}
if (llow != null) {
timestampStats.setLowValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(llow)));
}
timestampStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
timestampStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
data.setTimestampStats(timestampStats);
}
}
Aggregations