use of org.apache.hadoop.hive.metastore.NumDistinctValueEstimator in project hive by apache.
the class ColumnStatsMergerFactory method getColumnStatsMerger.
public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsObjNew, ColumnStatisticsObj statsObjOld) {
ColumnStatsMerger agg;
_Fields typeNew = statsObjNew.getStatsData().getSetField();
_Fields typeOld = statsObjOld.getStatsData().getSetField();
// make sure that they have the same type
typeNew = typeNew == typeOld ? typeNew : null;
int numBitVectors = 0;
switch(typeNew) {
case BOOLEAN_STATS:
agg = new BooleanColumnStatsMerger();
break;
case LONG_STATS:
{
agg = new LongColumnStatsMerger();
int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getLongStats().getBitVectors());
int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getLongStats().getBitVectors());
numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
break;
}
case DOUBLE_STATS:
{
agg = new DoubleColumnStatsMerger();
int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDoubleStats().getBitVectors());
int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDoubleStats().getBitVectors());
numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
break;
}
case STRING_STATS:
{
agg = new StringColumnStatsMerger();
int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getStringStats().getBitVectors());
int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getStringStats().getBitVectors());
numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
break;
}
case BINARY_STATS:
agg = new BinaryColumnStatsMerger();
break;
case DECIMAL_STATS:
{
agg = new DecimalColumnStatsMerger();
int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDecimalStats().getBitVectors());
int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDecimalStats().getBitVectors());
numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
break;
}
default:
throw new RuntimeException("Woh, bad. Unknown stats type " + typeNew.toString());
}
if (numBitVectors > 0) {
agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors);
}
return agg;
}
use of org.apache.hadoop.hive.metastore.NumDistinctValueEstimator in project hive by apache.
the class DoubleColumnStatsMerger method merge.
@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
DoubleColumnStatsData aggregateData = aggregateColStats.getStatsData().getDoubleStats();
DoubleColumnStatsData newData = newColStats.getStatsData().getDoubleStats();
aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
} else {
ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), ndvEstimator.getnumBitVectors()));
ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), ndvEstimator.getnumBitVectors()));
long ndv = ndvEstimator.estimateNumDistinctValues();
LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
aggregateData.setNumDVs(ndv);
aggregateData.setBitVectors(ndvEstimator.serialize().toString());
}
}
use of org.apache.hadoop.hive.metastore.NumDistinctValueEstimator in project hive by apache.
the class LongColumnStatsMerger method merge.
@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
LongColumnStatsData aggregateData = aggregateColStats.getStatsData().getLongStats();
LongColumnStatsData newData = newColStats.getStatsData().getLongStats();
aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
} else {
ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), ndvEstimator.getnumBitVectors()));
ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), ndvEstimator.getnumBitVectors()));
long ndv = ndvEstimator.estimateNumDistinctValues();
LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
aggregateData.setNumDVs(ndv);
aggregateData.setBitVectors(ndvEstimator.serialize().toString());
}
}
use of org.apache.hadoop.hive.metastore.NumDistinctValueEstimator in project hive by apache.
the class StringColumnStatsMerger method merge.
@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
StringColumnStatsData aggregateData = aggregateColStats.getStatsData().getStringStats();
StringColumnStatsData newData = newColStats.getStatsData().getStringStats();
aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
} else {
ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), ndvEstimator.getnumBitVectors()));
ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), ndvEstimator.getnumBitVectors()));
long ndv = ndvEstimator.estimateNumDistinctValues();
LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
aggregateData.setNumDVs(ndv);
aggregateData.setBitVectors(ndvEstimator.serialize().toString());
}
}
Aggregations