use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.
the class MetaDataFormatUtils method formatWithoutIndentation.
private static void formatWithoutIndentation(String name, String type, String comment, StringBuilder colBuffer, List<ColumnStatisticsObj> colStats) {
colBuffer.append(name);
colBuffer.append(FIELD_DELIM);
colBuffer.append(type);
colBuffer.append(FIELD_DELIM);
if (colStats != null) {
ColumnStatisticsObj cso = getColumnStatisticsObject(name, type, colStats);
if (cso != null) {
ColumnStatisticsData csd = cso.getStatsData();
if (csd.isSetBinaryStats()) {
BinaryColumnStatsData bcsd = csd.getBinaryStats();
appendColumnStatsNoFormatting(colBuffer, "", "", bcsd.getNumNulls(), "", bcsd.getAvgColLen(), bcsd.getMaxColLen(), "", "");
} else if (csd.isSetStringStats()) {
StringColumnStatsData scsd = csd.getStringStats();
appendColumnStatsNoFormatting(colBuffer, "", "", scsd.getNumNulls(), scsd.getNumDVs(), scsd.getAvgColLen(), scsd.getMaxColLen(), "", "");
} else if (csd.isSetBooleanStats()) {
BooleanColumnStatsData bcsd = csd.getBooleanStats();
appendColumnStatsNoFormatting(colBuffer, "", "", bcsd.getNumNulls(), "", "", "", bcsd.getNumTrues(), bcsd.getNumFalses());
} else if (csd.isSetDecimalStats()) {
DecimalColumnStatsData dcsd = csd.getDecimalStats();
appendColumnStatsNoFormatting(colBuffer, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
} else if (csd.isSetDoubleStats()) {
DoubleColumnStatsData dcsd = csd.getDoubleStats();
appendColumnStatsNoFormatting(colBuffer, dcsd.getLowValue(), dcsd.getHighValue(), dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
} else if (csd.isSetLongStats()) {
LongColumnStatsData lcsd = csd.getLongStats();
appendColumnStatsNoFormatting(colBuffer, lcsd.getLowValue(), lcsd.getHighValue(), lcsd.getNumNulls(), lcsd.getNumDVs(), "", "", "", "");
} else if (csd.isSetDateStats()) {
DateColumnStatsData dcsd = csd.getDateStats();
appendColumnStatsNoFormatting(colBuffer, convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), dcsd.getNumNulls(), dcsd.getNumDVs(), "", "", "", "");
}
} else {
appendColumnStatsNoFormatting(colBuffer, "", "", "", "", "", "", "", "");
}
}
colBuffer.append(comment == null ? "" : HiveStringUtils.escapeJava(comment));
colBuffer.append(LINE_DELIM);
}
use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.
the class StatObjectConverter method fillColumnStatisticsData.
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses, Object avgLong, Object avgDouble, Object avgDecimal, Object sumDist, boolean useDensityFunctionForNDVEstimation) throws MetaException {
colType = colType.toLowerCase();
if (colType.equals("boolean")) {
BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
boolStats.setNumFalses(MetaStoreDirectSql.extractSqlLong(falses));
boolStats.setNumTrues(MetaStoreDirectSql.extractSqlLong(trues));
boolStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
data.setBooleanStats(boolStats);
} else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
StringColumnStatsData stringStats = new StringColumnStatsData();
stringStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
stringStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen));
stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
data.setStringStats(stringStats);
} else if (colType.equals("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
binaryStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
binaryStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen));
binaryStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
data.setBinaryStats(binaryStats);
} else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) {
LongColumnStatsData longStats = new LongColumnStatsData();
longStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
if (lhigh != null) {
longStats.setHighValue(MetaStoreDirectSql.extractSqlLong(lhigh));
}
if (llow != null) {
longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow));
}
long lowerBound = MetaStoreDirectSql.extractSqlLong(dist);
long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist);
if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null && MetaStoreDirectSql.extractSqlDouble(avgLong) != 0.0) {
// We have estimation, lowerbound and higherbound. We use estimation if
// it is between lowerbound and higherbound.
long estimation = MetaStoreDirectSql.extractSqlLong((MetaStoreDirectSql.extractSqlLong(lhigh) - MetaStoreDirectSql.extractSqlLong(llow)) / MetaStoreDirectSql.extractSqlDouble(avgLong));
if (estimation < lowerBound) {
longStats.setNumDVs(lowerBound);
} else if (estimation > higherBound) {
longStats.setNumDVs(higherBound);
} else {
longStats.setNumDVs(estimation);
}
} else {
longStats.setNumDVs(lowerBound);
}
data.setLongStats(longStats);
} else if (colType.equals("date")) {
DateColumnStatsData dateStats = new DateColumnStatsData();
dateStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
if (lhigh != null) {
dateStats.setHighValue(new Date(MetaStoreDirectSql.extractSqlLong(lhigh)));
}
if (llow != null) {
dateStats.setLowValue(new Date(MetaStoreDirectSql.extractSqlLong(llow)));
}
long lowerBound = MetaStoreDirectSql.extractSqlLong(dist);
long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist);
if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null && MetaStoreDirectSql.extractSqlDouble(avgLong) != 0.0) {
// We have estimation, lowerbound and higherbound. We use estimation if
// it is between lowerbound and higherbound.
long estimation = MetaStoreDirectSql.extractSqlLong((MetaStoreDirectSql.extractSqlLong(lhigh) - MetaStoreDirectSql.extractSqlLong(llow)) / MetaStoreDirectSql.extractSqlDouble(avgLong));
if (estimation < lowerBound) {
dateStats.setNumDVs(lowerBound);
} else if (estimation > higherBound) {
dateStats.setNumDVs(higherBound);
} else {
dateStats.setNumDVs(estimation);
}
} else {
dateStats.setNumDVs(lowerBound);
}
data.setDateStats(dateStats);
} else if (colType.equals("double") || colType.equals("float")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
doubleStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
if (dhigh != null) {
doubleStats.setHighValue(MetaStoreDirectSql.extractSqlDouble(dhigh));
}
if (dlow != null) {
doubleStats.setLowValue(MetaStoreDirectSql.extractSqlDouble(dlow));
}
long lowerBound = MetaStoreDirectSql.extractSqlLong(dist);
long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist);
if (useDensityFunctionForNDVEstimation && dhigh != null && dlow != null && avgDouble != null && MetaStoreDirectSql.extractSqlDouble(avgDouble) != 0.0) {
long estimation = MetaStoreDirectSql.extractSqlLong((MetaStoreDirectSql.extractSqlLong(dhigh) - MetaStoreDirectSql.extractSqlLong(dlow)) / MetaStoreDirectSql.extractSqlDouble(avgDouble));
if (estimation < lowerBound) {
doubleStats.setNumDVs(lowerBound);
} else if (estimation > higherBound) {
doubleStats.setNumDVs(higherBound);
} else {
doubleStats.setNumDVs(estimation);
}
} else {
doubleStats.setNumDVs(lowerBound);
}
data.setDoubleStats(doubleStats);
} else if (colType.startsWith("decimal")) {
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
decimalStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
Decimal low = null;
Decimal high = null;
BigDecimal blow = null;
BigDecimal bhigh = null;
if (dechigh instanceof BigDecimal) {
bhigh = (BigDecimal) dechigh;
high = new Decimal(ByteBuffer.wrap(bhigh.unscaledValue().toByteArray()), (short) bhigh.scale());
} else if (dechigh instanceof String) {
bhigh = new BigDecimal((String) dechigh);
high = createThriftDecimal((String) dechigh);
}
decimalStats.setHighValue(high);
if (declow instanceof BigDecimal) {
blow = (BigDecimal) declow;
low = new Decimal(ByteBuffer.wrap(blow.unscaledValue().toByteArray()), (short) blow.scale());
} else if (dechigh instanceof String) {
blow = new BigDecimal((String) declow);
low = createThriftDecimal((String) declow);
}
decimalStats.setLowValue(low);
long lowerBound = MetaStoreDirectSql.extractSqlLong(dist);
long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist);
if (useDensityFunctionForNDVEstimation && dechigh != null && declow != null && avgDecimal != null && MetaStoreDirectSql.extractSqlDouble(avgDecimal) != 0.0) {
long estimation = MetaStoreDirectSql.extractSqlLong(MetaStoreDirectSql.extractSqlLong(bhigh.subtract(blow).floatValue() / MetaStoreDirectSql.extractSqlDouble(avgDecimal)));
if (estimation < lowerBound) {
decimalStats.setNumDVs(lowerBound);
} else if (estimation > higherBound) {
decimalStats.setNumDVs(higherBound);
} else {
decimalStats.setNumDVs(estimation);
}
} else {
decimalStats.setNumDVs(lowerBound);
}
data.setDecimalStats(decimalStats);
}
}
use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.
the class TestHBaseAggrStatsCacheIntegration method someWithStats.
@Test
public void someWithStats() throws Exception {
String dbName = "default";
String tableName = "psws";
List<String> partVals1 = Arrays.asList("today");
List<String> partVals2 = Arrays.asList("yesterday");
long now = System.currentTimeMillis();
List<FieldSchema> cols = new ArrayList<>();
cols.add(new FieldSchema("col1", "long", "nocomment"));
SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, Collections.<String, String>emptyMap());
List<FieldSchema> partCols = new ArrayList<>();
partCols.add(new FieldSchema("ds", "string", ""));
Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, Collections.<String, String>emptyMap(), null, null, null);
store.createTable(table);
boolean first = true;
for (List<String> partVals : Arrays.asList(partVals1, partVals2)) {
StorageDescriptor psd = new StorageDescriptor(sd);
psd.setLocation("file:/tmp/default/psws/ds=" + partVals.get(0));
Partition part = new Partition(partVals, dbName, tableName, (int) now, (int) now, psd, Collections.<String, String>emptyMap());
store.addPartition(part);
if (first) {
ColumnStatistics cs = new ColumnStatistics();
ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName);
desc.setLastAnalyzed(now);
desc.setPartName("ds=" + partVals.get(0));
cs.setStatsDesc(desc);
ColumnStatisticsObj obj = new ColumnStatisticsObj();
obj.setColName("col1");
obj.setColType("long");
ColumnStatisticsData data = new ColumnStatisticsData();
LongColumnStatsData lcsd = new LongColumnStatsData();
lcsd.setHighValue(192L);
lcsd.setLowValue(-20L);
lcsd.setNumNulls(30);
lcsd.setNumDVs(32);
data.setLongStats(lcsd);
obj.setStatsData(data);
cs.addToStatsObj(obj);
store.updatePartitionColumnStatistics(cs, partVals);
first = false;
}
}
Checker statChecker = new Checker() {
@Override
public void checkStats(AggrStats aggrStats) throws Exception {
Assert.assertEquals(1, aggrStats.getPartsFound());
Assert.assertEquals(1, aggrStats.getColStatsSize());
ColumnStatisticsObj cso = aggrStats.getColStats().get(0);
Assert.assertEquals("col1", cso.getColName());
Assert.assertEquals("long", cso.getColType());
LongColumnStatsData lcsd = cso.getStatsData().getLongStats();
Assert.assertEquals(192L, lcsd.getHighValue());
Assert.assertEquals(-20L, lcsd.getLowValue());
Assert.assertEquals(30, lcsd.getNumNulls());
Assert.assertEquals(32, lcsd.getNumDVs());
}
};
AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"));
statChecker.checkStats(aggrStats);
// Check that we had to build it from the stats
Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt());
Assert.assertEquals(1, store.backdoor().getStatsCache().totalGets.getCnt());
Assert.assertEquals(1, store.backdoor().getStatsCache().misses.getCnt());
// Call again, this time it should come from memory. Also, reverse the name order this time
// to assure that we still hit.
aggrStats = store.get_aggr_stats_for(dbName, tableName, Arrays.asList("ds=yesterday", "ds=today"), Arrays.asList("col1"));
statChecker.checkStats(aggrStats);
Assert.assertEquals(0, store.backdoor().getStatsCache().hbaseHits.getCnt());
Assert.assertEquals(2, store.backdoor().getStatsCache().totalGets.getCnt());
Assert.assertEquals(1, store.backdoor().getStatsCache().misses.getCnt());
store.backdoor().getStatsCache().flushMemory();
// Call again, this time it should come from hbase
aggrStats = store.get_aggr_stats_for(dbName, tableName, Arrays.asList("ds=today", "ds=yesterday"), Arrays.asList("col1"));
statChecker.checkStats(aggrStats);
Assert.assertEquals(1, store.backdoor().getStatsCache().hbaseHits.getCnt());
Assert.assertEquals(3, store.backdoor().getStatsCache().totalGets.getCnt());
Assert.assertEquals(1, store.backdoor().getStatsCache().misses.getCnt());
}
use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.
the class TestHBaseStoreIntegration method tableStatistics.
@Test
public void tableStatistics() throws Exception {
long now = System.currentTimeMillis();
String dbname = "default";
String tableName = "statstable";
String boolcol = "boolcol";
String longcol = "longcol";
String doublecol = "doublecol";
String stringcol = "stringcol";
String binarycol = "bincol";
String decimalcol = "deccol";
long trues = 37;
long falses = 12;
long booleanNulls = 2;
long longHigh = 120938479124L;
long longLow = -12341243213412124L;
long longNulls = 23;
long longDVs = 213L;
double doubleHigh = 123423.23423;
double doubleLow = 0.00001234233;
long doubleNulls = 92;
long doubleDVs = 1234123421L;
long strMaxLen = 1234;
double strAvgLen = 32.3;
long strNulls = 987;
long strDVs = 906;
long binMaxLen = 123412987L;
double binAvgLen = 76.98;
long binNulls = 976998797L;
Decimal decHigh = new Decimal();
decHigh.setScale((short) 3);
// I have no clue how this is translated, but it
decHigh.setUnscaled("3876".getBytes());
// doesn't matter
Decimal decLow = new Decimal();
decLow.setScale((short) 3);
decLow.setUnscaled("38".getBytes());
long decNulls = 13;
long decDVs = 923947293L;
List<FieldSchema> cols = new ArrayList<FieldSchema>();
cols.add(new FieldSchema(boolcol, "boolean", "nocomment"));
cols.add(new FieldSchema(longcol, "long", "nocomment"));
cols.add(new FieldSchema(doublecol, "double", "nocomment"));
cols.add(new FieldSchema(stringcol, "varchar(32)", "nocomment"));
cols.add(new FieldSchema(binarycol, "binary", "nocomment"));
cols.add(new FieldSchema(decimalcol, "decimal(5, 3)", "nocomment"));
SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
Table table = new Table(tableName, dbname, "me", (int) now / 1000, (int) now / 1000, 0, sd, null, emptyParameters, null, null, null);
store.createTable(table);
ColumnStatistics stats = new ColumnStatistics();
ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
desc.setLastAnalyzed(now);
desc.setDbName(dbname);
desc.setTableName(tableName);
desc.setIsTblLevel(true);
stats.setStatsDesc(desc);
// Do one column of each type
ColumnStatisticsObj obj = new ColumnStatisticsObj();
obj.setColName(boolcol);
obj.setColType("boolean");
ColumnStatisticsData data = new ColumnStatisticsData();
BooleanColumnStatsData boolData = new BooleanColumnStatsData();
boolData.setNumTrues(trues);
boolData.setNumFalses(falses);
boolData.setNumNulls(booleanNulls);
data.setBooleanStats(boolData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
obj = new ColumnStatisticsObj();
obj.setColName(longcol);
obj.setColType("long");
data = new ColumnStatisticsData();
LongColumnStatsData longData = new LongColumnStatsData();
longData.setHighValue(longHigh);
longData.setLowValue(longLow);
longData.setNumNulls(longNulls);
longData.setNumDVs(longDVs);
data.setLongStats(longData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
obj = new ColumnStatisticsObj();
obj.setColName(doublecol);
obj.setColType("double");
data = new ColumnStatisticsData();
DoubleColumnStatsData doubleData = new DoubleColumnStatsData();
doubleData.setHighValue(doubleHigh);
doubleData.setLowValue(doubleLow);
doubleData.setNumNulls(doubleNulls);
doubleData.setNumDVs(doubleDVs);
data.setDoubleStats(doubleData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
store.updateTableColumnStatistics(stats);
stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol, longcol, doublecol));
// We'll check all of the individual values later.
Assert.assertEquals(3, stats.getStatsObjSize());
// check that we can fetch just some of the columns
stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol));
Assert.assertEquals(1, stats.getStatsObjSize());
stats = new ColumnStatistics();
stats.setStatsDesc(desc);
obj = new ColumnStatisticsObj();
obj.setColName(stringcol);
obj.setColType("string");
data = new ColumnStatisticsData();
StringColumnStatsData strData = new StringColumnStatsData();
strData.setMaxColLen(strMaxLen);
strData.setAvgColLen(strAvgLen);
strData.setNumNulls(strNulls);
strData.setNumDVs(strDVs);
data.setStringStats(strData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
obj = new ColumnStatisticsObj();
obj.setColName(binarycol);
obj.setColType("binary");
data = new ColumnStatisticsData();
BinaryColumnStatsData binData = new BinaryColumnStatsData();
binData.setMaxColLen(binMaxLen);
binData.setAvgColLen(binAvgLen);
binData.setNumNulls(binNulls);
data.setBinaryStats(binData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
obj = new ColumnStatisticsObj();
obj.setColName(decimalcol);
obj.setColType("decimal(5,3)");
data = new ColumnStatisticsData();
DecimalColumnStatsData decData = new DecimalColumnStatsData();
LOG.debug("Setting decimal high value to " + decHigh.getScale() + " <" + new String(decHigh.getUnscaled()) + ">");
decData.setHighValue(decHigh);
decData.setLowValue(decLow);
decData.setNumNulls(decNulls);
decData.setNumDVs(decDVs);
data.setDecimalStats(decData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
store.updateTableColumnStatistics(stats);
stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol, longcol, doublecol, stringcol, binarycol, decimalcol));
Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed());
Assert.assertEquals(dbname, stats.getStatsDesc().getDbName());
Assert.assertEquals(tableName, stats.getStatsDesc().getTableName());
Assert.assertTrue(stats.getStatsDesc().isIsTblLevel());
Assert.assertEquals(6, stats.getStatsObjSize());
ColumnStatisticsData colData = stats.getStatsObj().get(0).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, colData.getSetField());
boolData = colData.getBooleanStats();
Assert.assertEquals(trues, boolData.getNumTrues());
Assert.assertEquals(falses, boolData.getNumFalses());
Assert.assertEquals(booleanNulls, boolData.getNumNulls());
colData = stats.getStatsObj().get(1).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, colData.getSetField());
longData = colData.getLongStats();
Assert.assertEquals(longHigh, longData.getHighValue());
Assert.assertEquals(longLow, longData.getLowValue());
Assert.assertEquals(longNulls, longData.getNumNulls());
Assert.assertEquals(longDVs, longData.getNumDVs());
colData = stats.getStatsObj().get(2).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, colData.getSetField());
doubleData = colData.getDoubleStats();
Assert.assertEquals(doubleHigh, doubleData.getHighValue(), 0.01);
Assert.assertEquals(doubleLow, doubleData.getLowValue(), 0.01);
Assert.assertEquals(doubleNulls, doubleData.getNumNulls());
Assert.assertEquals(doubleDVs, doubleData.getNumDVs());
colData = stats.getStatsObj().get(3).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, colData.getSetField());
strData = colData.getStringStats();
Assert.assertEquals(strMaxLen, strData.getMaxColLen());
Assert.assertEquals(strAvgLen, strData.getAvgColLen(), 0.01);
Assert.assertEquals(strNulls, strData.getNumNulls());
Assert.assertEquals(strDVs, strData.getNumDVs());
colData = stats.getStatsObj().get(4).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.BINARY_STATS, colData.getSetField());
binData = colData.getBinaryStats();
Assert.assertEquals(binMaxLen, binData.getMaxColLen());
Assert.assertEquals(binAvgLen, binData.getAvgColLen(), 0.01);
Assert.assertEquals(binNulls, binData.getNumNulls());
colData = stats.getStatsObj().get(5).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, colData.getSetField());
decData = colData.getDecimalStats();
Assert.assertEquals(decHigh, decData.getHighValue());
Assert.assertEquals(decLow, decData.getLowValue());
Assert.assertEquals(decNulls, decData.getNumNulls());
Assert.assertEquals(decDVs, decData.getNumDVs());
}
use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.
the class TestHBaseSchemaTool method oneMondoTest.
@Test
public void oneMondoTest() throws Exception {
// This is a pain to do in one big test, but we have to control the order so that we have tests
// without dbs, etc.
HBaseSchemaTool tool = new HBaseSchemaTool();
ByteArrayOutputStream outStr = new ByteArrayOutputStream();
PrintStream out = new PrintStream(outStr);
ByteArrayOutputStream errStr = new ByteArrayOutputStream();
PrintStream err = new PrintStream(errStr);
// This needs to be up front before we create any tables or partitions
tool.go(false, HBaseReadWrite.SD_TABLE, null, "whatever", conf, out, err);
Assert.assertEquals("No storage descriptors" + lsep, outStr.toString());
// This one needs to be up front too
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.SEQUENCES_TABLE, null, "whatever", conf, out, err);
Assert.assertEquals("No sequences" + lsep, outStr.toString());
// Create some databases
String[] dbNames = new String[3];
for (int i = 0; i < dbNames.length; i++) {
dbNames[i] = "db" + i;
Database db = new Database(dbNames[i], "no description", "file:///tmp", emptyParameters);
store.createDatabase(db);
}
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.DB_TABLE, "db0", null, conf, out, err);
Assert.assertEquals("{\"name\":\"db0\",\"description\":\"no description\"," + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.DB_TABLE, null, ".*", conf, out, err);
Assert.assertEquals("{\"name\":\"db0\",\"description\":\"no description\"," + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep + "{\"name\":\"db1\",\"description\":\"no description\"," + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep + "{\"name\":\"db2\",\"description\":\"no description\"," + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.DB_TABLE, null, "db[12]", conf, out, err);
Assert.assertEquals("{\"name\":\"db1\",\"description\":\"no description\"," + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep + "{\"name\":\"db2\",\"description\":\"no description\"," + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep, outStr.toString());
String[] roleNames = new String[2];
for (int i = 0; i < roleNames.length; i++) {
roleNames[i] = "role" + i;
store.addRole(roleNames[i], "me");
}
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.ROLE_TABLE, null, "role.", conf, out, err);
Assert.assertEquals("{\"roleName\":\"role0\",\"createTime\":now,\"ownerName\":\"me\"}" + lsep + "{\"roleName\":\"role1\",\"createTime\":now,\"ownerName\":\"me\"}" + lsep, outStr.toString().replaceAll("createTime\":[0-9]+", "createTime\":now"));
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.ROLE_TABLE, "role1", null, conf, out, err);
Assert.assertEquals("{\"roleName\":\"role1\",\"createTime\":now,\"ownerName\":\"me\"}" + lsep, outStr.toString().replaceAll("createTime\":[0-9]+", "createTime\":now"));
Role role1 = store.getRole("role1");
store.grantRole(role1, "fred", PrincipalType.USER, "me", PrincipalType.USER, false);
store.grantRole(role1, "joanne", PrincipalType.USER, "me", PrincipalType.USER, false);
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.USER_TO_ROLE_TABLE, null, ".*", conf, out, err);
Assert.assertEquals("fred: role1" + lsep + "joanne: role1" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.USER_TO_ROLE_TABLE, "joanne", null, conf, out, err);
Assert.assertEquals("role1" + lsep, outStr.toString());
String[] funcNames = new String[3];
for (int i = 0; i < funcNames.length; i++) {
funcNames[i] = "func" + i;
Function function = new Function(funcNames[i], "db1", "Function", "me", PrincipalType.USER, 0, FunctionType.JAVA, null);
store.createFunction(function);
}
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.FUNC_TABLE, "db1.func0", null, conf, out, err);
Assert.assertEquals("{\"functionName\":\"func0\",\"dbName\":\"db1\"," + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + "\"functionType\":1}" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.FUNC_TABLE, null, ".*", conf, out, err);
Assert.assertEquals("{\"functionName\":\"func0\",\"dbName\":\"db1\"," + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + "\"functionType\":1}" + lsep + "{\"functionName\":\"func1\",\"dbName\":\"db1\"," + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + "\"functionType\":1}" + lsep + "{\"functionName\":\"func2\",\"dbName\":\"db1\"," + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + "\"functionType\":1}" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.FUNC_TABLE, null, "db1.func[12]", conf, out, err);
Assert.assertEquals("{\"functionName\":\"func1\",\"dbName\":\"db1\"," + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + "\"functionType\":1}" + lsep + "{\"functionName\":\"func2\",\"dbName\":\"db1\"," + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + "\"functionType\":1}" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.GLOBAL_PRIVS_TABLE, null, null, conf, out, err);
Assert.assertEquals("No global privileges" + lsep, outStr.toString());
List<HiveObjectPrivilege> privileges = new ArrayList<>();
HiveObjectRef hiveObjRef = new HiveObjectRef(HiveObjectType.GLOBAL, "db0", "tab0", null, null);
PrivilegeGrantInfo grantInfo = new PrivilegeGrantInfo("read", 0, "me", PrincipalType.USER, false);
HiveObjectPrivilege hop = new HiveObjectPrivilege(hiveObjRef, "user", PrincipalType.USER, grantInfo);
privileges.add(hop);
grantInfo = new PrivilegeGrantInfo("create", 0, "me", PrincipalType.USER, true);
hop = new HiveObjectPrivilege(hiveObjRef, "user", PrincipalType.USER, grantInfo);
privileges.add(hop);
PrivilegeBag pBag = new PrivilegeBag(privileges);
store.grantPrivileges(pBag);
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.GLOBAL_PRIVS_TABLE, null, null, conf, out, err);
Assert.assertEquals("{\"userPrivileges\":{\"user\":[{\"privilege\":\"read\",\"createTime\":0," + "\"grantor\":\"me\",\"grantorType\":1,\"grantOption\":0},{\"privilege\":\"create\"," + "\"createTime\":0,\"grantor\":\"me\",\"grantorType\":1,\"grantOption\":1}]}}" + lsep, outStr.toString());
String[] tableNames = new String[3];
for (int i = 0; i < tableNames.length; i++) {
tableNames[i] = "tab" + i;
StorageDescriptor sd = new StorageDescriptor(Arrays.asList(new FieldSchema("col1", "int", ""), new FieldSchema("col2", "varchar(32)", "")), "/tmp", null, null, false, 0, null, null, null, Collections.<String, String>emptyMap());
Table tab = new Table(tableNames[i], dbNames[0], "me", 0, 0, 0, sd, Arrays.asList(new FieldSchema("pcol1", "string", ""), new FieldSchema("pcol2", "string", "")), Collections.<String, String>emptyMap(), null, null, null);
store.createTable(tab);
}
ColumnStatisticsDesc tableStatsDesc = new ColumnStatisticsDesc(false, "db0", "tab0");
ColumnStatisticsData tcsd = new ColumnStatisticsData();
LongColumnStatsData tlcsd = new LongColumnStatsData(1, 2);
tlcsd.setLowValue(-95);
tlcsd.setHighValue(95);
tcsd.setLongStats(tlcsd);
ColumnStatisticsData tcsd2 = new ColumnStatisticsData();
tcsd2.setStringStats(new StringColumnStatsData(97, 18.78, 29, 397));
List<ColumnStatisticsObj> tcsos = Arrays.asList(new ColumnStatisticsObj("col1", "int", tcsd), new ColumnStatisticsObj("col2", "varchar(32)", tcsd2));
ColumnStatistics tStatObj = new ColumnStatistics(tableStatsDesc, tcsos);
store.updateTableColumnStatistics(tStatObj);
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.TABLE_TABLE, "db0.tab1", null, conf, out, err);
Assert.assertEquals("{\"tableName\":\"tab1\",\"dbName\":\"db0\",\"owner\":\"me\"," + "\"createTime\":0,\"lastAccessTime\":0,\"retention\":0," + "\"partitionKeys\":[{\"name\":\"pcol1\",\"type\":\"string\",\"comment\":\"\"}," + "{\"name\":\"pcol2\",\"type\":\"string\",\"comment\":\"\"}],\"parameters\":{}," + "\"tableType\":\"\",\"rewriteEnabled\":0} sdHash: qQTgZAi5VzgpozzFGmIVTQ stats:" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.TABLE_TABLE, null, "db0.*", conf, out, err);
Assert.assertEquals("{\"tableName\":\"tab0\",\"dbName\":\"db0\",\"owner\":\"me\"," + "\"createTime\":0,\"lastAccessTime\":0,\"retention\":0," + "\"partitionKeys\":[{\"name\":\"pcol1\",\"type\":\"string\",\"comment\":\"\"}," + "{\"name\":\"pcol2\",\"type\":\"string\",\"comment\":\"\"}],\"parameters\":{\"COLUMN_STATS_ACCURATE\":\"{\\\"COLUMN_STATS\\\":{\\\"col1\\\":\\\"true\\\",\\\"col2\\\":\\\"true\\\"}}\"}," + "\"tableType\":\"\",\"rewriteEnabled\":0} sdHash: qQTgZAi5VzgpozzFGmIVTQ stats: column " + "col1: {\"colName\":\"col1\",\"colType\":\"int\"," + "\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," + "\"numDVs\":2,\"bitVectors\":\"\"}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," + "\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78," + "\"numNulls\":29,\"numDVs\":397,\"bitVectors\":\"\"}}}" + lsep + "{\"tableName\":\"tab1\",\"dbName\":\"db0\",\"owner\":\"me\",\"createTime\":0," + "\"lastAccessTime\":0,\"retention\":0,\"partitionKeys\":[{\"name\":\"pcol1\"," + "\"type\":\"string\",\"comment\":\"\"},{\"name\":\"pcol2\",\"type\":\"string\"," + "\"comment\":\"\"}],\"parameters\":{},\"tableType\":\"\",\"rewriteEnabled\":0} sdHash: " + "qQTgZAi5VzgpozzFGmIVTQ stats:" + lsep + "{\"tableName\":\"tab2\",\"dbName\":\"db0\",\"owner\":\"me\",\"createTime\":0," + "\"lastAccessTime\":0,\"retention\":0,\"partitionKeys\":[{\"name\":\"pcol1\"," + "\"type\":\"string\",\"comment\":\"\"},{\"name\":\"pcol2\",\"type\":\"string\"," + "\"comment\":\"\"}],\"parameters\":{},\"tableType\":\"\",\"rewriteEnabled\":0} sdHash: " + "qQTgZAi5VzgpozzFGmIVTQ stats:" + lsep, outStr.toString());
List<List<String>> partVals = Arrays.asList(Arrays.asList("a", "b"), Arrays.asList("c", "d"));
for (List<String> pv : partVals) {
StorageDescriptor sd = new StorageDescriptor(Arrays.asList(new FieldSchema("col1", "int", ""), new FieldSchema("col2", "varchar(32)", "")), "/tmp", null, null, false, 0, null, null, null, Collections.<String, String>emptyMap());
Partition p = new Partition(pv, "db0", "tab1", 0, 0, sd, Collections.<String, String>emptyMap());
store.addPartition(p);
}
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.PART_TABLE, "db0.tab1.a.b", null, conf, out, err);
Assert.assertEquals("{\"values\":[\"a\",\"b\"],\"dbName\":\"db0\",\"tableName\":\"tab1\"," + "\"createTime\":0,\"lastAccessTime\":0,\"parameters\":{}} sdHash: " + "qQTgZAi5VzgpozzFGmIVTQ stats:" + lsep, outStr.toString());
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(false, "db0", "tab1");
statsDesc.setPartName("pcol1=c/pcol2=d");
ColumnStatisticsData csd1 = new ColumnStatisticsData();
LongColumnStatsData lcsd = new LongColumnStatsData(1, 2);
lcsd.setLowValue(-95);
lcsd.setHighValue(95);
csd1.setLongStats(lcsd);
ColumnStatisticsData csd2 = new ColumnStatisticsData();
csd2.setStringStats(new StringColumnStatsData(97, 18.78, 29, 397));
List<ColumnStatisticsObj> csos = Arrays.asList(new ColumnStatisticsObj("col1", "int", csd1), new ColumnStatisticsObj("col2", "varchar(32)", csd2));
ColumnStatistics statsObj = new ColumnStatistics(statsDesc, csos);
store.updatePartitionColumnStatistics(statsObj, partVals.get(1));
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.PART_TABLE, "db0.tab1.c.d", null, conf, out, err);
Assert.assertEquals("{\"values\":[\"c\",\"d\"],\"dbName\":\"db0\",\"tableName\":\"tab1\"," + "\"createTime\":0,\"lastAccessTime\":0,\"parameters\":{\"COLUMN_STATS_ACCURATE\":\"{\\\"COLUMN_STATS\\\":{\\\"col1\\\":\\\"true\\\",\\\"col2\\\":\\\"true\\\"}}\"}} sdHash: qQTgZAi5VzgpozzFGmIVTQ " + "stats: column col1: {\"colName\":\"col1\",\"colType\":\"int\"," + "\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," + "\"numDVs\":2,\"bitVectors\":\"\"}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," + "\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78,\"numNulls\":29," + "\"numDVs\":397,\"bitVectors\":\"\"}}}" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.PART_TABLE, null, "db0.tab1", conf, out, err);
Assert.assertEquals("{\"values\":[\"a\",\"b\"],\"dbName\":\"db0\",\"tableName\":\"tab1\"," + "\"createTime\":0,\"lastAccessTime\":0,\"parameters\":{}} sdHash: qQTgZAi5VzgpozzFGmIVTQ " + "stats:" + lsep + "{\"values\":[\"c\",\"d\"],\"dbName\":\"db0\",\"tableName\":\"tab1\",\"createTime\":0," + "\"lastAccessTime\":0,\"parameters\":{\"COLUMN_STATS_ACCURATE\":\"{\\\"COLUMN_STATS\\\":{\\\"col1\\\":\\\"true\\\",\\\"col2\\\":\\\"true\\\"}}\"}} sdHash: qQTgZAi5VzgpozzFGmIVTQ stats: column " + "col1: {\"colName\":\"col1\",\"colType\":\"int\"," + "\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," + "\"numDVs\":2,\"bitVectors\":\"\"}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," + "\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78,\"numNulls\":29," + "\"numDVs\":397,\"bitVectors\":\"\"}}}" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.PART_TABLE, null, "db0.tab1.a", conf, out, err);
Assert.assertEquals("{\"values\":[\"a\",\"b\"],\"dbName\":\"db0\",\"tableName\":\"tab1\"," + "\"createTime\":0,\"lastAccessTime\":0,\"parameters\":{}} sdHash: qQTgZAi5VzgpozzFGmIVTQ " + "stats:" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.SD_TABLE, "qQTgZAi5VzgpozzFGmIVTQ", null, conf, out, err);
Assert.assertEquals("{\"cols\":[{\"name\":\"col1\",\"type\":\"int\",\"comment\":\"\"}," + "{\"name\":\"col2\",\"type\":\"varchar(32)\",\"comment\":\"\"}],\"compressed\":0," + "\"numBuckets\":0,\"bucketCols\":[],\"sortCols\":[],\"storedAsSubDirectories\":0}" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.SD_TABLE, null, "whatever", conf, out, err);
Assert.assertEquals("qQTgZAi5VzgpozzFGmIVTQ: {\"cols\":[{\"name\":\"col1\",\"type\":\"int\"," + "\"comment\":\"\"}," + "{\"name\":\"col2\",\"type\":\"varchar(32)\",\"comment\":\"\"}],\"compressed\":0," + "\"numBuckets\":0,\"bucketCols\":[],\"sortCols\":[],\"storedAsSubDirectories\":0}" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.SECURITY_TABLE, null, "whatever", conf, out, err);
Assert.assertEquals("No security related entries" + lsep, outStr.toString());
store.addMasterKey("this be a key");
store.addToken("tokenid", "delegation token");
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.SECURITY_TABLE, null, "whatever", conf, out, err);
Assert.assertEquals("Master key 0: this be a key" + lsep + "Delegation token tokenid: delegation token" + lsep, outStr.toString());
outStr = new ByteArrayOutputStream();
out = new PrintStream(outStr);
tool.go(false, HBaseReadWrite.SEQUENCES_TABLE, null, "whatever", conf, out, err);
Assert.assertEquals("master_key: 1" + lsep, outStr.toString());
}
Aggregations