use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.
the class TestHBaseImport method parallel.
@Test
public void parallel() throws Exception {
int parallelFactor = 10;
RawStore rdbms;
rdbms = new ObjectStore();
rdbms.setConf(conf);
String[] dbNames = new String[] { "paralleldb1" };
int now = (int) System.currentTimeMillis() / 1000;
for (int i = 0; i < dbNames.length; i++) {
rdbms.createDatabase(new Database(dbNames[i], "no description", "file:/tmp", emptyParameters));
List<FieldSchema> cols = new ArrayList<>();
cols.add(new FieldSchema("col1", "int", "nocomment"));
SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
List<FieldSchema> partCols = new ArrayList<>();
partCols.add(new FieldSchema("region", "string", ""));
for (int j = 0; j < parallelFactor; j++) {
rdbms.createTable(new Table("t" + j, dbNames[i], "me", now, now, 0, sd, partCols, emptyParameters, null, null, null));
for (int k = 0; k < parallelFactor; k++) {
StorageDescriptor psd = new StorageDescriptor(sd);
psd.setLocation("file:/tmp/region=" + k);
Partition part = new Partition(Arrays.asList("p" + k), dbNames[i], "t" + j, now, now, psd, emptyParameters);
rdbms.addPartition(part);
}
}
}
HBaseImport importer = new HBaseImport("-p", "2", "-b", "2", "-d", dbNames[0]);
importer.setConnections(rdbms, store);
importer.run();
for (int i = 0; i < dbNames.length; i++) {
Database db = store.getDatabase(dbNames[i]);
Assert.assertNotNull(db);
for (int j = 0; j < parallelFactor; j++) {
Table table = store.getTable(db.getName(), "t" + j);
Assert.assertNotNull(table);
Assert.assertEquals(now, table.getLastAccessTime());
Assert.assertEquals("input", table.getSd().getInputFormat());
for (int k = 0; k < parallelFactor; k++) {
Partition part = store.getPartition(dbNames[i], "t" + j, Arrays.asList("p" + k));
Assert.assertNotNull(part);
Assert.assertEquals("file:/tmp/region=" + k, part.getSd().getLocation());
}
Assert.assertEquals(parallelFactor, store.getPartitions(dbNames[i], "t" + j, -1).size());
}
Assert.assertEquals(parallelFactor, store.getAllTables(dbNames[i]).size());
}
}
use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.
the class TestHBaseImport method setupObjectStore.
private void setupObjectStore(RawStore rdbms, String[] roles, String[] dbNames, String[] tokenIds, String[] tokens, String[] masterKeys, int now, boolean putConstraintsOnTables) throws MetaException, InvalidObjectException, NoSuchObjectException {
if (roles != null) {
for (int i = 0; i < roles.length; i++) {
rdbms.addRole(roles[i], "me");
}
}
for (int i = 0; i < dbNames.length; i++) {
rdbms.createDatabase(new Database(dbNames[i], "no description", "file:/tmp", emptyParameters));
List<FieldSchema> cols = new ArrayList<>();
cols.add(new FieldSchema("col1", "int", "nocomment"));
SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
rdbms.createTable(new Table(tableNames[0], dbNames[i], "me", now, now, 0, sd, null, emptyParameters, null, null, null));
if (putConstraintsOnTables) {
rdbms.addPrimaryKeys(Collections.singletonList(new SQLPrimaryKey(dbNames[i], tableNames[0], "col1", 0, dbNames[i] + "_" + pkNames[0], true, false, true)));
}
List<FieldSchema> partCols = new ArrayList<>();
partCols.add(new FieldSchema("region", "string", ""));
rdbms.createTable(new Table(tableNames[1], dbNames[i], "me", now, now, 0, sd, partCols, emptyParameters, null, null, null));
if (putConstraintsOnTables) {
rdbms.addPrimaryKeys(Arrays.asList(new SQLPrimaryKey(dbNames[i], tableNames[1], "col1", 0, dbNames[i] + "_" + pkNames[1], true, false, true)));
rdbms.addForeignKeys(Collections.singletonList(new SQLForeignKey(dbNames[i], tableNames[0], "col1", dbNames[i], tableNames[1], "col1", 0, 1, 2, dbNames[i] + "_" + fkNames[1], dbNames[i] + "_" + pkNames[0], true, false, true)));
}
for (int j = 0; j < partVals.length; j++) {
StorageDescriptor psd = new StorageDescriptor(sd);
psd.setLocation("file:/tmp/region=" + partVals[j]);
Partition part = new Partition(Arrays.asList(partVals[j]), dbNames[i], tableNames[1], now, now, psd, emptyParameters);
rdbms.addPartition(part);
}
for (String funcName : funcNames) {
LOG.debug("Creating new function " + dbNames[i] + "." + funcName);
rdbms.createFunction(new Function(funcName, dbNames[i], "classname", "ownername", PrincipalType.USER, (int) System.currentTimeMillis() / 1000, FunctionType.JAVA, Arrays.asList(new ResourceUri(ResourceType.JAR, "uri"))));
}
for (String indexName : indexNames) {
LOG.debug("Creating new index " + dbNames[i] + "." + tableNames[0] + "." + indexName);
String indexTableName = tableNames[0] + "__" + indexName + "__";
rdbms.createTable(new Table(indexTableName, dbNames[i], "me", now, now, 0, sd, partCols, emptyParameters, null, null, null));
rdbms.addIndex(new Index(indexName, null, dbNames[i], tableNames[0], now, now, indexTableName, sd, emptyParameters, false));
}
}
if (tokenIds != null) {
for (int i = 0; i < tokenIds.length; i++) rdbms.addToken(tokenIds[i], tokens[i]);
}
if (masterKeys != null) {
for (int i = 0; i < masterKeys.length; i++) {
masterKeySeqs.add(rdbms.addMasterKey(masterKeys[i]));
}
}
}
use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.
the class TestHBaseStoreIntegration method tableStatistics.
@Test
public void tableStatistics() throws Exception {
long now = System.currentTimeMillis();
String dbname = "default";
String tableName = "statstable";
String boolcol = "boolcol";
String longcol = "longcol";
String doublecol = "doublecol";
String stringcol = "stringcol";
String binarycol = "bincol";
String decimalcol = "deccol";
long trues = 37;
long falses = 12;
long booleanNulls = 2;
long longHigh = 120938479124L;
long longLow = -12341243213412124L;
long longNulls = 23;
long longDVs = 213L;
double doubleHigh = 123423.23423;
double doubleLow = 0.00001234233;
long doubleNulls = 92;
long doubleDVs = 1234123421L;
long strMaxLen = 1234;
double strAvgLen = 32.3;
long strNulls = 987;
long strDVs = 906;
long binMaxLen = 123412987L;
double binAvgLen = 76.98;
long binNulls = 976998797L;
Decimal decHigh = new Decimal();
decHigh.setScale((short) 3);
// I have no clue how this is translated, but it
decHigh.setUnscaled("3876".getBytes());
// doesn't matter
Decimal decLow = new Decimal();
decLow.setScale((short) 3);
decLow.setUnscaled("38".getBytes());
long decNulls = 13;
long decDVs = 923947293L;
List<FieldSchema> cols = new ArrayList<FieldSchema>();
cols.add(new FieldSchema(boolcol, "boolean", "nocomment"));
cols.add(new FieldSchema(longcol, "long", "nocomment"));
cols.add(new FieldSchema(doublecol, "double", "nocomment"));
cols.add(new FieldSchema(stringcol, "varchar(32)", "nocomment"));
cols.add(new FieldSchema(binarycol, "binary", "nocomment"));
cols.add(new FieldSchema(decimalcol, "decimal(5, 3)", "nocomment"));
SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
Table table = new Table(tableName, dbname, "me", (int) now / 1000, (int) now / 1000, 0, sd, null, emptyParameters, null, null, null);
store.createTable(table);
ColumnStatistics stats = new ColumnStatistics();
ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
desc.setLastAnalyzed(now);
desc.setDbName(dbname);
desc.setTableName(tableName);
desc.setIsTblLevel(true);
stats.setStatsDesc(desc);
// Do one column of each type
ColumnStatisticsObj obj = new ColumnStatisticsObj();
obj.setColName(boolcol);
obj.setColType("boolean");
ColumnStatisticsData data = new ColumnStatisticsData();
BooleanColumnStatsData boolData = new BooleanColumnStatsData();
boolData.setNumTrues(trues);
boolData.setNumFalses(falses);
boolData.setNumNulls(booleanNulls);
data.setBooleanStats(boolData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
obj = new ColumnStatisticsObj();
obj.setColName(longcol);
obj.setColType("long");
data = new ColumnStatisticsData();
LongColumnStatsData longData = new LongColumnStatsData();
longData.setHighValue(longHigh);
longData.setLowValue(longLow);
longData.setNumNulls(longNulls);
longData.setNumDVs(longDVs);
data.setLongStats(longData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
obj = new ColumnStatisticsObj();
obj.setColName(doublecol);
obj.setColType("double");
data = new ColumnStatisticsData();
DoubleColumnStatsData doubleData = new DoubleColumnStatsData();
doubleData.setHighValue(doubleHigh);
doubleData.setLowValue(doubleLow);
doubleData.setNumNulls(doubleNulls);
doubleData.setNumDVs(doubleDVs);
data.setDoubleStats(doubleData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
store.updateTableColumnStatistics(stats);
stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol, longcol, doublecol));
// We'll check all of the individual values later.
Assert.assertEquals(3, stats.getStatsObjSize());
// check that we can fetch just some of the columns
stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol));
Assert.assertEquals(1, stats.getStatsObjSize());
stats = new ColumnStatistics();
stats.setStatsDesc(desc);
obj = new ColumnStatisticsObj();
obj.setColName(stringcol);
obj.setColType("string");
data = new ColumnStatisticsData();
StringColumnStatsData strData = new StringColumnStatsData();
strData.setMaxColLen(strMaxLen);
strData.setAvgColLen(strAvgLen);
strData.setNumNulls(strNulls);
strData.setNumDVs(strDVs);
data.setStringStats(strData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
obj = new ColumnStatisticsObj();
obj.setColName(binarycol);
obj.setColType("binary");
data = new ColumnStatisticsData();
BinaryColumnStatsData binData = new BinaryColumnStatsData();
binData.setMaxColLen(binMaxLen);
binData.setAvgColLen(binAvgLen);
binData.setNumNulls(binNulls);
data.setBinaryStats(binData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
obj = new ColumnStatisticsObj();
obj.setColName(decimalcol);
obj.setColType("decimal(5,3)");
data = new ColumnStatisticsData();
DecimalColumnStatsData decData = new DecimalColumnStatsData();
LOG.debug("Setting decimal high value to " + decHigh.getScale() + " <" + new String(decHigh.getUnscaled()) + ">");
decData.setHighValue(decHigh);
decData.setLowValue(decLow);
decData.setNumNulls(decNulls);
decData.setNumDVs(decDVs);
data.setDecimalStats(decData);
obj.setStatsData(data);
stats.addToStatsObj(obj);
store.updateTableColumnStatistics(stats);
stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol, longcol, doublecol, stringcol, binarycol, decimalcol));
Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed());
Assert.assertEquals(dbname, stats.getStatsDesc().getDbName());
Assert.assertEquals(tableName, stats.getStatsDesc().getTableName());
Assert.assertTrue(stats.getStatsDesc().isIsTblLevel());
Assert.assertEquals(6, stats.getStatsObjSize());
ColumnStatisticsData colData = stats.getStatsObj().get(0).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, colData.getSetField());
boolData = colData.getBooleanStats();
Assert.assertEquals(trues, boolData.getNumTrues());
Assert.assertEquals(falses, boolData.getNumFalses());
Assert.assertEquals(booleanNulls, boolData.getNumNulls());
colData = stats.getStatsObj().get(1).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, colData.getSetField());
longData = colData.getLongStats();
Assert.assertEquals(longHigh, longData.getHighValue());
Assert.assertEquals(longLow, longData.getLowValue());
Assert.assertEquals(longNulls, longData.getNumNulls());
Assert.assertEquals(longDVs, longData.getNumDVs());
colData = stats.getStatsObj().get(2).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, colData.getSetField());
doubleData = colData.getDoubleStats();
Assert.assertEquals(doubleHigh, doubleData.getHighValue(), 0.01);
Assert.assertEquals(doubleLow, doubleData.getLowValue(), 0.01);
Assert.assertEquals(doubleNulls, doubleData.getNumNulls());
Assert.assertEquals(doubleDVs, doubleData.getNumDVs());
colData = stats.getStatsObj().get(3).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, colData.getSetField());
strData = colData.getStringStats();
Assert.assertEquals(strMaxLen, strData.getMaxColLen());
Assert.assertEquals(strAvgLen, strData.getAvgColLen(), 0.01);
Assert.assertEquals(strNulls, strData.getNumNulls());
Assert.assertEquals(strDVs, strData.getNumDVs());
colData = stats.getStatsObj().get(4).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.BINARY_STATS, colData.getSetField());
binData = colData.getBinaryStats();
Assert.assertEquals(binMaxLen, binData.getMaxColLen());
Assert.assertEquals(binAvgLen, binData.getAvgColLen(), 0.01);
Assert.assertEquals(binNulls, binData.getNumNulls());
colData = stats.getStatsObj().get(5).getStatsData();
Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, colData.getSetField());
decData = colData.getDecimalStats();
Assert.assertEquals(decHigh, decData.getHighValue());
Assert.assertEquals(decLow, decData.getLowValue());
Assert.assertEquals(decNulls, decData.getNumNulls());
Assert.assertEquals(decDVs, decData.getNumDVs());
}
use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.
the class TestHBaseStoreIntegration method grantRevokeTablePrivileges.
@Test
public void grantRevokeTablePrivileges() throws Exception {
String dbName = "grtp_db";
String tableName = "grtp_table";
try {
Database db = new Database(dbName, "no description", "file:///tmp", emptyParameters);
store.createDatabase(db);
int startTime = (int) (System.currentTimeMillis() / 1000);
List<FieldSchema> cols = new ArrayList<FieldSchema>();
cols.add(new FieldSchema("col1", "int", "nocomment"));
SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, null, emptyParameters, null, null, null);
store.createTable(table);
doGrantRevoke(HiveObjectType.TABLE, dbName, tableName, new String[] { "grtp_role1", "grtp_role2" }, new String[] { "batman", "robin", "superman", "wonderwoman" });
} finally {
if (store.getTable(dbName, tableName) != null)
store.dropTable(dbName, tableName);
store.dropDatabase(dbName);
}
}
use of org.apache.hadoop.hive.metastore.api.SerDeInfo in project hive by apache.
the class TestHBaseStoreIntegration method getPartitionsByFilter.
@Test
public void getPartitionsByFilter() throws Exception {
String dbName = "default";
String tableName = "getPartitionsByFilter";
int startTime = (int) (System.currentTimeMillis() / 1000);
List<FieldSchema> cols = new ArrayList<FieldSchema>();
cols.add(new FieldSchema("col1", "int", "nocomment"));
SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, serde, null, null, emptyParameters);
List<FieldSchema> partCols = new ArrayList<FieldSchema>();
partCols.add(new FieldSchema("ds", "string", ""));
partCols.add(new FieldSchema("region", "string", ""));
Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, emptyParameters, null, null, null);
store.createTable(table);
String[][] partVals = new String[][] { { "20010101", "north america" }, { "20010101", "europe" }, { "20010102", "north america" }, { "20010102", "europe" }, { "20010103", "north america" } };
for (String[] pv : partVals) {
List<String> vals = new ArrayList<String>();
for (String v : pv) vals.add(v);
StorageDescriptor psd = new StorageDescriptor(sd);
psd.setLocation("file:/tmp/ds=" + pv[0] + "/region=" + pv[1]);
Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, emptyParameters);
store.addPartition(part);
}
// We only test getPartitionsByFilter since it calls same code as getPartitionsByExpr anyway.
// Test the case where we completely specify the partition
List<Partition> parts = null;
parts = store.getPartitionsByFilter(dbName, tableName, "ds > '20010101'", (short) -1);
checkPartVals(parts, "[20010102, north america]", "[20010102, europe]", "[20010103, north america]");
parts = store.getPartitionsByFilter(dbName, tableName, "ds >= '20010102'", (short) -1);
checkPartVals(parts, "[20010102, north america]", "[20010102, europe]", "[20010103, north america]");
parts = store.getPartitionsByFilter(dbName, tableName, "ds >= '20010102' and region = 'europe' ", (short) -1);
// filtering on first partition is only implemented as of now, so it will
// not filter on region
checkPartVals(parts, "[20010102, north america]", "[20010102, europe]", "[20010103, north america]");
parts = store.getPartitionsByFilter(dbName, tableName, "ds >= '20010101' and ds < '20010102'", (short) -1);
checkPartVals(parts, "[20010101, north america]", "[20010101, europe]");
parts = store.getPartitionsByFilter(dbName, tableName, "ds = '20010102' or ds < '20010103'", (short) -1);
checkPartVals(parts, "[20010101, north america]", "[20010101, europe]", "[20010102, north america]", "[20010102, europe]");
// test conversion to DNF
parts = store.getPartitionsByFilter(dbName, tableName, "ds = '20010102' and (ds = '20010102' or region = 'europe')", (short) -1);
// filtering on first partition is only implemented as of now, so it will not filter on region
checkPartVals(parts, "[20010102, north america]", "[20010102, europe]");
parts = store.getPartitionsByFilter(dbName, tableName, "region = 'europe'", (short) -1);
// filtering on first partition is only implemented as of now, so it will not filter on region
checkPartVals(parts, "[20010101, north america]", "[20010101, europe]", "[20010102, north america]", "[20010102, europe]", "[20010103, north america]");
}
Aggregations