use of org.apache.hadoop.hive.metastore.api.PartitionSpec in project hive by apache.
the class TestMetaStoreServerUtils method testGetPartitionspecsGroupedBySDonePartitionCombined.
/**
* Test getPartitionspecsGroupedByStorageDescriptor() multiple partitions:
* <ul>
* <li>Partition with null SD</li>
* <li>Two partitions under the table location</li>
* <li>One partition outside of table location</li>
* </ul>
*/
@Test
public void testGetPartitionspecsGroupedBySDonePartitionCombined() throws MetaException {
// Create database and table
String sharedInputFormat = "foo1";
Table tbl = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").setLocation("/foo").build(null);
Partition p1 = new PartitionBuilder().setDbName("DB_NAME").setTableName(TABLE_NAME).setLocation("/foo/bar").addCol("a1", "int").addValue("val1").setInputFormat(sharedInputFormat).build(null);
Partition p2 = new PartitionBuilder().setDbName("DB_NAME").setTableName(TABLE_NAME).setLocation("/a/b").addCol("a2", "int").addValue("val2").setInputFormat("foo2").build(null);
Partition p3 = new PartitionBuilder().setDbName("DB_NAME").setTableName(TABLE_NAME).addCol("a3", "int").addValue("val3").setInputFormat("foo3").build(null);
Partition p4 = new PartitionBuilder().setDbName("DB_NAME").setTableName("TABLE_NAME").setLocation("/foo/baz").addCol("a1", "int").addValue("val4").setInputFormat(sharedInputFormat).build(null);
p3.unsetSd();
List<PartitionSpec> result = MetaStoreServerUtils.getPartitionspecsGroupedByStorageDescriptor(tbl, Arrays.asList(p1, p2, p3, p4));
assertThat(result.size(), is(3));
PartitionSpec ps1 = result.get(0);
assertThat(ps1.getRootPath(), is((String) null));
assertThat(ps1.getPartitionList(), is((List<Partition>) null));
PartitionSpecWithSharedSD partSpec = ps1.getSharedSDPartitionSpec();
List<PartitionWithoutSD> partitions1 = partSpec.getPartitions();
assertThat(partitions1.size(), is(1));
PartitionWithoutSD partition1 = partitions1.get(0);
assertThat(partition1.getRelativePath(), is((String) null));
assertThat(partition1.getValues(), is(Collections.singletonList("val3")));
PartitionSpec ps2 = result.get(1);
assertThat(ps2.getRootPath(), is(tbl.getSd().getLocation()));
assertThat(ps2.getPartitionList(), is((List<Partition>) null));
List<PartitionWithoutSD> partitions2 = ps2.getSharedSDPartitionSpec().getPartitions();
assertThat(partitions2.size(), is(2));
PartitionWithoutSD partition2_1 = partitions2.get(0);
PartitionWithoutSD partition2_2 = partitions2.get(1);
if (partition2_1.getRelativePath().equals("baz")) {
// Swap p2_1 and p2_2
PartitionWithoutSD tmp = partition2_1;
partition2_1 = partition2_2;
partition2_2 = tmp;
}
assertThat(partition2_1.getRelativePath(), is("/bar"));
assertThat(partition2_1.getValues(), is(Collections.singletonList("val1")));
assertThat(partition2_2.getRelativePath(), is("/baz"));
assertThat(partition2_2.getValues(), is(Collections.singletonList("val4")));
PartitionSpec ps4 = result.get(2);
assertThat(ps4.getRootPath(), is((String) null));
assertThat(ps4.getSharedSDPartitionSpec(), is((PartitionSpecWithSharedSD) null));
List<Partition> partitions = ps4.getPartitionList().getPartitions();
assertThat(partitions.size(), is(1));
Partition partition = partitions.get(0);
assertThat(partition.getSd().getLocation(), is("/a/b"));
assertThat(partition.getValues(), is(Collections.singletonList("val2")));
}
use of org.apache.hadoop.hive.metastore.api.PartitionSpec in project metacat by Netflix.
the class CatalogThriftHiveMetastore method get_part_specs_by_filter.
/**
* {@inheritDoc}
*/
@Override
public List<PartitionSpec> get_part_specs_by_filter(final String dbName, final String tblName, final String filter, final int maxParts) throws TException {
// TODO: Handle the use case of grouping
return requestWrapper("get_partitions_pspec", new Object[] { dbName, tblName, filter, maxParts }, () -> {
final String databaseName = normalizeIdentifier(dbName);
final String tableName = normalizeIdentifier(tblName);
final TableDto tableDto = v1.getTable(catalogName, databaseName, tableName, true, false, false);
final GetPartitionsRequestDto dto = new GetPartitionsRequestDto(filter, null, true, false);
final List<PartitionDto> metacatPartitions = partV1.getPartitionsForRequest(catalogName, dbName, tblName, null, null, null, maxParts, false, dto);
final List<Partition> partitions = Lists.newArrayListWithCapacity(metacatPartitions.size());
for (PartitionDto partition : metacatPartitions) {
partitions.add(hiveConverters.metacatToHivePartition(partition, tableDto));
}
final PartitionSpec pSpec = new PartitionSpec();
pSpec.setPartitionList(new PartitionListComposingSpec(partitions));
pSpec.setDbName(dbName);
pSpec.setTableName(tblName);
if (tableDto != null && tableDto.getSerde() != null) {
pSpec.setRootPath(tableDto.getSerde().getUri());
}
return Arrays.asList(pSpec);
});
}
use of org.apache.hadoop.hive.metastore.api.PartitionSpec in project hive by apache.
the class SessionHiveMetaStoreClient method getPartitionSpecProxy.
private PartitionSpecProxy getPartitionSpecProxy(org.apache.hadoop.hive.metastore.api.Table table, List<Partition> partitions, int maxParts) throws MetaException {
List<PartitionSpec> partitionSpecs;
PartitionSpec partitionSpec = new PartitionSpec();
PartitionListComposingSpec partitionListComposingSpec = new PartitionListComposingSpec(new ArrayList<>());
for (int i = 0; i < ((maxParts < 0 || maxParts > partitions.size()) ? partitions.size() : maxParts); i++) {
partitionListComposingSpec.addToPartitions(deepCopy(partitions.get(i)));
}
partitionSpec.setCatName(table.getCatName());
partitionSpec.setDbName(table.getDbName());
partitionSpec.setTableName(table.getTableName());
partitionSpec.setRootPath(table.getSd().getLocation());
partitionSpec.setPartitionList(partitionListComposingSpec);
partitionSpecs = Arrays.asList(partitionSpec);
return PartitionSpecProxy.Factory.get(partitionSpecs);
}
use of org.apache.hadoop.hive.metastore.api.PartitionSpec in project hive by apache.
the class Hive method loadDynamicPartitions.
/**
* Given a source directory name of the load path, load all dynamically generated partitions
* into the specified table and return a list of strings that represent the dynamic partition
* paths.
* @param tbd table descriptor
* @param numLB number of buckets
* @param isAcid true if this is an ACID operation
* @param writeId writeId, can be 0 unless isAcid == true
* @param stmtId statementId
* @param resetStatistics if true, reset statistics. Do not reset statistics otherwise.
* @param operation ACID operation type
* @param partitionDetailsMap full dynamic partition specification
* @return partition map details (PartitionSpec and Partition)
* @throws HiveException
*/
public Map<Map<String, String>, Partition> loadDynamicPartitions(final LoadTableDesc tbd, final int numLB, final boolean isAcid, final long writeId, final int stmtId, final boolean resetStatistics, final AcidUtils.Operation operation, Map<Path, PartitionDetails> partitionDetailsMap) throws HiveException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.perfLogBegin("MoveTask", PerfLogger.LOAD_DYNAMIC_PARTITIONS);
final Path loadPath = tbd.getSourcePath();
final Table tbl = getTable(tbd.getTable().getTableName());
final Map<String, String> partSpec = tbd.getPartitionSpec();
final AtomicInteger partitionsLoaded = new AtomicInteger(0);
final boolean inPlaceEligible = conf.getLong("fs.trash.interval", 0) <= 0 && InPlaceUpdate.canRenderInPlace(conf) && !SessionState.getConsole().getIsSilent();
final PrintStream ps = (inPlaceEligible) ? SessionState.getConsole().getInfoStream() : null;
final SessionState parentSession = SessionState.get();
List<Callable<Partition>> tasks = Lists.newLinkedList();
boolean fetchPartitionInfo = true;
final boolean scanPartitionsByName = HiveConf.getBoolVar(conf, HIVE_LOAD_DYNAMIC_PARTITIONS_SCAN_SPECIFIC_PARTITIONS);
// for every dynamic partition
if (scanPartitionsByName && !tbd.isDirectInsert() && !AcidUtils.isTransactionalTable(tbl)) {
// Fetch only relevant partitions from HMS for checking old partitions
List<String> partitionNames = new LinkedList<>();
for (PartitionDetails details : partitionDetailsMap.values()) {
if (details.fullSpec != null && !details.fullSpec.isEmpty()) {
partitionNames.add(Warehouse.makeDynamicPartNameNoTrailingSeperator(details.fullSpec));
}
}
List<Partition> partitions = Hive.get().getPartitionsByNames(tbl, partitionNames);
for (Partition partition : partitions) {
LOG.debug("HMS partition spec: {}", partition.getSpec());
partitionDetailsMap.entrySet().parallelStream().filter(entry -> entry.getValue().fullSpec.equals(partition.getSpec())).findAny().ifPresent(entry -> {
entry.getValue().partition = partition;
entry.getValue().hasOldPartition = true;
});
}
// no need to fetch partition again in tasks since we have already fetched partitions
// info in getPartitionsByNames()
fetchPartitionInfo = false;
}
boolean isTxnTable = AcidUtils.isTransactionalTable(tbl);
AcidUtils.TableSnapshot tableSnapshot = isTxnTable ? getTableSnapshot(tbl, writeId) : null;
for (Entry<Path, PartitionDetails> entry : partitionDetailsMap.entrySet()) {
boolean getPartitionFromHms = fetchPartitionInfo;
tasks.add(() -> {
PartitionDetails partitionDetails = entry.getValue();
Map<String, String> fullPartSpec = partitionDetails.fullSpec;
try {
SessionState.setCurrentSessionState(parentSession);
if (getPartitionFromHms) {
// didn't fetch partition info from HMS. Getting from HMS now.
Partition existing = getPartition(tbl, fullPartSpec, false);
if (existing != null) {
partitionDetails.partition = existing;
partitionDetails.hasOldPartition = true;
}
}
LOG.info("New loading path = " + entry.getKey() + " withPartSpec " + fullPartSpec);
Partition oldPartition = partitionDetails.partition;
List<FileStatus> newFiles = null;
if (partitionDetails.newFiles != null) {
// If we already know the files from the direct insert manifest, use them
newFiles = partitionDetails.newFiles;
} else if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPartition == null) {
// Otherwise only collect them, if we are going to fire write notifications
newFiles = Collections.synchronizedList(new ArrayList<>());
}
// load the partition
Partition partition = loadPartitionInternal(entry.getKey(), tbl, fullPartSpec, oldPartition, tbd.getLoadFileType(), true, false, numLB > 0, false, isAcid, resetStatistics, writeId, stmtId, tbd.isInsertOverwrite(), isTxnTable, newFiles, tbd.isDirectInsert());
// metastore
if (tableSnapshot != null) {
partition.getTPartition().setWriteId(tableSnapshot.getWriteId());
}
partitionDetails.tableSnapshot = tableSnapshot;
if (oldPartition == null) {
partitionDetails.newFiles = newFiles;
partitionDetails.partition = partition;
}
if (inPlaceEligible) {
synchronized (ps) {
InPlaceUpdate.rePositionCursor(ps);
partitionsLoaded.incrementAndGet();
InPlaceUpdate.reprintLine(ps, "Loaded : " + partitionsLoaded.get() + "/" + partitionDetailsMap.size() + " partitions.");
}
}
return partition;
} catch (Exception e) {
LOG.error("Exception when loading partition with parameters " + " partPath=" + entry.getKey() + ", " + " table=" + tbl.getTableName() + ", " + " partSpec=" + fullPartSpec + ", " + " loadFileType=" + tbd.getLoadFileType().toString() + ", " + " listBucketingLevel=" + numLB + ", " + " isAcid=" + isAcid + ", " + " resetStatistics=" + resetStatistics, e);
throw e;
} finally {
// get(conf).getMSC can be called in this task, Close the HMS connection right after use, do not wait for finalizer to close it.
closeCurrent();
}
});
}
int poolSize = conf.getInt(ConfVars.HIVE_LOAD_DYNAMIC_PARTITIONS_THREAD_COUNT.varname, 1);
ExecutorService executor = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("load-dynamic-partitionsToAdd-%d").build());
List<Future<Partition>> futures = Lists.newLinkedList();
Map<Map<String, String>, Partition> result = Maps.newLinkedHashMap();
try {
futures = executor.invokeAll(tasks);
LOG.info("Number of partitionsToAdd to be added is " + futures.size());
for (Future<Partition> future : futures) {
Partition partition = future.get();
result.put(partition.getSpec(), partition);
}
// add new partitions in batch
addPartitionsToMetastore(partitionDetailsMap.entrySet().stream().filter(entry -> !entry.getValue().hasOldPartition).map(entry -> entry.getValue().partition).collect(Collectors.toList()), resetStatistics, tbl, partitionDetailsMap.entrySet().stream().filter(entry -> !entry.getValue().hasOldPartition).map(entry -> entry.getValue().tableSnapshot).collect(Collectors.toList()));
// For acid table, add the acid_write event with file list at the time of load itself. But
// it should be done after partition is created.
List<WriteNotificationLogRequest> requestList = new ArrayList<>();
int maxBatchSize = conf.getIntVar(HIVE_WRITE_NOTIFICATION_MAX_BATCH_SIZE);
for (Entry<Path, PartitionDetails> entry : partitionDetailsMap.entrySet()) {
PartitionDetails partitionDetails = entry.getValue();
if (isTxnTable && partitionDetails.newFiles != null) {
addWriteNotificationLog(tbl, partitionDetails.fullSpec, partitionDetails.newFiles, writeId, requestList);
if (requestList != null && requestList.size() >= maxBatchSize) {
// If the first call returns that the HMS does not supports batching, avoid batching
// for later requests.
boolean batchSupported = addWriteNotificationLogInBatch(tbl, requestList);
if (batchSupported) {
requestList.clear();
} else {
requestList = null;
}
}
}
}
if (requestList != null && requestList.size() > 0) {
addWriteNotificationLogInBatch(tbl, requestList);
}
setStatsPropAndAlterPartitions(resetStatistics, tbl, partitionDetailsMap.entrySet().stream().filter(entry -> entry.getValue().hasOldPartition).map(entry -> entry.getValue().partition).collect(Collectors.toList()), tableSnapshot);
} catch (InterruptedException | ExecutionException e) {
throw new HiveException("Exception when loading " + partitionDetailsMap.size() + " partitions" + " in table " + tbl.getTableName() + " with loadPath=" + loadPath, e);
} catch (TException e) {
LOG.error("Failed loadDynamicPartitions", e);
throw new HiveException(e);
} catch (Exception e) {
StringBuffer logMsg = new StringBuffer();
logMsg.append("Exception when loading partitionsToAdd with parameters ");
logMsg.append("partPaths=");
partitionDetailsMap.keySet().forEach(path -> logMsg.append(path + ", "));
logMsg.append("table=" + tbl.getTableName() + ", ").append("partSpec=" + partSpec + ", ").append("loadFileType=" + tbd.getLoadFileType().toString() + ", ").append("listBucketingLevel=" + numLB + ", ").append("isAcid=" + isAcid + ", ").append("resetStatistics=" + resetStatistics);
LOG.error(logMsg.toString(), e);
throw e;
} finally {
LOG.debug("Cancelling " + futures.size() + " dynamic loading tasks");
executor.shutdownNow();
}
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_IN_TEST) && HiveConf.getBoolVar(conf, ConfVars.HIVETESTMODEFAILLOADDYNAMICPARTITION)) {
throw new HiveException(HiveConf.ConfVars.HIVETESTMODEFAILLOADDYNAMICPARTITION.name() + "=true");
}
try {
if (isTxnTable) {
List<String> partNames = result.values().stream().map(Partition::getName).collect(Collectors.toList());
getMSC().addDynamicPartitions(parentSession.getTxnMgr().getCurrentTxnId(), writeId, tbl.getDbName(), tbl.getTableName(), partNames, AcidUtils.toDataOperationType(operation));
}
LOG.info("Loaded " + result.size() + "partitionsToAdd");
perfLogger.perfLogEnd("MoveTask", PerfLogger.LOAD_DYNAMIC_PARTITIONS);
return result;
} catch (TException te) {
LOG.error("Failed loadDynamicPartitions", te);
throw new HiveException("Exception updating metastore for acid table " + tbd.getTable().getTableName() + " with partitions " + result.values(), te);
}
}
use of org.apache.hadoop.hive.metastore.api.PartitionSpec in project hive by apache.
the class TestSessionHiveMetastoreClientListPartitionsTempTable method testListPartitionsSpecByExprHighMaxParts.
@Test
public void testListPartitionsSpecByExprHighMaxParts() throws Exception {
Table t = createTable4PartColsParts(getClient()).table;
TestMetastoreExpr.ExprBuilder e = new TestMetastoreExpr.ExprBuilder(TABLE_NAME);
List<PartitionSpec> result = new ArrayList<>();
PartitionsByExprRequest req = new PartitionsByExprRequest(DB_NAME, TABLE_NAME, ByteBuffer.wrap(SerializationUtilities.serializeExpressionToKryo(e.strCol("yyyy").val("2017").pred(">=", 2).build())));
req.setMaxParts((short) 100);
req.setId(t.getId());
getClient().listPartitionsSpecByExpr(req, result);
assertEquals(4, result.iterator().next().getSharedSDPartitionSpec().getPartitionsSize());
}
Aggregations