use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class TestExplainTask method testGetJSONDependenciesJsonShhouldMatch.
@Test
public void testGetJSONDependenciesJsonShhouldMatch() throws Exception {
ExplainWork work = mockExplainWork();
when(work.getDependency()).thenReturn(true);
// Mock inputs
HashSet<ReadEntity> inputs = new HashSet<>();
// One input table
Table table = mock(Table.class);
when(table.getCompleteName()).thenReturn("table-name-mock");
when(table.getTableType()).thenReturn(TableType.EXTERNAL_TABLE);
ReadEntity input1 = mock(ReadEntity.class);
when(input1.getType()).thenReturn(Entity.Type.TABLE);
when(input1.getTable()).thenReturn(table);
inputs.add(input1);
// And one partition
Partition partition = mock(Partition.class);
when(partition.getCompleteName()).thenReturn("partition-name-mock");
ReadEntity input2 = mock(ReadEntity.class);
when(input2.getType()).thenReturn(Entity.Type.PARTITION);
when(input2.getPartition()).thenReturn(partition);
inputs.add(input2);
when(work.getInputs()).thenReturn(inputs);
JsonNode result = objectMapper.readTree(ExplainTask.getJSONDependencies(work).toString());
JsonNode expected = objectMapper.readTree("{\"input_partitions\":[{\"partitionName\":" + "\"partition-name-mock\"}],\"input_tables\":[{\"tablename\":\"table-name-mock\"," + "\"tabletype\":\"EXTERNAL_TABLE\"}]}");
assertEquals(expected, result);
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class BasicStatsNoJobTask method updatePartitions.
private int updatePartitions(Hive db, List<FooterStatCollector> scs, Table table) throws InvalidOperationException, HiveException {
String tableFullName = table.getFullyQualifiedName();
if (scs.isEmpty()) {
return 0;
}
if (work.isStatsReliable()) {
for (FooterStatCollector statsCollection : scs) {
if (statsCollection.result == null) {
LOG.debug("Stats requested to be reliable. Empty stats found: {}", statsCollection.partish.getSimpleName());
return -1;
}
}
}
List<FooterStatCollector> validColectors = Lists.newArrayList();
for (FooterStatCollector statsCollection : scs) {
if (statsCollection.isValid()) {
validColectors.add(statsCollection);
}
}
EnvironmentContext environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
ImmutableListMultimap<String, FooterStatCollector> collectorsByTable = Multimaps.index(validColectors, FooterStatCollector.SIMPLE_NAME_FUNCTION);
LOG.debug("Collectors.size(): {}", collectorsByTable.keySet());
if (collectorsByTable.keySet().size() < 1) {
LOG.warn("Collectors are empty! ; {}", tableFullName);
}
// for now this should be true...
assert (collectorsByTable.keySet().size() <= 1);
LOG.debug("Updating stats for: {}", tableFullName);
for (String partName : collectorsByTable.keySet()) {
ImmutableList<FooterStatCollector> values = collectorsByTable.get(partName);
if (values == null) {
throw new RuntimeException("very intresting");
}
if (values.get(0).result instanceof Table) {
db.alterTable(tableFullName, (Table) values.get(0).result, environmentContext);
LOG.debug("Updated stats for {}.", tableFullName);
} else {
if (values.get(0).result instanceof Partition) {
List<Partition> results = Lists.transform(values, FooterStatCollector.EXTRACT_RESULT_FUNCTION);
db.alterPartitions(tableFullName, results, environmentContext);
LOG.debug("Bulk updated {} partitions of {}.", results.size(), tableFullName);
} else {
throw new RuntimeException("inconsistent");
}
}
}
LOG.debug("Updated stats for: {}", tableFullName);
return 0;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class StatsUtils method getFileSizeForPartitions.
/**
* Find the bytes on disks occupied by list of partitions
* @param conf
* - hive conf
* @param parts
* - partition list
* @return sizes of partitions
*/
public static List<Long> getFileSizeForPartitions(final HiveConf conf, List<Partition> parts) {
LOG.info("Number of partitions : " + parts.size());
ArrayList<Future<Long>> futures = new ArrayList<>();
int threads = Math.max(1, conf.getIntVar(ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT));
final ExecutorService pool = Executors.newFixedThreadPool(threads, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Partitions-Size-%d").build());
final ArrayList<Long> sizes = new ArrayList<>(parts.size());
for (final Partition part : parts) {
final Path path = part.getDataLocation();
futures.add(pool.submit(new Callable<Long>() {
@Override
public Long call() throws Exception {
try {
LOG.debug("Partition path : " + path);
FileSystem fs = path.getFileSystem(conf);
return fs.getContentSummary(path).getLength();
} catch (IOException e) {
return 0L;
}
}
}));
}
try {
for (int i = 0; i < futures.size(); i++) {
sizes.add(i, futures.get(i).get());
}
} catch (InterruptedException | ExecutionException e) {
LOG.warn("Exception in processing files ", e);
} finally {
pool.shutdownNow();
}
return sizes;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class DDLTask method unarchive.
private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) throws HiveException, URISyntaxException {
Table tbl = db.getTable(simpleDesc.getTableName());
// Means user specified a table, not a partition
if (simpleDesc.getPartSpec() == null) {
throw new HiveException("UNARCHIVE is for partitions only");
}
if (tbl.getTableType() != TableType.MANAGED_TABLE) {
throw new HiveException("UNARCHIVE can only be performed on managed tables");
}
Map<String, String> partSpec = simpleDesc.getPartSpec();
PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
List<Partition> partitions = db.getPartitions(tbl, partSpec);
int partSpecLevel = partSpec.size();
Path originalDir = null;
// to keep backward compatibility
if (partitions.isEmpty()) {
throw new HiveException("No partition matches the specification");
} else if (partSpecInfo.values.size() != tbl.getPartCols().size()) {
// for partial specifications we need partitions to follow the scheme
for (Partition p : partitions) {
if (partitionInCustomLocation(tbl, p)) {
String message = String.format("UNARCHIVE cannot run for partition " + "groups with custom locations like %s", p.getLocation());
throw new HiveException(message);
}
}
originalDir = partSpecInfo.createPath(tbl);
} else {
Partition p = partitions.get(0);
if (ArchiveUtils.isArchived(p)) {
originalDir = new Path(getOriginalLocation(p));
} else {
originalDir = new Path(p.getLocation());
}
}
URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
Path intermediateArchivedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
Path intermediateExtractedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_EXTRACTED_DIR_SUFFIX);
boolean recovery = false;
if (pathExists(intermediateArchivedDir) || pathExists(intermediateExtractedDir)) {
recovery = true;
console.printInfo("Starting recovery after failed UNARCHIVE");
}
for (Partition p : partitions) {
checkArchiveProperty(partSpecLevel, recovery, p);
}
String archiveName = "data.har";
FileSystem fs = null;
try {
fs = originalDir.getFileSystem(conf);
} catch (IOException e) {
throw new HiveException(e);
}
// assume the archive is in the original dir, check if it exists
Path archivePath = new Path(originalDir, archiveName);
URI archiveUri = archivePath.toUri();
ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri);
URI sourceUri = harHelper.getHarUri(originalUri);
Path sourceDir = new Path(sourceUri.getScheme(), sourceUri.getAuthority(), sourceUri.getPath());
if (!pathExists(intermediateArchivedDir) && !pathExists(archivePath)) {
throw new HiveException("Haven't found any archive where it should be");
}
Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir);
try {
fs = tmpPath.getFileSystem(conf);
} catch (IOException e) {
throw new HiveException(e);
}
if (!pathExists(intermediateExtractedDir) && !pathExists(intermediateArchivedDir)) {
try {
// Copy the files out of the archive into the temporary directory
String copySource = sourceDir.toString();
String copyDest = tmpPath.toString();
List<String> args = new ArrayList<String>();
args.add("-cp");
args.add(copySource);
args.add(copyDest);
console.printInfo("Copying " + copySource + " to " + copyDest);
FileSystem srcFs = FileSystem.get(sourceDir.toUri(), conf);
srcFs.initialize(sourceDir.toUri(), conf);
FsShell fss = new FsShell(conf);
int ret = 0;
try {
ret = ToolRunner.run(fss, args.toArray(new String[0]));
} catch (Exception e) {
e.printStackTrace();
throw new HiveException(e);
}
if (ret != 0) {
throw new HiveException("Error while copying files from archive, return code=" + ret);
} else {
console.printInfo("Successfully Copied " + copySource + " to " + copyDest);
}
console.printInfo("Moving " + tmpPath + " to " + intermediateExtractedDir);
if (fs.exists(intermediateExtractedDir)) {
throw new HiveException("Invalid state: the intermediate extracted " + "directory already exists.");
}
fs.rename(tmpPath, intermediateExtractedDir);
} catch (Exception e) {
throw new HiveException(e);
}
}
if (!pathExists(intermediateArchivedDir)) {
try {
console.printInfo("Moving " + originalDir + " to " + intermediateArchivedDir);
fs.rename(originalDir, intermediateArchivedDir);
} catch (IOException e) {
throw new HiveException(e);
}
} else {
console.printInfo(intermediateArchivedDir + " already exists. " + "Assuming it contains the archived version of the partition");
}
// (containing the archived version of the files) to intermediateArchiveDir
if (!pathExists(originalDir)) {
try {
console.printInfo("Moving " + intermediateExtractedDir + " to " + originalDir);
fs.rename(intermediateExtractedDir, originalDir);
} catch (IOException e) {
throw new HiveException(e);
}
} else {
console.printInfo(originalDir + " already exists. " + "Assuming it contains the extracted files in the partition");
}
for (Partition p : partitions) {
setUnArchived(p);
try {
db.alterPartition(simpleDesc.getTableName(), p, null);
} catch (InvalidOperationException e) {
throw new HiveException(e);
}
}
// deleted. The user will need to call unarchive again to clear those up.
if (pathExists(intermediateArchivedDir)) {
deleteDir(intermediateArchivedDir);
}
if (recovery) {
console.printInfo("Recovery after UNARCHIVE succeeded");
}
return 0;
}
use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.
the class DDLTask method generateAddMmTasks.
private List<Task<?>> generateAddMmTasks(Table tbl) throws HiveException {
// We will move all the files in the table/partition directories into the first MM
// directory, then commit the first write ID.
List<Path> srcs = new ArrayList<>(), tgts = new ArrayList<>();
long mmWriteId = 0;
try {
HiveTxnManager txnManager = SessionState.get().getTxnMgr();
if (txnManager.isTxnOpen()) {
mmWriteId = txnManager.getTableWriteId(tbl.getDbName(), tbl.getTableName());
} else {
txnManager.openTxn(new Context(conf), conf.getUser());
mmWriteId = txnManager.getTableWriteId(tbl.getDbName(), tbl.getTableName());
txnManager.commitTxn();
}
} catch (Exception e) {
String errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
}
int stmtId = 0;
String mmDir = AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId);
Hive db = getHive();
if (tbl.getPartitionKeys().size() > 0) {
PartitionIterable parts = new PartitionIterable(db, tbl, null, HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
Iterator<Partition> partIter = parts.iterator();
while (partIter.hasNext()) {
Partition part = partIter.next();
checkMmLb(part);
Path src = part.getDataLocation(), tgt = new Path(src, mmDir);
srcs.add(src);
tgts.add(tgt);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt);
}
}
} else {
checkMmLb(tbl);
Path src = tbl.getDataLocation(), tgt = new Path(src, mmDir);
srcs.add(src);
tgts.add(tgt);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt);
}
}
// Don't set inputs and outputs - the locks have already been taken so it's pointless.
MoveWork mw = new MoveWork(null, null, null, null, false);
mw.setMultiFilesDesc(new LoadMultiFilesDesc(srcs, tgts, true, null, null));
ImportCommitWork icw = new ImportCommitWork(tbl.getDbName(), tbl.getTableName(), mmWriteId, stmtId);
Task<?> mv = TaskFactory.get(mw), ic = TaskFactory.get(icw);
mv.addDependentTask(ic);
return Lists.<Task<?>>newArrayList(mv);
}
Aggregations