Search in sources :

Example 36 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class TestExplainTask method testGetJSONDependenciesJsonShhouldMatch.

@Test
public void testGetJSONDependenciesJsonShhouldMatch() throws Exception {
    ExplainWork work = mockExplainWork();
    when(work.getDependency()).thenReturn(true);
    // Mock inputs
    HashSet<ReadEntity> inputs = new HashSet<>();
    // One input table
    Table table = mock(Table.class);
    when(table.getCompleteName()).thenReturn("table-name-mock");
    when(table.getTableType()).thenReturn(TableType.EXTERNAL_TABLE);
    ReadEntity input1 = mock(ReadEntity.class);
    when(input1.getType()).thenReturn(Entity.Type.TABLE);
    when(input1.getTable()).thenReturn(table);
    inputs.add(input1);
    // And one partition
    Partition partition = mock(Partition.class);
    when(partition.getCompleteName()).thenReturn("partition-name-mock");
    ReadEntity input2 = mock(ReadEntity.class);
    when(input2.getType()).thenReturn(Entity.Type.PARTITION);
    when(input2.getPartition()).thenReturn(partition);
    inputs.add(input2);
    when(work.getInputs()).thenReturn(inputs);
    JsonNode result = objectMapper.readTree(ExplainTask.getJSONDependencies(work).toString());
    JsonNode expected = objectMapper.readTree("{\"input_partitions\":[{\"partitionName\":" + "\"partition-name-mock\"}],\"input_tables\":[{\"tablename\":\"table-name-mock\"," + "\"tabletype\":\"EXTERNAL_TABLE\"}]}");
    assertEquals(expected, result);
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) JsonNode(com.fasterxml.jackson.databind.JsonNode) Test(org.junit.Test)

Example 37 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class BasicStatsNoJobTask method updatePartitions.

private int updatePartitions(Hive db, List<FooterStatCollector> scs, Table table) throws InvalidOperationException, HiveException {
    String tableFullName = table.getFullyQualifiedName();
    if (scs.isEmpty()) {
        return 0;
    }
    if (work.isStatsReliable()) {
        for (FooterStatCollector statsCollection : scs) {
            if (statsCollection.result == null) {
                LOG.debug("Stats requested to be reliable. Empty stats found: {}", statsCollection.partish.getSimpleName());
                return -1;
            }
        }
    }
    List<FooterStatCollector> validColectors = Lists.newArrayList();
    for (FooterStatCollector statsCollection : scs) {
        if (statsCollection.isValid()) {
            validColectors.add(statsCollection);
        }
    }
    EnvironmentContext environmentContext = new EnvironmentContext();
    environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
    ImmutableListMultimap<String, FooterStatCollector> collectorsByTable = Multimaps.index(validColectors, FooterStatCollector.SIMPLE_NAME_FUNCTION);
    LOG.debug("Collectors.size(): {}", collectorsByTable.keySet());
    if (collectorsByTable.keySet().size() < 1) {
        LOG.warn("Collectors are empty! ; {}", tableFullName);
    }
    // for now this should be true...
    assert (collectorsByTable.keySet().size() <= 1);
    LOG.debug("Updating stats for: {}", tableFullName);
    for (String partName : collectorsByTable.keySet()) {
        ImmutableList<FooterStatCollector> values = collectorsByTable.get(partName);
        if (values == null) {
            throw new RuntimeException("very intresting");
        }
        if (values.get(0).result instanceof Table) {
            db.alterTable(tableFullName, (Table) values.get(0).result, environmentContext);
            LOG.debug("Updated stats for {}.", tableFullName);
        } else {
            if (values.get(0).result instanceof Partition) {
                List<Partition> results = Lists.transform(values, FooterStatCollector.EXTRACT_RESULT_FUNCTION);
                db.alterPartitions(tableFullName, results, environmentContext);
                LOG.debug("Bulk updated {} partitions of {}.", results.size(), tableFullName);
            } else {
                throw new RuntimeException("inconsistent");
            }
        }
    }
    LOG.debug("Updated stats for: {}", tableFullName);
    return 0;
}
Also used : EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table)

Example 38 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class StatsUtils method getFileSizeForPartitions.

/**
 * Find the bytes on disks occupied by list of partitions
 * @param conf
 *          - hive conf
 * @param parts
 *          - partition list
 * @return sizes of partitions
 */
public static List<Long> getFileSizeForPartitions(final HiveConf conf, List<Partition> parts) {
    LOG.info("Number of partitions : " + parts.size());
    ArrayList<Future<Long>> futures = new ArrayList<>();
    int threads = Math.max(1, conf.getIntVar(ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT));
    final ExecutorService pool = Executors.newFixedThreadPool(threads, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Partitions-Size-%d").build());
    final ArrayList<Long> sizes = new ArrayList<>(parts.size());
    for (final Partition part : parts) {
        final Path path = part.getDataLocation();
        futures.add(pool.submit(new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                try {
                    LOG.debug("Partition path : " + path);
                    FileSystem fs = path.getFileSystem(conf);
                    return fs.getContentSummary(path).getLength();
                } catch (IOException e) {
                    return 0L;
                }
            }
        }));
    }
    try {
        for (int i = 0; i < futures.size(); i++) {
            sizes.add(i, futures.get(i).get());
        }
    } catch (InterruptedException | ExecutionException e) {
        LOG.warn("Exception in processing files ", e);
    } finally {
        pool.shutdownNow();
    }
    return sizes;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Callable(java.util.concurrent.Callable) FileSystem(org.apache.hadoop.fs.FileSystem) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ExecutionException(java.util.concurrent.ExecutionException)

Example 39 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method unarchive.

private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc) throws HiveException, URISyntaxException {
    Table tbl = db.getTable(simpleDesc.getTableName());
    // Means user specified a table, not a partition
    if (simpleDesc.getPartSpec() == null) {
        throw new HiveException("UNARCHIVE is for partitions only");
    }
    if (tbl.getTableType() != TableType.MANAGED_TABLE) {
        throw new HiveException("UNARCHIVE can only be performed on managed tables");
    }
    Map<String, String> partSpec = simpleDesc.getPartSpec();
    PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
    List<Partition> partitions = db.getPartitions(tbl, partSpec);
    int partSpecLevel = partSpec.size();
    Path originalDir = null;
    // to keep backward compatibility
    if (partitions.isEmpty()) {
        throw new HiveException("No partition matches the specification");
    } else if (partSpecInfo.values.size() != tbl.getPartCols().size()) {
        // for partial specifications we need partitions to follow the scheme
        for (Partition p : partitions) {
            if (partitionInCustomLocation(tbl, p)) {
                String message = String.format("UNARCHIVE cannot run for partition " + "groups with custom locations like %s", p.getLocation());
                throw new HiveException(message);
            }
        }
        originalDir = partSpecInfo.createPath(tbl);
    } else {
        Partition p = partitions.get(0);
        if (ArchiveUtils.isArchived(p)) {
            originalDir = new Path(getOriginalLocation(p));
        } else {
            originalDir = new Path(p.getLocation());
        }
    }
    URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
    Path intermediateArchivedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
    Path intermediateExtractedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_EXTRACTED_DIR_SUFFIX);
    boolean recovery = false;
    if (pathExists(intermediateArchivedDir) || pathExists(intermediateExtractedDir)) {
        recovery = true;
        console.printInfo("Starting recovery after failed UNARCHIVE");
    }
    for (Partition p : partitions) {
        checkArchiveProperty(partSpecLevel, recovery, p);
    }
    String archiveName = "data.har";
    FileSystem fs = null;
    try {
        fs = originalDir.getFileSystem(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    // assume the archive is in the original dir, check if it exists
    Path archivePath = new Path(originalDir, archiveName);
    URI archiveUri = archivePath.toUri();
    ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri);
    URI sourceUri = harHelper.getHarUri(originalUri);
    Path sourceDir = new Path(sourceUri.getScheme(), sourceUri.getAuthority(), sourceUri.getPath());
    if (!pathExists(intermediateArchivedDir) && !pathExists(archivePath)) {
        throw new HiveException("Haven't found any archive where it should be");
    }
    Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir);
    try {
        fs = tmpPath.getFileSystem(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    if (!pathExists(intermediateExtractedDir) && !pathExists(intermediateArchivedDir)) {
        try {
            // Copy the files out of the archive into the temporary directory
            String copySource = sourceDir.toString();
            String copyDest = tmpPath.toString();
            List<String> args = new ArrayList<String>();
            args.add("-cp");
            args.add(copySource);
            args.add(copyDest);
            console.printInfo("Copying " + copySource + " to " + copyDest);
            FileSystem srcFs = FileSystem.get(sourceDir.toUri(), conf);
            srcFs.initialize(sourceDir.toUri(), conf);
            FsShell fss = new FsShell(conf);
            int ret = 0;
            try {
                ret = ToolRunner.run(fss, args.toArray(new String[0]));
            } catch (Exception e) {
                e.printStackTrace();
                throw new HiveException(e);
            }
            if (ret != 0) {
                throw new HiveException("Error while copying files from archive, return code=" + ret);
            } else {
                console.printInfo("Successfully Copied " + copySource + " to " + copyDest);
            }
            console.printInfo("Moving " + tmpPath + " to " + intermediateExtractedDir);
            if (fs.exists(intermediateExtractedDir)) {
                throw new HiveException("Invalid state: the intermediate extracted " + "directory already exists.");
            }
            fs.rename(tmpPath, intermediateExtractedDir);
        } catch (Exception e) {
            throw new HiveException(e);
        }
    }
    if (!pathExists(intermediateArchivedDir)) {
        try {
            console.printInfo("Moving " + originalDir + " to " + intermediateArchivedDir);
            fs.rename(originalDir, intermediateArchivedDir);
        } catch (IOException e) {
            throw new HiveException(e);
        }
    } else {
        console.printInfo(intermediateArchivedDir + " already exists. " + "Assuming it contains the archived version of the partition");
    }
    // (containing the archived version of the files) to intermediateArchiveDir
    if (!pathExists(originalDir)) {
        try {
            console.printInfo("Moving " + intermediateExtractedDir + " to " + originalDir);
            fs.rename(intermediateExtractedDir, originalDir);
        } catch (IOException e) {
            throw new HiveException(e);
        }
    } else {
        console.printInfo(originalDir + " already exists. " + "Assuming it contains the extracted files in the partition");
    }
    for (Partition p : partitions) {
        setUnArchived(p);
        try {
            db.alterPartition(simpleDesc.getTableName(), p, null);
        } catch (InvalidOperationException e) {
            throw new HiveException(e);
        }
    }
    // deleted. The user will need to call unarchive again to clear those up.
    if (pathExists(intermediateArchivedDir)) {
        deleteDir(intermediateArchivedDir);
    }
    if (recovery) {
        console.printInfo("Recovery after UNARCHIVE succeeded");
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) URI(java.net.URI) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) PartSpecInfo(org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo) FsShell(org.apache.hadoop.fs.FsShell) FileSystem(org.apache.hadoop.fs.FileSystem) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException)

Example 40 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method generateAddMmTasks.

private List<Task<?>> generateAddMmTasks(Table tbl) throws HiveException {
    // We will move all the files in the table/partition directories into the first MM
    // directory, then commit the first write ID.
    List<Path> srcs = new ArrayList<>(), tgts = new ArrayList<>();
    long mmWriteId = 0;
    try {
        HiveTxnManager txnManager = SessionState.get().getTxnMgr();
        if (txnManager.isTxnOpen()) {
            mmWriteId = txnManager.getTableWriteId(tbl.getDbName(), tbl.getTableName());
        } else {
            txnManager.openTxn(new Context(conf), conf.getUser());
            mmWriteId = txnManager.getTableWriteId(tbl.getDbName(), tbl.getTableName());
            txnManager.commitTxn();
        }
    } catch (Exception e) {
        String errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
        console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
    int stmtId = 0;
    String mmDir = AcidUtils.deltaSubdir(mmWriteId, mmWriteId, stmtId);
    Hive db = getHive();
    if (tbl.getPartitionKeys().size() > 0) {
        PartitionIterable parts = new PartitionIterable(db, tbl, null, HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
        Iterator<Partition> partIter = parts.iterator();
        while (partIter.hasNext()) {
            Partition part = partIter.next();
            checkMmLb(part);
            Path src = part.getDataLocation(), tgt = new Path(src, mmDir);
            srcs.add(src);
            tgts.add(tgt);
            if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt);
            }
        }
    } else {
        checkMmLb(tbl);
        Path src = tbl.getDataLocation(), tgt = new Path(src, mmDir);
        srcs.add(src);
        tgts.add(tgt);
        if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
            Utilities.FILE_OP_LOGGER.trace("Will move " + src + " to " + tgt);
        }
    }
    // Don't set inputs and outputs - the locks have already been taken so it's pointless.
    MoveWork mw = new MoveWork(null, null, null, null, false);
    mw.setMultiFilesDesc(new LoadMultiFilesDesc(srcs, tgts, true, null, null));
    ImportCommitWork icw = new ImportCommitWork(tbl.getDbName(), tbl.getTableName(), mmWriteId, stmtId);
    Task<?> mv = TaskFactory.get(mw), ic = TaskFactory.get(icw);
    mv.addDependentTask(ic);
    return Lists.<Task<?>>newArrayList(mv);
}
Also used : Path(org.apache.hadoop.fs.Path) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) DriverContext(org.apache.hadoop.hive.ql.DriverContext) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) ColumnTruncateTask(org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) MergeFileTask(org.apache.hadoop.hive.ql.io.merge.MergeFileTask) ArrayList(java.util.ArrayList) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) LoadMultiFilesDesc(org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc) Hive(org.apache.hadoop.hive.ql.metadata.Hive) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)

Aggregations

Partition (org.apache.hadoop.hive.ql.metadata.Partition)102 Table (org.apache.hadoop.hive.ql.metadata.Table)56 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)48 ArrayList (java.util.ArrayList)43 Path (org.apache.hadoop.fs.Path)25 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)25 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)24 IOException (java.io.IOException)18 HashMap (java.util.HashMap)18 LinkedHashMap (java.util.LinkedHashMap)18 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)18 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)18 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)17 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 FileNotFoundException (java.io.FileNotFoundException)12 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)11 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)11 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)11