Search in sources :

Example 11 with HiveInputFormat

use of org.apache.hadoop.hive.ql.io.HiveInputFormat in project hive by apache.

the class DagUtils method createVertexFromMapWork.

/*
   * Helper function to create Vertex from MapWork.
   */
private Vertex createVertexFromMapWork(JobConf conf, MapWork mapWork, Path mrScratchDir, VertexType vertexType) throws Exception {
    // set up the operator plan
    Utilities.cacheMapWork(conf, mapWork, mrScratchDir);
    // create the directories FileSinkOperators need
    Utilities.createTmpDirs(conf, mapWork);
    // finally create the vertex
    Vertex map = null;
    // use tez to combine splits
    boolean groupSplitsInInputInitializer;
    DataSourceDescriptor dataSource;
    int numTasks = -1;
    @SuppressWarnings("rawtypes") Class inputFormatClass = conf.getClass("mapred.input.format.class", InputFormat.class);
    boolean vertexHasCustomInput = VertexType.isCustomInputType(vertexType);
    LOG.info("Vertex has custom input? " + vertexHasCustomInput);
    if (vertexHasCustomInput) {
        groupSplitsInInputInitializer = false;
        // it will be enabled in the CustomVertex.
        if (inputFormatClass != BucketizedHiveInputFormat.class && inputFormatClass != HiveInputFormat.class) {
            // As of now only these two formats are supported.
            inputFormatClass = HiveInputFormat.class;
        }
        conf.setClass("mapred.input.format.class", inputFormatClass, InputFormat.class);
        // mapreduce.tez.input.initializer.serialize.event.payload should be set to false when using
        // this plug-in to avoid getting a serialized event at run-time.
        conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
    } else {
        // is HiveInputFormat
        if (inputFormatClass == HiveInputFormat.class) {
            groupSplitsInInputInitializer = true;
        } else {
            groupSplitsInInputInitializer = false;
        }
    }
    if (mapWork instanceof MergeFileWork) {
        Path outputPath = ((MergeFileWork) mapWork).getOutputDir();
        // prepare the tmp output directory. The output tmp directory should
        // exist before jobClose (before renaming after job completion)
        Path tempOutPath = Utilities.toTempPath(outputPath);
        try {
            FileSystem tmpOutFS = tempOutPath.getFileSystem(conf);
            if (!tmpOutFS.exists(tempOutPath)) {
                tmpOutFS.mkdirs(tempOutPath);
            }
        } catch (IOException e) {
            throw new RuntimeException("Can't make path " + outputPath + " : " + e.getMessage(), e);
        }
    }
    // remember mapping of plan to input
    conf.set(Utilities.INPUT_NAME, mapWork.getName());
    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_AM_SPLIT_GENERATION)) {
        // set up the operator plan. (before setting up splits on the AM)
        Utilities.setMapWork(conf, mapWork, mrScratchDir, false);
        // the correct plugin.
        if (groupSplitsInInputInitializer) {
            // Not setting a payload, since the MRInput payload is the same and can be accessed.
            InputInitializerDescriptor descriptor = InputInitializerDescriptor.create(HiveSplitGenerator.class.getName());
            dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(true).setCustomInitializerDescriptor(descriptor).build();
        } else {
            // Not HiveInputFormat, or a custom VertexManager will take care of grouping splits
            if (vertexHasCustomInput && vertexType == VertexType.MULTI_INPUT_UNINITIALIZED_EDGES) {
                // SMB Join.
                dataSource = MultiMRInput.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build();
            } else {
                dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build();
            }
        }
    } else {
        // Setup client side split generation.
        // we need to set this, because with HS2 and client side split
        // generation we end up not finding the map work. This is
        // because of thread local madness (tez split generation is
        // multi-threaded - HS2 plan cache uses thread locals). Setting
        // VECTOR_MODE/USE_VECTORIZED_INPUT_FILE_FORMAT causes the split gen code to use the conf instead
        // of the map work.
        conf.setBoolean(Utilities.VECTOR_MODE, mapWork.getVectorMode());
        conf.setBoolean(Utilities.USE_VECTORIZED_INPUT_FILE_FORMAT, mapWork.getUseVectorizedInputFileFormat());
        InputSplitInfo inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(conf, false, 0);
        InputInitializerDescriptor descriptor = InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName());
        InputDescriptor inputDescriptor = InputDescriptor.create(MRInputLegacy.class.getName()).setUserPayload(UserPayload.create(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(conf)).setSplits(inputSplitInfo.getSplitsProto()).build().toByteString().asReadOnlyByteBuffer()));
        dataSource = DataSourceDescriptor.create(inputDescriptor, descriptor, null);
        numTasks = inputSplitInfo.getNumTasks();
        // set up the operator plan. (after generating splits - that changes configs)
        Utilities.setMapWork(conf, mapWork, mrScratchDir, false);
    }
    UserPayload serializedConf = TezUtils.createUserPayloadFromConf(conf);
    String procClassName = MapTezProcessor.class.getName();
    if (mapWork instanceof MergeFileWork) {
        procClassName = MergeFileTezProcessor.class.getName();
    }
    map = Vertex.create(mapWork.getName(), ProcessorDescriptor.create(procClassName).setUserPayload(serializedConf), numTasks, getContainerResource(conf));
    map.setTaskEnvironment(getContainerEnvironment(conf, true));
    assert mapWork.getAliasToWork().keySet().size() == 1;
    // Add the actual source input
    String alias = mapWork.getAliasToWork().keySet().iterator().next();
    map.addDataSource(alias, dataSource);
    return map;
}
Also used : Path(org.apache.hadoop.fs.Path) InputDescriptor(org.apache.tez.dag.api.InputDescriptor) Vertex(org.apache.tez.dag.api.Vertex) PreWarmVertex(org.apache.tez.dag.api.PreWarmVertex) MergeFileWork(org.apache.hadoop.hive.ql.io.merge.MergeFileWork) UserPayload(org.apache.tez.dag.api.UserPayload) BucketizedHiveInputFormat(org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat) InputSplitInfo(org.apache.tez.mapreduce.hadoop.InputSplitInfo) IOException(java.io.IOException) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) BucketizedHiveInputFormat(org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) MRInputSplitDistributor(org.apache.tez.mapreduce.common.MRInputSplitDistributor) FileSystem(org.apache.hadoop.fs.FileSystem) InputInitializerDescriptor(org.apache.tez.dag.api.InputInitializerDescriptor) DataSourceDescriptor(org.apache.tez.dag.api.DataSourceDescriptor)

Example 12 with HiveInputFormat

use of org.apache.hadoop.hive.ql.io.HiveInputFormat in project hive by apache.

the class HiveSplitGenerator method initialize.

@SuppressWarnings("unchecked")
@Override
public List<Event> initialize() throws Exception {
    if (getContext() != null) {
        // called from Tez AM.
        prepare(getContext());
    }
    // Setup the map work for this thread. Pruning modified the work instance to potentially remove
    // partitions. The same work instance must be used when generating splits.
    Utilities.setMapWork(jobConf, work);
    try {
        boolean sendSerializedEvents = conf.getBoolean("mapreduce.tez.input.initializer.serialize.event.payload", true);
        // perform dynamic partition pruning
        if (pruner != null) {
            pruner.initialize(getContext(), work, jobConf);
            pruner.prune();
        }
        InputSplitInfoMem inputSplitInfo = null;
        boolean generateConsistentSplits = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS);
        LOG.info("GenerateConsistentSplitsInHive=" + generateConsistentSplits);
        String realInputFormatName = conf.get("mapred.input.format.class");
        boolean groupingEnabled = userPayloadProto.getGroupingEnabled();
        if (groupingEnabled) {
            // Need to instantiate the realInputFormat
            InputFormat<?, ?> inputFormat = (InputFormat<?, ?>) ReflectionUtils.newInstance(JavaUtils.loadClass(realInputFormatName), jobConf);
            int totalResource = 0;
            int taskResource = 0;
            int availableSlots = 0;
            // FIXME. Do the right thing Luke.
            if (getContext() == null) {
                // for now, totalResource = taskResource for llap
                availableSlots = 1;
            }
            if (getContext() != null) {
                totalResource = getContext().getTotalAvailableResource().getMemory();
                taskResource = getContext().getVertexTaskResource().getMemory();
                availableSlots = totalResource / taskResource;
            }
            if (HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1) <= 1) {
                // broken configuration from mapred-default.xml
                final long blockSize = conf.getLongBytes(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT);
                final long minGrouping = conf.getLong(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT);
                final long preferredSplitSize = Math.min(blockSize / 2, minGrouping);
                HiveConf.setLongVar(jobConf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, preferredSplitSize);
                LOG.info("The preferred split size is " + preferredSplitSize);
            }
            float waves;
            // Create the un-grouped splits
            if (numSplits.isPresent()) {
                waves = numSplits.get().floatValue() / availableSlots;
            } else {
                waves = conf.getFloat(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT);
            }
            InputSplit[] splits;
            if (generateSingleSplit && conf.get(HiveConf.ConfVars.HIVETEZINPUTFORMAT.varname).equals(HiveInputFormat.class.getName())) {
                MapWork mapWork = Utilities.getMapWork(jobConf);
                List<Path> paths = Utilities.getInputPathsTez(jobConf, mapWork);
                FileSystem fs = paths.get(0).getFileSystem(jobConf);
                FileStatus[] fileStatuses = fs.listStatus(paths.get(0));
                if (fileStatuses.length == 0) {
                    // generate single split typically happens when reading data out of order by queries.
                    // if order by query returns no rows, no files will exists in input path
                    splits = new InputSplit[0];
                } else {
                    // if files exists in input path then it has to be 1 as this code path gets triggered only
                    // of order by queries which is expected to write only one file (written by one reducer)
                    Preconditions.checkState(paths.size() == 1 && fileStatuses.length == 1 && mapWork.getAliasToPartnInfo().size() == 1, "Requested to generate single split. Paths and fileStatuses are expected to be 1. " + "Got paths: " + paths.size() + " fileStatuses: " + fileStatuses.length);
                    splits = new InputSplit[1];
                    FileStatus fileStatus = fileStatuses[0];
                    BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
                    Set<String> hostsSet = new HashSet<>();
                    for (BlockLocation location : locations) {
                        hostsSet.addAll(Lists.newArrayList(location.getHosts()));
                    }
                    String[] hosts = hostsSet.toArray(new String[0]);
                    FileSplit fileSplit = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen(), hosts);
                    String alias = mapWork.getAliases().get(0);
                    PartitionDesc partDesc = mapWork.getAliasToPartnInfo().get(alias);
                    String partIF = partDesc.getInputFileFormatClassName();
                    splits[0] = new HiveInputFormat.HiveInputSplit(fileSplit, partIF);
                }
            } else {
                // Raw splits
                splits = inputFormat.getSplits(jobConf, numSplits.orElse(Math.multiplyExact(availableSlots, (int) waves)));
            }
            // Sort the splits, so that subsequent grouping is consistent.
            Arrays.sort(splits, new InputSplitComparator());
            LOG.info("Number of input splits: " + splits.length + ". " + availableSlots + " available slots, " + waves + " waves. Input format is: " + realInputFormatName);
            // increment/set input counters
            InputInitializerContext inputInitializerContext = getContext();
            TezCounters tezCounters = null;
            String counterName;
            String groupName = null;
            String vertexName = null;
            if (inputInitializerContext != null) {
                try {
                    tezCounters = new TezCounters();
                    groupName = HiveInputCounters.class.getName();
                    vertexName = jobConf.get(Operator.CONTEXT_NAME_KEY, "");
                    counterName = Utilities.getVertexCounterName(HiveInputCounters.RAW_INPUT_SPLITS.name(), vertexName);
                    tezCounters.findCounter(groupName, counterName).increment(splits.length);
                    final List<Path> paths = Utilities.getInputPathsTez(jobConf, work);
                    counterName = Utilities.getVertexCounterName(HiveInputCounters.INPUT_DIRECTORIES.name(), vertexName);
                    tezCounters.findCounter(groupName, counterName).increment(paths.size());
                    final Set<String> files = new HashSet<>();
                    for (InputSplit inputSplit : splits) {
                        if (inputSplit instanceof FileSplit) {
                            final FileSplit fileSplit = (FileSplit) inputSplit;
                            final Path path = fileSplit.getPath();
                            // The assumption here is the path is a file. Only case this is different is ACID deltas.
                            // The isFile check is avoided here for performance reasons.
                            final String fileStr = path.toString();
                            if (!files.contains(fileStr)) {
                                files.add(fileStr);
                            }
                        }
                    }
                    counterName = Utilities.getVertexCounterName(HiveInputCounters.INPUT_FILES.name(), vertexName);
                    tezCounters.findCounter(groupName, counterName).increment(files.size());
                } catch (Exception e) {
                    LOG.warn("Caught exception while trying to update Tez counters", e);
                }
            }
            if (work.getIncludedBuckets() != null) {
                splits = pruneBuckets(work, splits);
            }
            Multimap<Integer, InputSplit> groupedSplits = splitGrouper.generateGroupedSplits(jobConf, conf, splits, waves, availableSlots, splitLocationProvider);
            // And finally return them in a flat array
            InputSplit[] flatSplits = groupedSplits.values().toArray(new InputSplit[0]);
            LOG.info("Number of split groups: " + flatSplits.length);
            if (inputInitializerContext != null) {
                try {
                    counterName = Utilities.getVertexCounterName(HiveInputCounters.GROUPED_INPUT_SPLITS.name(), vertexName);
                    tezCounters.findCounter(groupName, counterName).setValue(flatSplits.length);
                    LOG.debug("Published tez counters: {}", tezCounters);
                    inputInitializerContext.addCounters(tezCounters);
                } catch (Exception e) {
                    LOG.warn("Caught exception while trying to update Tez counters", e);
                }
            }
            List<TaskLocationHint> locationHints = splitGrouper.createTaskLocationHints(flatSplits, generateConsistentSplits);
            inputSplitInfo = new InputSplitInfoMem(flatSplits, locationHints, flatSplits.length, null, jobConf);
        } else {
            // If this is used in the future - make sure to disable grouping in the payload, if it isn't already disabled
            throw new RuntimeException("HiveInputFormat does not support non-grouped splits, InputFormatName is: " + realInputFormatName);
        // inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, false, 0);
        }
        return createEventList(sendSerializedEvents, inputSplitInfo);
    } finally {
        Utilities.clearWork(jobConf);
    }
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) BlockLocation(org.apache.hadoop.fs.BlockLocation) FileSplit(org.apache.hadoop.mapred.FileSplit) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) InputSplitInfoMem(org.apache.tez.mapreduce.hadoop.InputSplitInfoMem) FileSystem(org.apache.hadoop.fs.FileSystem) InputSplit(org.apache.hadoop.mapred.InputSplit) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) TezCounters(org.apache.tez.common.counters.TezCounters) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) InputFormat(org.apache.hadoop.mapred.InputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc)

Example 13 with HiveInputFormat

use of org.apache.hadoop.hive.ql.io.HiveInputFormat in project hive by apache.

the class TestCompactor method testStatsAfterCompactionPartTbl.

/**
 * After each major compaction, stats need to be updated on the table
 * 1. create a partitioned ORC backed table (Orc is currently required by ACID)
 * 2. populate with data
 * 3. compute stats
 * 4. Trigger major compaction on one of the partitions (which should update stats)
 * 5. check that stats have been updated for that partition only
 *
 * @throws Exception todo:
 *                   4. add a test with sorted table?
 */
@Test
public void testStatsAfterCompactionPartTbl() throws Exception {
    // as of (8/27/2014) Hive 0.14, ACID/Orc requires HiveInputFormat
    String dbName = "default";
    String tblName = "compaction_test";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(bkt INT)" + // currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 4 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
    HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName).withStaticPartitionValues(Arrays.asList("0")).withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer).connect();
    connection.beginTransaction();
    connection.write("55, 'London'".getBytes());
    connection.commitTransaction();
    connection.beginTransaction();
    connection.write("56, 'Paris'".getBytes());
    connection.commitTransaction();
    connection.close();
    executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(57, 'Budapest')", driver);
    executeStatementOnDriver("INSERT INTO TABLE " + tblName + " PARTITION(bkt=1)" + " values(58, 'Milano')", driver);
    execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " + tblName + " after load:");
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    Table table = msClient.getTable(dbName, tblName);
    // compute stats before compaction
    CompactionInfo ci = new CompactionInfo(dbName, tblName, "bkt=0", CompactionType.MAJOR);
    Worker.StatsUpdater.gatherStats(ci, conf, System.getProperty("user.name"), CompactorUtil.getCompactorJobQueueName(conf, ci, table));
    ci = new CompactionInfo(dbName, tblName, "bkt=1", CompactionType.MAJOR);
    Worker.StatsUpdater.gatherStats(ci, conf, System.getProperty("user.name"), CompactorUtil.getCompactorJobQueueName(conf, ci, table));
    // Check basic stats are collected
    org.apache.hadoop.hive.ql.metadata.Table hiveTable = Hive.get().getTable(tblName);
    List<org.apache.hadoop.hive.ql.metadata.Partition> partitions = Hive.get().getPartitions(hiveTable);
    Map<String, String> parameters = partitions.stream().filter(p -> p.getName().equals("bkt=0")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
    Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
    Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
    Assert.assertEquals("The total table size is differing from the expected", "1373", parameters.get("totalSize"));
    parameters = partitions.stream().filter(p -> p.getName().equals("bkt=1")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
    Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
    Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
    Assert.assertEquals("The total table size is differing from the expected", "1442", parameters.get("totalSize"));
    // Do a major compaction
    CompactionRequest rqst = new CompactionRequest(dbName, tblName, CompactionType.MAJOR);
    rqst.setPartitionname("bkt=0");
    txnHandler.compact(rqst);
    runWorker(conf);
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    if (1 != compacts.size()) {
        Assert.fail("Expecting 1 file and found " + compacts.size() + " files " + compacts);
    }
    Assert.assertEquals("ready for cleaning", compacts.get(0).getState());
    // Check basic stats are updated for partition bkt=0, but not updated for partition bkt=1
    partitions = Hive.get().getPartitions(hiveTable);
    parameters = partitions.stream().filter(p -> p.getName().equals("bkt=0")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
    Assert.assertEquals("The number of files is differing from the expected", "1", parameters.get("numFiles"));
    Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
    Assert.assertEquals("The total table size is differing from the expected", "801", parameters.get("totalSize"));
    parameters = partitions.stream().filter(p -> p.getName().equals("bkt=1")).findFirst().orElseThrow(() -> new RuntimeException("Could not get Partition")).getParameters();
    Assert.assertEquals("The number of files is differing from the expected", "2", parameters.get("numFiles"));
    Assert.assertEquals("The number of rows is differing from the expected", "2", parameters.get("numRows"));
    Assert.assertEquals("The total table size is differing from the expected", "1442", parameters.get("totalSize"));
}
Also used : HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) DriverFactory(org.apache.hadoop.hive.ql.DriverFactory) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Arrays(java.util.Arrays) SortedSet(java.util.SortedSet) TestTxnDbUtil(org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil) StreamingConnection(org.apache.hive.streaming.StreamingConnection) FileSystem(org.apache.hadoop.fs.FileSystem) HiveStreamingConnection(org.apache.hive.streaming.HiveStreamingConnection) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) FileStatus(org.apache.hadoop.fs.FileStatus) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) TestTxnCommands2.runWorker(org.apache.hadoop.hive.ql.TestTxnCommands2.runWorker) Mockito.doThrow(org.mockito.Mockito.doThrow) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) OrcConf(org.apache.orc.OrcConf) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Map(java.util.Map) After(org.junit.After) Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) FileUtil(org.apache.hadoop.fs.FileUtil) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) IDriver(org.apache.hadoop.hive.ql.IDriver) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) SessionState(org.apache.hadoop.hive.ql.session.SessionState) Retry(org.apache.hive.common.util.Retry) TxnUtils(org.apache.hadoop.hive.metastore.txn.TxnUtils) List(java.util.List) HCatUtil(org.apache.hive.hcatalog.common.HCatUtil) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) VISIBILITY_PATTERN(org.apache.hadoop.hive.common.AcidConstants.VISIBILITY_PATTERN) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) TestTxnCommands2.runCleaner(org.apache.hadoop.hive.ql.TestTxnCommands2.runCleaner) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PathFilter(org.apache.hadoop.fs.PathFilter) TestTxnCommands2.runInitiator(org.apache.hadoop.hive.ql.TestTxnCommands2.runInitiator) HashMap(java.util.HashMap) Partition(org.apache.hadoop.hive.metastore.api.Partition) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) Constants(org.apache.hadoop.hive.conf.Constants) Before(org.junit.Before) Hive(org.apache.hadoop.hive.ql.metadata.Hive) StreamingException(org.apache.hive.streaming.StreamingException) Logger(org.slf4j.Logger) FileWriter(java.io.FileWriter) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Mockito.times(org.mockito.Mockito.times) IOException(java.io.IOException) Test(org.junit.Test) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) File(java.io.File) Table(org.apache.hadoop.hive.metastore.api.Table) Mockito.verify(org.mockito.Mockito.verify) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) FieldSetter(org.mockito.internal.util.reflection.FieldSetter) Assert(org.junit.Assert) Collections(java.util.Collections) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) Assert.assertEquals(org.junit.Assert.assertEquals) TemporaryFolder(org.junit.rules.TemporaryFolder) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) StrictDelimitedInputWriter(org.apache.hive.streaming.StrictDelimitedInputWriter) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Aggregations

HiveInputFormat (org.apache.hadoop.hive.ql.io.HiveInputFormat)13 Path (org.apache.hadoop.fs.Path)6 IOException (java.io.IOException)5 CombineHiveInputFormat (org.apache.hadoop.hive.ql.io.CombineHiveInputFormat)5 InputSplit (org.apache.hadoop.mapred.InputSplit)5 Test (org.junit.Test)5 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 Map (java.util.Map)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)3 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)3 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)3 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)3 InputFormat (org.apache.hadoop.mapred.InputFormat)3 Serializable (java.io.Serializable)2 Configurable (org.apache.hadoop.conf.Configurable)2 FileStatus (org.apache.hadoop.fs.FileStatus)2