Search in sources :

Example 11 with ValidWriteIdList

use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.

the class Initiator method run.

@Override
public void run() {
    // so wrap it in a big catch Throwable statement.
    try {
        recoverFailedCompactions(false);
        int abortedThreshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD);
        // HiveMetaStore.
        do {
            long startedAt = -1;
            TxnStore.MutexAPI.LockHandle handle = null;
            // don't doom the entire thread.
            try {
                handle = txnHandler.getMutexAPI().acquireLock(TxnStore.MUTEX_KEY.Initiator.name());
                startedAt = System.currentTimeMillis();
                // todo: add method to only get current i.e. skip history - more efficient
                ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest());
                Set<CompactionInfo> potentials = txnHandler.findPotentialCompactions(abortedThreshold);
                LOG.debug("Found " + potentials.size() + " potential compactions, " + "checking to see if we should compact any of them");
                for (CompactionInfo ci : potentials) {
                    LOG.info("Checking to see if we should compact " + ci.getFullPartitionName());
                    try {
                        Table t = resolveTable(ci);
                        if (t == null) {
                            // Most likely this means it's a temp table
                            LOG.info("Can't find table " + ci.getFullTableName() + ", assuming it's a temp " + "table or has been dropped and moving on.");
                            continue;
                        }
                        // check if no compaction set for this table
                        if (noAutoCompactSet(t)) {
                            LOG.info("Table " + tableName(t) + " marked " + hive_metastoreConstants.TABLE_NO_AUTO_COMPACT + "=true so we will not compact it.");
                            continue;
                        }
                        // then it's a dynamic partitioning case and we shouldn't check the table itself.
                        if (t.getPartitionKeys() != null && t.getPartitionKeys().size() > 0 && ci.partName == null) {
                            LOG.debug("Skipping entry for " + ci.getFullTableName() + " as it is from dynamic" + " partitioning");
                            continue;
                        }
                        // the time currentCompactions is generated and now
                        if (lookForCurrentCompactions(currentCompactions, ci)) {
                            LOG.debug("Found currently initiated or working compaction for " + ci.getFullPartitionName() + " so we will not initiate another compaction");
                            continue;
                        }
                        if (txnHandler.checkFailedCompactions(ci)) {
                            LOG.warn("Will not initiate compaction for " + ci.getFullPartitionName() + " since last " + HiveConf.ConfVars.COMPACTOR_INITIATOR_FAILED_THRESHOLD + " attempts to compact it failed.");
                            txnHandler.markFailed(ci);
                            continue;
                        }
                        // Figure out who we should run the file operations as
                        Partition p = resolvePartition(ci);
                        if (p == null && ci.partName != null) {
                            LOG.info("Can't find partition " + ci.getFullPartitionName() + ", assuming it has been dropped and moving on.");
                            continue;
                        }
                        // Compaction doesn't work under a transaction and hence pass null for validTxnList
                        // The response will have one entry per table and hence we get only one ValidWriteIdList
                        String fullTableName = TxnUtils.getFullTableName(t.getDbName(), t.getTableName());
                        GetValidWriteIdsRequest rqst = new GetValidWriteIdsRequest(Collections.singletonList(fullTableName), null);
                        ValidWriteIdList tblValidWriteIds = TxnUtils.createValidCompactWriteIdList(txnHandler.getValidWriteIds(rqst).getTblValidWriteIds().get(0));
                        StorageDescriptor sd = resolveStorageDescriptor(t, p);
                        String runAs = findUserToRunAs(sd.getLocation(), t);
                        /*Future thought: checkForCompaction will check a lot of file metadata and may be expensive.
              * Long term we should consider having a thread pool here and running checkForCompactionS
              * in parallel*/
                        CompactionType compactionNeeded = checkForCompaction(ci, tblValidWriteIds, sd, t.getParameters(), runAs);
                        if (compactionNeeded != null)
                            requestCompaction(ci, runAs, compactionNeeded);
                    } catch (Throwable t) {
                        LOG.error("Caught exception while trying to determine if we should compact " + ci + ".  Marking failed to avoid repeated failures, " + "" + StringUtils.stringifyException(t));
                        txnHandler.markFailed(ci);
                    }
                }
                // Check for timed out remote workers.
                recoverFailedCompactions(true);
                // Clean anything from the txns table that has no components left in txn_components.
                txnHandler.cleanEmptyAbortedTxns();
            } catch (Throwable t) {
                LOG.error("Initiator loop caught unexpected exception this time through the loop: " + StringUtils.stringifyException(t));
            } finally {
                if (handle != null) {
                    handle.releaseLocks();
                }
            }
            long elapsedTime = System.currentTimeMillis() - startedAt;
            if (elapsedTime >= checkInterval || stop.get())
                continue;
            else
                Thread.sleep(checkInterval - elapsedTime);
        } while (!stop.get());
    } catch (Throwable t) {
        LOG.error("Caught an exception in the main loop of compactor initiator, exiting " + StringUtils.stringifyException(t));
    }
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) GetValidWriteIdsRequest(org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest) CompactionType(org.apache.hadoop.hive.metastore.api.CompactionType) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo)

Example 12 with ValidWriteIdList

use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.

the class Worker method run.

// todo: this doesn;t check if compaction is already running (even though Initiator does but we
// don't go  through Initiator for user initiated compactions)
@Override
public void run() {
    do {
        boolean launchedJob = false;
        // so wrap it in a big catch Throwable statement.
        try {
            final CompactionInfo ci = txnHandler.findNextToCompact(name);
            if (ci == null && !stop.get()) {
                try {
                    Thread.sleep(SLEEP_TIME);
                    continue;
                } catch (InterruptedException e) {
                    LOG.warn("Worker thread sleep interrupted " + e.getMessage());
                    continue;
                }
            }
            // Find the table we will be working with.
            Table t1 = null;
            try {
                t1 = resolveTable(ci);
                if (t1 == null) {
                    LOG.info("Unable to find table " + ci.getFullTableName() + ", assuming it was dropped and moving on.");
                    txnHandler.markCleaned(ci);
                    continue;
                }
            } catch (MetaException e) {
                txnHandler.markCleaned(ci);
                continue;
            }
            // This chicanery is to get around the fact that the table needs to be final in order to
            // go into the doAs below.
            final Table t = t1;
            // Find the partition we will be working with, if there is one.
            Partition p = null;
            try {
                p = resolvePartition(ci);
                if (p == null && ci.partName != null) {
                    LOG.info("Unable to find partition " + ci.getFullPartitionName() + ", assuming it was dropped and moving on.");
                    txnHandler.markCleaned(ci);
                    continue;
                }
            } catch (Exception e) {
                txnHandler.markCleaned(ci);
                continue;
            }
            // Find the appropriate storage descriptor
            final StorageDescriptor sd = resolveStorageDescriptor(t, p);
            // Check that the table or partition isn't sorted, as we don't yet support that.
            if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
                LOG.error("Attempt to compact sorted table, which is not yet supported!");
                txnHandler.markCleaned(ci);
                continue;
            }
            final boolean isMajor = ci.isMajorCompaction();
            // Compaction doesn't work under a transaction and hence pass 0 for current txn Id
            // The response will have one entry per table and hence we get only one OpenWriteIds
            String fullTableName = TxnUtils.getFullTableName(t.getDbName(), t.getTableName());
            GetValidWriteIdsRequest rqst = new GetValidWriteIdsRequest(Collections.singletonList(fullTableName), null);
            final ValidWriteIdList tblValidWriteIds = TxnUtils.createValidCompactWriteIdList(txnHandler.getValidWriteIds(rqst).getTblValidWriteIds().get(0));
            LOG.debug("ValidCompactWriteIdList: " + tblValidWriteIds.writeToString());
            txnHandler.setCompactionHighestWriteId(ci, tblValidWriteIds.getHighWatermark());
            final StringBuilder jobName = new StringBuilder(name);
            jobName.append("-compactor-");
            jobName.append(ci.getFullPartitionName());
            // Determine who to run as
            String runAs;
            if (ci.runAs == null) {
                runAs = findUserToRunAs(sd.getLocation(), t);
                txnHandler.setRunAs(ci.id, runAs);
            } else {
                runAs = ci.runAs;
            }
            LOG.info("Starting " + ci.type.toString() + " compaction for " + ci.getFullPartitionName());
            final StatsUpdater su = StatsUpdater.init(ci, txnHandler.findColumnsWithStats(ci), conf, runJobAsSelf(runAs) ? runAs : t.getOwner());
            final CompactorMR mr = new CompactorMR();
            launchedJob = true;
            try {
                if (runJobAsSelf(runAs)) {
                    mr.run(conf, jobName.toString(), t, sd, tblValidWriteIds, ci, su, txnHandler);
                } else {
                    UserGroupInformation ugi = UserGroupInformation.createProxyUser(t.getOwner(), UserGroupInformation.getLoginUser());
                    ugi.doAs(new PrivilegedExceptionAction<Object>() {

                        @Override
                        public Object run() throws Exception {
                            mr.run(conf, jobName.toString(), t, sd, tblValidWriteIds, ci, su, txnHandler);
                            return null;
                        }
                    });
                    try {
                        FileSystem.closeAllForUGI(ugi);
                    } catch (IOException exception) {
                        LOG.error("Could not clean up file-system handles for UGI: " + ugi + " for " + ci.getFullPartitionName(), exception);
                    }
                }
                txnHandler.markCompacted(ci);
                if (conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST)) {
                    mrJob = mr.getMrJob();
                }
            } catch (Exception e) {
                LOG.error("Caught exception while trying to compact " + ci + ".  Marking failed to avoid repeated failures, " + StringUtils.stringifyException(e));
                txnHandler.markFailed(ci);
            }
        } catch (Throwable t) {
            LOG.error("Caught an exception in the main loop of compactor worker " + name + ", " + StringUtils.stringifyException(t));
        }
        // a bit before we restart the loop.
        if (!launchedJob && !stop.get()) {
            try {
                Thread.sleep(SLEEP_TIME);
            } catch (InterruptedException e) {
            }
        }
    } while (!stop.get());
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) GetValidWriteIdsRequest(org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest) IOException(java.io.IOException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) CompactionInfo(org.apache.hadoop.hive.metastore.txn.CompactionInfo) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 13 with ValidWriteIdList

use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.

the class FetchOperator method processCurrPathForMmWriteIds.

private String processCurrPathForMmWriteIds(InputFormat inputFormat) throws IOException {
    if (inputFormat instanceof HiveInputFormat) {
        // No need to process here.
        return StringUtils.escapeString(currPath.toString());
    }
    ValidWriteIdList validWriteIdList;
    if (AcidUtils.isInsertOnlyTable(currDesc.getTableDesc().getProperties())) {
        validWriteIdList = extractValidWriteIdList();
    } else {
        // non-MM case
        validWriteIdList = null;
    }
    if (validWriteIdList != null) {
        Utilities.FILE_OP_LOGGER.info("Processing " + currDesc.getTableName() + " for MM paths");
    }
    Path[] dirs = HiveInputFormat.processPathsForMmRead(Lists.newArrayList(currPath), job, validWriteIdList);
    if (dirs == null || dirs.length == 0) {
        // No valid inputs. This condition is logged inside the call.
        return null;
    }
    StringBuffer str = new StringBuffer(StringUtils.escapeString(dirs[0].toString()));
    for (int i = 1; i < dirs.length; i++) {
        str.append(",").append(StringUtils.escapeString(dirs[i].toString()));
    }
    return str.toString();
}
Also used : Path(org.apache.hadoop.fs.Path) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList)

Example 14 with ValidWriteIdList

use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.

the class Driver method recordValidWriteIds.

// Write the current set of valid write ids for the operated acid tables into the conf file so
// that it can be read by the input format.
private void recordValidWriteIds(HiveTxnManager txnMgr) throws LockException {
    String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY);
    if ((txnString == null) || (txnString.isEmpty())) {
        throw new IllegalStateException("calling recordValidWritsIdss() without initializing ValidTxnList " + JavaUtils.txnIdToString(txnMgr.getCurrentTxnId()));
    }
    ValidTxnWriteIdList txnWriteIds = txnMgr.getValidWriteIds(getTransactionalTableList(plan), txnString);
    String writeIdStr = txnWriteIds.toString();
    conf.set(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY, writeIdStr);
    if (plan.getFetchTask() != null) {
        /**
         * This is needed for {@link HiveConf.ConfVars.HIVEFETCHTASKCONVERSION} optimization which
         * initializes JobConf in FetchOperator before recordValidTxns() but this has to be done
         * after locks are acquired to avoid race conditions in ACID.
         * This case is supported only for single source query.
         */
        Operator<?> source = plan.getFetchTask().getWork().getSource();
        if (source instanceof TableScanOperator) {
            TableScanOperator tsOp = (TableScanOperator) source;
            String fullTableName = AcidUtils.getFullTableName(tsOp.getConf().getDatabaseName(), tsOp.getConf().getTableName());
            ValidWriteIdList writeIdList = txnWriteIds.getTableValidWriteIdList(fullTableName);
            if (tsOp.getConf().isTranscationalTable() && (writeIdList == null)) {
                throw new IllegalStateException("ACID table: " + fullTableName + " is missing from the ValidWriteIdList config: " + writeIdStr);
            }
            if (writeIdList != null) {
                plan.getFetchTask().setValidWriteIdList(writeIdList.toString());
            }
        }
    }
    LOG.debug("Encoding valid txn write ids info " + writeIdStr + " txnid:" + txnMgr.getCurrentTxnId());
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList)

Example 15 with ValidWriteIdList

use of org.apache.hadoop.hive.common.ValidWriteIdList in project hive by apache.

the class TestOrcRawRecordMerger method testNewBaseAndDelta.

private void testNewBaseAndDelta(boolean use130Format) throws Exception {
    final int BUCKET = 10;
    String[] values = new String[] { "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth" };
    Configuration conf = new Configuration();
    OrcOutputFormat of = new OrcOutputFormat();
    FileSystem fs = FileSystem.getLocal(conf);
    Path root = new Path(tmpDir, "testNewBaseAndDelta").makeQualified(fs);
    fs.delete(root, true);
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // write the base
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).inspector(inspector).bucket(BUCKET).finalDestination(root);
    final int BUCKET_PROPERTY = BucketCodec.V1.encode(options);
    if (!use130Format) {
        options.statementId(-1);
    }
    RecordUpdater ru = of.getRecordUpdater(root, options.writingBase(true).maximumWriteId(100));
    for (String v : values) {
        ru.insert(0, new MyRow(v));
    }
    ru.close(false);
    // write a delta
    ru = of.getRecordUpdater(root, options.writingBase(false).minimumWriteId(200).maximumWriteId(200).recordIdColumn(1));
    ru.update(200, new MyRow("update 1", 0, 0, BUCKET_PROPERTY));
    ru.update(200, new MyRow("update 2", 2, 0, BUCKET_PROPERTY));
    ru.update(200, new MyRow("update 3", 3, 0, BUCKET_PROPERTY));
    ru.delete(200, new MyRow("", 7, 0, BUCKET_PROPERTY));
    ru.delete(200, new MyRow("", 8, 0, BUCKET_PROPERTY));
    ru.close(false);
    ValidWriteIdList writeIdList = new ValidReaderWriteIdList("testNewBaseAndDelta:200:" + Long.MAX_VALUE);
    AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, writeIdList);
    assertEquals(new Path(root, "base_0000100"), directory.getBaseDirectory());
    assertEquals(new Path(root, use130Format ? AcidUtils.deleteDeltaSubdir(200, 200, 0) : AcidUtils.deleteDeltaSubdir(200, 200)), directory.getCurrentDirectories().get(0).getPath());
    assertEquals(new Path(root, use130Format ? AcidUtils.deltaSubdir(200, 200, 0) : AcidUtils.deltaSubdir(200, 200)), directory.getCurrentDirectories().get(1).getPath());
    Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), BUCKET);
    Path deltaPath = AcidUtils.createBucketFile(directory.getCurrentDirectories().get(1).getPath(), BUCKET);
    Path deleteDeltaDir = directory.getCurrentDirectories().get(0).getPath();
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    AcidUtils.setAcidOperationalProperties(conf, true, null);
    conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    // the first "split" is for base/
    Reader baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
    OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    RecordIdentifier id = merger.createKey();
    OrcStruct event = merger.createValue();
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 1, 0), id);
    assertEquals("second", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 4, 0), id);
    assertEquals("fifth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 5, 0), id);
    assertEquals("sixth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 6, 0), id);
    assertEquals("seventh", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 9, 0), id);
    assertEquals("tenth", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // second "split" is delta_200_200
    baseReader = OrcFile.createReader(deltaPath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
    assertEquals("update 1", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
    assertEquals("update 2", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
    assertEquals("update 3", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // now run as if it's a minor Compaction so we don't collapse events
    // here there is only 1 "split" since we only have data for 1 bucket
    merger = new OrcRawRecordMerger(conf, false, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(true));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(true, merger.next(id, event));
    // minor comp, so we ignore 'base_0000100' files so all Deletes end up first since
    // they all modify primordial rows
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    // data from delta_200_200
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
    assertEquals("update 1", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
    assertEquals("update 2", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
    assertEquals("update 3", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // now run as if it's a major Compaction so we collapse events
    // here there is only 1 "split" since we only have data for 1 bucket
    baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, true, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(true).isMajorCompaction(true).baseDir(new Path(root, "base_0000100")));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 1, 0), id);
    assertEquals("second", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 4, 0), id);
    assertEquals("fifth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 5, 0), id);
    assertEquals("sixth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 6, 0), id);
    assertEquals("seventh", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 9, 0), id);
    assertEquals("tenth", getValue(event));
    // data from delta_200_200
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
    assertEquals("update 1", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
    assertEquals("update 2", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
    assertEquals("update 3", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // try ignoring the 200 transaction and make sure it works still
    ValidWriteIdList writeIds = new ValidReaderWriteIdList("testNewBaseAndDelta:2000:200:200");
    // again 1st split is for base/
    baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, writeIds, new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    for (int i = 0; i < values.length; ++i) {
        assertEquals(true, merger.next(id, event));
        LOG.info("id = " + id + "event = " + event);
        assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
        assertEquals(new ReaderKey(0, BUCKET_PROPERTY, i, 0), id);
        assertEquals(values[i], getValue(event));
    }
    assertEquals(false, merger.next(id, event));
    merger.close();
    // 2nd split is for delta_200_200 which is filtered out entirely by "txns"
    baseReader = OrcFile.createReader(deltaPath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, writeIds, new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(false, merger.next(id, event));
    merger.close();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) FileSystem(org.apache.hadoop.fs.FileSystem) ReaderKey(org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList) OrcAcidUtils(org.apache.orc.impl.OrcAcidUtils) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils)

Aggregations

ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)16 AcidUtils (org.apache.hadoop.hive.ql.io.AcidUtils)6 Path (org.apache.hadoop.fs.Path)5 OrcAcidUtils (org.apache.orc.impl.OrcAcidUtils)5 Configuration (org.apache.hadoop.conf.Configuration)4 ValidReaderWriteIdList (org.apache.hadoop.hive.common.ValidReaderWriteIdList)4 RecordIdentifier (org.apache.hadoop.hive.ql.io.RecordIdentifier)4 IOException (java.io.IOException)3 Partition (org.apache.hadoop.hive.metastore.api.Partition)3 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 Table (org.apache.hadoop.hive.metastore.api.Table)3 AcidOutputFormat (org.apache.hadoop.hive.ql.io.AcidOutputFormat)3 FileSystem (org.apache.hadoop.fs.FileSystem)2 GetValidWriteIdsRequest (org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2 CompactionInfo (org.apache.hadoop.hive.metastore.txn.CompactionInfo)2 OrcInputFormat (org.apache.hadoop.hive.ql.io.orc.OrcInputFormat)2 OrcStruct (org.apache.hadoop.hive.ql.io.orc.OrcStruct)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2