Search in sources :

Example 6 with OrcTail

use of org.apache.orc.impl.OrcTail in project hive by apache.

the class VectorizedOrcAcidRowBatchReader method getOrcReaderData.

/**
 * Gets the OrcTail from cache if LLAP IO is enabled, otherwise creates the reader to get the tail.
 * Always store the Reader along with the Tail as part of ReaderData so we can reuse it.
 * @param path The Orc file path we want to get the OrcTail for
 * @param conf The Configuration to access LLAP
 * @param cacheTag The cacheTag needed to get OrcTail from LLAP IO cache
 * @param fileKey fileId of the Orc file (either the Long fileId of HDFS or the SyntheticFileId).
 *                Optional, if it is not provided, it will be generated, see:
 *                {@link org.apache.hadoop.hive.ql.io.HdfsUtils.getFileId()}
 * @return ReaderData object where the orcTail is not null. Reader can be null, but if we had to create
 * one we return that as well for further reuse.
 */
private static ReaderData getOrcReaderData(Path path, Configuration conf, CacheTag cacheTag, Object fileKey) throws IOException {
    ReaderData readerData = new ReaderData();
    if (shouldReadDeleteDeltasWithLlap(conf, true)) {
        try {
            readerData.orcTail = LlapProxy.getIo().getOrcTailFromCache(path, conf, cacheTag, fileKey);
            readerData.reader = OrcFile.createReader(path, OrcFile.readerOptions(conf).orcTail(readerData.orcTail));
        } catch (IllegalCacheConfigurationException icce) {
            throw new IOException("LLAP cache is not configured properly while delete delta caching is turned on", icce);
        }
    }
    readerData.reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    readerData.orcTail = new OrcTail(readerData.reader.getFileTail(), readerData.reader.getSerializedFileFooter());
    return readerData;
}
Also used : IllegalCacheConfigurationException(org.apache.hadoop.hive.llap.IllegalCacheConfigurationException) IOException(java.io.IOException) OrcTail(org.apache.orc.impl.OrcTail)

Example 7 with OrcTail

use of org.apache.orc.impl.OrcTail in project hive by apache.

the class OrcFileFormatProxy method applySargToMetadata.

@Override
public SplitInfos applySargToMetadata(SearchArgument sarg, ByteBuffer fileMetadata, Configuration conf) throws IOException {
    // TODO: ideally we should store shortened representation of only the necessary fields
    // in HBase; it will probably require custom SARG application code.
    OrcTail orcTail = ReaderImpl.extractFileTail(fileMetadata);
    OrcProto.Footer footer = orcTail.getFooter();
    int stripeCount = footer.getStripesCount();
    // Always convert To PROLEPTIC_GREGORIAN
    List<StripeStatistics> stripeStats;
    try (org.apache.orc.Reader dummyReader = new org.apache.orc.impl.ReaderImpl(null, org.apache.orc.OrcFile.readerOptions(org.apache.orc.OrcFile.readerOptions(conf).getConfiguration()).useUTCTimestamp(true).convertToProlepticGregorian(true).orcTail(orcTail))) {
        stripeStats = dummyReader.getVariantStripeStatistics(null);
    }
    boolean[] result = OrcInputFormat.pickStripesViaTranslatedSarg(sarg, orcTail.getWriterVersion(), footer.getTypesList(), stripeStats, stripeCount);
    // For ORC case, send the boundaries of the stripes so we don't have to send the footer.
    SplitInfos.Builder sb = SplitInfos.newBuilder();
    List<StripeInformation> stripes = orcTail.getStripes();
    boolean isEliminated = true;
    for (int i = 0; i < result.length; ++i) {
        if (result != null && !result[i])
            continue;
        isEliminated = false;
        StripeInformation si = stripes.get(i);
        if (LOG.isDebugEnabled()) {
            LOG.debug("PPD is adding a split " + i + ": " + si.getOffset() + ", " + si.getLength());
        }
        sb.addInfos(SplitInfo.newBuilder().setIndex(i).setOffset(si.getOffset()).setLength(si.getLength()));
    }
    return isEliminated ? null : sb.build();
}
Also used : OrcProto(org.apache.orc.OrcProto) StripeStatistics(org.apache.orc.StripeStatistics) SplitInfos(org.apache.hadoop.hive.metastore.Metastore.SplitInfos) StripeInformation(org.apache.orc.StripeInformation) OrcTail(org.apache.orc.impl.OrcTail)

Example 8 with OrcTail

use of org.apache.orc.impl.OrcTail in project hive by apache.

the class TestEncodedOrcFile method testFileSystemIsNotInitializedWithKnownTail.

@Test
public void testFileSystemIsNotInitializedWithKnownTail() throws IOException {
    JobConf conf = new JobConf();
    Path path = new Path("fmock:///testtable/bucket_0");
    conf.set("hive.orc.splits.include.file.footer", "true");
    conf.set("fs.defaultFS", "fmock:///");
    conf.set("fs.mock.impl", FailingMockFileSystem.class.getName());
    OrcProto.FileTail tail = OrcProto.FileTail.newBuilder().setFooter(Footer.newBuilder().addTypes(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.BINARY).build()).build()).build();
    OrcFile.ReaderOptions readerOptions = EncodedOrcFile.readerOptions(conf).filesystem(() -> {
        throw new RuntimeException("Filesystem should not have been initialized");
    }).orcTail(new OrcTail(tail, new BufferChunk(0, 0), -1));
    // an orc reader is created, this should not cause filesystem initialization
    // because orc tail is already provided and we are not making any real reads.
    Reader reader = EncodedOrcFile.createReader(path, readerOptions);
    // Following initiates the creation of data reader in ORC reader. This should
    // not cause file system initialization either as we are still not making any
    // real read.
    reader.rows();
}
Also used : Path(org.apache.hadoop.fs.Path) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) OrcProto(org.apache.orc.OrcProto) BufferChunk(org.apache.orc.impl.BufferChunk) JobConf(org.apache.hadoop.mapred.JobConf) OrcTail(org.apache.orc.impl.OrcTail) Test(org.junit.Test)

Example 9 with OrcTail

use of org.apache.orc.impl.OrcTail in project hive by apache.

the class OrcSplit method readFields.

@Override
public void readFields(DataInput in) throws IOException {
    // deserialize path, offset, length using FileSplit
    super.readFields(in);
    byte flags = in.readByte();
    hasFooter = (FOOTER_FLAG & flags) != 0;
    isOriginal = (ORIGINAL_FLAG & flags) != 0;
    hasBase = (BASE_FLAG & flags) != 0;
    boolean hasLongFileId = (HAS_LONG_FILEID_FLAG & flags) != 0, hasWritableFileId = (HAS_SYNTHETIC_FILEID_FLAG & flags) != 0, hasSyntheticProps = (HAS_SYNTHETIC_ACID_PROPS_FLAG & flags) != 0;
    if (hasLongFileId && hasWritableFileId) {
        throw new IOException("Invalid split - both file ID types present");
    }
    deltas.clear();
    int numDeltas = in.readInt();
    for (int i = 0; i < numDeltas; i++) {
        AcidInputFormat.DeltaMetaData dmd = new AcidInputFormat.DeltaMetaData();
        dmd.readFields(in);
        deltas.add(dmd);
    }
    if (hasFooter) {
        int tailLen = WritableUtils.readVInt(in);
        byte[] tailBuffer = new byte[tailLen];
        in.readFully(tailBuffer);
        OrcProto.FileTail fileTail = OrcProto.FileTail.parseFrom(tailBuffer);
        orcTail = new OrcTail(fileTail, null);
    }
    if (hasLongFileId) {
        fileKey = in.readLong();
    } else if (hasWritableFileId) {
        SyntheticFileId fileId = new SyntheticFileId();
        fileId.readFields(in);
        this.fileKey = fileId;
    }
    fileLen = in.readLong();
    rootDir = new Path(in.readUTF());
    if (hasSyntheticProps) {
        long rowId = in.readLong();
        int bucket = in.readInt();
        long writeId = in.readLong();
        syntheticAcidProps = new OffsetAndBucketProperty(rowId, bucket, writeId);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SyntheticFileId(org.apache.hadoop.hive.ql.io.SyntheticFileId) OrcProto(org.apache.orc.OrcProto) IOException(java.io.IOException) AcidInputFormat(org.apache.hadoop.hive.ql.io.AcidInputFormat) OrcTail(org.apache.orc.impl.OrcTail)

Example 10 with OrcTail

use of org.apache.orc.impl.OrcTail in project hive by apache.

the class OrcEncodedDataReader method ensureOrcReader.

/**
 * Ensures orcReader is initialized for the split.
 */
private void ensureOrcReader() throws IOException {
    if (orcReader != null)
        return;
    path = split.getPath();
    if (fileKey instanceof Long && HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_USE_FILEID_PATH)) {
        path = HdfsUtils.getFileIdPath(path, (long) fileKey);
    }
    LlapIoImpl.ORC_LOGGER.trace("Creating reader for {} ({})", path, split.getPath());
    long startTime = counters.startTimeCounter();
    ReaderOptions opts = EncodedOrcFile.readerOptions(jobConf).filesystem(fsSupplier).fileMetadata(fileMetadata);
    if (split instanceof OrcSplit) {
        OrcTail orcTail = ((OrcSplit) split).getOrcTail();
        if (orcTail != null) {
            LlapIoImpl.ORC_LOGGER.debug("Setting OrcTail. path={}", path);
            opts.orcTail(orcTail);
        }
    }
    orcReader = EncodedOrcFile.createReader(path, opts);
    counters.incrWallClockCounter(LlapIOCounters.HDFS_TIME_NS, startTime);
}
Also used : ReaderOptions(org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions) OrcSplit(org.apache.hadoop.hive.ql.io.orc.OrcSplit) OrcTail(org.apache.orc.impl.OrcTail)

Aggregations

OrcTail (org.apache.orc.impl.OrcTail)13 OrcProto (org.apache.orc.OrcProto)6 IOException (java.io.IOException)4 Path (org.apache.hadoop.fs.Path)4 ByteBuffer (java.nio.ByteBuffer)3 OrcSplit (org.apache.hadoop.hive.ql.io.orc.OrcSplit)3 StripeInformation (org.apache.orc.StripeInformation)3 Test (org.junit.Test)3 Configuration (org.apache.hadoop.conf.Configuration)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 CacheTag (org.apache.hadoop.hive.common.io.CacheTag)2 MetadataCache (org.apache.hadoop.hive.llap.io.metadata.MetadataCache)2 LlapDaemonCacheMetrics (org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics)2 SplitInfos (org.apache.hadoop.hive.metastore.Metastore.SplitInfos)2 AcidInputFormat (org.apache.hadoop.hive.ql.io.AcidInputFormat)2 SyntheticFileId (org.apache.hadoop.hive.ql.io.SyntheticFileId)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 FileSystem (org.apache.hadoop.fs.FileSystem)1 IllegalCacheConfigurationException (org.apache.hadoop.hive.llap.IllegalCacheConfigurationException)1 LlapBufferOrBuffers (org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers)1