Search in sources :

Example 1 with SyntheticFileId

use of org.apache.hadoop.hive.ql.io.SyntheticFileId in project hive by apache.

the class LlapCacheMetadataSerializer method encodeFileKey.

@VisibleForTesting
static ByteString encodeFileKey(Object fileKey) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(baos);
    if (fileKey instanceof SyntheticFileId) {
        SyntheticFileId fk = (SyntheticFileId) fileKey;
        fk.write(dos);
    } else {
        dos.writeLong((Long) fileKey);
    }
    return ByteString.copyFrom(baos.toByteArray());
}
Also used : SyntheticFileId(org.apache.hadoop.hive.ql.io.SyntheticFileId) DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with SyntheticFileId

use of org.apache.hadoop.hive.ql.io.SyntheticFileId in project hive by apache.

the class TestOrcMetadataCache method testGetOrcTailForPathWithFileIdChange.

@Test
public void testGetOrcTailForPathWithFileIdChange() throws Exception {
    DummyMemoryManager mm = new DummyMemoryManager();
    DummyCachePolicy cp = new DummyCachePolicy();
    final int MAX_ALLOC = 64;
    LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
    BuddyAllocator alloc = new BuddyAllocator(false, false, 8, MAX_ALLOC, 1, 4 * 4096, 0, null, mm, metrics, null, true);
    MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
    Path path = new Path("../data/files/alltypesorc");
    Configuration jobConf = new Configuration();
    Configuration daemonConf = new Configuration();
    CacheTag tag = CacheTag.build("test-table");
    OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, new SyntheticFileId(path, 100, 100));
    jobConf.set(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname, "true");
    Exception ex = null;
    try {
        // this should miss the cache, since the fileKey changed
        OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, new SyntheticFileId(path, 100, 101));
        fail();
    } catch (IOException e) {
        ex = e;
    }
    Assert.assertTrue(ex.getMessage().contains(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname));
}
Also used : Path(org.apache.hadoop.fs.Path) LlapDaemonCacheMetrics(org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics) Configuration(org.apache.hadoop.conf.Configuration) SyntheticFileId(org.apache.hadoop.hive.ql.io.SyntheticFileId) MetadataCache(org.apache.hadoop.hive.llap.io.metadata.MetadataCache) CacheTag(org.apache.hadoop.hive.common.io.CacheTag) IOException(java.io.IOException) IllegalCacheConfigurationException(org.apache.hadoop.hive.llap.IllegalCacheConfigurationException) IOException(java.io.IOException) Test(org.junit.Test)

Example 3 with SyntheticFileId

use of org.apache.hadoop.hive.ql.io.SyntheticFileId in project hive by apache.

the class OrcSplit method readFields.

@Override
public void readFields(DataInput in) throws IOException {
    // deserialize path, offset, length using FileSplit
    super.readFields(in);
    byte flags = in.readByte();
    hasFooter = (FOOTER_FLAG & flags) != 0;
    isOriginal = (ORIGINAL_FLAG & flags) != 0;
    hasBase = (BASE_FLAG & flags) != 0;
    boolean hasLongFileId = (HAS_LONG_FILEID_FLAG & flags) != 0, hasWritableFileId = (HAS_SYNTHETIC_FILEID_FLAG & flags) != 0, hasSyntheticProps = (HAS_SYNTHETIC_ACID_PROPS_FLAG & flags) != 0;
    if (hasLongFileId && hasWritableFileId) {
        throw new IOException("Invalid split - both file ID types present");
    }
    deltas.clear();
    int numDeltas = in.readInt();
    for (int i = 0; i < numDeltas; i++) {
        AcidInputFormat.DeltaMetaData dmd = new AcidInputFormat.DeltaMetaData();
        dmd.readFields(in);
        deltas.add(dmd);
    }
    if (hasFooter) {
        int tailLen = WritableUtils.readVInt(in);
        byte[] tailBuffer = new byte[tailLen];
        in.readFully(tailBuffer);
        OrcProto.FileTail fileTail = OrcProto.FileTail.parseFrom(tailBuffer);
        orcTail = new OrcTail(fileTail, null);
    }
    if (hasLongFileId) {
        fileKey = in.readLong();
    } else if (hasWritableFileId) {
        SyntheticFileId fileId = new SyntheticFileId();
        fileId.readFields(in);
        this.fileKey = fileId;
    }
    fileLen = in.readLong();
    rootDir = new Path(in.readUTF());
    if (hasSyntheticProps) {
        long rowId = in.readLong();
        int bucket = in.readInt();
        long writeId = in.readLong();
        syntheticAcidProps = new OffsetAndBucketProperty(rowId, bucket, writeId);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SyntheticFileId(org.apache.hadoop.hive.ql.io.SyntheticFileId) OrcProto(org.apache.orc.OrcProto) IOException(java.io.IOException) AcidInputFormat(org.apache.hadoop.hive.ql.io.AcidInputFormat) OrcTail(org.apache.orc.impl.OrcTail)

Example 4 with SyntheticFileId

use of org.apache.hadoop.hive.ql.io.SyntheticFileId in project hive by apache.

the class TestLlapCacheMetadataSerializer method testEncodeDecodeSyntheticFileKey.

@Test
public void testEncodeDecodeSyntheticFileKey() throws IOException {
    SyntheticFileId originalKey = new SyntheticFileId(new Path("dummy"), 123L, 99999999L);
    ByteString encodedKey = serializer.encodeFileKey(originalKey);
    Object decodedKey = serializer.decodeFileKey(encodedKey);
    assertEquals(originalKey, decodedKey);
}
Also used : Path(org.apache.hadoop.fs.Path) SyntheticFileId(org.apache.hadoop.hive.ql.io.SyntheticFileId) ByteString(com.google.protobuf.ByteString) Test(org.junit.Test)

Example 5 with SyntheticFileId

use of org.apache.hadoop.hive.ql.io.SyntheticFileId in project hive by apache.

the class TestLlapCacheMetadataSerializer method createDummyMetadata.

private LlapDaemonProtocolProtos.CacheEntryList createDummyMetadata() throws IOException {
    LlapDaemonProtocolProtos.CacheEntryRange re1 = LlapDaemonProtocolProtos.CacheEntryRange.newBuilder().setStart(3L).setEnd(14L).build();
    LlapDaemonProtocolProtos.CacheEntryRange re2 = LlapDaemonProtocolProtos.CacheEntryRange.newBuilder().setStart(14L).setEnd(38L).build();
    LlapDaemonProtocolProtos.CacheTag ct = LlapDaemonProtocolProtos.CacheTag.newBuilder().setTableName("dummyTable").build();
    Path path = new Path(TEST_PATH);
    SyntheticFileId syntheticFileId = fileId(path);
    pathCache.touch(syntheticFileId, path.toUri().toString());
    ByteString fileKey = serializer.encodeFileKey(syntheticFileId);
    LlapDaemonProtocolProtos.CacheEntry ce = LlapDaemonProtocolProtos.CacheEntry.newBuilder().setCacheTag(ct).setFilePath(TEST_PATH).setFileKey(fileKey).addRanges(re2).addRanges(re1).build();
    LlapDaemonProtocolProtos.CacheEntryList cel = LlapDaemonProtocolProtos.CacheEntryList.newBuilder().addEntries(ce).build();
    return cel;
}
Also used : Path(org.apache.hadoop.fs.Path) SyntheticFileId(org.apache.hadoop.hive.ql.io.SyntheticFileId) ByteString(com.google.protobuf.ByteString) LlapDaemonProtocolProtos(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos)

Aggregations

SyntheticFileId (org.apache.hadoop.hive.ql.io.SyntheticFileId)8 Path (org.apache.hadoop.fs.Path)6 IOException (java.io.IOException)3 Test (org.junit.Test)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ByteString (com.google.protobuf.ByteString)2 Configuration (org.apache.hadoop.conf.Configuration)2 CacheTag (org.apache.hadoop.hive.common.io.CacheTag)2 MetadataCache (org.apache.hadoop.hive.llap.io.metadata.MetadataCache)2 LlapDaemonCacheMetrics (org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics)2 OrcTail (org.apache.orc.impl.OrcTail)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataInput (java.io.DataInput)1 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 IllegalCacheConfigurationException (org.apache.hadoop.hive.llap.IllegalCacheConfigurationException)1 LlapDaemonProtocolProtos (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos)1