Search in sources :

Example 1 with HdfsFileStatusWithoutId

use of org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId in project hive by apache.

the class TestAcidInputFormat method testDeltaMetaWithFileMultiStatement.

@Test
public void testDeltaMetaWithFileMultiStatement() throws Exception {
    FileStatus fs = new FileStatus(200, false, 100, 100, 100, new Path("mypath"));
    DeltaMetaData deltaMetaData = new AcidInputFormat.DeltaMetaData(2000L, 2001L, Arrays.asList(97, 98, 99), 0, Collections.singletonList(new AcidInputFormat.DeltaFileMetaData(new HdfsFileStatusWithoutId(fs), 97, 1)));
    assertEquals(2000L, deltaMetaData.getMinWriteId());
    assertEquals(2001L, deltaMetaData.getMaxWriteId());
    assertEquals(3, deltaMetaData.getStmtIds().size());
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    deltaMetaData.write(new DataOutputStream(byteArrayOutputStream));
    byte[] bytes = byteArrayOutputStream.toByteArray();
    DeltaMetaData copy = new DeltaMetaData();
    copy.readFields(new DataInputStream(new ByteArrayInputStream(bytes)));
    assertEquals(2000L, copy.getMinWriteId());
    assertEquals(2001L, copy.getMaxWriteId());
    assertEquals(3, copy.getStmtIds().size());
    Object fileId = copy.getDeltaFiles().get(0).getFileId(new Path("deleteDelta"), 1, new HiveConf());
    Assert.assertTrue(fileId instanceof SyntheticFileId);
    assertEquals(100, ((SyntheticFileId) fileId).getModTime());
    assertEquals(200, ((SyntheticFileId) fileId).getLength());
    assertEquals(1, copy.getDeltaFilesForStmtId(97).size());
    assertEquals(0, copy.getDeltaFilesForStmtId(99).size());
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) DataOutputStream(java.io.DataOutputStream) DeltaMetaData(org.apache.hadoop.hive.ql.io.AcidInputFormat.DeltaMetaData) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) HdfsFileStatusWithoutId(org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId) ByteArrayInputStream(java.io.ByteArrayInputStream) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Test(org.junit.Test)

Example 2 with HdfsFileStatusWithoutId

use of org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId in project hive by apache.

the class AcidUtils method parsedDelta.

private static ParsedDelta parsedDelta(Path deltaDir, FileSystem fs, HdfsDirSnapshot dirSnapshot) throws IOException {
    ParsedDeltaLight deltaLight = ParsedDeltaLight.parse(deltaDir);
    // small optimization - delete delta can't be in raw format
    boolean isRawFormat = !deltaLight.isDeleteDelta && MetaDataFile.isRawFormat(deltaDir, fs, dirSnapshot);
    List<HdfsFileStatusWithId> files = null;
    if (dirSnapshot != null) {
        final PathFilter filter = isRawFormat ? AcidUtils.originalBucketFilter : AcidUtils.bucketFileFilter;
        // If we already know the files, store it for future use
        files = dirSnapshot.getFiles().stream().filter(fileStatus -> filter.accept(fileStatus.getPath())).map(HdfsFileStatusWithoutId::new).collect(Collectors.toList());
    }
    return new ParsedDelta(deltaLight, isRawFormat, files);
}
Also used : OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) OrcRecordUpdater(org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater) Arrays(java.util.Arrays) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) LockComponentBuilder(org.apache.hadoop.hive.metastore.LockComponentBuilder) FileSystem(org.apache.hadoop.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) FileStatus(org.apache.hadoop.fs.FileStatus) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Writer(org.apache.hadoop.hive.ql.io.orc.Writer) OrcAcidUtils(org.apache.orc.impl.OrcAcidUtils) Matcher(java.util.regex.Matcher) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) Context(org.apache.hadoop.hive.ql.Context) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) URI(java.net.URI) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileFormatException(org.apache.orc.FileFormatException) MetaStoreUtils(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils) HdfsFileStatusWithoutId(org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) HadoopShims(org.apache.hadoop.hive.shims.HadoopShims) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) Set(java.util.Set) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) SessionState(org.apache.hadoop.hive.ql.session.SessionState) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) ValidReadTxnList(org.apache.hadoop.hive.common.ValidReadTxnList) List(java.util.List) Stream(java.util.stream.Stream) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) Pattern(java.util.regex.Pattern) CacheBuilder(com.google.common.cache.CacheBuilder) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) ErrorMsg(org.apache.hadoop.hive.ql.ErrorMsg) LockType(org.apache.hadoop.hive.metastore.api.LockType) HdfsFileStatusWithId(org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PathFilter(org.apache.hadoop.fs.PathFilter) HashMap(java.util.HashMap) Supplier(java.util.function.Supplier) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TransactionalValidationListener(org.apache.hadoop.hive.metastore.TransactionalValidationListener) DataOperationType(org.apache.hadoop.hive.metastore.api.DataOperationType) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) Strings(com.google.common.base.Strings) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Charset(java.nio.charset.Charset) CreateMaterializedViewDesc(org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc) LoadSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.LoadSemanticAnalyzer) TableName(org.apache.hadoop.hive.common.TableName) Entity(org.apache.hadoop.hive.ql.hooks.Entity) DeltaFileMetaData(org.apache.hadoop.hive.ql.io.AcidInputFormat.DeltaFileMetaData) Ref(org.apache.hive.common.util.Ref) Properties(java.util.Properties) Logger(org.slf4j.Logger) LongStream(java.util.stream.LongStream) AcidMetaDataFile(org.apache.hadoop.hive.common.AcidMetaDataFile) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HiveParser(org.apache.hadoop.hive.ql.parse.HiveParser) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IOException(java.io.IOException) Table(org.apache.hadoop.hive.ql.metadata.Table) AcidConstants(org.apache.hadoop.hive.common.AcidConstants) ASTSearcher(org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher) TimeUnit(java.util.concurrent.TimeUnit) Partition(org.apache.hadoop.hive.ql.metadata.Partition) COPY_KEYWORD(org.apache.hadoop.hive.ql.exec.Utilities.COPY_KEYWORD) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) ShimLoader(org.apache.hadoop.hive.shims.ShimLoader) Preconditions(com.google.common.base.Preconditions) TxnType(org.apache.hadoop.hive.metastore.api.TxnType) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Cache(com.google.common.cache.Cache) Collections(java.util.Collections) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) Immutable(javax.annotation.concurrent.Immutable) InputStream(java.io.InputStream) HdfsFileStatusWithoutId(org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId) PathFilter(org.apache.hadoop.fs.PathFilter) HdfsFileStatusWithId(org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId)

Example 3 with HdfsFileStatusWithoutId

use of org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId in project hive by apache.

the class TestAcidInputFormat method testDeltaMetaWithFile.

@Test
public void testDeltaMetaWithFile() throws Exception {
    FileStatus fs = new FileStatus(200, false, 100, 100, 100, new Path("mypath"));
    DeltaMetaData deltaMetaData = new AcidInputFormat.DeltaMetaData(2000L, 2001L, new ArrayList<>(), 0, Collections.singletonList(new AcidInputFormat.DeltaFileMetaData(new HdfsFileStatusWithoutId(fs), null, 1)));
    assertEquals(2000L, deltaMetaData.getMinWriteId());
    assertEquals(2001L, deltaMetaData.getMaxWriteId());
    assertEquals(0, deltaMetaData.getStmtIds().size());
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    deltaMetaData.write(new DataOutputStream(byteArrayOutputStream));
    byte[] bytes = byteArrayOutputStream.toByteArray();
    DeltaMetaData copy = new DeltaMetaData();
    copy.readFields(new DataInputStream(new ByteArrayInputStream(bytes)));
    assertEquals(2000L, copy.getMinWriteId());
    assertEquals(2001L, copy.getMaxWriteId());
    assertEquals(0, copy.getStmtIds().size());
    AcidInputFormat.DeltaFileMetaData fileMetaData = copy.getDeltaFiles().get(0);
    Object fileId = fileMetaData.getFileId(new Path("deleteDelta"), 1, new HiveConf());
    Assert.assertTrue(fileId instanceof SyntheticFileId);
    assertEquals(100, ((SyntheticFileId) fileId).getModTime());
    assertEquals(200, ((SyntheticFileId) fileId).getLength());
    String fileName = fileMetaData.getPath(new Path("deleteDelta"), 1).getName();
    Assert.assertEquals("bucket_00001", fileName);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) DataOutputStream(java.io.DataOutputStream) DeltaMetaData(org.apache.hadoop.hive.ql.io.AcidInputFormat.DeltaMetaData) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) HdfsFileStatusWithoutId(org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId) ByteArrayInputStream(java.io.ByteArrayInputStream) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Test(org.junit.Test)

Example 4 with HdfsFileStatusWithoutId

use of org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId in project hive by apache.

the class AcidUtils method getChildState.

private static void getChildState(AcidDirectory directory, Map<Path, HdfsDirSnapshot> dirSnapshots, ValidWriteIdList writeIdList, ValidTxnList validTxnList, boolean ignoreEmptyFiles) throws IOException {
    for (HdfsDirSnapshot dirSnapshot : dirSnapshots.values()) {
        Path dirPath = dirSnapshot.getPath();
        String dirName = dirPath.getName();
        // dirPath may contains the filesystem prefix
        if (dirPath.toString().endsWith(directory.getPath().toString())) {
            // and return. This is the case when compaction thread calls getChildState.
            for (FileStatus fileStatus : dirSnapshot.getFiles()) {
                if (!ignoreEmptyFiles || fileStatus.getLen() != 0) {
                    directory.getOriginalFiles().add(new HdfsFileStatusWithoutId(fileStatus));
                }
            }
        } else if (dirName.startsWith(BASE_PREFIX)) {
            processBaseDir(dirPath, writeIdList, validTxnList, directory, dirSnapshot);
        } else if (dirName.startsWith(DELTA_PREFIX) || dirName.startsWith(DELETE_DELTA_PREFIX)) {
            processDeltaDir(dirPath, writeIdList, validTxnList, directory, dirSnapshot);
        } else {
            directory.getOriginalDirectories().add(dirPath);
            for (FileStatus stat : dirSnapshot.getFiles()) {
                if ((!ignoreEmptyFiles) || (stat.getLen() != 0)) {
                    directory.getOriginalFiles().add(new HdfsFileStatusWithoutId(stat));
                }
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HdfsFileStatusWithoutId(org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus)

Aggregations

FileStatus (org.apache.hadoop.fs.FileStatus)4 Path (org.apache.hadoop.fs.Path)4 HdfsFileStatusWithoutId (org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId)4 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 DataInputStream (java.io.DataInputStream)2 DataOutputStream (java.io.DataOutputStream)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions (com.google.common.base.Preconditions)1 Strings (com.google.common.base.Strings)1 Cache (com.google.common.cache.Cache)1 CacheBuilder (com.google.common.cache.CacheBuilder)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 Serializable (java.io.Serializable)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1