use of org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId in project hive by apache.
the class TestAcidInputFormat method testDeltaMetaWithFileMultiStatement.
@Test
public void testDeltaMetaWithFileMultiStatement() throws Exception {
FileStatus fs = new FileStatus(200, false, 100, 100, 100, new Path("mypath"));
DeltaMetaData deltaMetaData = new AcidInputFormat.DeltaMetaData(2000L, 2001L, Arrays.asList(97, 98, 99), 0, Collections.singletonList(new AcidInputFormat.DeltaFileMetaData(new HdfsFileStatusWithoutId(fs), 97, 1)));
assertEquals(2000L, deltaMetaData.getMinWriteId());
assertEquals(2001L, deltaMetaData.getMaxWriteId());
assertEquals(3, deltaMetaData.getStmtIds().size());
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
deltaMetaData.write(new DataOutputStream(byteArrayOutputStream));
byte[] bytes = byteArrayOutputStream.toByteArray();
DeltaMetaData copy = new DeltaMetaData();
copy.readFields(new DataInputStream(new ByteArrayInputStream(bytes)));
assertEquals(2000L, copy.getMinWriteId());
assertEquals(2001L, copy.getMaxWriteId());
assertEquals(3, copy.getStmtIds().size());
Object fileId = copy.getDeltaFiles().get(0).getFileId(new Path("deleteDelta"), 1, new HiveConf());
Assert.assertTrue(fileId instanceof SyntheticFileId);
assertEquals(100, ((SyntheticFileId) fileId).getModTime());
assertEquals(200, ((SyntheticFileId) fileId).getLength());
assertEquals(1, copy.getDeltaFilesForStmtId(97).size());
assertEquals(0, copy.getDeltaFilesForStmtId(99).size());
}
use of org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId in project hive by apache.
the class AcidUtils method parsedDelta.
private static ParsedDelta parsedDelta(Path deltaDir, FileSystem fs, HdfsDirSnapshot dirSnapshot) throws IOException {
ParsedDeltaLight deltaLight = ParsedDeltaLight.parse(deltaDir);
// small optimization - delete delta can't be in raw format
boolean isRawFormat = !deltaLight.isDeleteDelta && MetaDataFile.isRawFormat(deltaDir, fs, dirSnapshot);
List<HdfsFileStatusWithId> files = null;
if (dirSnapshot != null) {
final PathFilter filter = isRawFormat ? AcidUtils.originalBucketFilter : AcidUtils.bucketFileFilter;
// If we already know the files, store it for future use
files = dirSnapshot.getFiles().stream().filter(fileStatus -> filter.accept(fileStatus.getPath())).map(HdfsFileStatusWithoutId::new).collect(Collectors.toList());
}
return new ParsedDelta(deltaLight, isRawFormat, files);
}
use of org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId in project hive by apache.
the class TestAcidInputFormat method testDeltaMetaWithFile.
@Test
public void testDeltaMetaWithFile() throws Exception {
FileStatus fs = new FileStatus(200, false, 100, 100, 100, new Path("mypath"));
DeltaMetaData deltaMetaData = new AcidInputFormat.DeltaMetaData(2000L, 2001L, new ArrayList<>(), 0, Collections.singletonList(new AcidInputFormat.DeltaFileMetaData(new HdfsFileStatusWithoutId(fs), null, 1)));
assertEquals(2000L, deltaMetaData.getMinWriteId());
assertEquals(2001L, deltaMetaData.getMaxWriteId());
assertEquals(0, deltaMetaData.getStmtIds().size());
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
deltaMetaData.write(new DataOutputStream(byteArrayOutputStream));
byte[] bytes = byteArrayOutputStream.toByteArray();
DeltaMetaData copy = new DeltaMetaData();
copy.readFields(new DataInputStream(new ByteArrayInputStream(bytes)));
assertEquals(2000L, copy.getMinWriteId());
assertEquals(2001L, copy.getMaxWriteId());
assertEquals(0, copy.getStmtIds().size());
AcidInputFormat.DeltaFileMetaData fileMetaData = copy.getDeltaFiles().get(0);
Object fileId = fileMetaData.getFileId(new Path("deleteDelta"), 1, new HiveConf());
Assert.assertTrue(fileId instanceof SyntheticFileId);
assertEquals(100, ((SyntheticFileId) fileId).getModTime());
assertEquals(200, ((SyntheticFileId) fileId).getLength());
String fileName = fileMetaData.getPath(new Path("deleteDelta"), 1).getName();
Assert.assertEquals("bucket_00001", fileName);
}
use of org.apache.hadoop.hive.ql.io.HdfsUtils.HdfsFileStatusWithoutId in project hive by apache.
the class AcidUtils method getChildState.
private static void getChildState(AcidDirectory directory, Map<Path, HdfsDirSnapshot> dirSnapshots, ValidWriteIdList writeIdList, ValidTxnList validTxnList, boolean ignoreEmptyFiles) throws IOException {
for (HdfsDirSnapshot dirSnapshot : dirSnapshots.values()) {
Path dirPath = dirSnapshot.getPath();
String dirName = dirPath.getName();
// dirPath may contains the filesystem prefix
if (dirPath.toString().endsWith(directory.getPath().toString())) {
// and return. This is the case when compaction thread calls getChildState.
for (FileStatus fileStatus : dirSnapshot.getFiles()) {
if (!ignoreEmptyFiles || fileStatus.getLen() != 0) {
directory.getOriginalFiles().add(new HdfsFileStatusWithoutId(fileStatus));
}
}
} else if (dirName.startsWith(BASE_PREFIX)) {
processBaseDir(dirPath, writeIdList, validTxnList, directory, dirSnapshot);
} else if (dirName.startsWith(DELTA_PREFIX) || dirName.startsWith(DELETE_DELTA_PREFIX)) {
processDeltaDir(dirPath, writeIdList, validTxnList, directory, dirSnapshot);
} else {
directory.getOriginalDirectories().add(dirPath);
for (FileStatus stat : dirSnapshot.getFiles()) {
if ((!ignoreEmptyFiles) || (stat.getLen() != 0)) {
directory.getOriginalFiles().add(new HdfsFileStatusWithoutId(stat));
}
}
}
}
}
Aggregations