use of org.apache.hadoop.fs.LocatedFileStatus in project tez by apache.
the class TestMRInputHelpers method testOldSplitsGen.
@Test(timeout = 5000)
public void testOldSplitsGen() throws Exception {
DataSourceDescriptor dataSource = generateDataSourceDescriptorMapRed(oldSplitsDir);
Assert.assertTrue(dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME));
Assert.assertTrue(dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME));
RemoteIterator<LocatedFileStatus> files = remoteFs.listFiles(oldSplitsDir, false);
boolean foundSplitsFile = false;
boolean foundMetaFile = false;
int totalFilesFound = 0;
while (files.hasNext()) {
LocatedFileStatus status = files.next();
String fName = status.getPath().getName();
totalFilesFound++;
if (fName.equals(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)) {
foundSplitsFile = true;
} else if (fName.equals(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
foundMetaFile = true;
} else {
Assert.fail("Found invalid file in splits dir, filename=" + fName);
}
Assert.assertTrue(status.getLen() > 0);
}
Assert.assertEquals(2, totalFilesFound);
Assert.assertTrue(foundSplitsFile);
Assert.assertTrue(foundMetaFile);
verifyLocationHints(oldSplitsDir, dataSource.getLocationHint().getTaskLocationHints());
}
use of org.apache.hadoop.fs.LocatedFileStatus in project tez by apache.
the class TestMRInputHelpers method testNewSplitsGen.
@Test(timeout = 5000)
public void testNewSplitsGen() throws Exception {
DataSourceDescriptor dataSource = generateDataSourceDescriptorMapReduce(newSplitsDir);
Assert.assertTrue(dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME));
Assert.assertTrue(dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME));
RemoteIterator<LocatedFileStatus> files = remoteFs.listFiles(newSplitsDir, false);
boolean foundSplitsFile = false;
boolean foundMetaFile = false;
int totalFilesFound = 0;
while (files.hasNext()) {
LocatedFileStatus status = files.next();
String fName = status.getPath().getName();
totalFilesFound++;
if (fName.equals(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)) {
foundSplitsFile = true;
} else if (fName.equals(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
foundMetaFile = true;
} else {
Assert.fail("Found invalid file in splits dir, filename=" + fName);
}
Assert.assertTrue(status.getLen() > 0);
}
Assert.assertEquals(2, totalFilesFound);
Assert.assertTrue(foundSplitsFile);
Assert.assertTrue(foundMetaFile);
verifyLocationHints(newSplitsDir, dataSource.getLocationHint().getTaskLocationHints());
}
use of org.apache.hadoop.fs.LocatedFileStatus in project cdap by caskdata.
the class DynamicPartitioningOutputCommitter method commitJob.
@Override
public void commitJob(JobContext context) throws IOException {
Configuration configuration = context.getConfiguration();
MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext);
String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
outputDataset = taskContext.getDataset(outputDatasetName);
DynamicPartitioner.PartitionWriteOption partitionWriteOption = DynamicPartitioner.PartitionWriteOption.valueOf(configuration.get(PartitionedFileSetArguments.DYNAMIC_PARTITIONER_WRITE_OPTION));
Partitioning partitioning = outputDataset.getPartitioning();
partitionsToAdd = new HashMap<>();
// Go over all files in the temporary directory and keep track of partitions to add for them
FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context);
for (FileStatus committedTaskPath : allCommittedTaskPaths) {
FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration);
RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true);
while (fileIter.hasNext()) {
Path path = fileIter.next().getPath();
String relativePath = getRelative(committedTaskPath.getPath(), path);
int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR);
if (lastPathSepIdx == -1) {
// this shouldn't happen because each relative path should consist of at least one partition key and
// the output file name
LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path, relativePath);
continue;
}
// relativePath = "../key1/key2/part-m-00000"
// relativeDir = "../key1/key2"
// fileName = "part-m-00000"
String relativeDir = relativePath.substring(0, lastPathSepIdx);
Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir);
if (partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE) {
if (fs.exists(finalDir)) {
throw new FileAlreadyExistsException("Final output path already exists: " + finalDir);
}
}
PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir);
partitionsToAdd.put(relativeDir, partitionKey);
}
}
// need to remove any existing partitions, before moving temporary content to final output
if (partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_OVERWRITE) {
for (Map.Entry<String, PartitionKey> entry : partitionsToAdd.entrySet()) {
if (outputDataset.getPartition(entry.getValue()) != null) {
// this allows reinstating the existing files if there's a rollback.
// alternative is to simply remove the files within the partition's location
// upside to that is easily avoiding explore operations. one downside is that metadata is not removed then
outputDataset.dropPartition(entry.getValue());
}
}
}
// We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to
// the original outputDir.
Path finalOutput = FileOutputFormat.getOutputPath(context);
FileContext fc = FileContext.getFileContext(configuration);
// the finalOutput path doesn't have scheme or authority (but 'from' does)
finalOutput = fc.makeQualified(finalOutput);
for (FileStatus from : getAllCommittedTaskPaths(context)) {
mergePaths(fc, from, finalOutput);
}
// compute the metadata to be written to every output partition
Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);
boolean allowAppend = partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_APPEND;
// create all the necessary partitions
for (Map.Entry<String, PartitionKey> entry : partitionsToAdd.entrySet()) {
outputDataset.addPartition(entry.getValue(), entry.getKey(), metadata, true, allowAppend);
}
// delete the job-specific _temporary folder
cleanupJob(context);
// mark all the final output paths with a _SUCCESS file, if configured to do so (default = true)
if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
for (String relativePath : partitionsToAdd.keySet()) {
Path pathToMark = new Path(finalOutput, relativePath);
createOrUpdate(fc, new Path(pathToMark, SUCCEEDED_FILE_NAME));
// also create a _SUCCESS-<RunId>, if allowing append
if (allowAppend) {
createOrUpdate(fc, new Path(pathToMark, SUCCEEDED_FILE_NAME + "-" + taskContext.getProgramRunId().getRun()));
}
}
}
}
use of org.apache.hadoop.fs.LocatedFileStatus in project incubator-crail by apache.
the class HdfsIOBenchmark method enumerateDir.
void enumerateDir() throws Exception {
System.out.println("enumarate dir, path " + path);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
int repfactor = 4;
for (int k = 0; k < repfactor; k++) {
long start = System.currentTimeMillis();
for (int i = 0; i < size; i++) {
// single operation == loop
RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false);
while (iter.hasNext()) {
iter.next();
}
}
long end = System.currentTimeMillis();
double executionTime = ((double) (end - start));
double latency = executionTime * 1000.0 / ((double) size);
System.out.println("execution time [ms] " + executionTime);
System.out.println("latency [us] " + latency);
}
fs.close();
}
use of org.apache.hadoop.fs.LocatedFileStatus in project drill by apache.
the class TemporaryTablesAutomaticDropTest method createAndCheckSessionTemporaryLocation.
private File createAndCheckSessionTemporaryLocation(String suffix, File schemaLocation) throws Exception {
String temporaryTableName = "temporary_table_automatic_drop_" + suffix;
File sessionTemporaryLocation = schemaLocation.toPath().resolve(SESSION_UUID.toString()).toFile();
test("create TEMPORARY table %s.%s as select 'A' as c1 from (values(1))", DFS_TMP_SCHEMA, temporaryTableName);
FileSystem fs = getLocalFileSystem();
Path sessionPath = new Path(sessionTemporaryLocation.getAbsolutePath());
assertTrue("Session temporary location should exist", fs.exists(sessionPath));
assertEquals("Directory permission should match", StorageStrategy.TEMPORARY.getFolderPermission(), fs.getFileStatus(sessionPath).getPermission());
Path tempTablePath = new Path(sessionPath, SESSION_UUID.toString());
assertTrue("Temporary table location should exist", fs.exists(tempTablePath));
assertEquals("Directory permission should match", StorageStrategy.TEMPORARY.getFolderPermission(), fs.getFileStatus(tempTablePath).getPermission());
RemoteIterator<LocatedFileStatus> fileIterator = fs.listFiles(tempTablePath, false);
while (fileIterator.hasNext()) {
LocatedFileStatus file = fileIterator.next();
assertEquals("File permission should match", StorageStrategy.TEMPORARY.getFilePermission(), file.getPermission());
}
return sessionTemporaryLocation;
}
Aggregations