use of org.apache.hadoop.fs.Path in project druid by druid-io.
the class HdfsTaskLogs method killAll.
@Override
public void killAll() throws IOException {
log.info("Deleting all task logs from hdfs dir [%s].", config.getDirectory());
Path taskLogDir = new Path(config.getDirectory());
FileSystem fs = taskLogDir.getFileSystem(hadoopConfig);
fs.delete(taskLogDir, true);
}
use of org.apache.hadoop.fs.Path in project druid by druid-io.
the class HdfsTaskLogsTest method testKill.
@Test
public void testKill() throws Exception {
final File tmpDir = tempFolder.newFolder();
final File logDir = new File(tmpDir, "logs");
final File logFile = new File(tmpDir, "log");
final Path logDirPath = new Path(logDir.toString());
FileSystem fs = new Path(logDir.toString()).getFileSystem(new Configuration());
final TaskLogs taskLogs = new HdfsTaskLogs(new HdfsTaskLogsConfig(logDir.toString()), new Configuration());
Files.write("log1content", logFile, Charsets.UTF_8);
taskLogs.pushTaskLog("log1", logFile);
Assert.assertEquals("log1content", readLog(taskLogs, "log1", 0));
//File modification timestamp is only maintained to seconds resolution, so artificial delay
//is necessary to separate 2 file creations by a timestamp that would result in only one
//of them getting deleted
Thread.sleep(1500);
long time = (System.currentTimeMillis() / 1000) * 1000;
Assert.assertTrue(fs.getFileStatus(new Path(logDirPath, "log1")).getModificationTime() < time);
Files.write("log2content", logFile, Charsets.UTF_8);
taskLogs.pushTaskLog("log2", logFile);
Assert.assertEquals("log2content", readLog(taskLogs, "log2", 0));
Assert.assertTrue(fs.getFileStatus(new Path(logDirPath, "log2")).getModificationTime() >= time);
taskLogs.killOlderThan(time);
Assert.assertFalse(taskLogs.streamTaskLog("log1", 0).isPresent());
Assert.assertEquals("log2content", readLog(taskLogs, "log2", 0));
}
use of org.apache.hadoop.fs.Path in project druid by druid-io.
the class HdfsDataSegmentFinderTest method setUp.
@Before
public void setUp() throws IOException {
dataSourceDir = new Path(new Path(uriBase), "/usr/dataSource");
descriptor1 = new Path(dataSourceDir, "interval1/v1/0/" + DESCRIPTOR_JSON);
descriptor2 = new Path(dataSourceDir, "interval2/v1/0/" + DESCRIPTOR_JSON);
descriptor3 = new Path(dataSourceDir, "interval3/v2/0/" + DESCRIPTOR_JSON);
descriptor4_0 = new Path(dataSourceDir, "interval4/v1/0/" + DESCRIPTOR_JSON);
descriptor4_1 = new Path(dataSourceDir, "interval4/v1/1/" + DESCRIPTOR_JSON);
descriptor5 = new Path(dataSourceDir, "interval5/v1/1/" + "1_" + DESCRIPTOR_JSON);
indexZip1 = new Path(descriptor1.getParent(), INDEX_ZIP);
indexZip2 = new Path(descriptor2.getParent(), INDEX_ZIP);
indexZip3 = new Path(descriptor3.getParent(), INDEX_ZIP);
indexZip4_0 = new Path(descriptor4_0.getParent(), INDEX_ZIP);
indexZip4_1 = new Path(descriptor4_1.getParent(), INDEX_ZIP);
indexZip5 = new Path(descriptor5.getParent(), "1_" + INDEX_ZIP);
mapper.writeValue(fs.create(descriptor1), SEGMENT_1);
mapper.writeValue(fs.create(descriptor2), SEGMENT_2);
mapper.writeValue(fs.create(descriptor3), SEGMENT_3);
mapper.writeValue(fs.create(descriptor4_0), SEGMENT_4_0);
mapper.writeValue(fs.create(descriptor4_1), SEGMENT_4_1);
mapper.writeValue(fs.create(descriptor5), SEGMENT_5);
create(indexZip1);
create(indexZip2);
create(indexZip3);
create(indexZip4_0);
create(indexZip4_1);
create(indexZip5);
}
use of org.apache.hadoop.fs.Path in project druid by druid-io.
the class HdfsDataSegmentFinder method findSegments.
@Override
public Set<DataSegment> findSegments(String workingDirPathStr, boolean updateDescriptor) throws SegmentLoadingException {
final Set<DataSegment> segments = Sets.newHashSet();
final Path workingDirPath = new Path(workingDirPathStr);
FileSystem fs;
try {
fs = workingDirPath.getFileSystem(config);
log.info(fs.getScheme());
log.info("FileSystem URI:" + fs.getUri().toString());
if (!fs.exists(workingDirPath)) {
throw new SegmentLoadingException("Working directory [%s] doesn't exist.", workingDirPath);
}
if (!fs.isDirectory(workingDirPath)) {
throw new SegmentLoadingException("Working directory [%s] is not a directory!?", workingDirPath);
}
final RemoteIterator<LocatedFileStatus> it = fs.listFiles(workingDirPath, true);
while (it.hasNext()) {
final LocatedFileStatus locatedFileStatus = it.next();
final Path path = locatedFileStatus.getPath();
if (path.getName().endsWith("descriptor.json")) {
final Path indexZip;
final String[] descriptorParts = path.getName().split("_");
if (descriptorParts.length == 2 && descriptorParts[1].equals("descriptor.json") && StringUtils.isNumeric(descriptorParts[0])) {
indexZip = new Path(path.getParent(), String.format("%s_index.zip", descriptorParts[0]));
} else {
indexZip = new Path(path.getParent(), "index.zip");
}
if (fs.exists(indexZip)) {
final DataSegment dataSegment = mapper.readValue(fs.open(path), DataSegment.class);
log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);
final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
final String pathWithoutScheme = indexZip.toUri().getPath();
if (!loadSpec.get("type").equals(HdfsStorageDruidModule.SCHEME) || !loadSpec.get("path").equals(pathWithoutScheme)) {
loadSpec.put("type", HdfsStorageDruidModule.SCHEME);
loadSpec.put("path", pathWithoutScheme);
if (updateDescriptor) {
log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", path, pathWithoutScheme);
mapper.writeValue(fs.create(path, true), dataSegment);
}
}
segments.add(dataSegment);
} else {
throw new SegmentLoadingException("index.zip didn't exist at [%s] while descripter.json exists!?", indexZip);
}
}
}
} catch (IOException e) {
throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", workingDirPath);
}
return segments;
}
use of org.apache.hadoop.fs.Path in project druid by druid-io.
the class HdfsDataSegmentKiller method kill.
@Override
public void kill(DataSegment segment) throws SegmentLoadingException {
final Path segmentPath = getPath(segment);
log.info("killing segment[%s] mapped to path[%s]", segment.getIdentifier(), segmentPath);
try {
String segmentLocation = segmentPath.getName();
final FileSystem fs = segmentPath.getFileSystem(config);
if (!segmentLocation.endsWith(".zip")) {
throw new SegmentLoadingException("Unknown file type[%s]", segmentPath);
} else {
if (!fs.exists(segmentPath)) {
log.warn("Segment Path [%s] does not exist. It appears to have been deleted already.", segmentPath);
return;
}
String[] zipParts = segmentLocation.split("_");
// for segments stored as hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_index.zip
if (zipParts.length == 2 && zipParts[1].equals("index.zip") && StringUtils.isNumeric(zipParts[0])) {
if (!fs.delete(segmentPath, false)) {
throw new SegmentLoadingException("Unable to kill segment, failed to delete [%s]", segmentPath.toString());
}
Path descriptorPath = new Path(segmentPath.getParent(), String.format("%s_descriptor.json", zipParts[0]));
//delete partitionNumber_descriptor.json
if (!fs.delete(descriptorPath, false)) {
throw new SegmentLoadingException("Unable to kill segment, failed to delete [%s]", descriptorPath.toString());
}
//for segments stored as hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_index.zip
// max depth to look is 2, i.e version directory and interval.
mayBeDeleteParentsUpto(fs, segmentPath, 2);
} else {
// index.zip
if (!fs.delete(segmentPath, false)) {
throw new SegmentLoadingException("Unable to kill segment, failed to delete [%s]", segmentPath.toString());
}
Path descriptorPath = new Path(segmentPath.getParent(), "descriptor.json");
if (!fs.delete(descriptorPath, false)) {
throw new SegmentLoadingException("Unable to kill segment, failed to delete [%s]", descriptorPath.toString());
}
//for segments stored as hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum/index.zip
//max depth to look is 3, i.e partition number directory,version directory and interval.
mayBeDeleteParentsUpto(fs, segmentPath, 3);
}
}
} catch (IOException e) {
throw new SegmentLoadingException(e, "Unable to kill segment");
}
}
Aggregations