Search in sources :

Example 1 with Path

use of org.apache.hadoop.fs.Path in project druid by druid-io.

the class HdfsTaskLogs method killAll.

@Override
public void killAll() throws IOException {
    log.info("Deleting all task logs from hdfs dir [%s].", config.getDirectory());
    Path taskLogDir = new Path(config.getDirectory());
    FileSystem fs = taskLogDir.getFileSystem(hadoopConfig);
    fs.delete(taskLogDir, true);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 2 with Path

use of org.apache.hadoop.fs.Path in project druid by druid-io.

the class HdfsTaskLogsTest method testKill.

@Test
public void testKill() throws Exception {
    final File tmpDir = tempFolder.newFolder();
    final File logDir = new File(tmpDir, "logs");
    final File logFile = new File(tmpDir, "log");
    final Path logDirPath = new Path(logDir.toString());
    FileSystem fs = new Path(logDir.toString()).getFileSystem(new Configuration());
    final TaskLogs taskLogs = new HdfsTaskLogs(new HdfsTaskLogsConfig(logDir.toString()), new Configuration());
    Files.write("log1content", logFile, Charsets.UTF_8);
    taskLogs.pushTaskLog("log1", logFile);
    Assert.assertEquals("log1content", readLog(taskLogs, "log1", 0));
    //File modification timestamp is only maintained to seconds resolution, so artificial delay
    //is necessary to separate 2 file creations by a timestamp that would result in only one
    //of them getting deleted
    Thread.sleep(1500);
    long time = (System.currentTimeMillis() / 1000) * 1000;
    Assert.assertTrue(fs.getFileStatus(new Path(logDirPath, "log1")).getModificationTime() < time);
    Files.write("log2content", logFile, Charsets.UTF_8);
    taskLogs.pushTaskLog("log2", logFile);
    Assert.assertEquals("log2content", readLog(taskLogs, "log2", 0));
    Assert.assertTrue(fs.getFileStatus(new Path(logDirPath, "log2")).getModificationTime() >= time);
    taskLogs.killOlderThan(time);
    Assert.assertFalse(taskLogs.streamTaskLog("log1", 0).isPresent());
    Assert.assertEquals("log2content", readLog(taskLogs, "log2", 0));
}
Also used : Path(org.apache.hadoop.fs.Path) TaskLogs(io.druid.tasklogs.TaskLogs) HdfsTaskLogs(io.druid.storage.hdfs.tasklog.HdfsTaskLogs) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) HdfsTaskLogsConfig(io.druid.storage.hdfs.tasklog.HdfsTaskLogsConfig) HdfsTaskLogs(io.druid.storage.hdfs.tasklog.HdfsTaskLogs) File(java.io.File) Test(org.junit.Test)

Example 3 with Path

use of org.apache.hadoop.fs.Path in project druid by druid-io.

the class HdfsDataSegmentFinderTest method setUp.

@Before
public void setUp() throws IOException {
    dataSourceDir = new Path(new Path(uriBase), "/usr/dataSource");
    descriptor1 = new Path(dataSourceDir, "interval1/v1/0/" + DESCRIPTOR_JSON);
    descriptor2 = new Path(dataSourceDir, "interval2/v1/0/" + DESCRIPTOR_JSON);
    descriptor3 = new Path(dataSourceDir, "interval3/v2/0/" + DESCRIPTOR_JSON);
    descriptor4_0 = new Path(dataSourceDir, "interval4/v1/0/" + DESCRIPTOR_JSON);
    descriptor4_1 = new Path(dataSourceDir, "interval4/v1/1/" + DESCRIPTOR_JSON);
    descriptor5 = new Path(dataSourceDir, "interval5/v1/1/" + "1_" + DESCRIPTOR_JSON);
    indexZip1 = new Path(descriptor1.getParent(), INDEX_ZIP);
    indexZip2 = new Path(descriptor2.getParent(), INDEX_ZIP);
    indexZip3 = new Path(descriptor3.getParent(), INDEX_ZIP);
    indexZip4_0 = new Path(descriptor4_0.getParent(), INDEX_ZIP);
    indexZip4_1 = new Path(descriptor4_1.getParent(), INDEX_ZIP);
    indexZip5 = new Path(descriptor5.getParent(), "1_" + INDEX_ZIP);
    mapper.writeValue(fs.create(descriptor1), SEGMENT_1);
    mapper.writeValue(fs.create(descriptor2), SEGMENT_2);
    mapper.writeValue(fs.create(descriptor3), SEGMENT_3);
    mapper.writeValue(fs.create(descriptor4_0), SEGMENT_4_0);
    mapper.writeValue(fs.create(descriptor4_1), SEGMENT_4_1);
    mapper.writeValue(fs.create(descriptor5), SEGMENT_5);
    create(indexZip1);
    create(indexZip2);
    create(indexZip3);
    create(indexZip4_0);
    create(indexZip4_1);
    create(indexZip5);
}
Also used : Path(org.apache.hadoop.fs.Path) Before(org.junit.Before)

Example 4 with Path

use of org.apache.hadoop.fs.Path in project druid by druid-io.

the class HdfsDataSegmentFinder method findSegments.

@Override
public Set<DataSegment> findSegments(String workingDirPathStr, boolean updateDescriptor) throws SegmentLoadingException {
    final Set<DataSegment> segments = Sets.newHashSet();
    final Path workingDirPath = new Path(workingDirPathStr);
    FileSystem fs;
    try {
        fs = workingDirPath.getFileSystem(config);
        log.info(fs.getScheme());
        log.info("FileSystem URI:" + fs.getUri().toString());
        if (!fs.exists(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] doesn't exist.", workingDirPath);
        }
        if (!fs.isDirectory(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] is not a directory!?", workingDirPath);
        }
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(workingDirPath, true);
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            final Path path = locatedFileStatus.getPath();
            if (path.getName().endsWith("descriptor.json")) {
                final Path indexZip;
                final String[] descriptorParts = path.getName().split("_");
                if (descriptorParts.length == 2 && descriptorParts[1].equals("descriptor.json") && StringUtils.isNumeric(descriptorParts[0])) {
                    indexZip = new Path(path.getParent(), String.format("%s_index.zip", descriptorParts[0]));
                } else {
                    indexZip = new Path(path.getParent(), "index.zip");
                }
                if (fs.exists(indexZip)) {
                    final DataSegment dataSegment = mapper.readValue(fs.open(path), DataSegment.class);
                    log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);
                    final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
                    final String pathWithoutScheme = indexZip.toUri().getPath();
                    if (!loadSpec.get("type").equals(HdfsStorageDruidModule.SCHEME) || !loadSpec.get("path").equals(pathWithoutScheme)) {
                        loadSpec.put("type", HdfsStorageDruidModule.SCHEME);
                        loadSpec.put("path", pathWithoutScheme);
                        if (updateDescriptor) {
                            log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", path, pathWithoutScheme);
                            mapper.writeValue(fs.create(path, true), dataSegment);
                        }
                    }
                    segments.add(dataSegment);
                } else {
                    throw new SegmentLoadingException("index.zip didn't exist at [%s] while descripter.json exists!?", indexZip);
                }
            }
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", workingDirPath);
    }
    return segments;
}
Also used : Path(org.apache.hadoop.fs.Path) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) FileSystem(org.apache.hadoop.fs.FileSystem) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment)

Example 5 with Path

use of org.apache.hadoop.fs.Path in project druid by druid-io.

the class HdfsDataSegmentKiller method kill.

@Override
public void kill(DataSegment segment) throws SegmentLoadingException {
    final Path segmentPath = getPath(segment);
    log.info("killing segment[%s] mapped to path[%s]", segment.getIdentifier(), segmentPath);
    try {
        String segmentLocation = segmentPath.getName();
        final FileSystem fs = segmentPath.getFileSystem(config);
        if (!segmentLocation.endsWith(".zip")) {
            throw new SegmentLoadingException("Unknown file type[%s]", segmentPath);
        } else {
            if (!fs.exists(segmentPath)) {
                log.warn("Segment Path [%s] does not exist. It appears to have been deleted already.", segmentPath);
                return;
            }
            String[] zipParts = segmentLocation.split("_");
            // for segments stored as hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_index.zip
            if (zipParts.length == 2 && zipParts[1].equals("index.zip") && StringUtils.isNumeric(zipParts[0])) {
                if (!fs.delete(segmentPath, false)) {
                    throw new SegmentLoadingException("Unable to kill segment, failed to delete [%s]", segmentPath.toString());
                }
                Path descriptorPath = new Path(segmentPath.getParent(), String.format("%s_descriptor.json", zipParts[0]));
                //delete partitionNumber_descriptor.json
                if (!fs.delete(descriptorPath, false)) {
                    throw new SegmentLoadingException("Unable to kill segment, failed to delete [%s]", descriptorPath.toString());
                }
                //for segments stored as hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_index.zip
                // max depth to look is 2, i.e version directory and interval.
                mayBeDeleteParentsUpto(fs, segmentPath, 2);
            } else {
                // index.zip
                if (!fs.delete(segmentPath, false)) {
                    throw new SegmentLoadingException("Unable to kill segment, failed to delete [%s]", segmentPath.toString());
                }
                Path descriptorPath = new Path(segmentPath.getParent(), "descriptor.json");
                if (!fs.delete(descriptorPath, false)) {
                    throw new SegmentLoadingException("Unable to kill segment, failed to delete [%s]", descriptorPath.toString());
                }
                //for segments stored as hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum/index.zip
                //max depth to look is 3, i.e partition number directory,version directory and interval.
                mayBeDeleteParentsUpto(fs, segmentPath, 3);
            }
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Unable to kill segment");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException)

Aggregations

Path (org.apache.hadoop.fs.Path)11752 Test (org.junit.Test)4193 FileSystem (org.apache.hadoop.fs.FileSystem)3587 IOException (java.io.IOException)2631 Configuration (org.apache.hadoop.conf.Configuration)2621 FileStatus (org.apache.hadoop.fs.FileStatus)1568 ArrayList (java.util.ArrayList)1145 File (java.io.File)987 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)924 HashMap (java.util.HashMap)570 Job (org.apache.hadoop.mapreduce.Job)492 JobConf (org.apache.hadoop.mapred.JobConf)477 URI (java.net.URI)465 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)462 FileNotFoundException (java.io.FileNotFoundException)441 FsPermission (org.apache.hadoop.fs.permission.FsPermission)375 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)362 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)355 Map (java.util.Map)326 List (java.util.List)316