Search in sources :

Example 1 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class DataSegmentPusherUtilTest method shouldNotHaveColonsInHdfsStorageDir.

@Test
public void shouldNotHaveColonsInHdfsStorageDir() throws Exception {
    Interval interval = new Interval("2011-10-01/2011-10-02");
    ImmutableMap<String, Object> loadSpec = ImmutableMap.<String, Object>of("something", "or_other");
    DataSegment segment = new DataSegment("something", interval, "brand:new:version", loadSpec, Arrays.asList("dim1", "dim2"), Arrays.asList("met1", "met2"), NoneShardSpec.instance(), null, 1);
    String storageDir = DataSegmentPusherUtil.getHdfsStorageDir(segment);
    Assert.assertEquals("something/20111001T000000.000Z_20111002T000000.000Z/brand_new_version", storageDir);
}
Also used : DataSegment(io.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 2 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class HdfsDataSegmentFinderTest method testFindSegments.

@Test
public void testFindSegments() throws Exception {
    final HdfsDataSegmentFinder hdfsDataSegmentFinder = new HdfsDataSegmentFinder(conf, mapper);
    final Set<DataSegment> segments = hdfsDataSegmentFinder.findSegments(dataSourceDir.toString(), false);
    Assert.assertEquals(6, segments.size());
    DataSegment updatedSegment1 = null;
    DataSegment updatedSegment2 = null;
    DataSegment updatedSegment3 = null;
    DataSegment updatedSegment4_0 = null;
    DataSegment updatedSegment4_1 = null;
    DataSegment updatedSegment5 = null;
    for (DataSegment dataSegment : segments) {
        if (dataSegment.getIdentifier().equals(SEGMENT_1.getIdentifier())) {
            updatedSegment1 = dataSegment;
        } else if (dataSegment.getIdentifier().equals(SEGMENT_2.getIdentifier())) {
            updatedSegment2 = dataSegment;
        } else if (dataSegment.getIdentifier().equals(SEGMENT_3.getIdentifier())) {
            updatedSegment3 = dataSegment;
        } else if (dataSegment.getIdentifier().equals(SEGMENT_4_0.getIdentifier())) {
            updatedSegment4_0 = dataSegment;
        } else if (dataSegment.getIdentifier().equals(SEGMENT_4_1.getIdentifier())) {
            updatedSegment4_1 = dataSegment;
        } else if (dataSegment.getIdentifier().equals(SEGMENT_5.getIdentifier())) {
            updatedSegment5 = dataSegment;
        } else {
            Assert.fail("Unexpected segment");
        }
    }
    Assert.assertEquals(descriptor1.toUri().getPath(), getDescriptorPath(updatedSegment1));
    Assert.assertEquals(descriptor2.toUri().getPath(), getDescriptorPath(updatedSegment2));
    Assert.assertEquals(descriptor3.toUri().getPath(), getDescriptorPath(updatedSegment3));
    Assert.assertEquals(descriptor4_0.toUri().getPath(), getDescriptorPath(updatedSegment4_0));
    Assert.assertEquals(descriptor4_1.toUri().getPath(), getDescriptorPath(updatedSegment4_1));
    Assert.assertEquals(descriptor5.toUri().getPath(), getDescriptorPathWithPartitionNum(updatedSegment5, 1));
    final String serializedSegment1 = mapper.writeValueAsString(updatedSegment1);
    final String serializedSegment2 = mapper.writeValueAsString(updatedSegment2);
    final String serializedSegment3 = mapper.writeValueAsString(updatedSegment3);
    final String serializedSegment4_0 = mapper.writeValueAsString(updatedSegment4_0);
    final String serializedSegment4_1 = mapper.writeValueAsString(updatedSegment4_1);
    final String serializedSegment5 = mapper.writeValueAsString(updatedSegment5);
    // since updateDescriptor was not enabled, descriptor.json still has stale information
    Assert.assertNotEquals(serializedSegment1, readContent(descriptor1));
    Assert.assertNotEquals(serializedSegment2, readContent(descriptor2));
    Assert.assertNotEquals(serializedSegment3, readContent(descriptor3));
    Assert.assertNotEquals(serializedSegment4_0, readContent(descriptor4_0));
    Assert.assertNotEquals(serializedSegment4_1, readContent(descriptor4_1));
    Assert.assertNotEquals(serializedSegment5, readContent(descriptor5));
    // enable updateDescriptor so that descriptors.json will be updated to relfect the new loadSpec
    final Set<DataSegment> segments2 = hdfsDataSegmentFinder.findSegments(dataSourceDir.toString(), true);
    Assert.assertEquals(segments, segments2);
    Assert.assertEquals(serializedSegment1, readContent(descriptor1));
    Assert.assertEquals(serializedSegment2, readContent(descriptor2));
    Assert.assertEquals(serializedSegment3, readContent(descriptor3));
    Assert.assertEquals(serializedSegment4_0, readContent(descriptor4_0));
    Assert.assertEquals(serializedSegment4_1, readContent(descriptor4_1));
    Assert.assertEquals(serializedSegment5, readContent(descriptor5));
}
Also used : HdfsDataSegmentFinder(io.druid.storage.hdfs.HdfsDataSegmentFinder) DataSegment(io.druid.timeline.DataSegment) Test(org.junit.Test)

Example 3 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class HdfsDataSegmentFinder method findSegments.

@Override
public Set<DataSegment> findSegments(String workingDirPathStr, boolean updateDescriptor) throws SegmentLoadingException {
    final Set<DataSegment> segments = Sets.newHashSet();
    final Path workingDirPath = new Path(workingDirPathStr);
    FileSystem fs;
    try {
        fs = workingDirPath.getFileSystem(config);
        log.info(fs.getScheme());
        log.info("FileSystem URI:" + fs.getUri().toString());
        if (!fs.exists(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] doesn't exist.", workingDirPath);
        }
        if (!fs.isDirectory(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] is not a directory!?", workingDirPath);
        }
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(workingDirPath, true);
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            final Path path = locatedFileStatus.getPath();
            if (path.getName().endsWith("descriptor.json")) {
                final Path indexZip;
                final String[] descriptorParts = path.getName().split("_");
                if (descriptorParts.length == 2 && descriptorParts[1].equals("descriptor.json") && StringUtils.isNumeric(descriptorParts[0])) {
                    indexZip = new Path(path.getParent(), String.format("%s_index.zip", descriptorParts[0]));
                } else {
                    indexZip = new Path(path.getParent(), "index.zip");
                }
                if (fs.exists(indexZip)) {
                    final DataSegment dataSegment = mapper.readValue(fs.open(path), DataSegment.class);
                    log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);
                    final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
                    final String pathWithoutScheme = indexZip.toUri().getPath();
                    if (!loadSpec.get("type").equals(HdfsStorageDruidModule.SCHEME) || !loadSpec.get("path").equals(pathWithoutScheme)) {
                        loadSpec.put("type", HdfsStorageDruidModule.SCHEME);
                        loadSpec.put("path", pathWithoutScheme);
                        if (updateDescriptor) {
                            log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", path, pathWithoutScheme);
                            mapper.writeValue(fs.create(path, true), dataSegment);
                        }
                    }
                    segments.add(dataSegment);
                } else {
                    throw new SegmentLoadingException("index.zip didn't exist at [%s] while descripter.json exists!?", indexZip);
                }
            }
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", workingDirPath);
    }
    return segments;
}
Also used : Path(org.apache.hadoop.fs.Path) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) FileSystem(org.apache.hadoop.fs.FileSystem) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment)

Example 4 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class HdfsDataSegmentPusher method push.

@Override
public DataSegment push(File inDir, DataSegment segment) throws IOException {
    final String storageDir = DataSegmentPusherUtil.getHdfsStorageDir(segment);
    log.info("Copying segment[%s] to HDFS at location[%s/%s]", segment.getIdentifier(), fullyQualifiedStorageDirectory, storageDir);
    Path tmpIndexFile = new Path(String.format("%s/%s/%s/%s_index.zip", fullyQualifiedStorageDirectory, segment.getDataSource(), UUIDUtils.generateUuid(), segment.getShardSpec().getPartitionNum()));
    FileSystem fs = tmpIndexFile.getFileSystem(hadoopConfig);
    fs.mkdirs(tmpIndexFile.getParent());
    log.info("Compressing files from[%s] to [%s]", inDir, tmpIndexFile);
    final long size;
    final DataSegment dataSegment;
    try (FSDataOutputStream out = fs.create(tmpIndexFile)) {
        size = CompressionUtils.zip(inDir, out);
        final Path outIndexFile = new Path(String.format("%s/%s/%d_index.zip", fullyQualifiedStorageDirectory, storageDir, segment.getShardSpec().getPartitionNum()));
        final Path outDescriptorFile = new Path(String.format("%s/%s/%d_descriptor.json", fullyQualifiedStorageDirectory, storageDir, segment.getShardSpec().getPartitionNum()));
        dataSegment = segment.withLoadSpec(makeLoadSpec(outIndexFile)).withSize(size).withBinaryVersion(SegmentUtils.getVersionFromDir(inDir));
        final Path tmpDescriptorFile = new Path(tmpIndexFile.getParent(), String.format("%s_descriptor.json", dataSegment.getShardSpec().getPartitionNum()));
        log.info("Creating descriptor file at[%s]", tmpDescriptorFile);
        ByteSource.wrap(jsonMapper.writeValueAsBytes(dataSegment)).copyTo(new HdfsOutputStreamSupplier(fs, tmpDescriptorFile));
        // Create parent if it does not exist, recreation is not an error
        fs.mkdirs(outIndexFile.getParent());
        copyFilesWithChecks(fs, tmpDescriptorFile, outDescriptorFile);
        copyFilesWithChecks(fs, tmpIndexFile, outIndexFile);
    } finally {
        try {
            if (fs.exists(tmpIndexFile.getParent()) && !fs.delete(tmpIndexFile.getParent(), true)) {
                log.error("Failed to delete temp directory[%s]", tmpIndexFile.getParent());
            }
        } catch (IOException ex) {
            log.error(ex, "Failed to delete temp directory[%s]", tmpIndexFile.getParent());
        }
    }
    return dataSegment;
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment)

Example 5 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class HdfsDataSegmentPusherTest method testUsingScheme.

private void testUsingScheme(final String scheme) throws Exception {
    Configuration conf = new Configuration(true);
    // Create a mock segment on disk
    File segmentDir = tempFolder.newFolder();
    File tmp = new File(segmentDir, "version.bin");
    final byte[] data = new byte[] { 0x0, 0x0, 0x0, 0x1 };
    Files.write(data, tmp);
    final long size = data.length;
    HdfsDataSegmentPusherConfig config = new HdfsDataSegmentPusherConfig();
    final File storageDirectory = tempFolder.newFolder();
    config.setStorageDirectory(scheme != null ? String.format("%s://%s", scheme, storageDirectory.getAbsolutePath()) : storageDirectory.getAbsolutePath());
    HdfsDataSegmentPusher pusher = new HdfsDataSegmentPusher(config, conf, new DefaultObjectMapper());
    DataSegment segmentToPush = new DataSegment("foo", new Interval("2015/2016"), "0", Maps.<String, Object>newHashMap(), Lists.<String>newArrayList(), Lists.<String>newArrayList(), NoneShardSpec.instance(), 0, size);
    DataSegment segment = pusher.push(segmentDir, segmentToPush);
    String indexUri = String.format("%s/%s/%d_index.zip", FileSystem.newInstance(conf).makeQualified(new Path(config.getStorageDirectory())).toUri().toString(), DataSegmentPusherUtil.getHdfsStorageDir(segmentToPush), segmentToPush.getShardSpec().getPartitionNum());
    Assert.assertEquals(segmentToPush.getSize(), segment.getSize());
    Assert.assertEquals(segmentToPush, segment);
    Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", indexUri), segment.getLoadSpec());
    // rename directory after push
    final String segmentPath = DataSegmentPusherUtil.getHdfsStorageDir(segment);
    File indexFile = new File(String.format("%s/%s/%d_index.zip", storageDirectory, segmentPath, segment.getShardSpec().getPartitionNum()));
    Assert.assertTrue(indexFile.exists());
    File descriptorFile = new File(String.format("%s/%s/%d_descriptor.json", storageDirectory, segmentPath, segment.getShardSpec().getPartitionNum()));
    Assert.assertTrue(descriptorFile.exists());
    // push twice will fail and temp dir cleaned
    File outDir = new File(String.format("%s/%s", config.getStorageDirectory(), segmentPath));
    outDir.setReadOnly();
    try {
        pusher.push(segmentDir, segmentToPush);
    } catch (IOException e) {
        Assert.fail("should not throw exception");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) IOException(java.io.IOException) File(java.io.File) DataSegment(io.druid.timeline.DataSegment) Interval(org.joda.time.Interval)

Aggregations

DataSegment (io.druid.timeline.DataSegment)293 Test (org.junit.Test)151 Interval (org.joda.time.Interval)136 File (java.io.File)56 DateTime (org.joda.time.DateTime)52 IOException (java.io.IOException)37 DruidServer (io.druid.client.DruidServer)36 Map (java.util.Map)35 DruidDataSource (io.druid.client.DruidDataSource)19 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)18 List (java.util.List)17 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)16 Rule (io.druid.server.coordinator.rules.Rule)16 ImmutableMap (com.google.common.collect.ImmutableMap)15 ForeverLoadRule (io.druid.server.coordinator.rules.ForeverLoadRule)14 IntervalDropRule (io.druid.server.coordinator.rules.IntervalDropRule)13 IntervalLoadRule (io.druid.server.coordinator.rules.IntervalLoadRule)13 CountDownLatch (java.util.concurrent.CountDownLatch)13 GET (javax.ws.rs.GET)13 Produces (javax.ws.rs.Produces)13