Search in sources :

Example 6 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class HadoopIngestionSpec method updateSegmentListIfDatasourcePathSpecIsUsed.

public static HadoopIngestionSpec updateSegmentListIfDatasourcePathSpecIsUsed(HadoopIngestionSpec spec, ObjectMapper jsonMapper, UsedSegmentLister segmentLister) throws IOException {
    String dataSource = "dataSource";
    String type = "type";
    String multi = "multi";
    String children = "children";
    String segments = "segments";
    String ingestionSpec = "ingestionSpec";
    Map<String, Object> pathSpec = spec.getIOConfig().getPathSpec();
    Map<String, Object> datasourcePathSpec = null;
    if (pathSpec.get(type).equals(dataSource)) {
        datasourcePathSpec = pathSpec;
    } else if (pathSpec.get(type).equals(multi)) {
        List<Map<String, Object>> childPathSpecs = (List<Map<String, Object>>) pathSpec.get(children);
        for (Map<String, Object> childPathSpec : childPathSpecs) {
            if (childPathSpec.get(type).equals(dataSource)) {
                datasourcePathSpec = childPathSpec;
                break;
            }
        }
    }
    if (datasourcePathSpec != null) {
        Map<String, Object> ingestionSpecMap = (Map<String, Object>) datasourcePathSpec.get(ingestionSpec);
        DatasourceIngestionSpec ingestionSpecObj = jsonMapper.convertValue(ingestionSpecMap, DatasourceIngestionSpec.class);
        List<DataSegment> segmentsList = segmentLister.getUsedSegmentsForIntervals(ingestionSpecObj.getDataSource(), ingestionSpecObj.getIntervals());
        if (ingestionSpecObj.getSegments() != null) {
            //ensure that user supplied segment list matches with the segmentsList obtained from db
            //this safety check lets users do test-n-set kind of batch delta ingestion where the delta
            //ingestion task would only run if current state of the system is same as when they submitted
            //the task.
            List<DataSegment> userSuppliedSegmentsList = ingestionSpecObj.getSegments();
            if (segmentsList.size() == userSuppliedSegmentsList.size()) {
                Set<DataSegment> segmentsSet = new HashSet<>(segmentsList);
                for (DataSegment userSegment : userSuppliedSegmentsList) {
                    if (!segmentsSet.contains(userSegment)) {
                        throw new IOException("user supplied segments list did not match with segments list obtained from db");
                    }
                }
            } else {
                throw new IOException("user supplied segments list did not match with segments list obtained from db");
            }
        }
        VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
        for (DataSegment segment : segmentsList) {
            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
        }
        final List<WindowedDataSegment> windowedSegments = Lists.newArrayList();
        for (Interval interval : ingestionSpecObj.getIntervals()) {
            final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
            for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
                for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
                    windowedSegments.add(new WindowedDataSegment(chunk.getObject(), holder.getInterval()));
                }
            }
            datasourcePathSpec.put(segments, windowedSegments);
        }
    }
    return spec;
}
Also used : DatasourceIngestionSpec(io.druid.indexer.hadoop.DatasourceIngestionSpec) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) List(java.util.List) Map(java.util.Map) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Example 7 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class S3DataSegmentFinder method findSegments.

@Override
public Set<DataSegment> findSegments(String workingDirPath, boolean updateDescriptor) throws SegmentLoadingException {
    final Set<DataSegment> segments = Sets.newHashSet();
    try {
        Iterator<StorageObject> objectsIterator = S3Utils.storageObjectsIterator(s3Client, config.getBucket(), workingDirPath.length() == 0 ? config.getBaseKey() : workingDirPath, config.getMaxListingLength());
        while (objectsIterator.hasNext()) {
            StorageObject storageObject = objectsIterator.next();
            storageObject.closeDataInputStream();
            if (S3Utils.toFilename(storageObject.getKey()).equals("descriptor.json")) {
                final String descriptorJson = storageObject.getKey();
                String indexZip = S3Utils.indexZipForSegmentPath(descriptorJson);
                if (S3Utils.isObjectInBucket(s3Client, config.getBucket(), indexZip)) {
                    S3Object indexObject = s3Client.getObject(config.getBucket(), descriptorJson);
                    try (InputStream is = indexObject.getDataInputStream()) {
                        final DataSegment dataSegment = jsonMapper.readValue(is, DataSegment.class);
                        log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);
                        final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
                        if (!loadSpec.get("type").equals(S3StorageDruidModule.SCHEME) || !loadSpec.get("key").equals(indexZip)) {
                            loadSpec.put("type", S3StorageDruidModule.SCHEME);
                            loadSpec.put("key", indexZip);
                            if (updateDescriptor) {
                                log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", descriptorJson, indexObject);
                                S3Object newDescJsonObject = new S3Object(descriptorJson, jsonMapper.writeValueAsString(dataSegment));
                                s3Client.putObject(config.getBucket(), newDescJsonObject);
                            }
                        }
                        segments.add(dataSegment);
                    }
                } else {
                    throw new SegmentLoadingException("index.zip didn't exist at [%s] while descriptor.json exists!?", indexZip);
                }
            }
        }
    } catch (ServiceException e) {
        throw new SegmentLoadingException(e, "Problem interacting with S3");
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "IO exception");
    } catch (Exception e) {
        Throwables.propagateIfInstanceOf(e, SegmentLoadingException.class);
        Throwables.propagate(e);
    }
    return segments;
}
Also used : StorageObject(org.jets3t.service.model.StorageObject) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) InputStream(java.io.InputStream) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment) ServiceException(org.jets3t.service.ServiceException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException) ServiceException(org.jets3t.service.ServiceException) S3Object(org.jets3t.service.model.S3Object) StorageObject(org.jets3t.service.model.StorageObject) S3Object(org.jets3t.service.model.S3Object)

Example 8 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class S3DataSegmentFinderTest method testFindSegments.

@Test
public void testFindSegments() throws Exception {
    final S3DataSegmentFinder s3DataSegmentFinder = new S3DataSegmentFinder(mockS3Client, config, mapper);
    final Set<DataSegment> segments = s3DataSegmentFinder.findSegments("", false);
    Assert.assertEquals(5, segments.size());
    DataSegment updatedSegment1 = null;
    DataSegment updatedSegment2 = null;
    DataSegment updatedSegment3 = null;
    DataSegment updatedSegment4_0 = null;
    DataSegment updatedSegment4_1 = null;
    for (DataSegment dataSegment : segments) {
        if (dataSegment.getIdentifier().equals(SEGMENT_1.getIdentifier())) {
            updatedSegment1 = dataSegment;
        } else if (dataSegment.getIdentifier().equals(SEGMENT_2.getIdentifier())) {
            updatedSegment2 = dataSegment;
        } else if (dataSegment.getIdentifier().equals(SEGMENT_3.getIdentifier())) {
            updatedSegment3 = dataSegment;
        } else if (dataSegment.getIdentifier().equals(SEGMENT_4_0.getIdentifier())) {
            updatedSegment4_0 = dataSegment;
        } else if (dataSegment.getIdentifier().equals(SEGMENT_4_1.getIdentifier())) {
            updatedSegment4_1 = dataSegment;
        } else {
            Assert.fail("Unexpected segment identifier : " + dataSegment.getIdentifier());
        }
    }
    Assert.assertEquals(descriptor1, getDescriptorPath(updatedSegment1));
    Assert.assertEquals(descriptor2, getDescriptorPath(updatedSegment2));
    Assert.assertEquals(descriptor3, getDescriptorPath(updatedSegment3));
    Assert.assertEquals(descriptor4_0, getDescriptorPath(updatedSegment4_0));
    Assert.assertEquals(descriptor4_1, getDescriptorPath(updatedSegment4_1));
    final String serializedSegment1 = mapper.writeValueAsString(updatedSegment1);
    final String serializedSegment2 = mapper.writeValueAsString(updatedSegment2);
    final String serializedSegment3 = mapper.writeValueAsString(updatedSegment3);
    final String serializedSegment4_0 = mapper.writeValueAsString(updatedSegment4_0);
    final String serializedSegment4_1 = mapper.writeValueAsString(updatedSegment4_1);
    Assert.assertNotEquals(serializedSegment1, IOUtils.toString(mockS3Client.getObject(bucket, descriptor1).getDataInputStream()));
    Assert.assertNotEquals(serializedSegment2, IOUtils.toString(mockS3Client.getObject(bucket, descriptor2).getDataInputStream()));
    Assert.assertNotEquals(serializedSegment3, IOUtils.toString(mockS3Client.getObject(bucket, descriptor3).getDataInputStream()));
    Assert.assertNotEquals(serializedSegment4_0, IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_0).getDataInputStream()));
    Assert.assertNotEquals(serializedSegment4_1, IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_1).getDataInputStream()));
    final Set<DataSegment> segments2 = s3DataSegmentFinder.findSegments("", true);
    Assert.assertEquals(segments, segments2);
    Assert.assertEquals(serializedSegment1, IOUtils.toString(mockS3Client.getObject(bucket, descriptor1).getDataInputStream()));
    Assert.assertEquals(serializedSegment2, IOUtils.toString(mockS3Client.getObject(bucket, descriptor2).getDataInputStream()));
    Assert.assertEquals(serializedSegment3, IOUtils.toString(mockS3Client.getObject(bucket, descriptor3).getDataInputStream()));
    Assert.assertEquals(serializedSegment4_0, IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_0).getDataInputStream()));
    Assert.assertEquals(serializedSegment4_1, IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_1).getDataInputStream()));
}
Also used : DataSegment(io.druid.timeline.DataSegment) Test(org.junit.Test)

Example 9 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class S3DataSegmentMoverTest method testFailsToMoveMissing.

@Test(expected = SegmentLoadingException.class)
public void testFailsToMoveMissing() throws Exception {
    MockStorageService mockS3Client = new MockStorageService();
    S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client, new S3DataSegmentPusherConfig());
    mover.move(new DataSegment("test", new Interval("2013-01-01/2013-01-02"), "1", ImmutableMap.<String, Object>of("key", "baseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip", "bucket", "DOES NOT EXIST"), ImmutableList.of("dim1", "dim1"), ImmutableList.of("metric1", "metric2"), NoneShardSpec.instance(), 0, 1), ImmutableMap.<String, Object>of("bucket", "DOES NOT EXIST", "baseKey", "baseKey2"));
}
Also used : S3Object(org.jets3t.service.model.S3Object) StorageObject(org.jets3t.service.model.StorageObject) DataSegment(io.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 10 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class S3DataSegmentMoverTest method testMoveNoop.

@Test
public void testMoveNoop() throws Exception {
    MockStorageService mockS3Client = new MockStorageService();
    S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client, new S3DataSegmentPusherConfig());
    mockS3Client.putObject("archive", new S3Object("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip"));
    mockS3Client.putObject("archive", new S3Object("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/descriptor.json"));
    DataSegment movedSegment = mover.move(sourceSegment, ImmutableMap.<String, Object>of("baseKey", "targetBaseKey", "bucket", "archive"));
    Map<String, Object> targetLoadSpec = movedSegment.getLoadSpec();
    Assert.assertEquals("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip", MapUtils.getString(targetLoadSpec, "key"));
    Assert.assertEquals("archive", MapUtils.getString(targetLoadSpec, "bucket"));
    Assert.assertFalse(mockS3Client.didMove());
}
Also used : S3Object(org.jets3t.service.model.S3Object) StorageObject(org.jets3t.service.model.StorageObject) S3Object(org.jets3t.service.model.S3Object) DataSegment(io.druid.timeline.DataSegment) Test(org.junit.Test)

Aggregations

DataSegment (io.druid.timeline.DataSegment)314 Test (org.junit.Test)165 Interval (org.joda.time.Interval)144 File (java.io.File)61 DateTime (org.joda.time.DateTime)55 IOException (java.io.IOException)41 Map (java.util.Map)38 DruidServer (io.druid.client.DruidServer)36 Path (org.apache.hadoop.fs.Path)28 ImmutableMap (com.google.common.collect.ImmutableMap)22 DruidDataSource (io.druid.client.DruidDataSource)19 List (java.util.List)19 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)18 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)17 Rule (io.druid.server.coordinator.rules.Rule)16 DataSegmentPusher (io.druid.segment.loading.DataSegmentPusher)15 ForeverLoadRule (io.druid.server.coordinator.rules.ForeverLoadRule)14 ImmutableList (com.google.common.collect.ImmutableList)13 IntervalDropRule (io.druid.server.coordinator.rules.IntervalDropRule)13 IntervalLoadRule (io.druid.server.coordinator.rules.IntervalLoadRule)13