Search in sources :

Example 1 with Interval

use of org.joda.time.Interval in project druid by druid-io.

the class DataSegmentPusherUtilTest method shouldNotHaveColonsInHdfsStorageDir.

@Test
public void shouldNotHaveColonsInHdfsStorageDir() throws Exception {
    Interval interval = new Interval("2011-10-01/2011-10-02");
    ImmutableMap<String, Object> loadSpec = ImmutableMap.<String, Object>of("something", "or_other");
    DataSegment segment = new DataSegment("something", interval, "brand:new:version", loadSpec, Arrays.asList("dim1", "dim2"), Arrays.asList("met1", "met2"), NoneShardSpec.instance(), null, 1);
    String storageDir = DataSegmentPusherUtil.getHdfsStorageDir(segment);
    Assert.assertEquals("something/20111001T000000.000Z_20111002T000000.000Z/brand_new_version", storageDir);
}
Also used : DataSegment(io.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 2 with Interval

use of org.joda.time.Interval in project druid by druid-io.

the class DataSegmentUtilsTest method testBasic.

@Test
public void testBasic() {
    String datasource = "datasource";
    SegmentIdentifierParts desc = new SegmentIdentifierParts(datasource, new Interval("2015-01-02/2015-01-03"), "ver", "0_0");
    Assert.assertEquals("datasource_2015-01-02T00:00:00.000Z_2015-01-03T00:00:00.000Z_ver_0_0", desc.toString());
    Assert.assertEquals(desc, DataSegmentUtils.valueOf(datasource, desc.toString()));
    desc = desc.withInterval(new Interval("2014-10-20T00:00:00Z/P1D"));
    Assert.assertEquals("datasource_2014-10-20T00:00:00.000Z_2014-10-21T00:00:00.000Z_ver_0_0", desc.toString());
    Assert.assertEquals(desc, DataSegmentUtils.valueOf(datasource, desc.toString()));
    desc = new SegmentIdentifierParts(datasource, new Interval("2015-01-02/2015-01-03"), "ver", null);
    Assert.assertEquals("datasource_2015-01-02T00:00:00.000Z_2015-01-03T00:00:00.000Z_ver", desc.toString());
    Assert.assertEquals(desc, DataSegmentUtils.valueOf(datasource, desc.toString()));
    desc = desc.withInterval(new Interval("2014-10-20T00:00:00Z/P1D"));
    Assert.assertEquals("datasource_2014-10-20T00:00:00.000Z_2014-10-21T00:00:00.000Z_ver", desc.toString());
    Assert.assertEquals(desc, DataSegmentUtils.valueOf(datasource, desc.toString()));
}
Also used : SegmentIdentifierParts(io.druid.timeline.DataSegmentUtils.SegmentIdentifierParts) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 3 with Interval

use of org.joda.time.Interval in project druid by druid-io.

the class DataSegmentUtilsTest method testDataSourceWithUnderscore2.

@Test
public void testDataSourceWithUnderscore2() {
    String dataSource = "datasource_2015-01-01T00:00:00.000Z";
    SegmentIdentifierParts desc = new SegmentIdentifierParts(dataSource, new Interval("2015-01-02/2015-01-03"), "ver", "0_0");
    Assert.assertEquals("datasource_2015-01-01T00:00:00.000Z_2015-01-02T00:00:00.000Z_2015-01-03T00:00:00.000Z_ver_0_0", desc.toString());
    Assert.assertEquals(desc, DataSegmentUtils.valueOf(dataSource, desc.toString()));
    desc = desc.withInterval(new Interval("2014-10-20T00:00:00Z/P1D"));
    Assert.assertEquals("datasource_2015-01-01T00:00:00.000Z_2014-10-20T00:00:00.000Z_2014-10-21T00:00:00.000Z_ver_0_0", desc.toString());
    Assert.assertEquals(desc, DataSegmentUtils.valueOf(dataSource, desc.toString()));
    desc = new SegmentIdentifierParts(dataSource, new Interval("2015-01-02/2015-01-03"), "ver", null);
    Assert.assertEquals("datasource_2015-01-01T00:00:00.000Z_2015-01-02T00:00:00.000Z_2015-01-03T00:00:00.000Z_ver", desc.toString());
    Assert.assertEquals(desc, DataSegmentUtils.valueOf(dataSource, desc.toString()));
    desc = desc.withInterval(new Interval("2014-10-20T00:00:00Z/P1D"));
    Assert.assertEquals("datasource_2015-01-01T00:00:00.000Z_2014-10-20T00:00:00.000Z_2014-10-21T00:00:00.000Z_ver", desc.toString());
    Assert.assertEquals(desc, DataSegmentUtils.valueOf(dataSource, desc.toString()));
}
Also used : SegmentIdentifierParts(io.druid.timeline.DataSegmentUtils.SegmentIdentifierParts) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 4 with Interval

use of org.joda.time.Interval in project druid by druid-io.

the class HdfsDataSegmentPusherTest method testUsingScheme.

private void testUsingScheme(final String scheme) throws Exception {
    Configuration conf = new Configuration(true);
    // Create a mock segment on disk
    File segmentDir = tempFolder.newFolder();
    File tmp = new File(segmentDir, "version.bin");
    final byte[] data = new byte[] { 0x0, 0x0, 0x0, 0x1 };
    Files.write(data, tmp);
    final long size = data.length;
    HdfsDataSegmentPusherConfig config = new HdfsDataSegmentPusherConfig();
    final File storageDirectory = tempFolder.newFolder();
    config.setStorageDirectory(scheme != null ? String.format("%s://%s", scheme, storageDirectory.getAbsolutePath()) : storageDirectory.getAbsolutePath());
    HdfsDataSegmentPusher pusher = new HdfsDataSegmentPusher(config, conf, new DefaultObjectMapper());
    DataSegment segmentToPush = new DataSegment("foo", new Interval("2015/2016"), "0", Maps.<String, Object>newHashMap(), Lists.<String>newArrayList(), Lists.<String>newArrayList(), NoneShardSpec.instance(), 0, size);
    DataSegment segment = pusher.push(segmentDir, segmentToPush);
    String indexUri = String.format("%s/%s/%d_index.zip", FileSystem.newInstance(conf).makeQualified(new Path(config.getStorageDirectory())).toUri().toString(), DataSegmentPusherUtil.getHdfsStorageDir(segmentToPush), segmentToPush.getShardSpec().getPartitionNum());
    Assert.assertEquals(segmentToPush.getSize(), segment.getSize());
    Assert.assertEquals(segmentToPush, segment);
    Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", indexUri), segment.getLoadSpec());
    // rename directory after push
    final String segmentPath = DataSegmentPusherUtil.getHdfsStorageDir(segment);
    File indexFile = new File(String.format("%s/%s/%d_index.zip", storageDirectory, segmentPath, segment.getShardSpec().getPartitionNum()));
    Assert.assertTrue(indexFile.exists());
    File descriptorFile = new File(String.format("%s/%s/%d_descriptor.json", storageDirectory, segmentPath, segment.getShardSpec().getPartitionNum()));
    Assert.assertTrue(descriptorFile.exists());
    // push twice will fail and temp dir cleaned
    File outDir = new File(String.format("%s/%s", config.getStorageDirectory(), segmentPath));
    outDir.setReadOnly();
    try {
        pusher.push(segmentDir, segmentToPush);
    } catch (IOException e) {
        Assert.fail("should not throw exception");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) IOException(java.io.IOException) File(java.io.File) DataSegment(io.druid.timeline.DataSegment) Interval(org.joda.time.Interval)

Example 5 with Interval

use of org.joda.time.Interval in project druid by druid-io.

the class HadoopIngestionSpec method updateSegmentListIfDatasourcePathSpecIsUsed.

public static HadoopIngestionSpec updateSegmentListIfDatasourcePathSpecIsUsed(HadoopIngestionSpec spec, ObjectMapper jsonMapper, UsedSegmentLister segmentLister) throws IOException {
    String dataSource = "dataSource";
    String type = "type";
    String multi = "multi";
    String children = "children";
    String segments = "segments";
    String ingestionSpec = "ingestionSpec";
    Map<String, Object> pathSpec = spec.getIOConfig().getPathSpec();
    Map<String, Object> datasourcePathSpec = null;
    if (pathSpec.get(type).equals(dataSource)) {
        datasourcePathSpec = pathSpec;
    } else if (pathSpec.get(type).equals(multi)) {
        List<Map<String, Object>> childPathSpecs = (List<Map<String, Object>>) pathSpec.get(children);
        for (Map<String, Object> childPathSpec : childPathSpecs) {
            if (childPathSpec.get(type).equals(dataSource)) {
                datasourcePathSpec = childPathSpec;
                break;
            }
        }
    }
    if (datasourcePathSpec != null) {
        Map<String, Object> ingestionSpecMap = (Map<String, Object>) datasourcePathSpec.get(ingestionSpec);
        DatasourceIngestionSpec ingestionSpecObj = jsonMapper.convertValue(ingestionSpecMap, DatasourceIngestionSpec.class);
        List<DataSegment> segmentsList = segmentLister.getUsedSegmentsForIntervals(ingestionSpecObj.getDataSource(), ingestionSpecObj.getIntervals());
        if (ingestionSpecObj.getSegments() != null) {
            //ensure that user supplied segment list matches with the segmentsList obtained from db
            //this safety check lets users do test-n-set kind of batch delta ingestion where the delta
            //ingestion task would only run if current state of the system is same as when they submitted
            //the task.
            List<DataSegment> userSuppliedSegmentsList = ingestionSpecObj.getSegments();
            if (segmentsList.size() == userSuppliedSegmentsList.size()) {
                Set<DataSegment> segmentsSet = new HashSet<>(segmentsList);
                for (DataSegment userSegment : userSuppliedSegmentsList) {
                    if (!segmentsSet.contains(userSegment)) {
                        throw new IOException("user supplied segments list did not match with segments list obtained from db");
                    }
                }
            } else {
                throw new IOException("user supplied segments list did not match with segments list obtained from db");
            }
        }
        VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
        for (DataSegment segment : segmentsList) {
            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
        }
        final List<WindowedDataSegment> windowedSegments = Lists.newArrayList();
        for (Interval interval : ingestionSpecObj.getIntervals()) {
            final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
            for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
                for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
                    windowedSegments.add(new WindowedDataSegment(chunk.getObject(), holder.getInterval()));
                }
            }
            datasourcePathSpec.put(segments, windowedSegments);
        }
    }
    return spec;
}
Also used : DatasourceIngestionSpec(io.druid.indexer.hadoop.DatasourceIngestionSpec) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) List(java.util.List) Map(java.util.Map) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Aggregations

Interval (org.joda.time.Interval)1051 Test (org.junit.Test)604 DateTime (org.joda.time.DateTime)315 ArrayList (java.util.ArrayList)186 DataSegment (org.apache.druid.timeline.DataSegment)145 DataSegment (io.druid.timeline.DataSegment)138 List (java.util.List)134 Map (java.util.Map)131 HashMap (java.util.HashMap)106 File (java.io.File)91 IOException (java.io.IOException)75 ImmutableList (com.google.common.collect.ImmutableList)71 ImmutableMap (com.google.common.collect.ImmutableMap)56 TreeMap (java.util.TreeMap)55 Period (org.joda.time.Period)55 ISE (org.apache.druid.java.util.common.ISE)53 HashSet (java.util.HashSet)50 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)49 QueryRunner (io.druid.query.QueryRunner)47 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)45