use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class GranularityPathSpecTest method testIntervalTrimming.
@Test
public void testIntervalTrimming() throws Exception {
UserGroupInformation.setLoginUser(UserGroupInformation.createUserForTesting("test", new String[] { "testGroup" }));
HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, Granularities.ALL, ImmutableList.of(new Interval("2015-01-01T11Z/2015-01-02T05Z"))), jsonMapper), new HadoopIOConfig(null, null, null), DEFAULT_TUNING_CONFIG);
granularityPathSpec.setDataGranularity(Granularities.HOUR);
granularityPathSpec.setPathFormat("yyyy/MM/dd/HH");
granularityPathSpec.setFilePattern(".*");
granularityPathSpec.setInputFormat(TextInputFormat.class);
Job job = Job.getInstance();
String formatStr = "file:%s/%s;org.apache.hadoop.mapreduce.lib.input.TextInputFormat";
createFile(testFolder, "test/2015/01/01/00/file1", "test/2015/01/01/10/file2", "test/2015/01/01/18/file3", "test/2015/01/02/00/file1", "test/2015/01/02/03/file2", "test/2015/01/02/05/file3", "test/2015/01/02/07/file4", "test/2015/01/02/09/file5");
granularityPathSpec.setInputPath(testFolder.getRoot().getPath() + "/test");
granularityPathSpec.addInputPaths(HadoopDruidIndexerConfig.fromSpec(spec), job);
String actual = job.getConfiguration().get("mapreduce.input.multipleinputs.dir.formats");
String expected = Joiner.on(",").join(Lists.newArrayList(String.format(formatStr, testFolder.getRoot(), "test/2015/01/01/18/file3"), String.format(formatStr, testFolder.getRoot(), "test/2015/01/02/00/file1"), String.format(formatStr, testFolder.getRoot(), "test/2015/01/02/03/file2")));
Assert.assertEquals("Did not find expected input paths", expected, actual);
}
use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class StaticPathSpecTest method testAddingPaths.
@Test
public void testAddingPaths() throws Exception {
Job job = new Job();
StaticPathSpec pathSpec = new StaticPathSpec("/a/c,/a/b/{c,d}", null);
DataSchema schema = new DataSchema("ds", null, new AggregatorFactory[0], null, jsonMapper);
HadoopIOConfig io = new HadoopIOConfig(null, null, null);
pathSpec.addInputPaths(new HadoopDruidIndexerConfig(new HadoopIngestionSpec(schema, io, null)), job);
String paths = job.getConfiguration().get(MultipleInputs.DIR_FORMATS);
String formatter = TextInputFormat.class.getName();
String[] expected = { "/a/c;" + formatter, "/a/b/c;" + formatter, "/a/b/d;" + formatter };
Assert.assertArrayEquals(expected, paths.split(","));
}
use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class HadoopConverterJobTest method setUp.
@Before
public void setUp() throws Exception {
final MetadataStorageUpdaterJobSpec metadataStorageUpdaterJobSpec = new MetadataStorageUpdaterJobSpec() {
@Override
public String getSegmentTable() {
return derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable();
}
@Override
public MetadataStorageConnectorConfig get() {
return derbyConnectorRule.getMetadataConnectorConfig();
}
};
final File scratchFileDir = temporaryFolder.newFolder();
storageLocProperty = System.getProperty(STORAGE_PROPERTY_KEY);
tmpSegmentDir = temporaryFolder.newFolder();
System.setProperty(STORAGE_PROPERTY_KEY, tmpSegmentDir.getAbsolutePath());
final URL url = Preconditions.checkNotNull(Query.class.getClassLoader().getResource("druid.sample.tsv"));
final File tmpInputFile = temporaryFolder.newFile();
FileUtils.retryCopy(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return url.openStream();
}
}, tmpInputFile, FileUtils.IS_EXCEPTION, 3);
final HadoopDruidIndexerConfig hadoopDruidIndexerConfig = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema(DATASOURCE, HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("ts", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList(TestIndex.DIMENSIONS)), null, null), "\t", "", Arrays.asList(TestIndex.COLUMNS)), null), Map.class), new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.METRICS[0], TestIndex.METRICS[0]), new HyperUniquesAggregatorFactory("quality_uniques", "quality") }, new UniformGranularitySpec(Granularities.MONTH, Granularities.DAY, ImmutableList.<Interval>of(interval)), HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.<String, Object>of("type", "static", "paths", tmpInputFile.getAbsolutePath()), metadataStorageUpdaterJobSpec, tmpSegmentDir.getAbsolutePath()), new HadoopTuningConfig(scratchFileDir.getAbsolutePath(), null, null, null, null, null, false, false, false, false, null, false, false, null, null, null, false, false)));
metadataStorageTablesConfigSupplier = derbyConnectorRule.metadataTablesConfigSupplier();
connector = derbyConnectorRule.getConnector();
try {
connector.getDBI().withHandle(new HandleCallback<Void>() {
@Override
public Void withHandle(Handle handle) throws Exception {
handle.execute("DROP TABLE druid_segments");
return null;
}
});
} catch (CallbackFailedException e) {
// Who cares
}
List<Jobby> jobs = ImmutableList.of(new Jobby() {
@Override
public boolean run() {
connector.createSegmentTable(metadataStorageUpdaterJobSpec.getSegmentTable());
return true;
}
}, new HadoopDruidDetermineConfigurationJob(hadoopDruidIndexerConfig), new HadoopDruidIndexerJob(hadoopDruidIndexerConfig, new SQLMetadataStorageUpdaterJobHandler(connector)));
JobHelper.runJobs(jobs, hadoopDruidIndexerConfig);
}
use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class HadoopDruidIndexerConfigTest method testHashedBucketSelection.
@Test
public void testHashedBucketSelection() {
List<HadoopyShardSpec> specs = Lists.newArrayList();
final int partitionCount = 10;
for (int i = 0; i < partitionCount; i++) {
specs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, partitionCount, null, new DefaultObjectMapper()), i));
}
HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.MINUTE, Granularities.MINUTE, ImmutableList.of(new Interval("2010-01-01/P1D"))), jsonMapper), new HadoopIOConfig(ImmutableMap.<String, Object>of("paths", "bar", "type", "static"), null, null), new HadoopTuningConfig(null, null, null, ImmutableMap.of(new DateTime("2010-01-01T01:00:00").getMillis(), specs), null, null, false, false, false, false, null, false, false, null, null, null, false, false));
HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec(spec);
final List<String> dims = Arrays.asList("diM1", "dIM2");
final ImmutableMap<String, Object> values = ImmutableMap.<String, Object>of("Dim1", "1", "DiM2", "2", "dim1", "3", "dim2", "4");
final long timestamp = new DateTime("2010-01-01T01:00:01").getMillis();
final Bucket expectedBucket = config.getBucket(new MapBasedInputRow(timestamp, dims, values)).get();
final long nextBucketTimestamp = Granularities.MINUTE.bucketEnd(new DateTime(timestamp)).getMillis();
// check that all rows having same set of dims and truncated timestamp hash to same bucket
for (int i = 0; timestamp + i < nextBucketTimestamp; i++) {
Assert.assertEquals(expectedBucket.partitionNum, config.getBucket(new MapBasedInputRow(timestamp + i, dims, values)).get().partitionNum);
}
}
use of io.druid.segment.indexing.DataSchema in project druid by druid-io.
the class HadoopDruidIndexerConfigTest method testNoneShardSpecBucketSelection.
@Test
public void testNoneShardSpecBucketSelection() {
HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.MINUTE, Granularities.MINUTE, ImmutableList.of(new Interval("2010-01-01/P1D"))), jsonMapper), new HadoopIOConfig(ImmutableMap.<String, Object>of("paths", "bar", "type", "static"), null, null), new HadoopTuningConfig(null, null, null, ImmutableMap.<Long, List<HadoopyShardSpec>>of(new DateTime("2010-01-01T01:00:00").getMillis(), Lists.newArrayList(new HadoopyShardSpec(NoneShardSpec.instance(), 1)), new DateTime("2010-01-01T02:00:00").getMillis(), Lists.newArrayList(new HadoopyShardSpec(NoneShardSpec.instance(), 2))), null, null, false, false, false, false, null, false, false, null, null, null, false, false));
HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec(spec);
final List<String> dims = Arrays.asList("diM1", "dIM2");
final ImmutableMap<String, Object> values = ImmutableMap.<String, Object>of("Dim1", "1", "DiM2", "2", "dim1", "3", "dim2", "4");
final long ts1 = new DateTime("2010-01-01T01:00:01").getMillis();
Assert.assertEquals(config.getBucket(new MapBasedInputRow(ts1, dims, values)).get().getShardNum(), 1);
final long ts2 = new DateTime("2010-01-01T02:00:01").getMillis();
Assert.assertEquals(config.getBucket(new MapBasedInputRow(ts2, dims, values)).get().getShardNum(), 2);
}
Aggregations