use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class OrcHadoopInputRowParserTest method testTest2.
@Test
public void testTest2() throws IOException {
HadoopDruidIndexerConfig config = loadHadoopDruidIndexerConfig("example/test_2_hadoop_job.json");
Job job = Job.getInstance(new Configuration());
config.intoConfiguration(job);
/*
test_2.orc
struct<timestamp:string,col1:string,col2:array<string>,col3:float,col4:bigint,col5:decimal,col6:array<string>,col7:map<string,string>>
{2016-01-01, bar, [dat1, dat2, dat3], 1.1, 2, 3.5, [], {subcol7=subval7}}
*/
OrcStruct data = getFirstRow(job, ((StaticPathSpec) config.getPathSpec()).getPaths());
List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
Assert.assertEquals(7, rows.get(0).getDimensions().size());
Assert.assertEquals("bar", rows.get(0).getDimension("col1").get(0));
Assert.assertEquals("dat1", rows.get(0).getDimension("col2").get(0));
Assert.assertEquals("dat2", rows.get(0).getDimension("col2").get(1));
Assert.assertEquals("dat3", rows.get(0).getDimension("col2").get(2));
Assert.assertEquals(1.1f, rows.get(0).getRaw("col3"));
Assert.assertEquals(2L, rows.get(0).getRaw("col4"));
Assert.assertEquals(3.5d, rows.get(0).getRaw("col5"));
Assert.assertEquals(ImmutableList.of(), rows.get(0).getRaw("col6"));
Assert.assertEquals("subval7", rows.get(0).getRaw("col7-subcol7"));
}
use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class HdfsDataSegmentPusherTest method shouldMakeHDFSCompliantSegmentOutputPath.
@Test
public void shouldMakeHDFSCompliantSegmentOutputPath() {
HadoopIngestionSpec schema;
try {
schema = objectMapper.readValue("{\n" + " \"dataSchema\": {\n" + " \"dataSource\": \"source\",\n" + " \"metricsSpec\": [],\n" + " \"granularitySpec\": {\n" + " \"type\": \"uniform\",\n" + " \"segmentGranularity\": \"hour\",\n" + " \"intervals\": [\"2012-07-10/P1D\"]\n" + " }\n" + " },\n" + " \"ioConfig\": {\n" + " \"type\": \"hadoop\",\n" + " \"segmentOutputPath\": \"hdfs://server:9100/tmp/druid/datatest\"\n" + " }\n" + "}", HadoopIngestionSpec.class);
} catch (Exception e) {
throw new RuntimeException(e);
}
// DataSchema dataSchema = new DataSchema("dataSource", null, null, Gra)
// schema = new HadoopIngestionSpec(dataSchema, ioConfig, HadoopTuningConfig.makeDefaultTuningConfig());
HadoopDruidIndexerConfig cfg = new HadoopDruidIndexerConfig(schema.withTuningConfig(schema.getTuningConfig().withVersion("some:brand:new:version")));
Bucket bucket = new Bucket(4711, new DateTime(2012, 07, 10, 5, 30, ISOChronology.getInstanceUTC()), 4712);
Path path = JobHelper.makeFileNamePath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, 0), JobHelper.INDEX_ZIP, hdfsDataSegmentPusher);
Assert.assertEquals("hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_index.zip", path.toString());
path = JobHelper.makeTmpPath(new Path(cfg.getSchema().getIOConfig().getSegmentOutputPath()), new DistributedFileSystem(), new DataSegment(cfg.getSchema().getDataSchema().getDataSource(), cfg.getSchema().getDataSchema().getGranularitySpec().bucketInterval(bucket.time).get(), cfg.getSchema().getTuningConfig().getVersion(), null, null, null, new NumberedShardSpec(bucket.partitionNum, 5000), -1, 0), new TaskAttemptID("abc", 123, TaskType.REDUCE, 1, 0), hdfsDataSegmentPusher);
Assert.assertEquals("hdfs://server:9100/tmp/druid/datatest/source/20120710T050000.000Z_20120710T060000.000Z/some_brand_new_version" + "/4712_index.zip.0", path.toString());
}
use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class DatasourcePathSpecTest method testAddInputPaths.
@Test
public void testAddInputPaths() throws Exception {
HadoopDruidIndexerConfig hadoopIndexerConfig = makeHadoopDruidIndexerConfig();
DatasourcePathSpec pathSpec1 = new DatasourcePathSpec(segments1, ingestionSpec1, null, false);
DatasourcePathSpec pathSpec2 = new DatasourcePathSpec(segments2, ingestionSpec2, null, false);
Configuration config = new Configuration();
Job job = EasyMock.createNiceMock(Job.class);
EasyMock.expect(job.getConfiguration()).andReturn(config).anyTimes();
EasyMock.replay(job);
pathSpec1.addInputPaths(hadoopIndexerConfig, job);
pathSpec2.addInputPaths(hadoopIndexerConfig, job);
Assert.assertEquals(ImmutableList.of(ingestionSpec1.getDataSource(), ingestionSpec2.getDataSource()), DatasourceInputFormat.getDataSources(config));
Assert.assertEquals(segments1, DatasourceInputFormat.getSegments(config, ingestionSpec1.getDataSource()));
Assert.assertEquals(segments2, DatasourceInputFormat.getSegments(config, ingestionSpec2.getDataSource()));
Assert.assertEquals(ingestionSpec1.withDimensions(ImmutableList.of("product")).withMetrics(ImmutableList.of("visited_sum")), DatasourceInputFormat.getIngestionSpec(config, ingestionSpec1.getDataSource()));
Assert.assertEquals(ingestionSpec2.withDimensions(ImmutableList.of("product2")).withMetrics(ImmutableList.of("visited_sum")), DatasourceInputFormat.getIngestionSpec(config, ingestionSpec2.getDataSource()));
}
use of org.apache.druid.indexer.HadoopDruidIndexerConfig in project druid by druid-io.
the class DatasourcePathSpecTest method testAddInputPathsWithNoSegments.
@Test
public void testAddInputPathsWithNoSegments() throws Exception {
HadoopDruidIndexerConfig hadoopIndexerConfig = makeHadoopDruidIndexerConfig();
DatasourcePathSpec pathSpec = new DatasourcePathSpec(null, ingestionSpec1, null, false);
Configuration config = new Configuration();
Job job = EasyMock.createNiceMock(Job.class);
EasyMock.expect(job.getConfiguration()).andReturn(config).anyTimes();
EasyMock.replay(job);
try {
pathSpec.addInputPaths(hadoopIndexerConfig, job);
Assert.fail("should've been ISE");
} catch (ISE ex) {
// OK
}
// now with ignoreWhenNoSegments flag set
pathSpec = new DatasourcePathSpec(null, ingestionSpec1.withIgnoreWhenNoSegments(true), null, false);
pathSpec.addInputPaths(hadoopIndexerConfig, job);
Assert.assertEquals(Collections.emptyList(), DatasourceInputFormat.getDataSources(config));
}
Aggregations