use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.
the class FireDepartmentTest method testSerde.
@Test
public void testSerde() throws Exception {
ObjectMapper jsonMapper = new DefaultObjectMapper();
jsonMapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, jsonMapper));
FireDepartment schema = new FireDepartment(new DataSchema("foo", jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("dim1", "dim2"))), null, null, null), null), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("count") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null, jsonMapper), new RealtimeIOConfig(null, new RealtimePlumberSchool(null, null, null, null, null, null, null, NoopJoinableFactory.INSTANCE, TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()), TestHelper.getTestIndexIO(), MapCache.create(0), NO_CACHE_CONFIG, new CachePopulatorStats(), TestHelper.makeJsonMapper())), RealtimeTuningConfig.makeDefaultTuningConfig(new File("/tmp/nonexistent")));
String json = jsonMapper.writeValueAsString(schema);
FireDepartment newSchema = jsonMapper.readValue(json, FireDepartment.class);
Assert.assertEquals(schema.getDataSchema().getDataSource(), newSchema.getDataSchema().getDataSource());
Assert.assertEquals("/tmp/nonexistent", schema.getTuningConfig().getBasePersistDirectory().toString());
}
use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.
the class MultiValuedDimensionTest method setup.
@Before
public void setup() throws Exception {
incrementalIndex = new OnheapIncrementalIndex.Builder().setSimpleTestingIndexSchema(new CountAggregatorFactory("count")).setMaxRowCount(5000).build();
StringInputRowParser parser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags", "othertags"))), "\t", ImmutableList.of("timestamp", "product", "tags", "othertags"), false, 0), "UTF-8");
String[] rows = new String[] { "2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3,u1\tu2", "2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5,u3\tu4", "2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7,u1\tu5", "2011-01-14T00:00:00.000Z,product_4,\"\",u2" };
for (String row : rows) {
incrementalIndex.add(parser.parse(row));
}
persistedSegmentDir = FileUtils.createTempDir();
TestHelper.getTestIndexMergerV9(segmentWriteOutMediumFactory).persist(incrementalIndex, persistedSegmentDir, new IndexSpec(), null);
queryableIndex = TestHelper.getTestIndexIO().loadIndex(persistedSegmentDir);
StringInputRowParser parserNullSampler = new StringInputRowParser(new JSONParseSpec(new TimestampSpec("time", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags", "othertags")))), "UTF-8");
incrementalIndexNullSampler = new OnheapIncrementalIndex.Builder().setSimpleTestingIndexSchema(new CountAggregatorFactory("count")).setMaxRowCount(5000).build();
String[] rowsNullSampler = new String[] { "{\"time\":\"2011-01-13T00:00:00.000Z\",\"product\":\"product_1\",\"tags\":[],\"othertags\":[\"u1\", \"u2\"]}", "{\"time\":\"2011-01-12T00:00:00.000Z\",\"product\":\"product_2\",\"othertags\":[\"u3\", \"u4\"]}", "{\"time\":\"2011-01-14T00:00:00.000Z\",\"product\":\"product_3\",\"tags\":[\"\"],\"othertags\":[\"u1\", \"u5\"]}", "{\"time\":\"2011-01-15T00:00:00.000Z\",\"product\":\"product_4\",\"tags\":[\"t1\", \"t2\", \"\"],\"othertags\":[\"u6\", \"u7\"]}", "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_5\",\"tags\":[],\"othertags\":[]}", "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_6\"}", "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_7\",\"othertags\":[]}", "{\"time\":\"2011-01-16T00:00:00.000Z\",\"product\":\"product_8\",\"tags\":[\"\"],\"othertags\":[]}" };
for (String row : rowsNullSampler) {
incrementalIndexNullSampler.add(parserNullSampler.parse(row));
}
persistedSegmentDirNullSampler = FileUtils.createTempDir();
TestHelper.getTestIndexMergerV9(segmentWriteOutMediumFactory).persist(incrementalIndexNullSampler, persistedSegmentDirNullSampler, new IndexSpec(), null);
queryableIndexNullSampler = TestHelper.getTestIndexIO().loadIndex(persistedSegmentDirNullSampler);
}
use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.
the class BatchDeltaIngestionTest method makeHadoopDruidIndexerConfig.
private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig(Map<String, Object> inputSpec, File tmpDir, AggregatorFactory[] aggregators) throws Exception {
HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", MAPPER.convertValue(new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host"))), null, ImmutableList.of("timestamp", "host", "host2", "visited_num"), false, 0), null), Map.class), aggregators != null ? aggregators : new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_num"), new HyperUniquesAggregatorFactory("unique_hosts", "host2") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, MAPPER), new HadoopIOConfig(inputSpec, null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, null, null, null, false, false, false, false, null, false, false, null, null, false, false, null, null, null, null, null)));
config.setShardSpecs(ImmutableMap.of(INTERVAL_FULL.getStartMillis(), ImmutableList.of(new HadoopyShardSpec(new HashBasedNumberedShardSpec(0, 1, 0, 1, null, HashPartitionFunction.MURMUR3_32_ABS, HadoopDruidIndexerConfig.JSON_MAPPER), 0))));
config = HadoopDruidIndexerConfig.fromSpec(config.getSchema());
return config;
}
use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.
the class IndexGeneratorCombinerTest method setUp.
@Before
public void setUp() throws Exception {
HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host", "keywords")))), null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited"), new HyperUniquesAggregatorFactory("unique_hosts", "host") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(Intervals.of("2010/2011"))), null, HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.of("paths", "/tmp/dummy", "type", "static"), null, "/tmp/dummy"), HadoopTuningConfig.makeDefaultTuningConfig().withWorkingPath("/tmp/work").withVersion("ver")));
Configuration hadoopConfig = new Configuration();
hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config));
Reducer.Context context = EasyMock.createMock(Reducer.Context.class);
EasyMock.expect(context.getConfiguration()).andReturn(hadoopConfig);
EasyMock.replay(context);
aggregators = config.getSchema().getDataSchema().getAggregators();
combiner = new IndexGeneratorJob.IndexGeneratorCombiner();
combiner.setup(context);
}
use of org.apache.druid.data.input.impl.StringInputRowParser in project druid by druid-io.
the class StreamChunkParserTest method testWithParserAndNullInputformatParseProperly.
@Test
public void testWithParserAndNullInputformatParseProperly() throws IOException {
final InputRowParser<ByteBuffer> parser = new StringInputRowParser(new JSONParseSpec(TIMESTAMP_SPEC, DimensionsSpec.EMPTY, JSONPathSpec.DEFAULT, Collections.emptyMap(), false), StringUtils.UTF8_STRING);
final StreamChunkParser<ByteEntity> chunkParser = new StreamChunkParser<>(parser, // Set nulls for all parameters below since inputFormat will be never used.
null, null, null, null, row -> true, rowIngestionMeters, parseExceptionHandler);
parseAndAssertResult(chunkParser);
}
Aggregations