use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class HadoopConverterJobTest method setUp.
@Before
public void setUp() throws Exception {
final MetadataStorageUpdaterJobSpec metadataStorageUpdaterJobSpec = new MetadataStorageUpdaterJobSpec() {
@Override
public String getSegmentTable() {
return derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable();
}
@Override
public MetadataStorageConnectorConfig get() {
return derbyConnectorRule.getMetadataConnectorConfig();
}
};
final File scratchFileDir = temporaryFolder.newFolder();
storageLocProperty = System.getProperty(STORAGE_PROPERTY_KEY);
tmpSegmentDir = temporaryFolder.newFolder();
System.setProperty(STORAGE_PROPERTY_KEY, tmpSegmentDir.getAbsolutePath());
final URL url = Preconditions.checkNotNull(Query.class.getClassLoader().getResource("druid.sample.tsv"));
final File tmpInputFile = temporaryFolder.newFile();
FileUtils.retryCopy(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return url.openStream();
}
}, tmpInputFile, FileUtils.IS_EXCEPTION, 3);
final HadoopDruidIndexerConfig hadoopDruidIndexerConfig = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema(DATASOURCE, HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("ts", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList(TestIndex.DIMENSIONS)), null, null), "\t", "", Arrays.asList(TestIndex.COLUMNS)), null), Map.class), new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.METRICS[0], TestIndex.METRICS[0]), new HyperUniquesAggregatorFactory("quality_uniques", "quality") }, new UniformGranularitySpec(Granularities.MONTH, Granularities.DAY, ImmutableList.<Interval>of(interval)), HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.<String, Object>of("type", "static", "paths", tmpInputFile.getAbsolutePath()), metadataStorageUpdaterJobSpec, tmpSegmentDir.getAbsolutePath()), new HadoopTuningConfig(scratchFileDir.getAbsolutePath(), null, null, null, null, null, false, false, false, false, null, false, false, null, null, null, false, false)));
metadataStorageTablesConfigSupplier = derbyConnectorRule.metadataTablesConfigSupplier();
connector = derbyConnectorRule.getConnector();
try {
connector.getDBI().withHandle(new HandleCallback<Void>() {
@Override
public Void withHandle(Handle handle) throws Exception {
handle.execute("DROP TABLE druid_segments");
return null;
}
});
} catch (CallbackFailedException e) {
// Who cares
}
List<Jobby> jobs = ImmutableList.of(new Jobby() {
@Override
public boolean run() {
connector.createSegmentTable(metadataStorageUpdaterJobSpec.getSegmentTable());
return true;
}
}, new HadoopDruidDetermineConfigurationJob(hadoopDruidIndexerConfig), new HadoopDruidIndexerJob(hadoopDruidIndexerConfig, new SQLMetadataStorageUpdaterJobHandler(connector)));
JobHelper.runJobs(jobs, hadoopDruidIndexerConfig);
}
use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class JobHelperTest method setup.
@Before
public void setup() throws Exception {
tmpDir = temporaryFolder.newFile();
dataFile = temporaryFolder.newFile();
config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host")), null, null), null, ImmutableList.of("timestamp", "host", "visited_num")), null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("visited_num", "visited_num") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(this.interval)), HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.<String, Object>of("paths", dataFile.getCanonicalPath(), "type", "static"), null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, false, false, false, false, //Map of job properties
ImmutableMap.of("fs.s3.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem", "fs.s3.awsAccessKeyId", "THISISMYACCESSKEY"), false, false, null, null, null, false, false)));
}
use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class ParquetHadoopInputRowParser method parse.
/**
* imitate avro extension {@link AvroStreamInputRowParser#parseGenericRecord(GenericRecord, ParseSpec, List, boolean, boolean)}
*/
@Override
public InputRow parse(GenericRecord record) {
GenericRecordAsMap genericRecordAsMap = new GenericRecordAsMap(record, false, binaryAsString);
TimestampSpec timestampSpec = parseSpec.getTimestampSpec();
DateTime dateTime = timestampSpec.extractTimestamp(genericRecordAsMap);
return new MapBasedInputRow(dateTime, dimensions, genericRecordAsMap);
}
use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class OrcHadoopInputRowParserTest method testSerde.
@Test
public void testSerde() throws IOException {
String parserString = "{\n" + " \"type\": \"orc\",\n" + " \"parseSpec\": {\n" + " \"format\": \"timeAndDims\",\n" + " \"timestampSpec\": {\n" + " \"column\": \"timestamp\",\n" + " \"format\": \"auto\"\n" + " },\n" + " \"dimensionsSpec\": {\n" + " \"dimensions\": [\n" + " \"col1\",\n" + " \"col2\"\n" + " ],\n" + " \"dimensionExclusions\": [],\n" + " \"spatialDimensions\": []\n" + " }\n" + " },\n" + " \"typeString\": \"struct<timestamp:string,col1:string,col2:array<string>,val1:float>\"\n" + " }";
InputRowParser parser = mapper.readValue(parserString, InputRowParser.class);
InputRowParser expected = new OrcHadoopInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("col1"), new StringDimensionSchema("col2")), null, null)), "struct<timestamp:string,col1:string,col2:array<string>,val1:float>");
Assert.assertEquals(expected, parser);
}
use of io.druid.data.input.impl.TimestampSpec in project druid by druid-io.
the class GroupByQueryRunnerFactoryTest method createSegment.
private Segment createSegment() throws Exception {
IncrementalIndex incrementalIndex = new OnheapIncrementalIndex(0, Granularities.NONE, new AggregatorFactory[] { new CountAggregatorFactory("count") }, true, true, true, 5000);
StringInputRowParser parser = new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags")), null, null), "\t", ImmutableList.of("timestamp", "product", "tags")), "UTF-8");
String[] rows = new String[] { "2011-01-12T00:00:00.000Z,product_1,t1", "2011-01-13T00:00:00.000Z,product_2,t2", "2011-01-14T00:00:00.000Z,product_3,t2" };
for (String row : rows) {
incrementalIndex.add(parser.parse(row));
}
closerRule.closeLater(incrementalIndex);
return new IncrementalIndexSegment(incrementalIndex, "test");
}
Aggregations