use of org.apache.druid.data.input.impl.CsvInputFormat in project druid by druid-io.
the class S3InputSourceTest method testReader.
@Test
public void testReader() throws IOException {
EasyMock.reset(S3_CLIENT);
expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
expectListObjects(EXPECTED_URIS.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), CONTENT);
expectGetObject(EXPECTED_URIS.get(0));
expectGetObject(EXPECTED_URIS.get(1));
EasyMock.replay(S3_CLIENT);
S3InputSource inputSource = new S3InputSource(SERVICE, SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0), EXPECTED_URIS.get(1)), null, null);
InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
CloseableIterator<InputRow> iterator = reader.read();
while (iterator.hasNext()) {
InputRow nextRow = iterator.next();
Assert.assertEquals(NOW, nextRow.getTimestamp());
Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
}
EasyMock.verify(S3_CLIENT);
}
use of org.apache.druid.data.input.impl.CsvInputFormat in project druid by druid-io.
the class S3InputSourceTest method testReaderRetriesOnSdkClientExceptionButNeverSucceedsThenThrows.
@Test(expected = SdkClientException.class)
public void testReaderRetriesOnSdkClientExceptionButNeverSucceedsThenThrows() throws Exception {
EasyMock.reset(S3_CLIENT);
expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
expectSdkClientException(EXPECTED_URIS.get(0));
EasyMock.replay(S3_CLIENT);
S3InputSource inputSource = new S3InputSource(SERVICE, SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0)), null, null, // only have three retries since they are slow
3);
InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
CloseableIterator<InputRow> iterator = reader.read();
while (iterator.hasNext()) {
InputRow nextRow = iterator.next();
Assert.assertEquals(NOW, nextRow.getTimestamp());
Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
}
EasyMock.verify(S3_CLIENT);
}
use of org.apache.druid.data.input.impl.CsvInputFormat in project druid by druid-io.
the class CompactionTaskParallelRunTest method runIndexTask.
private void runIndexTask(@Nullable PartitionsSpec partitionsSpec, boolean appendToExisting) {
ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, "druid*"), new CsvInputFormat(Arrays.asList("ts", "dim", "val"), "|", null, false, 0), appendToExisting, null);
ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, 2, !appendToExisting);
ParallelIndexSupervisorTask indexTask = new ParallelIndexSupervisorTask(null, null, null, new ParallelIndexIngestionSpec(new DataSchema(DATA_SOURCE, new TimestampSpec("ts", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim"))), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, ImmutableList.of(INTERVAL_TO_INDEX)), null), ioConfig, tuningConfig), null);
runTask(indexTask);
}
use of org.apache.druid.data.input.impl.CsvInputFormat in project druid by druid-io.
the class IndexTaskTest method testCSVFileWithHeaderColumnOverride.
@Test
public void testCSVFileWithHeaderColumnOverride() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("time,d,val\n");
writer.write("2014-01-01T00:00:10Z,a,1\n");
}
final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null);
final List<String> columns = Arrays.asList("time", "dim", "val");
final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, true);
final IndexIngestionSpec ingestionSpec;
if (useInputFormatApi) {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, columns, true, 0), null, null, tuningConfig, false, false);
} else {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, timestampSpec, DimensionsSpec.EMPTY, new CsvInputFormat(columns, null, null, true, 0), null, null, tuningConfig, false, false);
}
IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(1, segments.size());
Assert.assertEquals(Collections.singletonList("d"), segments.get(0).getDimensions());
Assert.assertEquals(Collections.singletonList("val"), segments.get(0).getMetrics());
Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
}
use of org.apache.druid.data.input.impl.CsvInputFormat in project druid by druid-io.
the class IndexTaskTest method testCSVFileWithHeader.
@Test
public void testCSVFileWithHeader() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("time,d,val\n");
writer.write("2014-01-01T00:00:10Z,a,1\n");
}
final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null);
final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, true);
final IndexIngestionSpec ingestionSpec;
if (useInputFormatApi) {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(timestampSpec, DimensionsSpec.EMPTY, null, null, true, 0), null, null, tuningConfig, false, false);
} else {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, timestampSpec, DimensionsSpec.EMPTY, new CsvInputFormat(null, null, null, true, 0), null, null, tuningConfig, false, false);
}
IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(1, segments.size());
Assert.assertEquals(Collections.singletonList("d"), segments.get(0).getDimensions());
Assert.assertEquals(Collections.singletonList("val"), segments.get(0).getMetrics());
Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
}
Aggregations