use of org.apache.druid.data.input.impl.LongDimensionSchema in project druid by druid-io.
the class GroupByLimitPushDownMultiNodeMergeTest method setup.
@Before
public void setup() throws Exception {
tmpDir = FileUtils.createTempDir();
InputRow row;
List<String> dimNames = Arrays.asList("dimA", "metA");
Map<String, Object> event;
final IncrementalIndex indexA = makeIncIndex(false);
incrementalIndices.add(indexA);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 2395L);
row = new MapBasedInputRow(1505260888888L, dimNames, event);
indexA.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 8L);
row = new MapBasedInputRow(1505260800000L, dimNames, event);
indexA.add(row);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 5028L);
row = new MapBasedInputRow(1505264400000L, dimNames, event);
indexA.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 7L);
row = new MapBasedInputRow(1505264400400L, dimNames, event);
indexA.add(row);
final File fileA = INDEX_MERGER_V9.persist(indexA, new File(tmpDir, "A"), new IndexSpec(), null);
QueryableIndex qindexA = INDEX_IO.loadIndex(fileA);
final IncrementalIndex indexB = makeIncIndex(false);
incrementalIndices.add(indexB);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 4718L);
row = new MapBasedInputRow(1505260800000L, dimNames, event);
indexB.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 18L);
row = new MapBasedInputRow(1505260800000L, dimNames, event);
indexB.add(row);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 2698L);
row = new MapBasedInputRow(1505264400000L, dimNames, event);
indexB.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 3L);
row = new MapBasedInputRow(1505264400000L, dimNames, event);
indexB.add(row);
final File fileB = INDEX_MERGER_V9.persist(indexB, new File(tmpDir, "B"), new IndexSpec(), null);
QueryableIndex qindexB = INDEX_IO.loadIndex(fileB);
final IncrementalIndex indexC = makeIncIndex(false);
incrementalIndices.add(indexC);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 2395L);
row = new MapBasedInputRow(1505260800000L, dimNames, event);
indexC.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 8L);
row = new MapBasedInputRow(1605260800000L, dimNames, event);
indexC.add(row);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 5028L);
row = new MapBasedInputRow(1705264400000L, dimNames, event);
indexC.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 7L);
row = new MapBasedInputRow(1805264400000L, dimNames, event);
indexC.add(row);
final File fileC = INDEX_MERGER_V9.persist(indexC, new File(tmpDir, "C"), new IndexSpec(), null);
QueryableIndex qindexC = INDEX_IO.loadIndex(fileC);
final IncrementalIndex indexD = makeIncIndex(false);
incrementalIndices.add(indexD);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 4718L);
row = new MapBasedInputRow(1505260800000L, dimNames, event);
indexD.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 18L);
row = new MapBasedInputRow(1605260800000L, dimNames, event);
indexD.add(row);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("metA", 2698L);
row = new MapBasedInputRow(1705264400000L, dimNames, event);
indexD.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("metA", 3L);
row = new MapBasedInputRow(1805264400000L, dimNames, event);
indexD.add(row);
final File fileD = INDEX_MERGER_V9.persist(indexD, new File(tmpDir, "D"), new IndexSpec(), null);
QueryableIndex qindexD = INDEX_IO.loadIndex(fileD);
List<String> dimNames2 = Arrays.asList("dimA", "dimB", "metA");
List<DimensionSchema> dimensions = Arrays.asList(new StringDimensionSchema("dimA"), new StringDimensionSchema("dimB"), new LongDimensionSchema("metA"));
final IncrementalIndex indexE = makeIncIndex(false, dimensions);
incrementalIndices.add(indexE);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("dimB", "raw");
event.put("metA", 5L);
row = new MapBasedInputRow(1505260800000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("dimB", "ripe");
event.put("metA", 9L);
row = new MapBasedInputRow(1605260800000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "pomegranate");
event.put("dimB", "raw");
event.put("metA", 3L);
row = new MapBasedInputRow(1705264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "mango");
event.put("dimB", "ripe");
event.put("metA", 7L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "grape");
event.put("dimB", "raw");
event.put("metA", 5L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "apple");
event.put("dimB", "ripe");
event.put("metA", 3L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "apple");
event.put("dimB", "raw");
event.put("metA", 1L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "apple");
event.put("dimB", "ripe");
event.put("metA", 4L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "apple");
event.put("dimB", "raw");
event.put("metA", 1L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "banana");
event.put("dimB", "ripe");
event.put("metA", 4L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "orange");
event.put("dimB", "raw");
event.put("metA", 9L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "peach");
event.put("dimB", "ripe");
event.put("metA", 7L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "orange");
event.put("dimB", "raw");
event.put("metA", 2L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
event = new HashMap<>();
event.put("dimA", "strawberry");
event.put("dimB", "ripe");
event.put("metA", 10L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexE.add(row);
final File fileE = INDEX_MERGER_V9.persist(indexE, new File(tmpDir, "E"), new IndexSpec(), null);
QueryableIndex qindexE = INDEX_IO.loadIndex(fileE);
final IncrementalIndex indexF = makeIncIndex(false, dimensions);
incrementalIndices.add(indexF);
event = new HashMap<>();
event.put("dimA", "kiwi");
event.put("dimB", "raw");
event.put("metA", 7L);
row = new MapBasedInputRow(1505260800000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "watermelon");
event.put("dimB", "ripe");
event.put("metA", 14L);
row = new MapBasedInputRow(1605260800000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "kiwi");
event.put("dimB", "raw");
event.put("metA", 8L);
row = new MapBasedInputRow(1705264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "kiwi");
event.put("dimB", "ripe");
event.put("metA", 8L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "lemon");
event.put("dimB", "raw");
event.put("metA", 3L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "cherry");
event.put("dimB", "ripe");
event.put("metA", 2L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "cherry");
event.put("dimB", "raw");
event.put("metA", 7L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "avocado");
event.put("dimB", "ripe");
event.put("metA", 12L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "cherry");
event.put("dimB", "raw");
event.put("metA", 3L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "plum");
event.put("dimB", "ripe");
event.put("metA", 5L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "plum");
event.put("dimB", "raw");
event.put("metA", 3L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
event = new HashMap<>();
event.put("dimA", "lime");
event.put("dimB", "ripe");
event.put("metA", 7L);
row = new MapBasedInputRow(1805264400000L, dimNames2, event);
indexF.add(row);
final File fileF = INDEX_MERGER_V9.persist(indexF, new File(tmpDir, "F"), new IndexSpec(), null);
QueryableIndex qindexF = INDEX_IO.loadIndex(fileF);
groupByIndices = Arrays.asList(qindexA, qindexB, qindexC, qindexD, qindexE, qindexF);
resourceCloser = Closer.create();
setupGroupByFactory();
}
use of org.apache.druid.data.input.impl.LongDimensionSchema in project druid by druid-io.
the class IncrementalIndexTest method constructorFeeder.
@Parameterized.Parameters(name = "{index}: {0}, {1}, deserialize={2}")
public static Collection<?> constructorFeeder() {
DimensionsSpec dimensions = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("string"), new FloatDimensionSchema("float"), new LongDimensionSchema("long"), new DoubleDimensionSchema("double")));
AggregatorFactory[] metrics = { new FilteredAggregatorFactory(new CountAggregatorFactory("cnt"), new SelectorDimFilter("billy", "A", null)) };
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.MINUTE).withDimensionsSpec(dimensions).withMetrics(metrics).build();
return IncrementalIndexCreator.indexTypeCartesianProduct(ImmutableList.of("rollup", "plain"), ImmutableList.of(true, false), ImmutableList.of(schema));
}
use of org.apache.druid.data.input.impl.LongDimensionSchema in project druid by druid-io.
the class IndexTaskTest method testMultipleParseExceptionsFailure.
@Test
public void testMultipleParseExceptionsFailure() throws Exception {
final File tmpDir = temporaryFolder.newFolder();
final File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("time,dim,dimLong,dimFloat,val\n");
// unparseable
writer.write("unparseable,a,2,3.0,1\n");
// valid row
writer.write("2014-01-01T00:00:10Z,a,2,3.0,1\n");
// unparseable
writer.write("9.0,a,2,3.0,1\n");
// thrown away
writer.write("3014-03-01T00:00:10Z,outsideofinterval,2,3.0,1\n");
// unparseable
writer.write("99999999999-01-01T00:00:10Z,b,2,3.0,1\n");
}
// Allow up to 3 parse exceptions, and save up to 2 parse exceptions
final IndexTuningConfig tuningConfig = new IndexTuningConfig(null, null, null, null, null, null, null, null, null, null, new DynamicPartitionsSpec(2, null), INDEX_SPEC, null, null, false, false, null, null, null, true, 2, 5, null, null);
final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat")));
final List<String> columns = Arrays.asList("time", "dim", "dimLong", "dimFloat", "val");
final IndexIngestionSpec ingestionSpec;
List<String> expectedMessages;
if (useInputFormatApi) {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, timestampSpec, dimensionsSpec, new CsvInputFormat(columns, null, null, true, 0), null, null, tuningConfig, false, false);
expectedMessages = Arrays.asList(StringUtils.format("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 3, Line: 6)", tmpFile.toURI()), StringUtils.format("Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 2, Line: 4)", tmpFile.toURI()), StringUtils.format("Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 2)", tmpFile.toURI()));
} else {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0), null, null, tuningConfig, false, false);
expectedMessages = Arrays.asList("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
}
IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
TaskStatus status = runTask(indexTask).lhs;
Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
checkTaskStatusErrorMsgForParseExceptionsExceeded(status);
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.DETERMINE_PARTITIONS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 0, RowIngestionMeters.UNPARSEABLE, 0, RowIngestionMeters.THROWN_AWAY, 0), RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 1, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, useInputFormatApi ? 1 : 2));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("{time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "{time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", "{time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
use of org.apache.druid.data.input.impl.LongDimensionSchema in project druid by druid-io.
the class IndexTaskTest method testMultipleParseExceptionsSuccess.
@Test
public void testMultipleParseExceptionsSuccess() throws Exception {
final File tmpDir = temporaryFolder.newFolder();
final File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
// unparseable time
writer.write("{\"time\":\"unparseable\",\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n");
// valid row
writer.write("{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n");
writer.write(// row with invalid long dimension
"{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":\"notnumber\",\"dimFloat\":3.0,\"val\":1}\n");
writer.write(// row with invalid float dimension
"{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":\"notnumber\",\"val\":1}\n");
writer.write(// row with invalid metric
"{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":4.0,\"val\":\"notnumber\"}\n");
// invalid JSON
writer.write("{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n");
writer.write(// thrown away
"{\"time\":\"3014-03-01T00:00:10Z\",\"dim\":\"outsideofinterval\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n");
// unparseable time
writer.write("{\"time\":\"99999999999-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n");
// invalid JSON
writer.write("this is not JSON\n");
}
final IndexTuningConfig tuningConfig = new IndexTuningConfig(null, null, null, null, null, null, null, null, null, null, new HashedPartitionsSpec(2, null, null), INDEX_SPEC, null, null, true, false, null, null, null, true, 7, 7, null, null);
final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat")));
final IndexIngestionSpec ingestionSpec;
if (useInputFormatApi) {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, timestampSpec, dimensionsSpec, new JsonInputFormat(null, null, null), null, null, tuningConfig, false, false);
} else {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new JSONParseSpec(timestampSpec, dimensionsSpec, null, null, null), null, null, tuningConfig, false, false);
}
IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
TaskStatus status = runTask(indexTask).lhs;
Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode());
Assert.assertEquals(null, status.getErrorMsg());
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.DETERMINE_PARTITIONS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 4, RowIngestionMeters.UNPARSEABLE, 4, RowIngestionMeters.THROWN_AWAY, 1), RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 3, RowIngestionMeters.PROCESSED, 1, RowIngestionMeters.UNPARSEABLE, 4, RowIngestionMeters.THROWN_AWAY, 1));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
List<String> expectedMessages;
if (useInputFormatApi) {
expectedMessages = Arrays.asList(StringUtils.format("Unable to parse row [this is not JSON] (Path: %s, Record: 6, Line: 9)", tmpFile.toURI()), StringUtils.format("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 6, Line: 8)", tmpFile.toURI()), StringUtils.format("Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}] (Path: %s, Record: 5, Line: 6)", tmpFile.toURI()), "Unable to parse value[notnumber] for field[val]", "could not convert value [notnumber] to float", "could not convert value [notnumber] to long", StringUtils.format("Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 1)", tmpFile.toURI()));
} else {
expectedMessages = Arrays.asList("Unable to parse row [this is not JSON]", "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}]", "Unable to parse value[notnumber] for field[val]", "could not convert value [notnumber] to float", "could not convert value [notnumber] to long", "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
}
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("this is not JSON", "{time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}", "{time=2014-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=4.0, val=notnumber}", "{time=2014-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=notnumber, val=1}", "{time=2014-01-01T00:00:10Z, dim=b, dimLong=notnumber, dimFloat=3.0, val=1}", "{time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.DETERMINE_PARTITIONS);
if (useInputFormatApi) {
expectedMessages = Arrays.asList(StringUtils.format("Unable to parse row [this is not JSON] (Path: %s, Record: 6, Line: 9)", tmpFile.toURI()), StringUtils.format("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 6, Line: 8)", tmpFile.toURI()), StringUtils.format("Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}] (Path: %s, Record: 5, Line: 6)", tmpFile.toURI()), StringUtils.format("Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 1)", tmpFile.toURI()));
} else {
expectedMessages = Arrays.asList("Unable to parse row [this is not JSON]", "Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}]", "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
}
actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
expectedInputs = Arrays.asList("this is not JSON", "{time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}", "{time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
use of org.apache.druid.data.input.impl.LongDimensionSchema in project druid by druid-io.
the class IndexTaskTest method testMultipleParseExceptionsFailureAtDeterminePartitions.
@Test
public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exception {
final File tmpDir = temporaryFolder.newFolder();
final File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("time,dim,dimLong,dimFloat,val\n");
// unparseable
writer.write("unparseable,a,2,3.0,1\n");
// valid row
writer.write("2014-01-01T00:00:10Z,a,2,3.0,1\n");
// unparseable
writer.write("9.0,a,2,3.0,1\n");
// thrown away
writer.write("3014-03-01T00:00:10Z,outsideofinterval,2,3.0,1\n");
// unparseable
writer.write("99999999999-01-01T00:00:10Z,b,2,3.0,1\n");
}
// Allow up to 3 parse exceptions, and save up to 2 parse exceptions
final IndexTuningConfig tuningConfig = new IndexTuningConfig(null, null, null, null, null, null, null, null, null, null, new HashedPartitionsSpec(2, null, null), INDEX_SPEC, null, null, true, false, null, null, null, true, 2, 5, null, null);
final TimestampSpec timestampSpec = new TimestampSpec("time", "auto", null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat")));
final List<String> columns = Arrays.asList("time", "dim", "dimLong", "dimFloat", "val");
final IndexIngestionSpec ingestionSpec;
List<String> expectedMessages;
if (useInputFormatApi) {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, timestampSpec, dimensionsSpec, new CsvInputFormat(columns, null, null, true, 0), null, null, tuningConfig, false, false);
expectedMessages = Arrays.asList(StringUtils.format("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 3, Line: 6)", tmpFile.toURI()), StringUtils.format("Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 2, Line: 4)", tmpFile.toURI()), StringUtils.format("Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1} (Path: %s, Record: 1, Line: 2)", tmpFile.toURI()));
} else {
ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(timestampSpec, dimensionsSpec, null, columns, true, 0), null, null, tuningConfig, false, false);
expectedMessages = Arrays.asList("Timestamp[99999999999-01-01T00:00:10Z] is unparseable! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "Timestamp[9.0] is unparseable! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", "Timestamp[unparseable] is unparseable! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
}
IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
TaskStatus status = runTask(indexTask).lhs;
Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
checkTaskStatusErrorMsgForParseExceptionsExceeded(status);
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.DETERMINE_PARTITIONS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 1, RowIngestionMeters.UNPARSEABLE, 3, RowIngestionMeters.THROWN_AWAY, useInputFormatApi ? 1 : 2), RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 0, RowIngestionMeters.UNPARSEABLE, 0, RowIngestionMeters.THROWN_AWAY, 0));
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.DETERMINE_PARTITIONS);
List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
return ((List<String>) r.get("details")).get(0);
}).collect(Collectors.toList());
Assert.assertEquals(expectedMessages, actualMessages);
List<String> expectedInputs = Arrays.asList("{time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", "{time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", "{time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}");
List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
return (String) r.get("input");
}).collect(Collectors.toList());
Assert.assertEquals(expectedInputs, actualInputs);
}
Aggregations