use of org.apache.druid.data.input.Row in project druid by druid-io.
the class IncrementalIndexStorageAdapterTest method testCursorDictionaryRaceConditionFix.
@Test
public void testCursorDictionaryRaceConditionFix() throws Exception {
// Tests the dictionary ID race condition bug described at https://github.com/apache/druid/pull/6340
final IncrementalIndex index = indexCreator.createIndex();
final long timestamp = System.currentTimeMillis();
for (int i = 0; i < 5; i++) {
index.add(new MapBasedInputRow(timestamp, Collections.singletonList("billy"), ImmutableMap.of("billy", "v1" + i)));
}
final StorageAdapter sa = new IncrementalIndexStorageAdapter(index);
Sequence<Cursor> cursors = sa.makeCursors(new DictionaryRaceTestFilter(index, timestamp), Intervals.utc(timestamp - 60_000, timestamp + 60_000), VirtualColumns.EMPTY, Granularities.ALL, false, null);
final AtomicInteger assertCursorsNotEmpty = new AtomicInteger(0);
cursors.map(cursor -> {
DimensionSelector dimSelector = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("billy", "billy"));
int cardinality = dimSelector.getValueCardinality();
int rowNumInCursor = 0;
while (!cursor.isDone()) {
IndexedInts row = dimSelector.getRow();
row.forEach(i -> Assert.assertTrue(i < cardinality));
cursor.advance();
rowNumInCursor++;
}
Assert.assertEquals(5, rowNumInCursor);
assertCursorsNotEmpty.incrementAndGet();
return null;
}).toList();
Assert.assertEquals(1, assertCursorsNotEmpty.get());
}
use of org.apache.druid.data.input.Row in project druid by druid-io.
the class IncrementalIndexStorageAdapterTest method testCursoringAndIndexUpdationInterleaving.
@Test
public void testCursoringAndIndexUpdationInterleaving() throws Exception {
final IncrementalIndex index = indexCreator.createIndex();
final long timestamp = System.currentTimeMillis();
for (int i = 0; i < 2; i++) {
index.add(new MapBasedInputRow(timestamp, Collections.singletonList("billy"), ImmutableMap.of("billy", "v1" + i)));
}
final StorageAdapter sa = new IncrementalIndexStorageAdapter(index);
Sequence<Cursor> cursors = sa.makeCursors(null, Intervals.utc(timestamp - 60_000, timestamp + 60_000), VirtualColumns.EMPTY, Granularities.ALL, false, null);
final AtomicInteger assertCursorsNotEmpty = new AtomicInteger(0);
cursors.map(cursor -> {
DimensionSelector dimSelector = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("billy", "billy"));
int cardinality = dimSelector.getValueCardinality();
// index gets more rows at this point, while other thread is iterating over the cursor
try {
for (int i = 0; i < 1; i++) {
index.add(new MapBasedInputRow(timestamp, Collections.singletonList("billy"), ImmutableMap.of("billy", "v2" + i)));
}
} catch (Exception ex) {
throw new RuntimeException(ex);
}
int rowNumInCursor = 0;
// and then, cursoring continues in the other thread
while (!cursor.isDone()) {
IndexedInts row = dimSelector.getRow();
row.forEach(i -> Assert.assertTrue(i < cardinality));
cursor.advance();
rowNumInCursor++;
}
Assert.assertEquals(2, rowNumInCursor);
assertCursorsNotEmpty.incrementAndGet();
return null;
}).toList();
Assert.assertEquals(1, assertCursorsNotEmpty.get());
}
use of org.apache.druid.data.input.Row in project druid by druid-io.
the class GroupByMergedQueryRunner method run.
@Override
public Sequence<T> run(final QueryPlus<T> queryPlus, final ResponseContext responseContext) {
final GroupByQuery query = (GroupByQuery) queryPlus.getQuery();
final GroupByQueryConfig querySpecificConfig = configSupplier.get().withOverrides(query);
final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
final Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> indexAccumulatorPair = GroupByQueryHelper.createIndexAccumulatorPair(query, null, querySpecificConfig);
final Pair<Queue, Accumulator<Queue, T>> bySegmentAccumulatorPair = GroupByQueryHelper.createBySegmentAccumulatorPair();
final boolean bySegment = QueryContexts.isBySegment(query);
final int priority = QueryContexts.getPriority(query);
final QueryPlus<T> threadSafeQueryPlus = queryPlus.withoutThreadUnsafeState();
final List<ListenableFuture<Void>> futures = Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<T>, ListenableFuture<Void>>() {
@Override
public ListenableFuture<Void> apply(final QueryRunner<T> input) {
if (input == null) {
throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
}
ListenableFuture<Void> future = queryProcessingPool.submitRunnerTask(new AbstractPrioritizedQueryRunnerCallable<Void, T>(priority, input) {
@Override
public Void call() {
try {
if (bySegment) {
input.run(threadSafeQueryPlus, responseContext).accumulate(bySegmentAccumulatorPair.lhs, bySegmentAccumulatorPair.rhs);
} else {
input.run(threadSafeQueryPlus, responseContext).accumulate(indexAccumulatorPair.lhs, indexAccumulatorPair.rhs);
}
return null;
} catch (QueryInterruptedException e) {
throw new RuntimeException(e);
} catch (Exception e) {
log.error(e, "Exception with one of the sequences!");
Throwables.propagateIfPossible(e);
throw new RuntimeException(e);
}
}
});
if (isSingleThreaded) {
waitForFutureCompletion(query, ImmutableList.of(future), indexAccumulatorPair.lhs);
}
return future;
}
}));
if (!isSingleThreaded) {
waitForFutureCompletion(query, futures, indexAccumulatorPair.lhs);
}
if (bySegment) {
return Sequences.simple(bySegmentAccumulatorPair.lhs);
}
return Sequences.withBaggage(Sequences.simple(Iterables.transform(indexAccumulatorPair.lhs.iterableWithPostAggregations(null, query.isDescending()), new Function<Row, T>() {
@Override
public T apply(Row input) {
return (T) input;
}
})), indexAccumulatorPair.lhs);
}
use of org.apache.druid.data.input.Row in project druid by druid-io.
the class SpatialDimensionRowTransformer method apply.
@Override
public InputRow apply(final InputRow row) {
final Map<String, List<String>> spatialLookup = new HashMap<>();
// remove all spatial dimensions
final List<String> finalDims = Lists.newArrayList(Iterables.filter(row.getDimensions(), new Predicate<String>() {
@Override
public boolean apply(String input) {
return !spatialDimensionMap.containsKey(input) && !spatialPartialDimNames.contains(input);
}
}));
InputRow retVal = new InputRow() {
@Override
public List<String> getDimensions() {
return finalDims;
}
@Override
public long getTimestampFromEpoch() {
return row.getTimestampFromEpoch();
}
@Override
public DateTime getTimestamp() {
return row.getTimestamp();
}
@Override
public List<String> getDimension(String dimension) {
List<String> retVal = spatialLookup.get(dimension);
return (retVal == null) ? row.getDimension(dimension) : retVal;
}
@Override
public Object getRaw(String dimension) {
List<String> retVal = spatialLookup.get(dimension);
return (retVal == null) ? row.getRaw(dimension) : retVal;
}
@Override
public Number getMetric(String metric) {
return row.getMetric(metric);
}
@Override
public String toString() {
return row.toString();
}
@Override
public int compareTo(Row o) {
return getTimestamp().compareTo(o.getTimestamp());
}
};
for (Map.Entry<String, SpatialDimensionSchema> entry : spatialDimensionMap.entrySet()) {
final String spatialDimName = entry.getKey();
final SpatialDimensionSchema spatialDim = entry.getValue();
List<String> dimVals = row.getDimension(spatialDimName);
if (dimVals != null && !dimVals.isEmpty()) {
if (dimVals.size() != 1) {
throw new ISE("Spatial dimension value must be in an array!");
}
if (isJoinedSpatialDimValValid(dimVals.get(0))) {
spatialLookup.put(spatialDimName, dimVals);
finalDims.add(spatialDimName);
}
} else {
List<String> spatialDimVals = new ArrayList<>();
for (String dim : spatialDim.getDims()) {
List<String> partialDimVals = row.getDimension(dim);
if (isSpatialDimValsValid(partialDimVals)) {
spatialDimVals.addAll(partialDimVals);
}
}
if (spatialDimVals.size() == spatialDim.getDims().size()) {
spatialLookup.put(spatialDimName, Collections.singletonList(JOINER.join(spatialDimVals)));
finalDims.add(spatialDimName);
}
}
}
return retVal;
}
use of org.apache.druid.data.input.Row in project druid by druid-io.
the class InputSourceSampler method sample.
public SamplerResponse sample(final InputSource inputSource, // inputFormat can be null only if inputSource.needsFormat() = false or parser is specified.
@Nullable final InputFormat inputFormat, @Nullable final DataSchema dataSchema, @Nullable final SamplerConfig samplerConfig) {
Preconditions.checkNotNull(inputSource, "inputSource required");
if (inputSource.needsFormat()) {
Preconditions.checkNotNull(inputFormat, "inputFormat required");
}
final DataSchema nonNullDataSchema = dataSchema == null ? DEFAULT_DATA_SCHEMA : dataSchema;
final SamplerConfig nonNullSamplerConfig = samplerConfig == null ? SamplerConfig.empty() : samplerConfig;
final Closer closer = Closer.create();
final File tempDir = FileUtils.createTempDir();
closer.register(() -> FileUtils.deleteDirectory(tempDir));
try {
final InputSourceReader reader = buildReader(nonNullSamplerConfig, nonNullDataSchema, inputSource, inputFormat, tempDir);
try (final CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample();
final IncrementalIndex index = buildIncrementalIndex(nonNullSamplerConfig, nonNullDataSchema);
final Closer closer1 = closer) {
List<SamplerResponseRow> responseRows = new ArrayList<>(nonNullSamplerConfig.getNumRows());
int numRowsIndexed = 0;
while (responseRows.size() < nonNullSamplerConfig.getNumRows() && iterator.hasNext()) {
final InputRowListPlusRawValues inputRowListPlusRawValues = iterator.next();
final List<Map<String, Object>> rawColumnsList = inputRowListPlusRawValues.getRawValuesList();
final ParseException parseException = inputRowListPlusRawValues.getParseException();
if (parseException != null) {
if (rawColumnsList != null) {
// add all rows to response
responseRows.addAll(rawColumnsList.stream().map(rawColumns -> new SamplerResponseRow(rawColumns, null, true, parseException.getMessage())).collect(Collectors.toList()));
} else {
// no data parsed, add one response row
responseRows.add(new SamplerResponseRow(null, null, true, parseException.getMessage()));
}
continue;
}
List<InputRow> inputRows = inputRowListPlusRawValues.getInputRows();
if (inputRows == null) {
continue;
}
for (int i = 0; i < inputRows.size(); i++) {
// InputRowListPlusRawValues guarantees the size of rawColumnsList and inputRows are the same
Map<String, Object> rawColumns = rawColumnsList == null ? null : rawColumnsList.get(i);
InputRow row = inputRows.get(i);
// keep the index of the row to be added to responseRows for further use
final int rowIndex = responseRows.size();
IncrementalIndexAddResult addResult = index.add(new SamplerInputRow(row, rowIndex), true);
if (addResult.hasParseException()) {
responseRows.add(new SamplerResponseRow(rawColumns, null, true, addResult.getParseException().getMessage()));
} else {
// store the raw value; will be merged with the data from the IncrementalIndex later
responseRows.add(new SamplerResponseRow(rawColumns, null, null, null));
numRowsIndexed++;
}
}
}
final List<String> columnNames = index.getColumnNames();
columnNames.remove(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
for (Row row : index) {
Map<String, Object> parsed = new LinkedHashMap<>();
parsed.put(ColumnHolder.TIME_COLUMN_NAME, row.getTimestampFromEpoch());
columnNames.forEach(k -> parsed.put(k, row.getRaw(k)));
Number sortKey = row.getMetric(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
if (sortKey != null) {
responseRows.set(sortKey.intValue(), responseRows.get(sortKey.intValue()).withParsed(parsed));
}
}
// make sure size of responseRows meets the input
if (responseRows.size() > nonNullSamplerConfig.getNumRows()) {
responseRows = responseRows.subList(0, nonNullSamplerConfig.getNumRows());
}
int numRowsRead = responseRows.size();
return new SamplerResponse(numRowsRead, numRowsIndexed, responseRows.stream().filter(Objects::nonNull).filter(x -> x.getParsed() != null || x.isUnparseable() != null).collect(Collectors.toList()));
}
} catch (Exception e) {
throw new SamplerException(e, "Failed to sample data: %s", e.getMessage());
}
}
Aggregations