Search in sources :

Example 1 with FixedByteSingleColumnMultiValueReaderWriter

use of com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnMultiValueReaderWriter in project pinot by linkedin.

the class FixedByteSingleColumnMultiValueReaderWriterTest method testIntArray.

public void testIntArray(final long seed) throws IOException {
    FixedByteSingleColumnMultiValueReaderWriter readerWriter;
    int rows = 1000;
    int columnSizeInBytes = Integer.SIZE / 8;
    int maxNumberOfMultiValuesPerRow = 2000;
    readerWriter = new FixedByteSingleColumnMultiValueReaderWriter(rows, columnSizeInBytes, maxNumberOfMultiValuesPerRow, 2);
    Random r = new Random(seed);
    int[][] data = new int[rows][];
    for (int i = 0; i < rows; i++) {
        data[i] = new int[r.nextInt(maxNumberOfMultiValuesPerRow)];
        for (int j = 0; j < data[i].length; j++) {
            data[i][j] = r.nextInt();
        }
        readerWriter.setIntArray(i, data[i]);
    }
    int[] ret = new int[maxNumberOfMultiValuesPerRow];
    for (int i = 0; i < rows; i++) {
        int length = readerWriter.getIntArray(i, ret);
        Assert.assertEquals(data[i].length, length, "Failed with seed=" + seed);
        Assert.assertTrue(Arrays.equals(data[i], Arrays.copyOf(ret, length)), "Failed with seed=" + seed);
    }
    readerWriter.close();
}
Also used : FixedByteSingleColumnMultiValueReaderWriter(com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnMultiValueReaderWriter) Random(java.util.Random)

Example 2 with FixedByteSingleColumnMultiValueReaderWriter

use of com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnMultiValueReaderWriter in project pinot by linkedin.

the class MultiValueDictionaryTest method testMultiValueIndexing.

public void testMultiValueIndexing(final long seed) throws Exception {
    final FieldSpec mvIntFs = new DimensionFieldSpec(COL_NAME, FieldSpec.DataType.LONG, false);
    final LongMutableDictionary dict = new LongMutableDictionary(COL_NAME);
    final FixedByteSingleColumnMultiValueReaderWriter indexer = new FixedByteSingleColumnMultiValueReaderWriter(NROWS, Integer.SIZE / 8, MAX_N_VALUES, 2);
    // Insert rows into the indexer and dictionary
    Random random = new Random(seed);
    for (int row = 0; row < NROWS; row++) {
        int nValues = Math.abs(random.nextInt()) % MAX_N_VALUES;
        Long[] val = new Long[nValues];
        for (int i = 0; i < nValues; i++) {
            val[i] = random.nextLong();
        }
        dict.index(val);
        int[] dictIds = new int[nValues];
        for (int i = 0; i < nValues; i++) {
            dictIds[i] = dict.indexOf(val[i]);
        }
        indexer.setIntArray(row, dictIds);
    }
    // Read back rows and make sure that the values are good.
    random = new Random(seed);
    final int[] dictIds = new int[MAX_N_VALUES];
    for (int row = 0; row < NROWS; row++) {
        int nValues = indexer.getIntArray(row, dictIds);
        Assert.assertEquals(nValues, Math.abs(random.nextInt()) % MAX_N_VALUES, "Mismatching number of values, random seed is: " + seed);
        for (int i = 0; i < nValues; i++) {
            Long val = dict.getLongValue(dictIds[i]);
            Assert.assertEquals(val.longValue(), random.nextLong(), "Value mismatch at row " + row + ", random seed is: " + seed);
        }
    }
}
Also used : FixedByteSingleColumnMultiValueReaderWriter(com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnMultiValueReaderWriter) Random(java.util.Random) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Example 3 with FixedByteSingleColumnMultiValueReaderWriter

use of com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnMultiValueReaderWriter in project pinot by linkedin.

the class RealtimeSegmentImpl method index.

@Override
public boolean index(GenericRow row) {
    // Validate row prior to indexing it
    StringBuilder invalidColumns = null;
    for (String dimension : dataSchema.getDimensionNames()) {
        Object value = row.getValue(dimension);
        if (value == null) {
            if (invalidColumns == null) {
                invalidColumns = new StringBuilder(dimension);
            } else {
                invalidColumns.append(", ").append(dimension);
            }
        }
    }
    for (String metric : dataSchema.getMetricNames()) {
        Object value = row.getValue(metric);
        if (value == null) {
            if (invalidColumns == null) {
                invalidColumns = new StringBuilder(metric);
            } else {
                invalidColumns.append(", ").append(metric);
            }
        }
    }
    {
        Object value = row.getValue(outgoingTimeColumnName);
        if (value == null) {
            if (invalidColumns == null) {
                invalidColumns = new StringBuilder(outgoingTimeColumnName);
            } else {
                invalidColumns.append(", ").append(outgoingTimeColumnName);
            }
        }
    }
    if (invalidColumns != null) {
        LOGGER.warn("Dropping invalid row {} with null values for column(s) {}", row, invalidColumns);
        serverMetrics.addMeteredTableValue(tableAndStreamName, ServerMeter.INVALID_REALTIME_ROWS_DROPPED, 1L);
        return true;
    }
    // since filtering won't return back anything unless a new entry is made in the inverted index
    for (String dimension : dataSchema.getDimensionNames()) {
        dictionaryMap.get(dimension).index(row.getValue(dimension));
        if (!dataSchema.getFieldSpecFor(dimension).isSingleValueField()) {
            Object[] entries = (Object[]) row.getValue(dimension);
            if ((entries != null) && (maxNumberOfMultivaluesMap.get(dimension) < entries.length)) {
                maxNumberOfMultivaluesMap.put(dimension, entries.length);
            }
        }
    }
    for (String metric : dataSchema.getMetricNames()) {
        dictionaryMap.get(metric).index(row.getValue(metric));
    }
    // Conversion already happens in PlainFieldExtractor
    Object timeValueObj = row.getValue(outgoingTimeColumnName);
    long timeValue = -1;
    if (timeValueObj instanceof Number) {
        timeValue = ((Number) timeValueObj).longValue();
    } else {
        timeValue = Long.valueOf(timeValueObj.toString());
    }
    dictionaryMap.get(outgoingTimeColumnName).index(timeValueObj);
    // update the min max time values
    minTimeVal = Math.min(minTimeVal, timeValue);
    maxTimeVal = Math.max(maxTimeVal, timeValue);
    // also lets collect all dicIds to update inverted index later
    Map<String, Object> rawRowToDicIdMap = new HashMap<String, Object>();
    // lets update forward index now
    int docId = docIdGenerator.incrementAndGet();
    for (String dimension : dataSchema.getDimensionNames()) {
        if (dataSchema.getFieldSpecFor(dimension).isSingleValueField()) {
            int dicId = dictionaryMap.get(dimension).indexOf(row.getValue(dimension));
            ((FixedByteSingleColumnSingleValueReaderWriter) columnIndexReaderWriterMap.get(dimension)).setInt(docId, dicId);
            rawRowToDicIdMap.put(dimension, dicId);
        } else {
            Object[] mValues = (Object[]) row.getValue(dimension);
            int[] dicIds;
            if (mValues != null) {
                dicIds = new int[mValues.length];
                for (int i = 0; i < dicIds.length; i++) {
                    dicIds[i] = dictionaryMap.get(dimension).indexOf(mValues[i]);
                }
            } else {
                dicIds = EMPTY_DICTIONARY_IDS_ARRAY;
            }
            ((FixedByteSingleColumnMultiValueReaderWriter) columnIndexReaderWriterMap.get(dimension)).setIntArray(docId, dicIds);
            rawRowToDicIdMap.put(dimension, dicIds);
        }
    }
    for (String metric : dataSchema.getMetricNames()) {
        FixedByteSingleColumnSingleValueReaderWriter readerWriter = (FixedByteSingleColumnSingleValueReaderWriter) columnIndexReaderWriterMap.get(metric);
        int dicId = dictionaryMap.get(metric).indexOf(row.getValue(metric));
        readerWriter.setInt(docId, dicId);
        rawRowToDicIdMap.put(metric, dicId);
    }
    int timeDicId = dictionaryMap.get(outgoingTimeColumnName).indexOf(timeValueObj);
    ((FixedByteSingleColumnSingleValueReaderWriter) columnIndexReaderWriterMap.get(outgoingTimeColumnName)).setInt(docId, timeDicId);
    rawRowToDicIdMap.put(outgoingTimeColumnName, timeDicId);
    // metrics
    for (String metric : dataSchema.getMetricNames()) {
        if (invertedIndexMap.containsKey(metric)) {
            invertedIndexMap.get(metric).add(rawRowToDicIdMap.get(metric), docId);
        }
    }
    // dimension
    for (String dimension : dataSchema.getDimensionNames()) {
        if (invertedIndexMap.containsKey(dimension)) {
            if (dataSchema.getFieldSpecFor(dimension).isSingleValueField()) {
                invertedIndexMap.get(dimension).add(rawRowToDicIdMap.get(dimension), docId);
            } else {
                int[] dicIds = (int[]) rawRowToDicIdMap.get(dimension);
                for (int dicId : dicIds) {
                    invertedIndexMap.get(dimension).add(dicId, docId);
                }
            }
        }
    }
    // time
    if (invertedIndexMap.containsKey(outgoingTimeColumnName)) {
        invertedIndexMap.get(outgoingTimeColumnName).add(rawRowToDicIdMap.get(outgoingTimeColumnName), docId);
    }
    docIdSearchableOffset = docId;
    numDocsIndexed += 1;
    numSuccessIndexed += 1;
    return numDocsIndexed < capacity;
}
Also used : FixedByteSingleColumnMultiValueReaderWriter(com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnMultiValueReaderWriter) HashMap(java.util.HashMap) FixedByteSingleColumnSingleValueReaderWriter(com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnSingleValueReaderWriter)

Aggregations

FixedByteSingleColumnMultiValueReaderWriter (com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnMultiValueReaderWriter)3 Random (java.util.Random)2 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)1 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)1 FixedByteSingleColumnSingleValueReaderWriter (com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnSingleValueReaderWriter)1 HashMap (java.util.HashMap)1