use of com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnSingleValueReaderWriter in project pinot by linkedin.
the class FixedByteSingleColumnSingleValueReaderWriterTest method testInt.
@Test
public void testInt() throws IOException {
FixedByteSingleColumnSingleValueReaderWriter readerWriter;
int rows = 10;
int[] columnSizesInBytes = new int[] { Integer.SIZE / 8 };
readerWriter = new FixedByteSingleColumnSingleValueReaderWriter(rows, columnSizesInBytes);
Random r = new Random();
int[] data = new int[rows];
for (int i = 0; i < rows; i++) {
data[i] = r.nextInt();
readerWriter.setInt(i, data[i]);
}
for (int i = 0; i < rows; i++) {
Assert.assertEquals(data[i], readerWriter.getInt(i));
}
readerWriter.close();
}
use of com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnSingleValueReaderWriter in project pinot by linkedin.
the class FixedByteSingleColumnSingleValueReaderWriterTest method testLong.
@Test
public void testLong() throws IOException {
FixedByteSingleColumnSingleValueReaderWriter readerWriter;
int rows = 10;
int[] columnSizesInBytes = new int[] { Long.SIZE / 8 };
readerWriter = new FixedByteSingleColumnSingleValueReaderWriter(rows, columnSizesInBytes);
Random r = new Random();
long[] data = new long[rows];
for (int i = 0; i < rows; i++) {
data[i] = r.nextLong();
readerWriter.setLong(i, data[i]);
}
for (int i = 0; i < rows; i++) {
Assert.assertEquals(data[i], readerWriter.getLong(i));
}
readerWriter.close();
}
use of com.linkedin.pinot.core.io.readerwriter.impl.FixedByteSingleColumnSingleValueReaderWriter in project pinot by linkedin.
the class RealtimeSegmentImpl method index.
@Override
public boolean index(GenericRow row) {
// Validate row prior to indexing it
StringBuilder invalidColumns = null;
for (String dimension : dataSchema.getDimensionNames()) {
Object value = row.getValue(dimension);
if (value == null) {
if (invalidColumns == null) {
invalidColumns = new StringBuilder(dimension);
} else {
invalidColumns.append(", ").append(dimension);
}
}
}
for (String metric : dataSchema.getMetricNames()) {
Object value = row.getValue(metric);
if (value == null) {
if (invalidColumns == null) {
invalidColumns = new StringBuilder(metric);
} else {
invalidColumns.append(", ").append(metric);
}
}
}
{
Object value = row.getValue(outgoingTimeColumnName);
if (value == null) {
if (invalidColumns == null) {
invalidColumns = new StringBuilder(outgoingTimeColumnName);
} else {
invalidColumns.append(", ").append(outgoingTimeColumnName);
}
}
}
if (invalidColumns != null) {
LOGGER.warn("Dropping invalid row {} with null values for column(s) {}", row, invalidColumns);
serverMetrics.addMeteredTableValue(tableAndStreamName, ServerMeter.INVALID_REALTIME_ROWS_DROPPED, 1L);
return true;
}
// since filtering won't return back anything unless a new entry is made in the inverted index
for (String dimension : dataSchema.getDimensionNames()) {
dictionaryMap.get(dimension).index(row.getValue(dimension));
if (!dataSchema.getFieldSpecFor(dimension).isSingleValueField()) {
Object[] entries = (Object[]) row.getValue(dimension);
if ((entries != null) && (maxNumberOfMultivaluesMap.get(dimension) < entries.length)) {
maxNumberOfMultivaluesMap.put(dimension, entries.length);
}
}
}
for (String metric : dataSchema.getMetricNames()) {
dictionaryMap.get(metric).index(row.getValue(metric));
}
// Conversion already happens in PlainFieldExtractor
Object timeValueObj = row.getValue(outgoingTimeColumnName);
long timeValue = -1;
if (timeValueObj instanceof Number) {
timeValue = ((Number) timeValueObj).longValue();
} else {
timeValue = Long.valueOf(timeValueObj.toString());
}
dictionaryMap.get(outgoingTimeColumnName).index(timeValueObj);
// update the min max time values
minTimeVal = Math.min(minTimeVal, timeValue);
maxTimeVal = Math.max(maxTimeVal, timeValue);
// also lets collect all dicIds to update inverted index later
Map<String, Object> rawRowToDicIdMap = new HashMap<String, Object>();
// lets update forward index now
int docId = docIdGenerator.incrementAndGet();
for (String dimension : dataSchema.getDimensionNames()) {
if (dataSchema.getFieldSpecFor(dimension).isSingleValueField()) {
int dicId = dictionaryMap.get(dimension).indexOf(row.getValue(dimension));
((FixedByteSingleColumnSingleValueReaderWriter) columnIndexReaderWriterMap.get(dimension)).setInt(docId, dicId);
rawRowToDicIdMap.put(dimension, dicId);
} else {
Object[] mValues = (Object[]) row.getValue(dimension);
int[] dicIds;
if (mValues != null) {
dicIds = new int[mValues.length];
for (int i = 0; i < dicIds.length; i++) {
dicIds[i] = dictionaryMap.get(dimension).indexOf(mValues[i]);
}
} else {
dicIds = EMPTY_DICTIONARY_IDS_ARRAY;
}
((FixedByteSingleColumnMultiValueReaderWriter) columnIndexReaderWriterMap.get(dimension)).setIntArray(docId, dicIds);
rawRowToDicIdMap.put(dimension, dicIds);
}
}
for (String metric : dataSchema.getMetricNames()) {
FixedByteSingleColumnSingleValueReaderWriter readerWriter = (FixedByteSingleColumnSingleValueReaderWriter) columnIndexReaderWriterMap.get(metric);
int dicId = dictionaryMap.get(metric).indexOf(row.getValue(metric));
readerWriter.setInt(docId, dicId);
rawRowToDicIdMap.put(metric, dicId);
}
int timeDicId = dictionaryMap.get(outgoingTimeColumnName).indexOf(timeValueObj);
((FixedByteSingleColumnSingleValueReaderWriter) columnIndexReaderWriterMap.get(outgoingTimeColumnName)).setInt(docId, timeDicId);
rawRowToDicIdMap.put(outgoingTimeColumnName, timeDicId);
// metrics
for (String metric : dataSchema.getMetricNames()) {
if (invertedIndexMap.containsKey(metric)) {
invertedIndexMap.get(metric).add(rawRowToDicIdMap.get(metric), docId);
}
}
// dimension
for (String dimension : dataSchema.getDimensionNames()) {
if (invertedIndexMap.containsKey(dimension)) {
if (dataSchema.getFieldSpecFor(dimension).isSingleValueField()) {
invertedIndexMap.get(dimension).add(rawRowToDicIdMap.get(dimension), docId);
} else {
int[] dicIds = (int[]) rawRowToDicIdMap.get(dimension);
for (int dicId : dicIds) {
invertedIndexMap.get(dimension).add(dicId, docId);
}
}
}
}
// time
if (invertedIndexMap.containsKey(outgoingTimeColumnName)) {
invertedIndexMap.get(outgoingTimeColumnName).add(rawRowToDicIdMap.get(outgoingTimeColumnName), docId);
}
docIdSearchableOffset = docId;
numDocsIndexed += 1;
numSuccessIndexed += 1;
return numDocsIndexed < capacity;
}
Aggregations