Search in sources :

Example 36 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class AvroRecordReader method getGenericRow.

private GenericRow getGenericRow(GenericRecord rawRecord, GenericRow row) {
    for (final Field field : _dataStream.getSchema().getFields()) {
        FieldSpec spec = _schemaExtractor.getSchema().getFieldSpecFor(field.name());
        if (spec == null) {
            continue;
        }
        Object value = rawRecord.get(field.name());
        if (value == null) {
            incrementNullCountFor(field.name());
            if (spec.isSingleValueField()) {
                value = spec.getDefaultNullValue();
            } else {
                value = transformAvroArrayToObjectArray((Array) value, spec);
            }
        } else {
            if (value instanceof Utf8) {
                value = ((Utf8) value).toString();
            }
            if (value instanceof Array) {
                value = transformAvroArrayToObjectArray((Array) value, spec);
            }
        }
        row.putField(field.name(), value);
    }
    return row;
}
Also used : Array(org.apache.avro.generic.GenericData.Array) Field(org.apache.avro.Schema.Field) Utf8(org.apache.avro.util.Utf8) FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 37 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class PlainFieldExtractor method transform.

@Override
public GenericRow transform(GenericRow row, GenericRow destinationRow) {
    boolean hasError = false;
    boolean hasNull = false;
    boolean hasConversion = false;
    for (String column : _schema.getColumnNames()) {
        FieldSpec fieldSpec = _schema.getFieldSpecFor(column);
        // Ignore transform of DerivedMetric
        if (fieldSpec instanceof MetricFieldSpec && ((MetricFieldSpec) fieldSpec).isDerivedMetric()) {
            continue;
        }
        Object value;
        // Fetch value for this column.
        if (column.equals(_outgoingTimeColumnName) && _timeConverter != null) {
            // Convert incoming time to outgoing time.
            value = row.getValue(_incomingTimeColumnName);
            if (value == null) {
                hasNull = true;
                _totalNullCols++;
            } else {
                try {
                    value = _timeConverter.convert(value);
                } catch (Exception e) {
                    LOGGER.debug("Caught exception while converting incoming time value: {}", value, e);
                    value = null;
                    hasError = true;
                    _errorCount.put(column, _errorCount.get(column) + 1);
                }
            }
        } else {
            value = row.getValue(column);
            if (value == null) {
                hasNull = true;
                _totalNullCols++;
            }
        }
        // Convert value if necessary.
        PinotDataType dest = _columnType.get(column);
        PinotDataType source = null;
        if (value != null) {
            if (value instanceof Object[]) {
                // Multi-value.
                Object[] valueArray = (Object[]) value;
                if (valueArray.length > 0) {
                    source = MULTI_VALUE_TYPE_MAP.get(valueArray[0].getClass());
                    if (source == null) {
                        source = PinotDataType.OBJECT_ARRAY;
                    }
                } else {
                    LOGGER.debug("Got 0 length array.");
                    // Use default value for 0 length array.
                    value = null;
                    hasError = true;
                    _errorCount.put(column, _errorCount.get(column) + 1);
                }
            } else {
                // Single-value.
                source = SINGLE_VALUE_TYPE_MAP.get(value.getClass());
                if (source == null) {
                    source = PinotDataType.OBJECT;
                }
            }
            if (value != null && source != dest) {
                Object before = value;
                try {
                    value = dest.convert(before, source);
                    hasConversion = true;
                } catch (Exception e) {
                    LOGGER.debug("Caught exception while converting value: {} from: {} to: {}", before, source, dest);
                    value = null;
                    hasError = true;
                    _errorCount.put(column, _errorCount.get(column) + 1);
                }
            }
            // Allowing this can cause multiple values to map to the same padded value, breaking segment generation.
            if (dest == PinotDataType.STRING) {
                value = StringUtil.trimTrailingNulls((String) value);
            }
        }
        // Assign default value for null value.
        if (value == null) {
            if (fieldSpec.isSingleValueField()) {
                // Single-value field.
                value = fieldSpec.getDefaultNullValue();
            } else {
                // Multi-value field.
                value = new Object[] { fieldSpec.getDefaultNullValue() };
            }
        }
        destinationRow.putField(column, value);
    }
    if (hasError) {
        _totalErrors++;
    }
    if (hasNull) {
        _totalNulls++;
    }
    if (hasConversion) {
        _totalConversions++;
    }
    return destinationRow;
}
Also used : MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 38 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class PinotSegmentRecordReader method getSchema.

@Override
public Schema getSchema() {
    Schema schema = new Schema();
    schema.setSchemaName(segmentMetadata.getName());
    for (String column : columns) {
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        String columnName = columnMetadata.getColumnName();
        DataType dataType = columnMetadata.getDataType();
        FieldType fieldType = columnMetadata.getFieldType();
        FieldSpec fieldSpec = null;
        switch(fieldType) {
            case DIMENSION:
                boolean isSingleValue = columnMetadata.isSingleValue();
                fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
                break;
            case METRIC:
                fieldSpec = new MetricFieldSpec(columnName, dataType);
                break;
            case TIME:
                TimeUnit timeType = columnMetadata.getTimeUnit();
                TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
                fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
                break;
            default:
                break;
        }
        schema.addField(fieldSpec);
    }
    return schema;
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 39 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class SegmentGeneratorConfig method getQualifyingDimensions.

/**
   * Returns a comma separated list of qualifying dimension name strings
   * @param type FieldType to filter on
   * @return
   */
@JsonIgnore
private String getQualifyingDimensions(FieldType type) {
    List<String> dimensions = new ArrayList<>();
    for (final FieldSpec spec : getSchema().getAllFieldSpecs()) {
        if (spec.getFieldType() == type) {
            dimensions.add(spec.getName());
        }
    }
    Collections.sort(dimensions);
    return StringUtils.join(dimensions, ",");
}
Also used : ArrayList(java.util.ArrayList) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) JsonIgnore(org.codehaus.jackson.annotate.JsonIgnore)

Example 40 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class DictionariesTest method testPaddedConflict.

/**
   * Tests SegmentDictionaryCreator for case when there is one empty string
   * and a string with a single padding character
   *
   * This test asserts that the padded length of the empty string is 1
   * in actual padded dictionary), and not 0.
   *
   * @throws Exception
   */
@Test
public void testPaddedConflict() throws Exception {
    File indexDir = new File("/tmp/dict.test");
    indexDir.deleteOnExit();
    FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true);
    String[] inputStrings = new String[2];
    String[] paddedStrings = new String[2];
    char paddingChar = '%';
    try {
        inputStrings[0] = "";
        inputStrings[1] = "%";
        // Sorted order: {"", "%"}
        Arrays.sort(inputStrings);
        SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir, paddingChar);
        boolean[] isSorted = new boolean[1];
        isSorted[0] = true;
        dictionaryCreator.build(isSorted);
    } catch (Exception e) {
        Assert.assertEquals(e.getMessage(), "Number of entries in dictionary != number of unique values in the data in column test");
    } finally {
        FileUtils.deleteQuietly(indexDir);
    }
}
Also used : SegmentDictionaryCreator(com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator) File(java.io.File) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) Test(org.testng.annotations.Test)

Aggregations

FieldSpec (com.linkedin.pinot.common.data.FieldSpec)52 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)28 Test (org.testng.annotations.Test)15 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)14 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)13 File (java.io.File)11 Schema (com.linkedin.pinot.common.data.Schema)10 SegmentDictionaryCreator (com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator)7 HashMap (java.util.HashMap)7 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)6 AbstractColumnStatisticsCollector (com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector)6 Random (java.util.Random)5 Block (com.linkedin.pinot.core.common.Block)4 BlockMetadata (com.linkedin.pinot.core.common.BlockMetadata)4 DataSource (com.linkedin.pinot.core.common.DataSource)4 GenericRow (com.linkedin.pinot.core.data.GenericRow)4 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)4 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)4 ArrayList (java.util.ArrayList)4 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)3