use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class AvroRecordReader method getGenericRow.
private GenericRow getGenericRow(GenericRecord rawRecord, GenericRow row) {
for (final Field field : _dataStream.getSchema().getFields()) {
FieldSpec spec = _schemaExtractor.getSchema().getFieldSpecFor(field.name());
if (spec == null) {
continue;
}
Object value = rawRecord.get(field.name());
if (value == null) {
incrementNullCountFor(field.name());
if (spec.isSingleValueField()) {
value = spec.getDefaultNullValue();
} else {
value = transformAvroArrayToObjectArray((Array) value, spec);
}
} else {
if (value instanceof Utf8) {
value = ((Utf8) value).toString();
}
if (value instanceof Array) {
value = transformAvroArrayToObjectArray((Array) value, spec);
}
}
row.putField(field.name(), value);
}
return row;
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class PlainFieldExtractor method transform.
@Override
public GenericRow transform(GenericRow row, GenericRow destinationRow) {
boolean hasError = false;
boolean hasNull = false;
boolean hasConversion = false;
for (String column : _schema.getColumnNames()) {
FieldSpec fieldSpec = _schema.getFieldSpecFor(column);
// Ignore transform of DerivedMetric
if (fieldSpec instanceof MetricFieldSpec && ((MetricFieldSpec) fieldSpec).isDerivedMetric()) {
continue;
}
Object value;
// Fetch value for this column.
if (column.equals(_outgoingTimeColumnName) && _timeConverter != null) {
// Convert incoming time to outgoing time.
value = row.getValue(_incomingTimeColumnName);
if (value == null) {
hasNull = true;
_totalNullCols++;
} else {
try {
value = _timeConverter.convert(value);
} catch (Exception e) {
LOGGER.debug("Caught exception while converting incoming time value: {}", value, e);
value = null;
hasError = true;
_errorCount.put(column, _errorCount.get(column) + 1);
}
}
} else {
value = row.getValue(column);
if (value == null) {
hasNull = true;
_totalNullCols++;
}
}
// Convert value if necessary.
PinotDataType dest = _columnType.get(column);
PinotDataType source = null;
if (value != null) {
if (value instanceof Object[]) {
// Multi-value.
Object[] valueArray = (Object[]) value;
if (valueArray.length > 0) {
source = MULTI_VALUE_TYPE_MAP.get(valueArray[0].getClass());
if (source == null) {
source = PinotDataType.OBJECT_ARRAY;
}
} else {
LOGGER.debug("Got 0 length array.");
// Use default value for 0 length array.
value = null;
hasError = true;
_errorCount.put(column, _errorCount.get(column) + 1);
}
} else {
// Single-value.
source = SINGLE_VALUE_TYPE_MAP.get(value.getClass());
if (source == null) {
source = PinotDataType.OBJECT;
}
}
if (value != null && source != dest) {
Object before = value;
try {
value = dest.convert(before, source);
hasConversion = true;
} catch (Exception e) {
LOGGER.debug("Caught exception while converting value: {} from: {} to: {}", before, source, dest);
value = null;
hasError = true;
_errorCount.put(column, _errorCount.get(column) + 1);
}
}
// Allowing this can cause multiple values to map to the same padded value, breaking segment generation.
if (dest == PinotDataType.STRING) {
value = StringUtil.trimTrailingNulls((String) value);
}
}
// Assign default value for null value.
if (value == null) {
if (fieldSpec.isSingleValueField()) {
// Single-value field.
value = fieldSpec.getDefaultNullValue();
} else {
// Multi-value field.
value = new Object[] { fieldSpec.getDefaultNullValue() };
}
}
destinationRow.putField(column, value);
}
if (hasError) {
_totalErrors++;
}
if (hasNull) {
_totalNulls++;
}
if (hasConversion) {
_totalConversions++;
}
return destinationRow;
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class PinotSegmentRecordReader method getSchema.
@Override
public Schema getSchema() {
Schema schema = new Schema();
schema.setSchemaName(segmentMetadata.getName());
for (String column : columns) {
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
String columnName = columnMetadata.getColumnName();
DataType dataType = columnMetadata.getDataType();
FieldType fieldType = columnMetadata.getFieldType();
FieldSpec fieldSpec = null;
switch(fieldType) {
case DIMENSION:
boolean isSingleValue = columnMetadata.isSingleValue();
fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
break;
case METRIC:
fieldSpec = new MetricFieldSpec(columnName, dataType);
break;
case TIME:
TimeUnit timeType = columnMetadata.getTimeUnit();
TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
break;
default:
break;
}
schema.addField(fieldSpec);
}
return schema;
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class SegmentGeneratorConfig method getQualifyingDimensions.
/**
* Returns a comma separated list of qualifying dimension name strings
* @param type FieldType to filter on
* @return
*/
@JsonIgnore
private String getQualifyingDimensions(FieldType type) {
List<String> dimensions = new ArrayList<>();
for (final FieldSpec spec : getSchema().getAllFieldSpecs()) {
if (spec.getFieldType() == type) {
dimensions.add(spec.getName());
}
}
Collections.sort(dimensions);
return StringUtils.join(dimensions, ",");
}
use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.
the class DictionariesTest method testPaddedConflict.
/**
* Tests SegmentDictionaryCreator for case when there is one empty string
* and a string with a single padding character
*
* This test asserts that the padded length of the empty string is 1
* in actual padded dictionary), and not 0.
*
* @throws Exception
*/
@Test
public void testPaddedConflict() throws Exception {
File indexDir = new File("/tmp/dict.test");
indexDir.deleteOnExit();
FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true);
String[] inputStrings = new String[2];
String[] paddedStrings = new String[2];
char paddingChar = '%';
try {
inputStrings[0] = "";
inputStrings[1] = "%";
// Sorted order: {"", "%"}
Arrays.sort(inputStrings);
SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir, paddingChar);
boolean[] isSorted = new boolean[1];
isSorted[0] = true;
dictionaryCreator.build(isSorted);
} catch (Exception e) {
Assert.assertEquals(e.getMessage(), "Number of entries in dictionary != number of unique values in the data in column test");
} finally {
FileUtils.deleteQuietly(indexDir);
}
}
Aggregations