use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.
the class AvroDataPublisherTest method TestReadAvro.
@Test
public void TestReadAvro() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
final String jsonPath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));
Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING).addSingleValueDimension("column2", DataType.STRING).build();
final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setFormat(FileFormat.AVRO);
config.setInputFilePath(filePath);
config.setSegmentVersion(SegmentVersion.v1);
AvroRecordReader avroDataPublisher = (AvroRecordReader) RecordReaderFactory.get(config);
int cnt = 0;
for (String line : FileUtils.readLines(new File(jsonPath))) {
JSONObject obj = new JSONObject(line);
if (avroDataPublisher.hasNext()) {
GenericRow recordRow = avroDataPublisher.next();
for (String column : recordRow.getFieldNames()) {
String valueFromJson = obj.get(column).toString();
String valueFromAvro = recordRow.getValue(column).toString();
if (cnt > 1) {
Assert.assertEquals(valueFromJson, valueFromAvro);
}
}
}
cnt++;
}
Assert.assertEquals(cnt, 10001);
}
use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.
the class AvroDataPublisherTest method TestReadMultiValueAvro.
@Test
public void TestReadMultiValueAvro() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_MULTI_DATA));
final SegmentGeneratorConfig config = new SegmentGeneratorConfig(AvroUtils.extractSchemaFromAvro(new File(filePath)));
config.setFormat(FileFormat.AVRO);
config.setInputFilePath(filePath);
config.setSegmentVersion(SegmentVersion.v1);
AvroRecordReader avroDataPublisher = (AvroRecordReader) RecordReaderFactory.get(config);
int cnt = 0;
while (avroDataPublisher.hasNext()) {
GenericRow recordRow = avroDataPublisher.next();
for (String column : recordRow.getFieldNames()) {
String valueStringFromAvro = null;
if (avroDataPublisher.getSchema().getFieldSpecFor(column).isSingleValueField()) {
Object valueFromAvro = recordRow.getValue(column);
valueStringFromAvro = valueFromAvro.toString();
} else {
Object[] valueFromAvro = (Object[]) recordRow.getValue(column);
valueStringFromAvro = "[";
int i = 0;
for (Object valueObject : valueFromAvro) {
if (i++ == 0) {
valueStringFromAvro += valueObject.toString();
} else {
valueStringFromAvro += ", " + valueObject.toString();
}
}
valueStringFromAvro += "]";
}
}
cnt++;
}
Assert.assertEquals(28949, cnt);
}
use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.
the class StarTreeIndexTestSegmentHelper method buildSegment.
private static Schema buildSegment(String segmentDirName, String segmentName, HllConfig hllConfig, boolean enableOffHeapFormat) throws Exception {
final int rows = (int) MathUtils.factorial(NUM_DIMENSIONS) * 100;
Schema schema = new Schema();
for (int i = 0; i < NUM_DIMENSIONS; i++) {
String dimName = "d" + (i + 1);
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(dimName, FieldSpec.DataType.STRING, true);
schema.addField(dimName, dimensionFieldSpec);
}
schema.setTimeFieldSpec(new TimeFieldSpec(TIME_COLUMN_NAME, FieldSpec.DataType.INT, TimeUnit.DAYS));
for (int i = 0; i < NUM_METRICS; i++) {
String metricName = "m" + (i + 1);
MetricFieldSpec metricFieldSpec = new MetricFieldSpec(metricName, FieldSpec.DataType.INT);
schema.addField(metricName, metricFieldSpec);
}
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setEnableStarTreeIndex(true);
config.setOutDir(segmentDirName);
config.setFormat(FileFormat.AVRO);
config.setSegmentName(segmentName);
config.setHllConfig(hllConfig);
config.setStarTreeIndexSpec(buildStarTreeIndexSpec(enableOffHeapFormat));
Random random = new Random(RANDOM_SEED);
final List<GenericRow> data = new ArrayList<>();
for (int row = 0; row < rows; row++) {
HashMap<String, Object> map = new HashMap<>();
// Dim columns.
for (int i = 0; i < NUM_DIMENSIONS / 2; i++) {
String dimName = schema.getDimensionFieldSpecs().get(i).getName();
map.put(dimName, dimName + "-v" + row % (NUM_DIMENSIONS - i));
}
// Random values make cardinality of d3, d4 column values larger to better test hll
for (int i = NUM_DIMENSIONS / 2; i < NUM_DIMENSIONS; i++) {
String dimName = schema.getDimensionFieldSpecs().get(i).getName();
map.put(dimName, dimName + "-v" + random.nextInt(i * 100));
}
// Metric columns.
for (int i = 0; i < NUM_METRICS; i++) {
String metName = schema.getMetricFieldSpecs().get(i).getName();
map.put(metName, random.nextInt(METRIC_MAX_VALUE));
}
// Time column.
map.put(TIME_COLUMN_NAME, row % 7);
GenericRow genericRow = new GenericRow();
genericRow.init(map);
data.add(genericRow);
}
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
RecordReader reader = new TestUtils.GenericRowRecordReader(schema, data);
driver.init(config, reader);
driver.build();
LOGGER.info("Built segment {} at {}", segmentName, segmentDirName);
return schema;
}
use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.
the class NoDictionaryGroupKeyGeneratorTest method getExpectedGroupKeys.
/**
* Helper method to build group keys for a given array of group-by columns.
*
* @param groupByColumns Group-by columns for which to generate the group-keys.
* @return Set of unique group keys.
* @throws Exception
*/
private Set<String> getExpectedGroupKeys(RecordReader recordReader, String[] groupByColumns) throws Exception {
Set<String> groupKeys = new HashSet<>();
StringBuilder stringBuilder = new StringBuilder();
recordReader.rewind();
while (recordReader.hasNext()) {
GenericRow row = recordReader.next();
stringBuilder.setLength(0);
for (int i = 0; i < groupByColumns.length; i++) {
stringBuilder.append(row.getValue(groupByColumns[i]));
if (i < groupByColumns.length - 1) {
stringBuilder.append(AggregationGroupByTrimmingService.GROUP_KEY_DELIMITER);
}
}
groupKeys.add(stringBuilder.toString());
}
return groupKeys;
}
use of com.linkedin.pinot.core.data.GenericRow in project pinot by linkedin.
the class NoDictionaryGroupKeyGeneratorTest method buildSegment.
/**
* Helper method to build a segment as follows:
* <ul>
* <li> One string column without dictionary. </li>
* <li> One integer column with dictionary. </li>
* </ul>
*
* It also computes the unique group keys while it generates the index.
*
* @return Set containing unique group keys from the created segment.
*
* @throws Exception
*/
private TestRecordReader buildSegment() throws Exception {
Schema schema = new Schema();
for (int i = 0; i < COLUMN_NAMES.length; i++) {
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(COLUMN_NAMES[i], DATA_TYPES[i], true);
schema.addField(dimensionFieldSpec);
}
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_COLUMN_NAMES));
config.setOutDir(SEGMENT_DIR_NAME);
config.setSegmentName(SEGMENT_NAME);
Random random = new Random();
List<GenericRow> rows = new ArrayList<>(NUM_ROWS);
for (int i = 0; i < NUM_ROWS; i++) {
Map<String, Object> map = new HashMap<>(NUM_COLUMNS);
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
String column = fieldSpec.getName();
FieldSpec.DataType dataType = fieldSpec.getDataType();
switch(dataType) {
case INT:
map.put(column, random.nextInt());
break;
case LONG:
map.put(column, random.nextLong());
break;
case FLOAT:
map.put(column, random.nextFloat());
break;
case DOUBLE:
map.put(column, random.nextDouble());
break;
case STRING:
map.put(column, "value_" + i);
break;
default:
throw new IllegalArgumentException("Illegal data type specified: " + dataType);
}
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
rows.add(genericRow);
}
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
_recordReader = new TestRecordReader(rows, schema);
driver.init(config, _recordReader);
driver.build();
return _recordReader;
}
Aggregations