use of com.linkedin.pinot.core.data.readers.TestRecordReader in project pinot by linkedin.
the class NoDictionaryGroupKeyGeneratorTest method buildSegment.
/**
* Helper method to build a segment as follows:
* <ul>
* <li> One string column without dictionary. </li>
* <li> One integer column with dictionary. </li>
* </ul>
*
* It also computes the unique group keys while it generates the index.
*
* @return Set containing unique group keys from the created segment.
*
* @throws Exception
*/
private TestRecordReader buildSegment() throws Exception {
Schema schema = new Schema();
for (int i = 0; i < COLUMN_NAMES.length; i++) {
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(COLUMN_NAMES[i], DATA_TYPES[i], true);
schema.addField(dimensionFieldSpec);
}
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_COLUMN_NAMES));
config.setOutDir(SEGMENT_DIR_NAME);
config.setSegmentName(SEGMENT_NAME);
Random random = new Random();
List<GenericRow> rows = new ArrayList<>(NUM_ROWS);
for (int i = 0; i < NUM_ROWS; i++) {
Map<String, Object> map = new HashMap<>(NUM_COLUMNS);
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
String column = fieldSpec.getName();
FieldSpec.DataType dataType = fieldSpec.getDataType();
switch(dataType) {
case INT:
map.put(column, random.nextInt());
break;
case LONG:
map.put(column, random.nextLong());
break;
case FLOAT:
map.put(column, random.nextFloat());
break;
case DOUBLE:
map.put(column, random.nextDouble());
break;
case STRING:
map.put(column, "value_" + i);
break;
default:
throw new IllegalArgumentException("Illegal data type specified: " + dataType);
}
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
rows.add(genericRow);
}
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
_recordReader = new TestRecordReader(rows, schema);
driver.init(config, _recordReader);
driver.build();
return _recordReader;
}
use of com.linkedin.pinot.core.data.readers.TestRecordReader in project pinot by linkedin.
the class RawIndexCreatorTest method buildIndex.
/**
* Helper method to build a segment containing a single valued string column with RAW (no-dictionary) index.
*
* @return Array of string values for the rows in the generated index.
* @throws Exception
*/
private RecordReader buildIndex(Schema schema) throws Exception {
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setRawIndexCreationColumns(schema.getDimensionNames());
config.setOutDir(SEGMENT_DIR_NAME);
config.setSegmentName(SEGMENT_NAME);
final List<GenericRow> rows = new ArrayList<>();
for (int row = 0; row < NUM_ROWS; row++) {
HashMap<String, Object> map = new HashMap<>();
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
Object value;
FieldSpec.DataType dataType = fieldSpec.getDataType();
value = getRandomValue(dataType);
map.put(fieldSpec.getName(), value);
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
rows.add(genericRow);
}
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
RecordReader reader = new TestRecordReader(rows, schema);
driver.init(config, reader);
driver.build();
_segmentDirectory = SegmentDirectory.createFromLocalFS(driver.getOutputDirectory(), ReadMode.mmap);
_segmentReader = _segmentDirectory.createReader();
reader.rewind();
return reader;
}
use of com.linkedin.pinot.core.data.readers.TestRecordReader in project pinot by linkedin.
the class RawIndexBenchmark method buildSegment.
/**
* Helper method that builds a segment containing two columns both with data from input file.
* The first column has raw indices (no dictionary), where as the second column is dictionary encoded.
*
* @throws Exception
*/
private File buildSegment() throws Exception {
Schema schema = new Schema();
for (int i = 0; i < NUM_COLUMNS; i++) {
String column = "column_" + i;
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(column, FieldSpec.DataType.STRING, true);
schema.addField(dimensionFieldSpec);
}
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setRawIndexCreationColumns(Collections.singletonList(_rawIndexColumn));
config.setOutDir(SEGMENT_DIR_NAME);
config.setSegmentName(SEGMENT_NAME);
BufferedReader reader = new BufferedReader(new FileReader(_dataFile));
String value;
final List<GenericRow> rows = new ArrayList<>();
System.out.println("Reading data...");
while ((value = reader.readLine()) != null) {
HashMap<String, Object> map = new HashMap<>();
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
map.put(fieldSpec.getName(), value);
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
rows.add(genericRow);
_numRows++;
if (_numRows % 1000000 == 0) {
System.out.println("Read rows: " + _numRows);
}
}
System.out.println("Generating segment...");
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
RecordReader recordReader = new TestRecordReader(rows, schema);
driver.init(config, recordReader);
driver.build();
return new File(SEGMENT_DIR_NAME, SEGMENT_NAME);
}
Aggregations