use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class RawIndexBenchmark method buildSegment.
/**
* Helper method that builds a segment containing two columns both with data from input file.
* The first column has raw indices (no dictionary), where as the second column is dictionary encoded.
*
* @throws Exception
*/
private File buildSegment() throws Exception {
Schema schema = new Schema();
for (int i = 0; i < NUM_COLUMNS; i++) {
String column = "column_" + i;
DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(column, FieldSpec.DataType.STRING, true);
schema.addField(dimensionFieldSpec);
}
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setRawIndexCreationColumns(Collections.singletonList(_rawIndexColumn));
config.setOutDir(SEGMENT_DIR_NAME);
config.setSegmentName(SEGMENT_NAME);
BufferedReader reader = new BufferedReader(new FileReader(_dataFile));
String value;
final List<GenericRow> rows = new ArrayList<>();
System.out.println("Reading data...");
while ((value = reader.readLine()) != null) {
HashMap<String, Object> map = new HashMap<>();
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
map.put(fieldSpec.getName(), value);
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
rows.add(genericRow);
_numRows++;
if (_numRows % 1000000 == 0) {
System.out.println("Read rows: " + _numRows);
}
}
System.out.println("Generating segment...");
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
RecordReader recordReader = new TestRecordReader(rows, schema);
driver.init(config, recordReader);
driver.build();
return new File(SEGMENT_DIR_NAME, SEGMENT_NAME);
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class HelixStarterTest method setupSegment.
private void setupSegment(File segmentDir, String tableName) throws Exception {
String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), segmentDir, TimeUnit.DAYS, tableName, null);
SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
driver.build();
LOGGER.info("Table: {} built at path: {}", tableName, segmentDir.getAbsolutePath());
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class SegmentTestUtils method getSegmentGenSpecWithSchemAndProjectedColumns.
public static SegmentGeneratorConfig getSegmentGenSpecWithSchemAndProjectedColumns(File inputAvro, File outputDir, TimeUnit timeUnit, String clusterName, Schema inputPinotSchema) throws IOException {
Schema schema;
if (inputPinotSchema == null) {
schema = AvroUtils.extractSchemaFromAvro(inputAvro);
} else {
schema = inputPinotSchema;
}
SegmentGeneratorConfig segmentGenSpec = new SegmentGeneratorConfig(schema);
segmentGenSpec.setInputFilePath(inputAvro.getAbsolutePath());
segmentGenSpec.setSegmentTimeUnit(timeUnit);
if (inputAvro.getName().endsWith("gz")) {
segmentGenSpec.setFormat(FileFormat.GZIPPED_AVRO);
} else {
segmentGenSpec.setFormat(FileFormat.AVRO);
}
segmentGenSpec.setSegmentVersion(SegmentVersion.v1);
segmentGenSpec.setTableName(clusterName);
segmentGenSpec.setOutDir(outputDir.getAbsolutePath());
return segmentGenSpec;
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class IntegrationTest method setupSegmentList.
private void setupSegmentList() throws Exception {
final URL resource = getClass().getClassLoader().getResource(SMALL_AVRO_DATA);
final String filePath = TestUtils.getFileFromResourceUrl(resource);
_indexSegmentList.clear();
if (INDEXES_DIR.exists()) {
FileUtils.deleteQuietly(INDEXES_DIR);
}
INDEXES_DIR.mkdir();
for (int i = 0; i < 2; ++i) {
final File segmentDir = new File(INDEXES_DIR, "segment_" + i);
final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), segmentDir, "dim" + i, TimeUnit.DAYS, "testTable");
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
driver.build();
_indexSegmentList.add(ColumnarSegmentLoader.load(new File(new File(INDEXES_DIR, "segment_" + String.valueOf(i)), driver.getSegmentName()), ReadMode.mmap));
// System.out.println("built at : " + segmentDir.getAbsolutePath());
}
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class ChunkIndexCreationDriverImplTest method setUP.
@BeforeClass
public void setUP() throws Exception {
if (INDEX_DIR.exists()) {
FileUtils.deleteQuietly(INDEX_DIR);
}
final String filePath = TestUtils.getFileFromResourceUrl(ChunkIndexCreationDriverImplTest.class.getClassLoader().getResource(AVRO_DATA));
final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.DAYS, "testTable");
config.setSegmentNamePostfix("1");
config.setTimeColumnName("daysSinceEpoch");
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
driver.build();
}
Aggregations