use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class AvroDataPublisherTest method TestReadPartialAvro.
@Test
public void TestReadPartialAvro() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));
final String jsonPath = TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(JSON_DATA));
final List<String> projectedColumns = new ArrayList<String>();
projectedColumns.add("column3");
projectedColumns.add("column2");
Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("column3", DataType.STRING).addSingleValueDimension("column2", DataType.STRING).build();
final SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setFormat(FileFormat.AVRO);
config.setInputFilePath(filePath);
config.setSegmentVersion(SegmentVersion.v1);
final AvroRecordReader avroDataPublisher = new AvroRecordReader(FieldExtractorFactory.getPlainFieldExtractor(config), config.getInputFilePath());
avroDataPublisher.next();
int cnt = 0;
for (final String line : FileUtils.readLines(new File(jsonPath))) {
final JSONObject obj = new JSONObject(line);
if (avroDataPublisher.hasNext()) {
final GenericRow recordRow = avroDataPublisher.next();
// System.out.println(recordRow);
Assert.assertEquals(recordRow.getFieldNames().length, 2);
for (final String column : recordRow.getFieldNames()) {
final String valueFromJson = obj.get(column).toString();
final String valueFromAvro = recordRow.getValue(column).toString();
if (cnt > 1) {
Assert.assertEquals(valueFromAvro, valueFromJson);
}
}
}
cnt++;
}
Assert.assertEquals(10001, cnt);
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class CrcUtilsTest method makeSegmentAndReturnPath.
private String makeSegmentAndReturnPath() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(ChunkIndexCreationDriverImplTest.class.getClassLoader().getResource(AVRO_DATA));
final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.DAYS, "testTable");
config.setSegmentNamePostfix("1");
config.setTimeColumnName("daysSinceEpoch");
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(config);
driver.build();
return new File(INDEX_DIR, driver.getSegmentName()).getAbsolutePath();
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class BaseClusterIntegrationTest method buildSegmentsFromAvro.
public static Future<Map<File, File>> buildSegmentsFromAvro(final List<File> avroFiles, Executor executor, int baseSegmentIndex, final File baseDirectory, final File segmentTarDir, final String tableName, final boolean createStarTreeIndex, final com.linkedin.pinot.common.data.Schema inputPinotSchema) {
int segmentCount = avroFiles.size();
LOGGER.info("Building " + segmentCount + " segments in parallel");
List<ListenableFutureTask<Pair<File, File>>> futureTasks = new ArrayList<ListenableFutureTask<Pair<File, File>>>();
for (int i = 1; i <= segmentCount; ++i) {
final int segmentIndex = i - 1;
final int segmentNumber = i + baseSegmentIndex;
final ListenableFutureTask<Pair<File, File>> buildSegmentFutureTask = ListenableFutureTask.<Pair<File, File>>create(new Callable<Pair<File, File>>() {
@Override
public Pair<File, File> call() throws Exception {
try {
// Build segment
LOGGER.info("Starting to build segment " + segmentNumber);
File outputDir = new File(baseDirectory, "segment-" + segmentNumber);
final File inputAvroFile = avroFiles.get(segmentIndex);
final SegmentGeneratorConfig genConfig = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(inputAvroFile, outputDir, TimeUnit.DAYS, tableName, inputPinotSchema);
if (inputPinotSchema != null) {
genConfig.setSchema(inputPinotSchema);
}
// jfim: We add a space and a special character to do a regression test for PINOT-3296 Segments with spaces
// in their filename don't work properly
genConfig.setSegmentNamePostfix(Integer.toString(segmentNumber) + " %");
genConfig.setEnableStarTreeIndex(createStarTreeIndex);
// Enable off heap star tree format in the integration test.
StarTreeIndexSpec starTreeIndexSpec = null;
if (createStarTreeIndex) {
starTreeIndexSpec = new StarTreeIndexSpec();
starTreeIndexSpec.setEnableOffHeapFormat(true);
}
genConfig.setStarTreeIndexSpec(starTreeIndexSpec);
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(genConfig);
driver.build();
// Tar segment
String segmentName = outputDir.list()[0];
final String tarGzPath = TarGzCompressionUtils.createTarGzOfDirectory(outputDir.getAbsolutePath() + "/" + segmentName, new File(segmentTarDir, segmentName).getAbsolutePath());
LOGGER.info("Completed segment " + segmentNumber + " : " + segmentName + " from file " + inputAvroFile.getName());
return new ImmutablePair<File, File>(inputAvroFile, new File(tarGzPath));
} catch (Exception e) {
LOGGER.error("Exception while building segment input: {} output {} ", avroFiles.get(segmentIndex), "segment-" + segmentNumber);
throw new RuntimeException(e);
}
}
});
futureTasks.add(buildSegmentFutureTask);
executor.execute(buildSegmentFutureTask);
}
ListenableFuture<List<Pair<File, File>>> pairListFuture = Futures.allAsList(futureTasks);
return Futures.transform(pairListFuture, new AsyncFunction<List<Pair<File, File>>, Map<File, File>>() {
@Override
public ListenableFuture<Map<File, File>> apply(List<Pair<File, File>> input) throws Exception {
Map<File, File> avroToSegmentMap = new HashMap<File, File>();
for (Pair<File, File> avroToSegmentPair : input) {
avroToSegmentMap.put(avroToSegmentPair.getLeft(), avroToSegmentPair.getRight());
}
return Futures.immediateFuture(avroToSegmentMap);
}
});
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class SegmentTestUtils method getSegmentGeneratorConfigWithSchema.
public static SegmentGeneratorConfig getSegmentGeneratorConfigWithSchema(File inputAvro, File outputDir, String tableName, Schema schema) {
SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
segmentGeneratorConfig.setInputFilePath(inputAvro.getAbsolutePath());
segmentGeneratorConfig.setOutDir(outputDir.getAbsolutePath());
segmentGeneratorConfig.setFormat(FileFormat.AVRO);
segmentGeneratorConfig.setSegmentVersion(SegmentVersion.v1);
segmentGeneratorConfig.setTableName(tableName);
segmentGeneratorConfig.setTimeColumnName(schema.getTimeColumnName());
segmentGeneratorConfig.setSegmentTimeUnit(schema.getOutgoingTimeUnit());
return segmentGeneratorConfig;
}
use of com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig in project pinot by linkedin.
the class IntArraysTest method before.
@BeforeClass
public static void before() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
if (INDEX_DIR.exists()) {
FileUtils.deleteQuietly(INDEX_DIR);
}
// System.out.println(INDEX_DIR.getAbsolutePath());
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "weeksSinceEpochSunday", TimeUnit.DAYS, "test");
config.setTimeColumnName("weeksSinceEpochSunday");
driver.init(config);
driver.build();
final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
final org.apache.avro.Schema avroSchema = avroReader.getSchema();
final String[] columns = new String[avroSchema.getFields().size()];
int i = 0;
for (final Field f : avroSchema.getFields()) {
columns[i] = f.name();
i++;
}
}
Aggregations