use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.
the class SelectionOperatorUtils method extractDataSchema.
/**
* Extract the {@link DataSchema} from sort sequence, selection columns and {@link IndexSegment}. (Inner segment)
* <p>Inside data schema, we just store each column once (de-duplicated).
*
* @param sortSequence sort sequence.
* @param selectionColumns selection columns.
* @param indexSegment index segment.
* @return data schema.
*/
@Nonnull
public static DataSchema extractDataSchema(@Nullable List<SelectionSort> sortSequence, @Nonnull List<String> selectionColumns, @Nonnull IndexSegment indexSegment) {
List<String> columnList = new ArrayList<>();
Set<String> columnSet = new HashSet<>();
if (sortSequence != null) {
for (SelectionSort selectionSort : sortSequence) {
String column = selectionSort.getColumn();
columnList.add(column);
columnSet.add(column);
}
}
for (String column : selectionColumns) {
if (!columnSet.contains(column)) {
columnList.add(column);
columnSet.add(column);
}
}
int numColumns = columnList.size();
String[] columns = new String[numColumns];
DataType[] dataTypes = new DataType[numColumns];
for (int i = 0; i < numColumns; i++) {
String column = columnList.get(i);
columns[i] = column;
DataSourceMetadata columnMetadata = indexSegment.getDataSource(column).getDataSourceMetadata();
if (columnMetadata.isSingleValue()) {
dataTypes[i] = columnMetadata.getDataType();
} else {
dataTypes[i] = columnMetadata.getDataType().toMultiValue();
}
}
return new DataSchema(columns, dataTypes);
}
use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.
the class ColumnMetadata method fromPropertiesConfiguration.
public static ColumnMetadata fromPropertiesConfiguration(String column, PropertiesConfiguration config) {
Builder builder = new Builder();
builder.setColumnName(column);
builder.setCardinality(config.getInt(getKeyFor(column, CARDINALITY)));
int totalDocs = config.getInt(getKeyFor(column, TOTAL_DOCS));
builder.setTotalDocs(totalDocs);
builder.setTotalRawDocs(config.getInt(getKeyFor(column, TOTAL_RAW_DOCS), totalDocs));
builder.setTotalAggDocs(config.getInt(getKeyFor(column, TOTAL_AGG_DOCS), 0));
DataType dataType = DataType.valueOf(config.getString(getKeyFor(column, DATA_TYPE)).toUpperCase());
builder.setDataType(dataType);
builder.setBitsPerElement(config.getInt(getKeyFor(column, BITS_PER_ELEMENT)));
builder.setStringColumnMaxLength(config.getInt(getKeyFor(column, DICTIONARY_ELEMENT_SIZE)));
builder.setFieldType(FieldType.valueOf(config.getString(getKeyFor(column, COLUMN_TYPE)).toUpperCase()));
builder.setIsSorted(config.getBoolean(getKeyFor(column, IS_SORTED)));
builder.setContainsNulls(config.getBoolean(getKeyFor(column, HAS_NULL_VALUE)));
builder.setHasDictionary(config.getBoolean(getKeyFor(column, HAS_DICTIONARY), true));
builder.setHasInvertedIndex(config.getBoolean(getKeyFor(column, HAS_INVERTED_INDEX)));
builder.setSingleValue(config.getBoolean(getKeyFor(column, IS_SINGLE_VALUED)));
builder.setMaxNumberOfMultiValues(config.getInt(getKeyFor(column, MAX_MULTI_VALUE_ELEMTS)));
builder.setTotalNumberOfEntries(config.getInt(getKeyFor(column, TOTAL_NUMBER_OF_ENTRIES)));
builder.setAutoGenerated(config.getBoolean(getKeyFor(column, IS_AUTO_GENERATED), false));
builder.setDefaultNullValueString(config.getString(getKeyFor(column, DEFAULT_NULL_VALUE), null));
builder.setTimeUnit(TimeUnit.valueOf(config.getString(TIME_UNIT, "DAYS").toUpperCase()));
char paddingCharacter = V1Constants.Str.LEGACY_STRING_PAD_CHAR;
if (config.containsKey(SEGMENT_PADDING_CHARACTER)) {
String padding = config.getString(SEGMENT_PADDING_CHARACTER);
paddingCharacter = StringEscapeUtils.unescapeJava(padding).charAt(0);
}
builder.setPaddingCharacter(paddingCharacter);
// DERIVED_METRIC_TYPE property is used to check whether this field is derived or not
// ORIGIN_COLUMN property is used to indicate the origin field of this derived metric
String typeStr = config.getString(getKeyFor(column, DERIVED_METRIC_TYPE), null);
DerivedMetricType derivedMetricType = (typeStr == null) ? null : DerivedMetricType.valueOf(typeStr.toUpperCase());
if (derivedMetricType != null) {
switch(derivedMetricType) {
case HLL:
try {
final int hllLog2m = config.getInt(V1Constants.MetadataKeys.Segment.SEGMENT_HLL_LOG2M);
builder.setFieldSize(HllUtil.getHllFieldSizeFromLog2m(hllLog2m));
final String originColumnName = config.getString(getKeyFor(column, ORIGIN_COLUMN));
builder.setOriginColumnName(originColumnName);
} catch (RuntimeException e) {
LOGGER.error("Column: " + column + " is HLL derived column, but missing log2m, fieldSize or originColumnName.");
throw e;
}
break;
default:
throw new IllegalArgumentException("Column: " + column + " with derived metric Type: " + derivedMetricType + " is not supported in building column metadata.");
}
builder.setDerivedMetricType(derivedMetricType);
}
// Set min/max value if available.
String minString = config.getString(getKeyFor(column, MIN_VALUE), null);
String maxString = config.getString(getKeyFor(column, MAX_VALUE), null);
if ((minString != null) && (maxString != null)) {
switch(dataType) {
case INT:
builder.setMinValue(Integer.valueOf(minString));
builder.setMaxValue(Integer.valueOf(maxString));
break;
case LONG:
builder.setMinValue(Long.valueOf(minString));
builder.setMaxValue(Long.valueOf(maxString));
break;
case FLOAT:
builder.setMinValue(Float.valueOf(minString));
builder.setMaxValue(Float.valueOf(maxString));
break;
case DOUBLE:
builder.setMinValue(Double.valueOf(minString));
builder.setMaxValue(Double.valueOf(maxString));
break;
case STRING:
builder.setMinValue(minString);
builder.setMaxValue(maxString);
break;
default:
throw new IllegalStateException("Unsupported data type: " + dataType + " for column: " + column);
}
}
return builder.build();
}
use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.
the class FileBasedSentineTest method setup.
@BeforeClass
public void setup() throws Exception {
url = new URL("http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT + "/query");
// lets generate data
final String[] columns = { "dimention1", "dimention2", "dimention3", "dimention4", "metric1", "daysSinceEpoch" };
final Map<String, DataType> dataTypes = new HashMap<String, FieldSpec.DataType>();
final Map<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
final Map<String, TimeUnit> timeUnits = new HashMap<String, TimeUnit>();
final Map<String, Integer> cardinality = new HashMap<String, Integer>();
// Crate empty range map as the signature of DataGeneratorSpec has changed, and this test does not
// use metric/time as fieldType.
final Map<String, IntRange> range = new HashMap<String, IntRange>();
for (final String col : columns) {
if (col.equals("dimention1")) {
dataTypes.put(col, DataType.STRING);
cardinality.put(col, 1000);
} else {
dataTypes.put(col, DataType.INT);
cardinality.put(col, 1000);
}
fieldTypes.put(col, FieldType.DIMENSION);
}
if (avroDataDir.exists()) {
FileUtils.deleteDirectory(avroDataDir);
}
final DataGeneratorSpec spec = new DataGeneratorSpec(Arrays.asList(columns), cardinality, range, dataTypes, fieldTypes, timeUnits, FileFormat.AVRO, avroDataDir.getAbsolutePath(), true);
generator = new DataGenerator();
generator.init(spec);
generator.generate(100000L, 2);
// lets make segments now
final File bootstrapDir = new File(FileBasedServerBrokerStarters.SERVER_BOOTSTRAP_DIR);
if (bootstrapDir.exists()) {
FileUtils.deleteDirectory(bootstrapDir);
}
bootstrapDir.mkdir();
int counter = 0;
for (final File avro : avroDataDir.listFiles()) {
for (final String table : FileBasedServerBrokerStarters.TABLE_NAMES) {
final SegmentGeneratorConfig genConfig = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(avro, new File(bootstrapDir, "segment-" + counter), "daysSinceEpoch", TimeUnit.DAYS, table);
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
driver.init(genConfig);
driver.build();
counter++;
}
}
// lets start the server and the broker now
starter = new FileBasedServerBrokerStarters();
starter.startAll();
// pick some values from here if you need to use it for running filter queries
final JSONObject selectionRequestResponse = postQuery("select * from 'table1' limit 100", "http://localhost:" + FileBasedServerBrokerStarters.BROKER_CLIENT_PORT);
// System.out.println(selectionRequestResponse.toString(1));
}
use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.
the class DataTableSerDeTest method testAllDataTypes.
@Test
public void testAllDataTypes() throws IOException {
DataType[] columnTypes = DataType.values();
int numColumns = columnTypes.length;
String[] columnNames = new String[numColumns];
for (int i = 0; i < numColumns; i++) {
columnNames[i] = columnTypes[i].name();
}
DataSchema dataSchema = new DataSchema(columnNames, columnTypes);
DataTableBuilder dataTableBuilder = new DataTableBuilder(dataSchema);
boolean[] booleans = new boolean[NUM_ROWS];
byte[] bytes = new byte[NUM_ROWS];
char[] chars = new char[NUM_ROWS];
short[] shorts = new short[NUM_ROWS];
int[] ints = new int[NUM_ROWS];
long[] longs = new long[NUM_ROWS];
float[] floats = new float[NUM_ROWS];
double[] doubles = new double[NUM_ROWS];
String[] strings = new String[NUM_ROWS];
Object[] objects = new Object[NUM_ROWS];
byte[][] byteArrays = new byte[NUM_ROWS][];
char[][] charArrays = new char[NUM_ROWS][];
short[][] shortArrays = new short[NUM_ROWS][];
int[][] intArrays = new int[NUM_ROWS][];
long[][] longArrays = new long[NUM_ROWS][];
float[][] floatArrays = new float[NUM_ROWS][];
double[][] doubleArrays = new double[NUM_ROWS][];
String[][] stringArrays = new String[NUM_ROWS][];
for (int rowId = 0; rowId < NUM_ROWS; rowId++) {
dataTableBuilder.startRow();
for (int colId = 0; colId < numColumns; colId++) {
switch(columnTypes[colId]) {
case BOOLEAN:
booleans[rowId] = RANDOM.nextBoolean();
dataTableBuilder.setColumn(colId, booleans[rowId]);
break;
case BYTE:
bytes[rowId] = (byte) RANDOM.nextInt();
dataTableBuilder.setColumn(colId, bytes[rowId]);
break;
case CHAR:
chars[rowId] = (char) RANDOM.nextInt();
dataTableBuilder.setColumn(colId, chars[rowId]);
break;
case SHORT:
shorts[rowId] = (short) RANDOM.nextInt();
dataTableBuilder.setColumn(colId, shorts[rowId]);
break;
case INT:
ints[rowId] = RANDOM.nextInt();
dataTableBuilder.setColumn(colId, ints[rowId]);
break;
case LONG:
longs[rowId] = RANDOM.nextLong();
dataTableBuilder.setColumn(colId, longs[rowId]);
break;
case FLOAT:
floats[rowId] = RANDOM.nextFloat();
dataTableBuilder.setColumn(colId, floats[rowId]);
break;
case DOUBLE:
doubles[rowId] = RANDOM.nextDouble();
dataTableBuilder.setColumn(colId, doubles[rowId]);
break;
case STRING:
strings[rowId] = RandomStringUtils.random(RANDOM.nextInt(20));
dataTableBuilder.setColumn(colId, strings[rowId]);
break;
// Just test Double here, all object types will be covered in ObjectCustomSerDeTest.
case OBJECT:
objects[rowId] = RANDOM.nextDouble();
dataTableBuilder.setColumn(colId, objects[rowId]);
break;
case BYTE_ARRAY:
int length = RANDOM.nextInt(20);
byte[] byteArray = new byte[length];
for (int i = 0; i < length; i++) {
byteArray[i] = (byte) RANDOM.nextInt();
}
byteArrays[rowId] = byteArray;
dataTableBuilder.setColumn(colId, byteArray);
break;
case CHAR_ARRAY:
length = RANDOM.nextInt(20);
char[] charArray = new char[length];
for (int i = 0; i < length; i++) {
charArray[i] = (char) RANDOM.nextInt();
}
charArrays[rowId] = charArray;
dataTableBuilder.setColumn(colId, charArray);
break;
case SHORT_ARRAY:
length = RANDOM.nextInt(20);
short[] shortArray = new short[length];
for (int i = 0; i < length; i++) {
shortArray[i] = (short) RANDOM.nextInt();
}
shortArrays[rowId] = shortArray;
dataTableBuilder.setColumn(colId, shortArray);
break;
case INT_ARRAY:
length = RANDOM.nextInt(20);
int[] intArray = new int[length];
for (int i = 0; i < length; i++) {
intArray[i] = RANDOM.nextInt();
}
intArrays[rowId] = intArray;
dataTableBuilder.setColumn(colId, intArray);
break;
case LONG_ARRAY:
length = RANDOM.nextInt(20);
long[] longArray = new long[length];
for (int i = 0; i < length; i++) {
longArray[i] = RANDOM.nextLong();
}
longArrays[rowId] = longArray;
dataTableBuilder.setColumn(colId, longArray);
break;
case FLOAT_ARRAY:
length = RANDOM.nextInt(20);
float[] floatArray = new float[length];
for (int i = 0; i < length; i++) {
floatArray[i] = RANDOM.nextFloat();
}
floatArrays[rowId] = floatArray;
dataTableBuilder.setColumn(colId, floatArray);
break;
case DOUBLE_ARRAY:
length = RANDOM.nextInt(20);
double[] doubleArray = new double[length];
for (int i = 0; i < length; i++) {
doubleArray[i] = RANDOM.nextDouble();
}
doubleArrays[rowId] = doubleArray;
dataTableBuilder.setColumn(colId, doubleArray);
break;
case STRING_ARRAY:
length = RANDOM.nextInt(20);
String[] stringArray = new String[length];
for (int i = 0; i < length; i++) {
stringArray[i] = RandomStringUtils.random(RANDOM.nextInt(20));
}
stringArrays[rowId] = stringArray;
dataTableBuilder.setColumn(colId, stringArray);
break;
}
}
dataTableBuilder.finishRow();
}
DataTable dataTable = dataTableBuilder.build();
DataTable newDataTable = DataTableFactory.getDataTable(dataTable.toBytes());
Assert.assertEquals(newDataTable.getDataSchema(), dataSchema, ERROR_MESSAGE);
Assert.assertEquals(newDataTable.getNumberOfRows(), NUM_ROWS, ERROR_MESSAGE);
for (int rowId = 0; rowId < NUM_ROWS; rowId++) {
for (int colId = 0; colId < numColumns; colId++) {
switch(columnTypes[colId]) {
case BOOLEAN:
Assert.assertEquals(newDataTable.getBoolean(rowId, colId), booleans[rowId], ERROR_MESSAGE);
break;
case BYTE:
Assert.assertEquals(newDataTable.getByte(rowId, colId), bytes[rowId], ERROR_MESSAGE);
break;
case CHAR:
Assert.assertEquals(newDataTable.getChar(rowId, colId), chars[rowId], ERROR_MESSAGE);
break;
case SHORT:
Assert.assertEquals(newDataTable.getShort(rowId, colId), shorts[rowId], ERROR_MESSAGE);
break;
case INT:
Assert.assertEquals(newDataTable.getInt(rowId, colId), ints[rowId], ERROR_MESSAGE);
break;
case LONG:
Assert.assertEquals(newDataTable.getLong(rowId, colId), longs[rowId], ERROR_MESSAGE);
break;
case FLOAT:
Assert.assertEquals(newDataTable.getFloat(rowId, colId), floats[rowId], ERROR_MESSAGE);
break;
case DOUBLE:
Assert.assertEquals(newDataTable.getDouble(rowId, colId), doubles[rowId], ERROR_MESSAGE);
break;
case STRING:
Assert.assertEquals(newDataTable.getString(rowId, colId), strings[rowId], ERROR_MESSAGE);
break;
case OBJECT:
Assert.assertEquals(newDataTable.getObject(rowId, colId), objects[rowId], ERROR_MESSAGE);
break;
case BYTE_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getByteArray(rowId, colId), byteArrays[rowId]), ERROR_MESSAGE);
break;
case CHAR_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getCharArray(rowId, colId), charArrays[rowId]), ERROR_MESSAGE);
break;
case SHORT_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getShortArray(rowId, colId), shortArrays[rowId]), ERROR_MESSAGE);
break;
case INT_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getIntArray(rowId, colId), intArrays[rowId]), ERROR_MESSAGE);
break;
case LONG_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getLongArray(rowId, colId), longArrays[rowId]), ERROR_MESSAGE);
break;
case FLOAT_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getFloatArray(rowId, colId), floatArrays[rowId]), ERROR_MESSAGE);
break;
case DOUBLE_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getDoubleArray(rowId, colId), doubleArrays[rowId]), ERROR_MESSAGE);
break;
case STRING_ARRAY:
Assert.assertTrue(Arrays.equals(newDataTable.getStringArray(rowId, colId), stringArrays[rowId]), ERROR_MESSAGE);
break;
}
}
}
}
use of com.linkedin.pinot.common.data.FieldSpec.DataType in project pinot by linkedin.
the class GenerateDataCommand method execute.
@Override
public boolean execute() throws Exception {
LOGGER.info("Executing command: " + toString());
if ((_numRecords < 0) || (_numFiles < 0)) {
throw new RuntimeException("Cannot generate negative number of records/files.");
}
Schema schema = Schema.fromFile(new File(_schemaFile));
List<String> columns = new LinkedList<String>();
final HashMap<String, DataType> dataTypes = new HashMap<String, DataType>();
final HashMap<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
final HashMap<String, TimeUnit> timeUnits = new HashMap<String, TimeUnit>();
final HashMap<String, Integer> cardinality = new HashMap<String, Integer>();
final HashMap<String, IntRange> range = new HashMap<String, IntRange>();
buildCardinalityRangeMaps(_schemaAnnFile, cardinality, range);
final DataGeneratorSpec spec = buildDataGeneratorSpec(schema, columns, dataTypes, fieldTypes, timeUnits, cardinality, range);
final DataGenerator gen = new DataGenerator();
gen.init(spec);
gen.generate(_numRecords, _numFiles);
return true;
}
Aggregations