use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class TestLazyAccumuloMap method testIntMap.
@Test
public void testIntMap() throws SerDeException, IOException {
AccumuloHiveRow row = new AccumuloHiveRow("row");
row.add(new Text("cf1"), new Text("1"), "2".getBytes());
row.add(new Text("cf1"), new Text("2"), "4".getBytes());
row.add(new Text("cf1"), new Text("3"), "6".getBytes());
HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, ColumnEncoding.STRING, ColumnEncoding.STRING, "column", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo).toString());
// Map of Integer to Integer
Text nullSequence = new Text("\\N");
ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<int,int>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi);
map.init(row, mapping);
Assert.assertEquals(3, map.getMapSize());
Object o = map.getMapValueElement(new IntWritable(1));
Assert.assertNotNull(o);
Assert.assertEquals(new IntWritable(2), ((LazyInteger) o).getWritableObject());
o = map.getMapValueElement(new IntWritable(2));
Assert.assertNotNull(o);
Assert.assertEquals(new IntWritable(4), ((LazyInteger) o).getWritableObject());
o = map.getMapValueElement(new IntWritable(3));
Assert.assertNotNull(o);
Assert.assertEquals(new IntWritable(6), ((LazyInteger) o).getWritableObject());
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hadoop-pcap by RIPE-NCC.
the class PcapDeserializer method initialize.
@Override
public void initialize(Configuration cfg, Properties props) throws SerDeException {
String columnNameProperty = props.getProperty(Constants.LIST_COLUMNS);
columnNames = Arrays.asList(columnNameProperty.split(","));
numColumns = columnNames.size();
String columnTypeProperty = props.getProperty(Constants.LIST_COLUMN_TYPES);
List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
// Ensure we have the same number of column names and types
assert numColumns == columnTypes.size();
List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(numColumns);
row = new ArrayList<Object>(numColumns);
for (int c = 0; c < numColumns; c++) {
ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(c));
inspectors.add(oi);
row.add(null);
}
inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class DruidOutputFormat method getHiveRecordWriter.
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
final String segmentGranularity = tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) != null ? tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) : HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_INDEXING_GRANULARITY);
final int targetNumShardsPerGranularity = Integer.parseUnsignedInt(tableProperties.getProperty(Constants.DRUID_TARGET_SHARDS_PER_GRANULARITY, "0"));
final int maxPartitionSize = targetNumShardsPerGranularity > 0 ? -1 : HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_PARTITION_SIZE);
// If datasource is in the table properties, it is an INSERT/INSERT OVERWRITE as the datasource
// name was already persisted. Otherwise, it is a CT/CTAS and we need to get the name from the
// job properties that are set by configureOutputJobProperties in the DruidStorageHandler
final String dataSource = tableProperties.getProperty(Constants.DRUID_DATA_SOURCE) == null ? jc.get(Constants.DRUID_DATA_SOURCE) : tableProperties.getProperty(Constants.DRUID_DATA_SOURCE);
final String segmentDirectory = jc.get(Constants.DRUID_SEGMENT_INTERMEDIATE_DIRECTORY);
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularity.fromString(segmentGranularity), Granularity.fromString(tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY) == null ? "NONE" : tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY)), null);
final String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
if (StringUtils.isEmpty(columnNameProperty) || StringUtils.isEmpty(columnTypeProperty)) {
throw new IllegalStateException(String.format("List of columns names [%s] or columns type [%s] is/are not present", columnNameProperty, columnTypeProperty));
}
ArrayList<String> columnNames = new ArrayList<String>();
for (String name : columnNameProperty.split(",")) {
columnNames.add(name);
}
if (!columnNames.contains(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) {
throw new IllegalStateException("Timestamp column (' " + DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + tableProperties.getProperty(serdeConstants.LIST_COLUMNS));
}
ArrayList<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
final boolean approximationAllowed = HiveConf.getBoolVar(jc, HiveConf.ConfVars.HIVE_DRUID_APPROX_RESULT);
// Default, all columns that are not metrics or timestamp, are treated as dimensions
final List<DimensionSchema> dimensions = new ArrayList<>();
ImmutableList.Builder<AggregatorFactory> aggregatorFactoryBuilder = ImmutableList.builder();
for (int i = 0; i < columnTypes.size(); i++) {
final PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) columnTypes.get(i)).getPrimitiveCategory();
AggregatorFactory af;
switch(primitiveCategory) {
case BYTE:
case SHORT:
case INT:
case LONG:
af = new LongSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
break;
case FLOAT:
case DOUBLE:
af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
break;
case DECIMAL:
if (approximationAllowed) {
af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
} else {
throw new UnsupportedOperationException(String.format("Druid does not support decimal column type." + "Either cast column [%s] to double or Enable Approximate Result for Druid by setting property [%s] to true", columnNames.get(i), HiveConf.ConfVars.HIVE_DRUID_APPROX_RESULT.varname));
}
break;
case TIMESTAMP:
// Granularity column
String tColumnName = columnNames.get(i);
if (!tColumnName.equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME)) {
throw new IOException("Dimension " + tColumnName + " does not have STRING type: " + primitiveCategory);
}
continue;
case TIMESTAMPLOCALTZ:
// Druid timestamp column
String tLocalTZColumnName = columnNames.get(i);
if (!tLocalTZColumnName.equals(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) {
throw new IOException("Dimension " + tLocalTZColumnName + " does not have STRING type: " + primitiveCategory);
}
continue;
default:
// Dimension
String dColumnName = columnNames.get(i);
if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(primitiveCategory) != PrimitiveGrouping.STRING_GROUP && primitiveCategory != PrimitiveObjectInspector.PrimitiveCategory.BOOLEAN) {
throw new IOException("Dimension " + dColumnName + " does not have STRING type: " + primitiveCategory);
}
dimensions.add(new StringDimensionSchema(dColumnName));
continue;
}
aggregatorFactoryBuilder.add(af);
}
List<AggregatorFactory> aggregatorFactories = aggregatorFactoryBuilder.build();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(dimensions, Lists.newArrayList(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Constants.DRUID_SHARD_KEY_COL_NAME), null)));
Map<String, Object> inputParser = DruidStorageHandlerUtils.JSON_MAPPER.convertValue(inputRowParser, Map.class);
final DataSchema dataSchema = new DataSchema(Preconditions.checkNotNull(dataSource, "Data source name is null"), inputParser, aggregatorFactories.toArray(new AggregatorFactory[aggregatorFactories.size()]), granularitySpec, DruidStorageHandlerUtils.JSON_MAPPER);
final String workingPath = jc.get(Constants.DRUID_JOB_WORKING_DIRECTORY);
final String version = jc.get(Constants.DRUID_SEGMENT_VERSION);
String basePersistDirectory = HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_BASE_PERSIST_DIRECTORY);
if (Strings.isNullOrEmpty(basePersistDirectory)) {
basePersistDirectory = System.getProperty("java.io.tmpdir");
}
Integer maxRowInMemory = HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_ROW_IN_MEMORY);
IndexSpec indexSpec;
if ("concise".equals(HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_BITMAP_FACTORY_TYPE))) {
indexSpec = new IndexSpec(new ConciseBitmapSerdeFactory(), null, null, null);
} else {
indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
}
RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(maxRowInMemory, null, null, new File(basePersistDirectory, dataSource), new CustomVersioningPolicy(version), null, null, null, indexSpec, true, 0, 0, true, null, 0L);
LOG.debug(String.format("running with Data schema [%s] ", dataSchema));
return new DruidRecordWriter(dataSchema, realtimeTuningConfig, DruidStorageHandlerUtils.createSegmentPusherForDirectory(segmentDirectory, jc), maxPartitionSize, new Path(workingPath, SEGMENTS_DESCRIPTOR_DIR_NAME), finalOutPath.getFileSystem(jc));
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class HiveHBaseTableInputFormat method createFilterScan.
/**
* Converts a filter (which has been pushed down from Hive's optimizer)
* into corresponding restrictions on the HBase scan. The
* filter should already be in a form which can be fully converted.
*
* @param jobConf configuration for the scan
*
* @param iKey 0-based offset of key column within Hive table
*
* @return converted table split if any
*/
private Scan createFilterScan(JobConf jobConf, int iKey, int iTimestamp, boolean isKeyBinary) throws IOException {
// TODO: assert iKey is HBaseSerDe#HBASE_KEY_COL
Scan scan = new Scan();
String filterObjectSerialized = jobConf.get(TableScanDesc.FILTER_OBJECT_CONF_STR);
if (filterObjectSerialized != null) {
HiveHBaseInputFormatUtil.setupScanRange(scan, filterObjectSerialized, jobConf, false);
return scan;
}
String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if (filterExprSerialized == null) {
return scan;
}
ExprNodeGenericFuncDesc filterExpr = SerializationUtilities.deserializeExpression(filterExprSerialized);
String keyColName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iKey];
ArrayList<TypeInfo> cols = TypeInfoUtils.getTypeInfosFromTypeString(jobConf.get(serdeConstants.LIST_COLUMN_TYPES));
String colType = cols.get(iKey).getTypeName();
boolean isKeyComparable = isKeyBinary || colType.equalsIgnoreCase("string");
String tsColName = null;
if (iTimestamp >= 0) {
tsColName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iTimestamp];
}
IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(keyColName, isKeyComparable, tsColName);
List<IndexSearchCondition> conditions = new ArrayList<IndexSearchCondition>();
ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, conditions);
// THIS IGNORES RESIDUAL PARSING FROM HBaseStorageHandler#decomposePredicate
if (residualPredicate != null) {
LOG.debug("Ignoring residual predicate " + residualPredicate.getExprString());
}
Map<String, List<IndexSearchCondition>> split = HiveHBaseInputFormatUtil.decompose(conditions);
List<IndexSearchCondition> keyConditions = split.get(keyColName);
if (keyConditions != null && !keyConditions.isEmpty()) {
HiveHBaseInputFormatUtil.setupKeyRange(scan, keyConditions, isKeyBinary);
}
List<IndexSearchCondition> tsConditions = split.get(tsColName);
if (tsConditions != null && !tsConditions.isEmpty()) {
HiveHBaseInputFormatUtil.setupTimeRange(scan, tsConditions);
}
return scan;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class TestLazyHBaseObject method testLazyHBaseRow2.
/**
* Test the LazyHBaseRow class with a mapping from a Hive field to
* an HBase column family.
* @throws SerDeException
*/
public void testLazyHBaseRow2() throws SerDeException {
// column family is mapped to Map<string,string>
List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,array<string>,map<string,string>,string");
List<String> fieldNames = Arrays.asList(new String[] { "key", "a", "b", "c", "d" });
Text nullSequence = new Text("\\N");
String hbaseColsMapping = ":key,cfa:a,cfa:b,cfb:,cfc:d";
ColumnMappings columnMappings = null;
try {
columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColsMapping);
} catch (SerDeException e) {
fail(e.toString());
}
for (ColumnMapping colMap : columnMappings) {
if (!colMap.hbaseRowKey && colMap.qualifierName == null) {
colMap.binaryStorage.add(false);
colMap.binaryStorage.add(false);
} else {
colMap.binaryStorage.add(false);
}
}
ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
List<Cell> kvs = new ArrayList<Cell>();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("a:b:c")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("e")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("f"), Bytes.toBytes("g")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfc"), Bytes.toBytes("d"), Bytes.toBytes("hi")));
Result r = Result.create(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':123,'b':['a','b','c']," + "'c':{'d':'e','f':'g'},'d':'hi'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("e")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("f"), Bytes.toBytes("g")));
r = Result.create(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':123,'b':null," + "'c':{'d':'e','f':'g'},'d':null}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("a")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("f"), Bytes.toBytes("g")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfc"), Bytes.toBytes("d"), Bytes.toBytes("no")));
r = Result.create(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':null,'b':['a']," + "'c':{'f':'g'},'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes(":a::")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfc"), Bytes.toBytes("d"), Bytes.toBytes("no")));
r = Result.create(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':null,'b':['','a','','']," + "'c':{},'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfc"), Bytes.toBytes("d"), Bytes.toBytes("")));
r = Result.create(kvs);
o.init(r);
assertEquals("{'key':'test-row','a':123,'b':[],'c':{},'d':''}".replace("'", "\""), SerDeUtils.getJSONString(o, oi));
}
Aggregations