use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.
the class RegexSerDe method initialize.
@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
super.initialize(configuration, tableProperties, partitionProperties);
numColumns = this.getColumnNames().size();
// Read the configuration parameters
inputRegex = properties.getProperty(INPUT_REGEX);
outputFormatString = properties.getProperty(OUTPUT_FORMAT_STRING);
boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(properties.getProperty(INPUT_REGEX_CASE_SENSITIVE));
// Parse the configuration parameters
if (inputRegex != null) {
inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
} else {
inputPattern = null;
}
// All columns have to be of type STRING
int i = 0;
for (TypeInfo type : getColumnTypes()) {
if (!type.equals(TypeInfoFactory.stringTypeInfo)) {
throw new SerDeException(getClass().getName() + " only accepts string columns, but column[" + i + "] named " + getColumnNames().get(i) + " has type " + type);
}
i++;
}
// Constructing the row ObjectInspector:
// The row consists of some string columns, each column will be a java
// String object.
List<ObjectInspector> columnOIs = Collections.nCopies(numColumns, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
// StandardStruct uses ArrayList to store the row.
rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(getColumnNames(), columnOIs);
// Constructing the row object, etc, which will be reused for all rows.
row = new ArrayList<>(Collections.nCopies(numColumns, null));
outputFields = new Object[numColumns];
outputRowText = new Text();
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.
the class MapJoinOneLongKeyBenchBase method doSetup.
public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation) throws Exception {
HiveConf hiveConf = new HiveConf();
long seed = 2543;
// 10,000,000.
int rowCount = 10000000;
String[] bigTableColumnNames = new String[] { "number1" };
TypeInfo[] bigTableTypeInfos = new TypeInfo[] { TypeInfoFactory.longTypeInfo };
int[] bigTableKeyColumnNums = new int[] { 0 };
String[] smallTableValueColumnNames = new String[] { "sv1", "sv2" };
TypeInfo[] smallTableValueTypeInfos = new TypeInfo[] { TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo };
int[] bigTableRetainColumnNums = new int[] { 0 };
int[] smallTableRetainKeyColumnNums = new int[] {};
int[] smallTableRetainValueColumnNums = new int[] { 0, 1 };
SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters();
smallTableGenerationParameters.setValueOption(ValueOption.ONLY_ONE);
setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, smallTableGenerationParameters);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.
the class MapJoinOneStringKeyBenchBase method doSetup.
public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation) throws Exception {
HiveConf hiveConf = new HiveConf();
long seed = 2543;
// 100,000.
int rowCount = 100000;
String[] bigTableColumnNames = new String[] { "b1" };
TypeInfo[] bigTableTypeInfos = new TypeInfo[] { TypeInfoFactory.stringTypeInfo };
int[] bigTableKeyColumnNums = new int[] { 0 };
String[] smallTableValueColumnNames = new String[] { "sv1", "sv2" };
TypeInfo[] smallTableValueTypeInfos = new TypeInfo[] { TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo };
int[] bigTableRetainColumnNums = new int[] { 0 };
int[] smallTableRetainKeyColumnNums = new int[] {};
int[] smallTableRetainValueColumnNums = new int[] { 0, 1 };
SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters();
smallTableGenerationParameters.setValueOption(ValueOption.ONLY_ONE);
setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, smallTableGenerationParameters);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.
the class JsonReadBench method buildArguments.
private ObjectInspector[] buildArguments(String typeStr) {
ObjectInspector valueOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
ObjectInspector[] arguments = { valueOI, PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, new Text(typeStr)) };
return arguments;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.
the class TestLazySimpleFast method testLazySimpleDeserializeRowEmptyArray.
@Test
public void testLazySimpleDeserializeRowEmptyArray() throws Throwable {
HiveConf hconf = new HiveConf();
// set the escaping related properties
Properties props = new Properties();
props.setProperty(serdeConstants.FIELD_DELIM, ",");
LazySerDeParameters lazyParams = new LazySerDeParameters(hconf, props, LazySimpleSerDe.class.getName());
TypeInfo[] typeInfos = new TypeInfo[] { TypeInfoFactory.getListTypeInfo(TypeInfoFactory.intTypeInfo), TypeInfoFactory.getListTypeInfo(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo)) };
LazySimpleDeserializeRead deserializeRead = new LazySimpleDeserializeRead(typeInfos, null, true, lazyParams);
byte[] bytes = ",".getBytes();
deserializeRead.set(bytes, 0, bytes.length);
verifyRead(deserializeRead, typeInfos[0], Collections.emptyList());
verifyRead(deserializeRead, typeInfos[1], Collections.emptyList());
assertTrue(deserializeRead.isEndOfInputReached());
}
Aggregations