use of org.apache.hive.hcatalog.data.DefaultHCatRecord in project hive by apache.
the class HCatBaseStorer method putNext.
@Override
public void putNext(Tuple tuple) throws IOException {
List<Object> outgoing = new ArrayList<Object>(tuple.size());
int i = 0;
for (HCatFieldSchema fSchema : computedSchema.getFields()) {
outgoing.add(getJavaObj(tuple.get(i++), fSchema));
}
try {
writer.write(null, new DefaultHCatRecord(outgoing));
} catch (InterruptedException e) {
throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e);
}
}
use of org.apache.hive.hcatalog.data.DefaultHCatRecord in project hive by apache.
the class TestHCatDynamicPartitioned method generateWriteRecords.
protected static void generateWriteRecords(int max, int mod, int offset) {
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < max; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("strvalue" + i);
objList.add(String.valueOf((i % mod) + offset));
writeRecords.add(new DefaultHCatRecord(objList));
}
}
use of org.apache.hive.hcatalog.data.DefaultHCatRecord in project hive by apache.
the class TestHCatPartitioned method tableSchemaTest.
//test that new columns gets added to table schema
private void tableSchemaTest() throws Exception {
HCatSchema tableSchema = getTableSchema();
assertEquals(4, tableSchema.getFields().size());
//Update partition schema to have 3 fields
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 20; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("strvalue" + i);
objList.add("str2value" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
Map<String, String> partitionMap = new HashMap<String, String>();
partitionMap.put("part1", "p1value5");
partitionMap.put("part0", "505");
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
tableSchema = getTableSchema();
//assert that c3 has got added to table schema
assertEquals(5, tableSchema.getFields().size());
assertEquals("c1", tableSchema.getFields().get(0).getName());
assertEquals("c2", tableSchema.getFields().get(1).getName());
assertEquals("c3", tableSchema.getFields().get(2).getName());
assertEquals("part1", tableSchema.getFields().get(3).getName());
assertEquals("part0", tableSchema.getFields().get(4).getName());
//Test that changing column data type fails
partitionMap.clear();
partitionMap.put("part1", "p1value6");
partitionMap.put("part0", "506");
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, "")));
IOException exc = null;
try {
runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
} catch (IOException e) {
exc = e;
}
assertTrue(exc != null);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType());
//Test that partition key is not allowed in data
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, "")));
List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20);
for (int i = 0; i < 20; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("c2value" + i);
objList.add("c3value" + i);
objList.add("p1value6");
recordsContainingPartitionCols.add(new DefaultHCatRecord(objList));
}
exc = null;
try {
runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true);
} catch (IOException e) {
exc = e;
}
List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\"");
assertEquals(20, records.size());
records = runMRRead(20, "part0 = \"506\"");
assertEquals(20, records.size());
Integer i = 0;
for (HCatRecord rec : records) {
assertEquals(5, rec.size());
assertEquals(rec.get(0), i);
assertEquals(rec.get(1), "c2value" + i);
assertEquals(rec.get(2), "c3value" + i);
assertEquals(rec.get(3), "p1value6");
assertEquals(rec.get(4), 506);
i++;
}
}
use of org.apache.hive.hcatalog.data.DefaultHCatRecord in project hive by apache.
the class TestHCatPartitioned method columnOrderChangeTest.
//check behavior while change the order of columns
private void columnOrderChangeTest() throws Exception {
HCatSchema tableSchema = getTableSchema();
assertEquals(5, tableSchema.getFields().size());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
objList.add("co str2value" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
Map<String, String> partitionMap = new HashMap<String, String>();
partitionMap.put("part1", "p1value8");
partitionMap.put("part0", "508");
Exception exc = null;
try {
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
} catch (IOException e) {
exc = e;
}
assertTrue(exc != null);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
if (isTableImmutable()) {
//Read should get 10 + 20 + 10 + 10 + 20 rows
runMRRead(70);
} else {
// +20 from the duplicate publish
runMRRead(90);
}
}
use of org.apache.hive.hcatalog.data.DefaultHCatRecord in project hive by apache.
the class HCatRecordReader method nextKeyValue.
/**
* Check if the wrapped RecordReader has another record, and if so convert it into an
* HCatRecord. We both check for records and convert here so a configurable percent of
* bad records can be tolerated.
*
* @return if there is a next record
* @throws IOException on error
* @throws InterruptedException on error
*/
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (currentKey == null) {
currentKey = baseRecordReader.createKey();
currentValue = baseRecordReader.createValue();
}
while (baseRecordReader.next(currentKey, currentValue)) {
HCatRecord r = null;
Throwable t = null;
errorTracker.incRecords();
try {
Object o = deserializer.deserialize(currentValue);
r = new LazyHCatRecord(o, deserializer.getObjectInspector());
} catch (Throwable throwable) {
t = throwable;
}
if (r == null) {
errorTracker.incErrors(t);
continue;
}
DefaultHCatRecord dr = new DefaultHCatRecord(outputSchema.size());
int i = 0;
for (String fieldName : outputSchema.getFieldNames()) {
if (dataSchema.getPosition(fieldName) != null) {
dr.set(i, r.get(fieldName, dataSchema));
} else {
dr.set(i, valuesNotInDataCols.get(fieldName));
}
i++;
}
currentHCatRecord = dr;
return true;
}
return false;
}
Aggregations