use of org.apache.hive.hcatalog.data.HCatRecord in project beam by apache.
the class PartitionReaderFn method processElement.
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
final Read readRequest = c.element().getKey();
final Integer partitionIndexToRead = c.element().getValue();
ReaderContext readerContext = getReaderContext(readRequest, partitionIndexToRead);
for (int i = 0; i < readerContext.numSplits(); i++) {
HCatReader reader = DataTransferFactory.getHCatReader(readerContext, i);
Iterator<HCatRecord> hcatIterator = reader.read();
while (hcatIterator.hasNext()) {
final HCatRecord record = hcatIterator.next();
c.output(record);
}
}
}
use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.
the class TestHCatLoaderEncryption method testReadDataFromEncryptedHiveTableByHCatMR.
@Test
public void testReadDataFromEncryptedHiveTableByHCatMR() throws Exception {
assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
readRecords.clear();
Configuration conf = new Configuration();
Job job = new Job(conf, "hcat mapreduce read encryption test");
job.setJarByClass(this.getClass());
job.setMapperClass(TestHCatLoaderEncryption.MapRead.class);
// input/output settings
job.setInputFormatClass(HCatInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
HCatInputFormat.setInput(job, Warehouse.DEFAULT_DATABASE_NAME, ENCRYPTED_TABLE, null);
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(Text.class);
job.setNumReduceTasks(0);
FileSystem fs = new LocalFileSystem();
String pathLoc = TEST_DATA_DIR + "/testHCatMREncryptionOutput";
Path path = new Path(pathLoc);
if (fs.exists(path)) {
fs.delete(path, true);
}
TextOutputFormat.setOutputPath(job, new Path(pathLoc));
job.waitForCompletion(true);
int numTuplesRead = 0;
for (HCatRecord hCatRecord : readRecords) {
assertEquals(2, hCatRecord.size());
assertNotNull(hCatRecord.get(0));
assertNotNull(hCatRecord.get(1));
assertTrue(hCatRecord.get(0).getClass() == Integer.class);
assertTrue(hCatRecord.get(1).getClass() == String.class);
assertEquals(hCatRecord.get(0), basicInputData.get(numTuplesRead).first);
assertEquals(hCatRecord.get(1), basicInputData.get(numTuplesRead).second);
numTuplesRead++;
}
assertEquals("failed HCat MR read with storage format: " + this.storageFormat, basicInputData.size(), numTuplesRead);
}
use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.
the class TestHCatPartitioned method tableSchemaTest.
// test that new columns gets added to table schema
private void tableSchemaTest() throws Exception {
HCatSchema tableSchema = getTableSchema();
assertEquals(4, tableSchema.getFields().size());
// Update partition schema to have 3 fields
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 20; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("strvalue" + i);
objList.add("str2value" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
Map<String, String> partitionMap = new HashMap<String, String>();
partitionMap.put("part1", "p1value5");
partitionMap.put("part0", "505");
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
tableSchema = getTableSchema();
// assert that c3 has got added to table schema
assertEquals(5, tableSchema.getFields().size());
assertEquals("c1", tableSchema.getFields().get(0).getName());
assertEquals("c2", tableSchema.getFields().get(1).getName());
assertEquals("c3", tableSchema.getFields().get(2).getName());
assertEquals("part1", tableSchema.getFields().get(3).getName());
assertEquals("part0", tableSchema.getFields().get(4).getName());
// Test that changing column data type fails
partitionMap.clear();
partitionMap.put("part1", "p1value6");
partitionMap.put("part0", "506");
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, "")));
IOException exc = null;
try {
runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
} catch (IOException e) {
exc = e;
}
assertTrue(exc != null);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType());
// Test that partition key is not allowed in data
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, "")));
List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20);
for (int i = 0; i < 20; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("c2value" + i);
objList.add("c3value" + i);
objList.add("p1value6");
recordsContainingPartitionCols.add(new DefaultHCatRecord(objList));
}
exc = null;
try {
runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true);
} catch (IOException e) {
exc = e;
}
List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\"");
assertEquals(20, records.size());
records = runMRRead(20, "part0 = \"506\"");
assertEquals(20, records.size());
Integer i = 0;
for (HCatRecord rec : records) {
assertEquals(5, rec.size());
assertEquals(rec.get(0), i);
assertEquals(rec.get(1), "c2value" + i);
assertEquals(rec.get(2), "c3value" + i);
assertEquals(rec.get(3), "p1value6");
assertEquals(rec.get(4), 506);
i++;
}
}
use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.
the class HCatBaseLoader method getNext.
@Override
public Tuple getNext() throws IOException {
try {
HCatRecord hr = (HCatRecord) (reader.nextKeyValue() ? reader.getCurrentValue() : null);
Tuple t = PigHCatUtil.transformToTuple(hr, outputSchema);
// change this when plans for that solidifies.
return t;
} catch (ExecException e) {
int errCode = 6018;
String errMsg = "Error while reading input";
throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e);
} catch (Exception eOther) {
int errCode = 6018;
String errMsg = "Error converting read value to tuple";
throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, eOther);
}
}
use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.
the class TestHCatDynamicPartitioned method generateWriteRecords.
protected static void generateWriteRecords(int max, int mod, int offset) {
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < max; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("strvalue" + i);
objList.add(String.valueOf((i % mod) + offset));
writeRecords.add(new DefaultHCatRecord(objList));
}
}
Aggregations