use of org.apache.hadoop.hive.ql.io.AcidInputFormat.AcidRecordReader in project hive by apache.
the class StreamingAssert method readRecords.
List<Record> readRecords() throws Exception {
if (currentDeltas.isEmpty()) {
throw new AssertionError("No data");
}
InputFormat<NullWritable, OrcStruct> inputFormat = new OrcInputFormat();
JobConf job = new JobConf();
job.set("mapred.input.dir", partitionLocation.toString());
job.set("bucket_count", Integer.toString(table.getSd().getNumBuckets()));
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
job.set(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, "true");
job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
InputSplit[] splits = inputFormat.getSplits(job, 1);
assertEquals(1, splits.length);
final AcidRecordReader<NullWritable, OrcStruct> recordReader = (AcidRecordReader<NullWritable, OrcStruct>) inputFormat.getRecordReader(splits[0], job, Reporter.NULL);
NullWritable key = recordReader.createKey();
OrcStruct value = recordReader.createValue();
List<Record> records = new ArrayList<>();
while (recordReader.next(key, value)) {
RecordIdentifier recordIdentifier = recordReader.getRecordIdentifier();
Record record = new Record(new RecordIdentifier(recordIdentifier.getTransactionId(), recordIdentifier.getBucketId(), recordIdentifier.getRowId()), value.toString());
System.out.println(record);
records.add(record);
}
recordReader.close();
return records;
}
Aggregations