use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class StreamingAssert method readRecords.
List<Record> readRecords() throws Exception {
if (currentDeltas.isEmpty()) {
throw new AssertionError("No data");
}
InputFormat<NullWritable, OrcStruct> inputFormat = new OrcInputFormat();
JobConf job = new JobConf();
job.set("mapred.input.dir", partitionLocation.toString());
job.set("bucket_count", Integer.toString(table.getSd().getNumBuckets()));
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
job.set(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, "true");
job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
InputSplit[] splits = inputFormat.getSplits(job, 1);
assertEquals(1, splits.length);
final AcidRecordReader<NullWritable, OrcStruct> recordReader = (AcidRecordReader<NullWritable, OrcStruct>) inputFormat.getRecordReader(splits[0], job, Reporter.NULL);
NullWritable key = recordReader.createKey();
OrcStruct value = recordReader.createValue();
List<Record> records = new ArrayList<>();
while (recordReader.next(key, value)) {
RecordIdentifier recordIdentifier = recordReader.getRecordIdentifier();
Record record = new Record(new RecordIdentifier(recordIdentifier.getTransactionId(), recordIdentifier.getBucketId(), recordIdentifier.getRowId()), value.toString());
System.out.println(record);
records.add(record);
}
recordReader.close();
return records;
}
use of org.apache.hadoop.mapred.InputSplit in project hive by apache.
the class JdbcInputFormatTest method testSplitLogic_noSpillOver.
@Test
public void testSplitLogic_noSpillOver() throws HiveJdbcDatabaseAccessException, IOException {
JdbcInputFormat f = new JdbcInputFormat();
when(mockDatabaseAccessor.getTotalNumberOfRecords(any(Configuration.class))).thenReturn(15);
f.setDbAccessor(mockDatabaseAccessor);
JobConf conf = new JobConf();
conf.set("mapred.input.dir", "/temp");
InputSplit[] splits = f.getSplits(conf, 3);
assertThat(splits, is(notNullValue()));
assertThat(splits.length, is(3));
assertThat(splits[0].getLength(), is(5L));
}
use of org.apache.hadoop.mapred.InputSplit in project asterixdb by apache.
the class HDFSCluster method main.
public static void main(String[] args) throws Exception {
HDFSCluster cluster = new HDFSCluster();
cluster.setup();
JobConf conf = configureJobConf();
InputSplit[] inputSplits = conf.getInputFormat().getSplits(conf, 0);
for (InputSplit split : inputSplits) {
System.out.println("split :" + split);
}
}
use of org.apache.hadoop.mapred.InputSplit in project asterixdb by apache.
the class IndexingScheduler method buildPopularityMap.
/**
* Scan the splits once and build a popularity map
*
* @param splits
* the split array
* @param locationToNumOfSplits
* the map to be built
* @throws IOException
*/
private void buildPopularityMap(InputSplit[] splits, Map<String, IntWritable> locationToNumOfSplits) throws IOException {
for (InputSplit split : splits) {
String[] locations = split.getLocations();
for (String loc : locations) {
IntWritable locCount = locationToNumOfSplits.get(loc);
if (locCount == null) {
locCount = new IntWritable(0);
locationToNumOfSplits.put(loc, locCount);
}
locCount.set(locCount.get() + 1);
}
}
}
use of org.apache.hadoop.mapred.InputSplit in project asterixdb by apache.
the class HDFSDataSourceFactory method configure.
@Override
public void configure(IServiceContext serviceCtx, Map<String, String> configuration) throws AsterixException {
try {
this.serviceCtx = serviceCtx;
this.configuration = configuration;
init((ICCServiceContext) serviceCtx);
JobConf conf = HDFSUtils.configureHDFSJobConf(configuration);
confFactory = new ConfFactory(conf);
clusterLocations = getPartitionConstraint();
int numPartitions = clusterLocations.getLocations().length;
// if files list was set, we restrict the splits to the list
InputSplit[] inputSplits;
if (files == null) {
inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
} else {
inputSplits = HDFSUtils.getSplits(conf, files);
}
if (indexingOp) {
readSchedule = indexingScheduler.getLocationConstraints(inputSplits);
} else {
readSchedule = hdfsScheduler.getLocationConstraints(inputSplits);
}
inputSplitsFactory = new InputSplitsFactory(inputSplits);
read = new boolean[readSchedule.length];
Arrays.fill(read, false);
String formatString = configuration.get(ExternalDataConstants.KEY_FORMAT);
if (formatString == null || formatString.equals(ExternalDataConstants.FORMAT_HDFS_WRITABLE)) {
RecordReader<?, ?> reader = conf.getInputFormat().getRecordReader(inputSplits[0], conf, Reporter.NULL);
this.recordClass = reader.createValue().getClass();
reader.close();
} else {
recordReaderClazz = StreamRecordReaderProvider.getRecordReaderClazz(configuration);
this.recordClass = char[].class;
}
} catch (IOException e) {
throw new AsterixException(e);
}
}
Aggregations