use of org.apache.hadoop.mapred.FileSplit in project presto by prestodb.
the class TestHiveFileFormats method testCursorProvider.
private void testCursorProvider(HiveRecordCursorProvider cursorProvider, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) {
List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(TestColumn::toHivePartitionKey).collect(toList());
List<HiveColumnHandle> partitionKeyColumnHandles = getColumnHandles(testColumns.stream().filter(TestColumn::isPartitionKey).collect(toImmutableList()));
List<Column> tableDataColumns = testColumns.stream().filter(column -> !column.isPartitionKey()).map(column -> new Column(column.getName(), HiveType.valueOf(column.getType()), Optional.empty(), Optional.empty())).collect(toImmutableList());
Configuration configuration = new Configuration();
configuration.set("io.compression.codecs", LzoCodec.class.getName() + "," + LzopCodec.class.getName());
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(cursorProvider), ImmutableSet.of(), configuration, session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), Instant.now().toEpochMilli(), new Storage(StorageFormat.create(storageFormat.getSerDe(), storageFormat.getInputFormat(), storageFormat.getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), TupleDomain.all(), getColumnHandles(testColumns), ImmutableMap.of(), partitionKeys, DateTimeZone.getDefault(), FUNCTION_AND_TYPE_MANAGER, new SchemaTableName("schema", "table"), partitionKeyColumnHandles, tableDataColumns, ImmutableMap.of(), tableDataColumns.size(), TableToPartitionMapping.empty(), Optional.empty(), false, DEFAULT_HIVE_FILE_CONTEXT, TRUE_CONSTANT, false, ROW_EXPRESSION_SERVICE, Optional.empty(), ImmutableMap.of());
RecordCursor cursor = ((RecordPageSource) pageSource.get()).getCursor();
checkCursor(cursor, testColumns, rowCount);
}
use of org.apache.hadoop.mapred.FileSplit in project flink by apache.
the class HiveSourceFileEnumerator method createInputSplits.
public static List<HiveSourceSplit> createInputSplits(int minNumSplits, List<HiveTablePartition> partitions, ReadableConfig flinkConf, JobConf jobConf) throws IOException {
List<HiveSourceSplit> hiveSplits = new ArrayList<>();
try (MRSplitsGetter splitsGetter = new MRSplitsGetter(flinkConf.get(HiveOptions.TABLE_EXEC_HIVE_LOAD_PARTITION_SPLITS_THREAD_NUM))) {
for (HiveTablePartitionSplits partitionSplits : splitsGetter.getHiveTablePartitionMRSplits(minNumSplits, partitions, jobConf)) {
HiveTablePartition partition = partitionSplits.getHiveTablePartition();
for (InputSplit inputSplit : partitionSplits.getInputSplits()) {
Preconditions.checkState(inputSplit instanceof FileSplit, "Unsupported InputSplit type: " + inputSplit.getClass().getName());
hiveSplits.add(new HiveSourceSplit((FileSplit) inputSplit, partition, null));
}
}
}
return hiveSplits;
}
use of org.apache.hadoop.mapred.FileSplit in project flink by apache.
the class HiveTableFileInputFormat method toHadoopFileSplit.
@VisibleForTesting
static FileSplit toHadoopFileSplit(FileInputSplit fileSplit) throws IOException {
URI uri = fileSplit.getPath().toUri();
long length = fileSplit.getLength();
// Hadoop FileSplit should not have -1 length.
if (length == -1) {
length = fileSplit.getPath().getFileSystem().getFileStatus(fileSplit.getPath()).getLen() - fileSplit.getStart();
}
return new FileSplit(new Path(uri), fileSplit.getStart(), length, (String[]) null);
}
use of org.apache.hadoop.mapred.FileSplit in project druid by druid-io.
the class OrcHadoopInputRowParserTest method getFirstRow.
private static OrcStruct getFirstRow(Job job, String orcPath) throws IOException {
File testFile = new File(orcPath);
Path path = new Path(testFile.getAbsoluteFile().toURI());
FileSplit split = new FileSplit(path, 0, testFile.length(), new String[] { "host" });
InputFormat<NullWritable, OrcStruct> inputFormat = ReflectionUtils.newInstance(OrcInputFormat.class, job.getConfiguration());
RecordReader<NullWritable, OrcStruct> reader = inputFormat.getRecordReader(split, new JobConf(job.getConfiguration()), null);
try {
final NullWritable key = reader.createKey();
final OrcStruct value = reader.createValue();
if (reader.next(key, value)) {
return value;
} else {
throw new NoSuchElementException();
}
} finally {
reader.close();
}
}
use of org.apache.hadoop.mapred.FileSplit in project mongo-hadoop by mongodb.
the class BSONFileInputFormat method getRecordReader.
@Override
public RecordReader<NullWritable, BSONWritable> getRecordReader(final InputSplit split, final JobConf job, final Reporter reporter) throws IOException {
FileSplit fileSplit = (FileSplit) split;
FileSystem fs = FileSystem.get(fileSplit.getPath().toUri(), job);
if (split instanceof BSONFileSplit || !isSplitable(fs, fileSplit.getPath())) {
BSONFileRecordReader reader = new BSONFileRecordReader();
reader.initialize(split, job);
return reader;
}
// Split was not created by BSONSplitter.
BSONSplitter splitter = new BSONSplitter();
splitter.setConf(job);
splitter.setInputPath(fileSplit.getPath());
org.apache.hadoop.mapreduce.lib.input.FileSplit newStyleFileSplit = new org.apache.hadoop.mapreduce.lib.input.FileSplit(fileSplit.getPath(), fileSplit.getStart(), fileSplit.getLength(), fileSplit.getLocations());
long start = splitter.getStartingPositionForSplit(newStyleFileSplit);
BSONFileRecordReader reader = new BSONFileRecordReader(start);
reader.initialize(fileSplit, job);
return reader;
}
Aggregations