use of org.apache.hadoop.mapred.FileSplit in project hive by apache.
the class Base64TextInputFormat method getRecordReader.
public RecordReader<LongWritable, BytesWritable> getRecordReader(InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException {
reporter.setStatus(genericSplit.toString());
Base64LineRecordReader reader = new Base64LineRecordReader(new LineRecordReader(job, (FileSplit) genericSplit));
reader.configure(job);
return reader;
}
use of org.apache.hadoop.mapred.FileSplit in project hive by apache.
the class TestOrcFileStripeMergeRecordReader method testSplitStartsWithOffset.
@Test
public void testSplitStartsWithOffset() throws IOException {
createOrcFile(DEFAULT_STRIPE_SIZE, DEFAULT_STRIPE_SIZE + 1);
FileStatus fileStatus = fs.getFileStatus(tmpPath);
long length = fileStatus.getLen();
long offset = length / 2;
FileSplit split = new FileSplit(tmpPath, offset, length, (String[]) null);
OrcFileStripeMergeRecordReader reader = new OrcFileStripeMergeRecordReader(conf, split);
reader.next(key, value);
Assert.assertEquals("InputPath", tmpPath, key.getInputPath());
Assert.assertEquals("NumberOfValues", 1L, value.getStripeStatistics().getColStats(0).getNumberOfValues());
reader.close();
}
use of org.apache.hadoop.mapred.FileSplit in project presto by prestodb.
the class BackgroundHiveSplitLoader method addSplitsToSource.
private boolean addSplitsToSource(InputSplit[] targetSplits, String partitionName, List<HivePartitionKey> partitionKeys, Properties schema, TupleDomain<HiveColumnHandle> effectivePredicate, Map<Integer, HiveType> columnCoercions) throws IOException {
for (InputSplit inputSplit : targetSplits) {
FileSplit split = (FileSplit) inputSplit;
FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(session.getUser(), split.getPath());
FileStatus file = targetFilesystem.getFileStatus(split.getPath());
hiveSplitSource.addToQueue(createHiveSplitIterator(partitionName, file.getPath().toString(), targetFilesystem.getFileBlockLocations(file, split.getStart(), split.getLength()), split.getStart(), split.getLength(), schema, partitionKeys, false, session, OptionalInt.empty(), effectivePredicate, columnCoercions));
if (stopped) {
return true;
}
}
return false;
}
use of org.apache.hadoop.mapred.FileSplit in project presto by prestodb.
the class TestHiveFileFormats method testPageSourceFactory.
private void testPageSourceFactory(HivePageSourceFactory sourceFactory, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) throws IOException {
Properties splitProperties = new Properties();
splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat());
splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerDe());
splitProperties.setProperty("columns", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getName)));
splitProperties.setProperty("columns.types", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getType)));
List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), HiveType.valueOf(input.getObjectInspector().getTypeName()), (String) input.getWriteValue())).collect(toList());
List<HiveColumnHandle> columnHandles = getColumnHandles(testColumns);
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(), ImmutableSet.of(sourceFactory), "test", new Configuration(), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), splitProperties, TupleDomain.all(), columnHandles, partitionKeys, DateTimeZone.getDefault(), TYPE_MANAGER, ImmutableMap.of());
assertTrue(pageSource.isPresent());
checkPageSource(pageSource.get(), testColumns, getTypes(columnHandles), rowCount);
}
use of org.apache.hadoop.mapred.FileSplit in project presto by prestodb.
the class TestOrcPageSourceMemoryTracking method createTestFile.
public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception {
// filter out partition keys, which are not written to the file
testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
serDe.initialize(CONFIGURATION, tableProperties);
JobConf jobConf = new JobConf();
if (compressionCodec != null) {
CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
}
RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);
try {
SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
for (int i = 0; i < testColumns.size(); i++) {
Object writeValue = testColumns.get(i).getWriteValue();
if (writeValue instanceof Slice) {
writeValue = ((Slice) writeValue).getBytes();
}
objectInspector.setStructFieldData(row, fields.get(i), writeValue);
}
Writable record = serDe.serialize(row, objectInspector);
recordWriter.write(record);
if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) {
flushStripe(recordWriter);
}
}
} finally {
recordWriter.close(false);
}
Path path = new Path(filePath);
path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
File file = new File(filePath);
return new FileSplit(path, 0, file.length(), new String[0]);
}
Aggregations