use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class StreamInputFormat method createRecordReader.
@Override
public RecordReader<Text, Text> createRecordReader(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
Configuration conf = context.getConfiguration();
String c = conf.get("stream.recordreader.class");
if (c == null || c.indexOf("LineRecordReader") >= 0) {
return super.createRecordReader(genericSplit, context);
}
// handling non-standard record reader (likely StreamXmlRecordReader)
FileSplit split = (FileSplit) genericSplit;
// LOG.info("getRecordReader start.....split=" + split);
context.setStatus(split.toString());
context.progress();
// Open the file and seek to the start of the split
FileSystem fs = split.getPath().getFileSystem(conf);
FSDataInputStream in = fs.open(split.getPath());
// Factory dispatch based on available params..
Class readerClass;
{
readerClass = StreamUtil.goodClassOrNull(conf, c, null);
if (readerClass == null) {
throw new RuntimeException("Class not found: " + c);
}
}
Constructor ctor;
try {
ctor = readerClass.getConstructor(new Class[] { FSDataInputStream.class, FileSplit.class, TaskAttemptContext.class, Configuration.class, FileSystem.class });
} catch (NoSuchMethodException nsm) {
throw new RuntimeException(nsm);
}
RecordReader<Text, Text> reader;
try {
reader = (RecordReader<Text, Text>) ctor.newInstance(new Object[] { in, split, context, conf, fs });
} catch (Exception nsm) {
throw new RuntimeException(nsm);
}
return reader;
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class FileSystemRMStateStore method readFile.
private byte[] readFile(Path inputPath, long len) throws Exception {
FSDataInputStream fsIn = null;
try {
fsIn = fs.open(inputPath);
// state data will not be that "long"
byte[] data = new byte[(int) len];
fsIn.readFully(data);
return data;
} finally {
IOUtils.cleanup(LOG, fsIn);
}
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class TestReadAndSeekPageBlobAfterWrite method readRandomDataAndVerify.
/**
* Read "size" bytes of data and verify that what was read and what was written
* are the same.
*/
private void readRandomDataAndVerify(int size) throws AzureException, IOException {
byte[] b = new byte[size];
FSDataInputStream stream = fs.open(PATH);
int bytesRead = stream.read(b);
stream.close();
assertEquals(bytesRead, size);
// compare the data read to the data written
assertTrue(comparePrefix(randomData, b, size));
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class TestReadAndSeekPageBlobAfterWrite method writeAndReadOneFile.
/**
* Write a total of numWrites * recordLength data to a file, read it back,
* and check to make sure what was read is the same as what was written.
* The syncInterval is the number of writes after which to call hflush to
* force the data to storage.
*/
private void writeAndReadOneFile(int numWrites, int recordLength, int syncInterval) throws IOException {
final int NUM_WRITES = numWrites;
final int RECORD_LENGTH = recordLength;
final int SYNC_INTERVAL = syncInterval;
// A lower bound on the minimum time we think it will take to do
// a write to Azure storage.
final long MINIMUM_EXPECTED_TIME = 20;
LOG.info("Writing " + NUM_WRITES * RECORD_LENGTH + " bytes to " + PATH.getName());
FSDataOutputStream output = fs.create(PATH);
int writesSinceHFlush = 0;
try {
// Do a flush and hflush to exercise case for empty write queue in PageBlobOutputStream,
// to test concurrent execution gates.
output.flush();
output.hflush();
for (int i = 0; i < NUM_WRITES; i++) {
output.write(randomData, i * RECORD_LENGTH, RECORD_LENGTH);
writesSinceHFlush++;
output.flush();
if ((i % SYNC_INTERVAL) == 0) {
long start = Time.monotonicNow();
output.hflush();
writesSinceHFlush = 0;
long end = Time.monotonicNow();
// A true, round-trip synchronous flush to Azure must take
// a significant amount of time or we are not syncing to storage correctly.
LOG.debug("hflush duration = " + (end - start) + " msec.");
assertTrue(String.format("hflush duration of %d, less than minimum expected of %d", end - start, MINIMUM_EXPECTED_TIME), end - start >= MINIMUM_EXPECTED_TIME);
}
}
} finally {
long start = Time.monotonicNow();
output.close();
long end = Time.monotonicNow();
LOG.debug("close duration = " + (end - start) + " msec.");
if (writesSinceHFlush > 0) {
assertTrue(String.format("close duration with >= 1 pending write is %d, less than minimum expected of %d", end - start, MINIMUM_EXPECTED_TIME), end - start >= MINIMUM_EXPECTED_TIME);
}
}
// Read the data back and check it.
FSDataInputStream stream = fs.open(PATH);
int SIZE = NUM_WRITES * RECORD_LENGTH;
byte[] b = new byte[SIZE];
try {
stream.seek(0);
stream.read(b, 0, SIZE);
verifyReadRandomData(b, SIZE, 0, SIZE);
} finally {
stream.close();
}
// delete the file
fs.delete(PATH, false);
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class TestReadAndSeekPageBlobAfterWrite method testPageBlobSeekAndReadAfterWrite.
/**
* Write data to a page blob, open it, seek, and then read a range of data.
* Then compare that the data read from that range is the same as the data originally written.
*/
@Test
public void testPageBlobSeekAndReadAfterWrite() throws IOException {
writeRandomData(PAGE_SIZE * MAX_PAGES);
int recordSize = 100;
byte[] b = new byte[recordSize];
FSDataInputStream stream = fs.open(PATH);
// Seek to a boundary around the middle of the 6th page
int seekPosition = 5 * PAGE_SIZE + 250;
stream.seek(seekPosition);
// Read a record's worth of bytes and verify results
int bytesRead = stream.read(b);
verifyReadRandomData(b, bytesRead, seekPosition, recordSize);
// Seek to another spot and read a record greater than a page
seekPosition = 10 * PAGE_SIZE + 250;
stream.seek(seekPosition);
recordSize = 1000;
b = new byte[recordSize];
bytesRead = stream.read(b);
verifyReadRandomData(b, bytesRead, seekPosition, recordSize);
// Read the last 100 bytes of the file
recordSize = 100;
seekPosition = PAGE_SIZE * MAX_PAGES - recordSize;
stream.seek(seekPosition);
b = new byte[recordSize];
bytesRead = stream.read(b);
verifyReadRandomData(b, bytesRead, seekPosition, recordSize);
// Read past the end of the file and we should get only partial data.
recordSize = 100;
seekPosition = PAGE_SIZE * MAX_PAGES - recordSize + 50;
stream.seek(seekPosition);
b = new byte[recordSize];
bytesRead = stream.read(b);
assertEquals(50, bytesRead);
// compare last 50 bytes written with those read
byte[] tail = Arrays.copyOfRange(randomData, seekPosition, randomData.length);
assertTrue(comparePrefix(tail, b, 50));
}
Aggregations