use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class SplitMetaInfoReader method readSplitMetaInfo.
public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) throws IOException {
long maxMetaInfoSize = conf.getLong(MRJobConfig.SPLIT_METAINFO_MAXSIZE, MRJobConfig.DEFAULT_SPLIT_METAINFO_MAXSIZE);
Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
String jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString();
FileStatus fStatus = fs.getFileStatus(metaSplitFile);
if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
throw new IOException("Split metadata size exceeded " + maxMetaInfoSize + ". Aborting job " + jobId);
}
FSDataInputStream in = fs.open(metaSplitFile);
byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
in.readFully(header);
if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
throw new IOException("Invalid header on split file");
}
int vers = WritableUtils.readVInt(in);
if (vers != JobSplit.META_SPLIT_VERSION) {
in.close();
throw new IOException("Unsupported split version " + vers);
}
//TODO: check for insane values
int numSplits = WritableUtils.readVInt(in);
JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = new JobSplit.TaskSplitMetaInfo[numSplits];
for (int i = 0; i < numSplits; i++) {
JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
splitMetaInfo.readFields(in);
JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(jobSplitFile, splitMetaInfo.getStartOffset());
allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, splitMetaInfo.getLocations(), splitMetaInfo.getInputDataLength());
}
in.close();
return allSplitMetaInfo;
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class TestAliyunOSSInputStream method testReadFile.
@Test
public void testReadFile() throws Exception {
final int bufLen = 256;
final int sizeFlag = 5;
String filename = "readTestFile_" + sizeFlag + ".txt";
Path readTestFile = setPath("/test/" + filename);
long size = sizeFlag * 1024 * 1024;
ContractTestUtils.generateTestFile(this.fs, readTestFile, size, 256, 255);
LOG.info(sizeFlag + "MB file created: /test/" + filename);
FSDataInputStream instream = this.fs.open(readTestFile);
byte[] buf = new byte[bufLen];
long bytesRead = 0;
while (bytesRead < size) {
int bytes;
if (size - bytesRead < bufLen) {
int remaining = (int) (size - bytesRead);
bytes = instream.read(buf, 0, remaining);
} else {
bytes = instream.read(buf, 0, bufLen);
}
bytesRead += bytes;
if (bytesRead % (1024 * 1024) == 0) {
int available = instream.available();
int remaining = (int) (size - bytesRead);
assertTrue("expected remaining:" + remaining + ", but got:" + available, remaining == available);
LOG.info("Bytes read: " + Math.round((double) bytesRead / (1024 * 1024)) + " MB");
}
}
assertTrue(instream.available() == 0);
IOUtils.closeStream(instream);
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class TestAliyunOSSInputStream method testSeekFile.
@Test
public void testSeekFile() throws Exception {
Path smallSeekFile = setPath("/test/smallSeekFile.txt");
long size = 5 * 1024 * 1024;
ContractTestUtils.generateTestFile(this.fs, smallSeekFile, size, 256, 255);
LOG.info("5MB file created: smallSeekFile.txt");
FSDataInputStream instream = this.fs.open(smallSeekFile);
int seekTimes = 5;
LOG.info("multiple fold position seeking test...:");
for (int i = 0; i < seekTimes; i++) {
long pos = size / (seekTimes - i) - 1;
LOG.info("begin seeking for pos: " + pos);
instream.seek(pos);
assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos);
LOG.info("completed seeking at pos: " + instream.getPos());
}
LOG.info("random position seeking test...:");
Random rand = new Random();
for (int i = 0; i < seekTimes; i++) {
long pos = Math.abs(rand.nextLong()) % size;
LOG.info("begin seeking for pos: " + pos);
instream.seek(pos);
assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos);
LOG.info("completed seeking at pos: " + instream.getPos());
}
IOUtils.closeStream(instream);
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class TestDataJoin method confirmOutput.
private static void confirmOutput(Path out, JobConf job, int srcs) throws IOException {
FileSystem fs = out.getFileSystem(job);
FileStatus[] outlist = fs.listStatus(out);
assertEquals(1, outlist.length);
assertTrue(0 < outlist[0].getLen());
FSDataInputStream in = fs.open(outlist[0].getPath());
LineRecordReader rr = new LineRecordReader(in, 0, Integer.MAX_VALUE, job);
LongWritable k = new LongWritable();
Text v = new Text();
int count = 0;
while (rr.next(k, v)) {
String[] vals = v.toString().split("\t");
assertEquals(srcs + 1, vals.length);
int[] ivals = new int[vals.length];
for (int i = 0; i < vals.length; ++i) ivals[i] = Integer.parseInt(vals[i]);
assertEquals(0, ivals[0] % (srcs * srcs));
for (int i = 1; i < vals.length; ++i) {
assertEquals((ivals[i] - (i - 1)) * srcs, 10 * ivals[0]);
}
++count;
}
assertEquals(4, count);
}
use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.
the class TestHistograms method testHistograms.
/**
* @throws IOException
*
* There should be files in the directory named by
* ${test.build.data}/rumen/histogram-test .
*
* There will be pairs of files, inputXxx.json and goldXxx.json .
*
* We read the input file as a HistogramRawTestData in json. Then we
* create a Histogram using the data field, and then a
* LoggedDiscreteCDF using the percentiles and scale field. Finally,
* we read the corresponding goldXxx.json as a LoggedDiscreteCDF and
* deepCompare them.
*/
@Test
public void testHistograms() throws IOException {
final Configuration conf = new Configuration();
final FileSystem lfs = FileSystem.getLocal(conf);
final Path rootInputDir = new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(lfs);
final Path rootInputFile = new Path(rootInputDir, "rumen/histogram-tests");
FileStatus[] tests = lfs.listStatus(rootInputFile);
for (int i = 0; i < tests.length; ++i) {
Path filePath = tests[i].getPath();
String fileName = filePath.getName();
if (fileName.startsWith("input")) {
String testName = fileName.substring("input".length());
Path goldFilePath = new Path(rootInputFile, "gold" + testName);
assertTrue("Gold file dies not exist", lfs.exists(goldFilePath));
LoggedDiscreteCDF newResult = histogramFileToCDF(filePath, lfs);
System.out.println("Testing a Histogram for " + fileName);
FSDataInputStream goldStream = lfs.open(goldFilePath);
JsonObjectMapperParser<LoggedDiscreteCDF> parser = new JsonObjectMapperParser<LoggedDiscreteCDF>(goldStream, LoggedDiscreteCDF.class);
try {
LoggedDiscreteCDF dcdf = parser.getNext();
dcdf.deepCompare(newResult, new TreePath(null, "<root>"));
} catch (DeepInequalityException e) {
fail(e.path.toString());
} finally {
parser.close();
}
}
}
}
Aggregations