Search in sources :

Example 81 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class SplitMetaInfoReader method readSplitMetaInfo.

public static JobSplit.TaskSplitMetaInfo[] readSplitMetaInfo(JobID jobId, FileSystem fs, Configuration conf, Path jobSubmitDir) throws IOException {
    long maxMetaInfoSize = conf.getLong(MRJobConfig.SPLIT_METAINFO_MAXSIZE, MRJobConfig.DEFAULT_SPLIT_METAINFO_MAXSIZE);
    Path metaSplitFile = JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir);
    String jobSplitFile = JobSubmissionFiles.getJobSplitFile(jobSubmitDir).toString();
    FileStatus fStatus = fs.getFileStatus(metaSplitFile);
    if (maxMetaInfoSize > 0 && fStatus.getLen() > maxMetaInfoSize) {
        throw new IOException("Split metadata size exceeded " + maxMetaInfoSize + ". Aborting job " + jobId);
    }
    FSDataInputStream in = fs.open(metaSplitFile);
    byte[] header = new byte[JobSplit.META_SPLIT_FILE_HEADER.length];
    in.readFully(header);
    if (!Arrays.equals(JobSplit.META_SPLIT_FILE_HEADER, header)) {
        throw new IOException("Invalid header on split file");
    }
    int vers = WritableUtils.readVInt(in);
    if (vers != JobSplit.META_SPLIT_VERSION) {
        in.close();
        throw new IOException("Unsupported split version " + vers);
    }
    //TODO: check for insane values
    int numSplits = WritableUtils.readVInt(in);
    JobSplit.TaskSplitMetaInfo[] allSplitMetaInfo = new JobSplit.TaskSplitMetaInfo[numSplits];
    for (int i = 0; i < numSplits; i++) {
        JobSplit.SplitMetaInfo splitMetaInfo = new JobSplit.SplitMetaInfo();
        splitMetaInfo.readFields(in);
        JobSplit.TaskSplitIndex splitIndex = new JobSplit.TaskSplitIndex(jobSplitFile, splitMetaInfo.getStartOffset());
        allSplitMetaInfo[i] = new JobSplit.TaskSplitMetaInfo(splitIndex, splitMetaInfo.getLocations(), splitMetaInfo.getInputDataLength());
    }
    in.close();
    return allSplitMetaInfo;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 82 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestAliyunOSSInputStream method testReadFile.

@Test
public void testReadFile() throws Exception {
    final int bufLen = 256;
    final int sizeFlag = 5;
    String filename = "readTestFile_" + sizeFlag + ".txt";
    Path readTestFile = setPath("/test/" + filename);
    long size = sizeFlag * 1024 * 1024;
    ContractTestUtils.generateTestFile(this.fs, readTestFile, size, 256, 255);
    LOG.info(sizeFlag + "MB file created: /test/" + filename);
    FSDataInputStream instream = this.fs.open(readTestFile);
    byte[] buf = new byte[bufLen];
    long bytesRead = 0;
    while (bytesRead < size) {
        int bytes;
        if (size - bytesRead < bufLen) {
            int remaining = (int) (size - bytesRead);
            bytes = instream.read(buf, 0, remaining);
        } else {
            bytes = instream.read(buf, 0, bufLen);
        }
        bytesRead += bytes;
        if (bytesRead % (1024 * 1024) == 0) {
            int available = instream.available();
            int remaining = (int) (size - bytesRead);
            assertTrue("expected remaining:" + remaining + ", but got:" + available, remaining == available);
            LOG.info("Bytes read: " + Math.round((double) bytesRead / (1024 * 1024)) + " MB");
        }
    }
    assertTrue(instream.available() == 0);
    IOUtils.closeStream(instream);
}
Also used : Path(org.apache.hadoop.fs.Path) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Example 83 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestAliyunOSSInputStream method testSeekFile.

@Test
public void testSeekFile() throws Exception {
    Path smallSeekFile = setPath("/test/smallSeekFile.txt");
    long size = 5 * 1024 * 1024;
    ContractTestUtils.generateTestFile(this.fs, smallSeekFile, size, 256, 255);
    LOG.info("5MB file created: smallSeekFile.txt");
    FSDataInputStream instream = this.fs.open(smallSeekFile);
    int seekTimes = 5;
    LOG.info("multiple fold position seeking test...:");
    for (int i = 0; i < seekTimes; i++) {
        long pos = size / (seekTimes - i) - 1;
        LOG.info("begin seeking for pos: " + pos);
        instream.seek(pos);
        assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos);
        LOG.info("completed seeking at pos: " + instream.getPos());
    }
    LOG.info("random position seeking test...:");
    Random rand = new Random();
    for (int i = 0; i < seekTimes; i++) {
        long pos = Math.abs(rand.nextLong()) % size;
        LOG.info("begin seeking for pos: " + pos);
        instream.seek(pos);
        assertTrue("expected position at:" + pos + ", but got:" + instream.getPos(), instream.getPos() == pos);
        LOG.info("completed seeking at pos: " + instream.getPos());
    }
    IOUtils.closeStream(instream);
}
Also used : Path(org.apache.hadoop.fs.Path) Random(java.util.Random) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Example 84 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestDataJoin method confirmOutput.

private static void confirmOutput(Path out, JobConf job, int srcs) throws IOException {
    FileSystem fs = out.getFileSystem(job);
    FileStatus[] outlist = fs.listStatus(out);
    assertEquals(1, outlist.length);
    assertTrue(0 < outlist[0].getLen());
    FSDataInputStream in = fs.open(outlist[0].getPath());
    LineRecordReader rr = new LineRecordReader(in, 0, Integer.MAX_VALUE, job);
    LongWritable k = new LongWritable();
    Text v = new Text();
    int count = 0;
    while (rr.next(k, v)) {
        String[] vals = v.toString().split("\t");
        assertEquals(srcs + 1, vals.length);
        int[] ivals = new int[vals.length];
        for (int i = 0; i < vals.length; ++i) ivals[i] = Integer.parseInt(vals[i]);
        assertEquals(0, ivals[0] % (srcs * srcs));
        for (int i = 1; i < vals.length; ++i) {
            assertEquals((ivals[i] - (i - 1)) * srcs, 10 * ivals[0]);
        }
        ++count;
    }
    assertEquals(4, count);
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable)

Example 85 with FSDataInputStream

use of org.apache.hadoop.fs.FSDataInputStream in project hadoop by apache.

the class TestHistograms method testHistograms.

/**
   * @throws IOException
   * 
   *           There should be files in the directory named by
   *           ${test.build.data}/rumen/histogram-test .
   * 
   *           There will be pairs of files, inputXxx.json and goldXxx.json .
   * 
   *           We read the input file as a HistogramRawTestData in json. Then we
   *           create a Histogram using the data field, and then a
   *           LoggedDiscreteCDF using the percentiles and scale field. Finally,
   *           we read the corresponding goldXxx.json as a LoggedDiscreteCDF and
   *           deepCompare them.
   */
@Test
public void testHistograms() throws IOException {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);
    final Path rootInputDir = new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(lfs);
    final Path rootInputFile = new Path(rootInputDir, "rumen/histogram-tests");
    FileStatus[] tests = lfs.listStatus(rootInputFile);
    for (int i = 0; i < tests.length; ++i) {
        Path filePath = tests[i].getPath();
        String fileName = filePath.getName();
        if (fileName.startsWith("input")) {
            String testName = fileName.substring("input".length());
            Path goldFilePath = new Path(rootInputFile, "gold" + testName);
            assertTrue("Gold file dies not exist", lfs.exists(goldFilePath));
            LoggedDiscreteCDF newResult = histogramFileToCDF(filePath, lfs);
            System.out.println("Testing a Histogram for " + fileName);
            FSDataInputStream goldStream = lfs.open(goldFilePath);
            JsonObjectMapperParser<LoggedDiscreteCDF> parser = new JsonObjectMapperParser<LoggedDiscreteCDF>(goldStream, LoggedDiscreteCDF.class);
            try {
                LoggedDiscreteCDF dcdf = parser.getNext();
                dcdf.deepCompare(newResult, new TreePath(null, "<root>"));
            } catch (DeepInequalityException e) {
                fail(e.path.toString());
            } finally {
                parser.close();
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Aggregations

FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)431 Path (org.apache.hadoop.fs.Path)271 FileSystem (org.apache.hadoop.fs.FileSystem)143 Test (org.junit.Test)135 IOException (java.io.IOException)125 Configuration (org.apache.hadoop.conf.Configuration)94 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)93 FileStatus (org.apache.hadoop.fs.FileStatus)62 InputStreamReader (java.io.InputStreamReader)37 BufferedReader (java.io.BufferedReader)36 FileNotFoundException (java.io.FileNotFoundException)26 IgfsPath (org.apache.ignite.igfs.IgfsPath)26 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)21 ArrayList (java.util.ArrayList)20 Random (java.util.Random)19 EOFException (java.io.EOFException)18 HashMap (java.util.HashMap)16 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)15 URI (java.net.URI)14 File (java.io.File)13