Search in sources :

Example 66 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class UtilsForTests method runJob.

// Start a job with the specified input and return its RunningJob object
static RunningJob runJob(JobConf conf, Path inDir, Path outDir, int numMaps, int numReds, String input) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    if (!fs.exists(inDir)) {
        fs.mkdirs(inDir);
    }
    for (int i = 0; i < numMaps; ++i) {
        DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
        file.writeBytes(input);
        file.close();
    }
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    conf.setNumMapTasks(numMaps);
    conf.setNumReduceTasks(numReds);
    JobClient jobClient = new JobClient(conf);
    RunningJob job = jobClient.submitJob(conf);
    return job;
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 67 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class UtilsForTests method createTmpFileDFS.

/**
   * This creates a file in the dfs
   * @param dfs FileSystem Local File System where file needs to be picked
   * @param URIPATH Path dfs path where file needs to be copied
   * @param permission FsPermission File permission
   * @return returns the DataOutputStream
   */
public static DataOutputStream createTmpFileDFS(FileSystem dfs, Path URIPATH, FsPermission permission, String input) throws Exception {
    //Creating the path with the file
    DataOutputStream file = FileSystem.create(dfs, URIPATH, permission);
    file.writeBytes(input);
    file.close();
    return file;
}
Also used : DataOutputStream(java.io.DataOutputStream)

Example 68 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class NativeAzureFileSystem method append.

/** This optional operation is not yet supported. */
@Override
public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
    if (!appendSupportEnabled) {
        throw new UnsupportedOperationException("Append Support not enabled");
    }
    LOG.debug("Opening file: {} for append", f);
    Path absolutePath = makeAbsolute(f);
    performAuthCheck(absolutePath.toString(), WasbAuthorizationOperations.WRITE.toString(), "append");
    String key = pathToKey(absolutePath);
    FileMetadata meta = null;
    try {
        meta = store.retrieveMetadata(key);
    } catch (Exception ex) {
        Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex);
        if (innerException instanceof StorageException && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) {
            throw new FileNotFoundException(String.format("%s is not found", key));
        } else {
            throw ex;
        }
    }
    if (meta == null) {
        throw new FileNotFoundException(f.toString());
    }
    if (meta.isDir()) {
        throw new FileNotFoundException(f.toString() + " is a directory not a file.");
    }
    if (store.isPageBlobKey(key)) {
        throw new IOException("Append not supported for Page Blobs");
    }
    DataOutputStream appendStream = null;
    try {
        appendStream = store.retrieveAppendStream(key, bufferSize);
    } catch (Exception ex) {
        Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex);
        if (innerException instanceof StorageException && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) {
            throw new FileNotFoundException(String.format("%s is not found", key));
        } else {
            throw ex;
        }
    }
    return new FSDataOutputStream(appendStream, statistics);
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) StorageException(com.microsoft.azure.storage.StorageException) URISyntaxException(java.net.URISyntaxException) JsonParseException(com.fasterxml.jackson.core.JsonParseException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) JsonMappingException(com.fasterxml.jackson.databind.JsonMappingException) StorageException(com.microsoft.azure.storage.StorageException) FileAlreadyExistsException(org.apache.hadoop.fs.FileAlreadyExistsException) IOException(java.io.IOException)

Example 69 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class DumpTypedBytes method dumpTypedBytes.

/**
   * Dump given list of files to standard output as typed bytes.
   */
@SuppressWarnings("unchecked")
private int dumpTypedBytes(List<FileStatus> files) throws IOException {
    JobConf job = new JobConf(getConf());
    DataOutputStream dout = new DataOutputStream(System.out);
    AutoInputFormat autoInputFormat = new AutoInputFormat();
    for (FileStatus fileStatus : files) {
        FileSplit split = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen() * fileStatus.getBlockSize(), (String[]) null);
        RecordReader recReader = null;
        try {
            recReader = autoInputFormat.getRecordReader(split, job, Reporter.NULL);
            Object key = recReader.createKey();
            Object value = recReader.createValue();
            while (recReader.next(key, value)) {
                if (key instanceof Writable) {
                    TypedBytesWritableOutput.get(dout).write((Writable) key);
                } else {
                    TypedBytesOutput.get(dout).write(key);
                }
                if (value instanceof Writable) {
                    TypedBytesWritableOutput.get(dout).write((Writable) value);
                } else {
                    TypedBytesOutput.get(dout).write(value);
                }
            }
        } finally {
            if (recReader != null) {
                recReader.close();
            }
        }
    }
    dout.flush();
    return 0;
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) DataOutputStream(java.io.DataOutputStream) RecordReader(org.apache.hadoop.mapred.RecordReader) Writable(org.apache.hadoop.io.Writable) FileSplit(org.apache.hadoop.mapred.FileSplit) JobConf(org.apache.hadoop.mapred.JobConf)

Example 70 with DataOutputStream

use of java.io.DataOutputStream in project hadoop by apache.

the class TestMultipleCachefiles method testMultipleCachefiles.

@Test
public void testMultipleCachefiles() throws Exception {
    boolean mayExit = false;
    MiniMRCluster mr = null;
    MiniDFSCluster dfs = null;
    try {
        Configuration conf = new Configuration();
        dfs = new MiniDFSCluster.Builder(conf).build();
        FileSystem fileSys = dfs.getFileSystem();
        String namenode = fileSys.getUri().toString();
        mr = new MiniMRCluster(1, namenode, 3);
        List<String> args = new ArrayList<String>();
        for (Map.Entry<String, String> entry : mr.createJobConf()) {
            args.add("-jobconf");
            args.add(entry.getKey() + "=" + entry.getValue());
        }
        String[] argv = new String[] { "-input", INPUT_FILE, "-output", OUTPUT_DIR, "-mapper", map, "-reducer", reduce, "-jobconf", "stream.tmpdir=" + System.getProperty("test.build.data", "/tmp"), "-jobconf", JobConf.MAPRED_MAP_TASK_JAVA_OPTS + "=" + "-Dcontrib.name=" + System.getProperty("contrib.name") + " " + "-Dbuild.test=" + System.getProperty("build.test") + " " + conf.get(JobConf.MAPRED_MAP_TASK_JAVA_OPTS, conf.get(JobConf.MAPRED_TASK_JAVA_OPTS, "")), "-jobconf", JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS + "=" + "-Dcontrib.name=" + System.getProperty("contrib.name") + " " + "-Dbuild.test=" + System.getProperty("build.test") + " " + conf.get(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, conf.get(JobConf.MAPRED_TASK_JAVA_OPTS, "")), "-cacheFile", fileSys.getUri() + CACHE_FILE + "#" + mapString, "-cacheFile", fileSys.getUri() + CACHE_FILE_2 + "#" + mapString2, "-jobconf", "mapred.jar=" + TestStreaming.STREAMING_JAR };
        for (String arg : argv) {
            args.add(arg);
        }
        argv = args.toArray(new String[args.size()]);
        fileSys.delete(new Path(OUTPUT_DIR), true);
        DataOutputStream file = fileSys.create(new Path(INPUT_FILE));
        file.writeBytes(mapString + "\n");
        file.writeBytes(mapString2 + "\n");
        file.close();
        file = fileSys.create(new Path(CACHE_FILE));
        file.writeBytes(cacheString + "\n");
        file.close();
        file = fileSys.create(new Path(CACHE_FILE_2));
        file.writeBytes(cacheString2 + "\n");
        file.close();
        job = new StreamJob(argv, mayExit);
        job.go();
        fileSys = dfs.getFileSystem();
        String line = null;
        String line2 = null;
        Path[] fileList = FileUtil.stat2Paths(fileSys.listStatus(new Path(OUTPUT_DIR), new Utils.OutputFileUtils.OutputFilesFilter()));
        for (int i = 0; i < fileList.length; i++) {
            System.out.println(fileList[i].toString());
            BufferedReader bread = new BufferedReader(new InputStreamReader(fileSys.open(fileList[i])));
            line = bread.readLine();
            System.out.println(line);
            line2 = bread.readLine();
            System.out.println(line2);
        }
        assertEquals(cacheString + "\t", line);
        assertEquals(cacheString2 + "\t", line2);
    } finally {
        if (dfs != null) {
            dfs.shutdown();
        }
        if (mr != null) {
            mr.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) MiniMRCluster(org.apache.hadoop.mapred.MiniMRCluster) Utils(org.apache.hadoop.mapred.Utils) FileSystem(org.apache.hadoop.fs.FileSystem) BufferedReader(java.io.BufferedReader) Map(java.util.Map) Test(org.junit.Test)

Aggregations

DataOutputStream (java.io.DataOutputStream)2968 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1314 IOException (java.io.IOException)1024 Test (org.junit.Test)633 DataInputStream (java.io.DataInputStream)615 FileOutputStream (java.io.FileOutputStream)427 ByteArrayInputStream (java.io.ByteArrayInputStream)411 File (java.io.File)281 BufferedOutputStream (java.io.BufferedOutputStream)228 UnitTest (org.apache.geode.test.junit.categories.UnitTest)172 URL (java.net.URL)149 InputStreamReader (java.io.InputStreamReader)146 BufferedReader (java.io.BufferedReader)142 Path (org.apache.hadoop.fs.Path)137 DataInput (java.io.DataInput)124 ArrayList (java.util.ArrayList)122 HttpURLConnection (java.net.HttpURLConnection)120 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)117 FileInputStream (java.io.FileInputStream)107 InputStream (java.io.InputStream)107