Search in sources :

Example 56 with OutputStreamWriter

use of java.io.OutputStreamWriter in project hadoop by apache.

the class TestLocalRunner method makeNumberFile.

/**
   * Write out an input file containing an integer.
   *
   * @param fileNum the file number to write to.
   * @param value the value to write to the file
   * @return the path of the written file.
   */
private Path makeNumberFile(int fileNum, int value) throws IOException {
    Path workDir = getNumberDirPath();
    Path filePath = new Path(workDir, "file" + fileNum);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    OutputStream os = fs.create(filePath);
    BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
    w.write("" + value);
    w.close();
    return filePath;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStream(java.io.OutputStream) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 57 with OutputStreamWriter

use of java.io.OutputStreamWriter in project hadoop by apache.

the class TestStreamingBadRecords method createInput.

private void createInput() throws Exception {
    OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt"));
    Writer wr = new OutputStreamWriter(os);
    //increasing the record size so that we have stream flushing
    String prefix = new String(new byte[20 * 1024]);
    for (int i = 1; i <= INPUTSIZE; i++) {
        String str = "" + i;
        int zerosToPrepend = 3 - str.length();
        for (int j = 0; j < zerosToPrepend; j++) {
            str = "0" + str;
        }
        wr.write(prefix + "hey" + str + "\n");
    }
    wr.close();
}
Also used : Path(org.apache.hadoop.fs.Path) OutputStream(java.io.OutputStream) OutputStreamWriter(java.io.OutputStreamWriter) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer)

Example 58 with OutputStreamWriter

use of java.io.OutputStreamWriter in project hadoop by apache.

the class FileSystemTimelineWriterImpl method write.

private synchronized void write(String clusterId, String userId, String flowName, String flowVersion, long flowRun, String appId, TimelineEntity entity, TimelineWriteResponse response) throws IOException {
    PrintWriter out = null;
    try {
        String dir = mkdirs(outputRoot, ENTITIES_DIR, clusterId, userId, escape(flowName), escape(flowVersion), String.valueOf(flowRun), appId, entity.getType());
        String fileName = dir + entity.getId() + TIMELINE_SERVICE_STORAGE_EXTENSION;
        out = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName, true), "UTF-8")));
        out.println(TimelineUtils.dumpTimelineRecordtoJSON(entity));
        out.write("\n");
    } catch (IOException ioe) {
        TimelineWriteError error = new TimelineWriteError();
        error.setEntityId(entity.getId());
        error.setEntityType(entity.getType());
        /*
       * TODO: set an appropriate error code after PoC could possibly be:
       * error.setErrorCode(TimelineWriteError.IO_EXCEPTION);
       */
        response.addError(error);
    } finally {
        if (out != null) {
            out.close();
        }
    }
}
Also used : FileOutputStream(java.io.FileOutputStream) TimelineWriteError(org.apache.hadoop.yarn.api.records.timelineservice.TimelineWriteResponse.TimelineWriteError) OutputStreamWriter(java.io.OutputStreamWriter) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter) BufferedWriter(java.io.BufferedWriter)

Example 59 with OutputStreamWriter

use of java.io.OutputStreamWriter in project hive by apache.

the class TestSymlinkTextInputFormat method writeTextFile.

/**
   * Writes the given string to the given file.
   */
private void writeTextFile(Path file, String content) throws IOException {
    OutputStreamWriter writer = new OutputStreamWriter(fileSystem.create(file));
    writer.write(content);
    writer.close();
}
Also used : OutputStreamWriter(java.io.OutputStreamWriter)

Example 60 with OutputStreamWriter

use of java.io.OutputStreamWriter in project hive by apache.

the class SparkClientImpl method startDriver.

private Thread startDriver(final RpcServer rpcServer, final String clientId, final String secret) throws IOException {
    Runnable runnable;
    final String serverAddress = rpcServer.getAddress();
    final String serverPort = String.valueOf(rpcServer.getPort());
    if (conf.containsKey(SparkClientFactory.CONF_KEY_IN_PROCESS)) {
        // Mostly for testing things quickly. Do not do this in production.
        // when invoked in-process it inherits the environment variables of the parent
        LOG.warn("!!!! Running remote driver in-process. !!!!");
        runnable = new Runnable() {

            @Override
            public void run() {
                List<String> args = Lists.newArrayList();
                args.add("--remote-host");
                args.add(serverAddress);
                args.add("--remote-port");
                args.add(serverPort);
                args.add("--client-id");
                args.add(clientId);
                args.add("--secret");
                args.add(secret);
                for (Map.Entry<String, String> e : conf.entrySet()) {
                    args.add("--conf");
                    args.add(String.format("%s=%s", e.getKey(), conf.get(e.getKey())));
                }
                try {
                    RemoteDriver.main(args.toArray(new String[args.size()]));
                } catch (Exception e) {
                    LOG.error("Error running driver.", e);
                }
            }
        };
    } else {
        // If a Spark installation is provided, use the spark-submit script. Otherwise, call the
        // SparkSubmit class directly, which has some caveats (like having to provide a proper
        // version of Guava on the classpath depending on the deploy mode).
        String sparkHome = Strings.emptyToNull(conf.get(SPARK_HOME_KEY));
        if (sparkHome == null) {
            sparkHome = Strings.emptyToNull(System.getenv(SPARK_HOME_ENV));
        }
        if (sparkHome == null) {
            sparkHome = Strings.emptyToNull(System.getProperty(SPARK_HOME_KEY));
        }
        String sparkLogDir = conf.get("hive.spark.log.dir");
        if (sparkLogDir == null) {
            if (sparkHome == null) {
                sparkLogDir = "./target/";
            } else {
                sparkLogDir = sparkHome + "/logs/";
            }
        }
        String osxTestOpts = "";
        if (Strings.nullToEmpty(System.getProperty("os.name")).toLowerCase().contains("mac")) {
            osxTestOpts = Strings.nullToEmpty(System.getenv(OSX_TEST_OPTS));
        }
        String driverJavaOpts = Joiner.on(" ").skipNulls().join("-Dhive.spark.log.dir=" + sparkLogDir, osxTestOpts, conf.get(DRIVER_OPTS_KEY));
        String executorJavaOpts = Joiner.on(" ").skipNulls().join("-Dhive.spark.log.dir=" + sparkLogDir, osxTestOpts, conf.get(EXECUTOR_OPTS_KEY));
        // Create a file with all the job properties to be read by spark-submit. Change the
        // file's permissions so that only the owner can read it. This avoid having the
        // connection secret show up in the child process's command line.
        File properties = File.createTempFile("spark-submit.", ".properties");
        if (!properties.setReadable(false) || !properties.setReadable(true, true)) {
            throw new IOException("Cannot change permissions of job properties file.");
        }
        properties.deleteOnExit();
        Properties allProps = new Properties();
        // first load the defaults from spark-defaults.conf if available
        try {
            URL sparkDefaultsUrl = Thread.currentThread().getContextClassLoader().getResource("spark-defaults.conf");
            if (sparkDefaultsUrl != null) {
                LOG.info("Loading spark defaults: " + sparkDefaultsUrl);
                allProps.load(new ByteArrayInputStream(Resources.toByteArray(sparkDefaultsUrl)));
            }
        } catch (Exception e) {
            String msg = "Exception trying to load spark-defaults.conf: " + e;
            throw new IOException(msg, e);
        }
        // then load the SparkClientImpl config
        for (Map.Entry<String, String> e : conf.entrySet()) {
            allProps.put(e.getKey(), conf.get(e.getKey()));
        }
        allProps.put(SparkClientFactory.CONF_CLIENT_ID, clientId);
        allProps.put(SparkClientFactory.CONF_KEY_SECRET, secret);
        allProps.put(DRIVER_OPTS_KEY, driverJavaOpts);
        allProps.put(EXECUTOR_OPTS_KEY, executorJavaOpts);
        String isTesting = conf.get("spark.testing");
        if (isTesting != null && isTesting.equalsIgnoreCase("true")) {
            String hiveHadoopTestClasspath = Strings.nullToEmpty(System.getenv("HIVE_HADOOP_TEST_CLASSPATH"));
            if (!hiveHadoopTestClasspath.isEmpty()) {
                String extraDriverClasspath = Strings.nullToEmpty((String) allProps.get(DRIVER_EXTRA_CLASSPATH));
                if (extraDriverClasspath.isEmpty()) {
                    allProps.put(DRIVER_EXTRA_CLASSPATH, hiveHadoopTestClasspath);
                } else {
                    extraDriverClasspath = extraDriverClasspath.endsWith(File.pathSeparator) ? extraDriverClasspath : extraDriverClasspath + File.pathSeparator;
                    allProps.put(DRIVER_EXTRA_CLASSPATH, extraDriverClasspath + hiveHadoopTestClasspath);
                }
                String extraExecutorClasspath = Strings.nullToEmpty((String) allProps.get(EXECUTOR_EXTRA_CLASSPATH));
                if (extraExecutorClasspath.isEmpty()) {
                    allProps.put(EXECUTOR_EXTRA_CLASSPATH, hiveHadoopTestClasspath);
                } else {
                    extraExecutorClasspath = extraExecutorClasspath.endsWith(File.pathSeparator) ? extraExecutorClasspath : extraExecutorClasspath + File.pathSeparator;
                    allProps.put(EXECUTOR_EXTRA_CLASSPATH, extraExecutorClasspath + hiveHadoopTestClasspath);
                }
            }
        }
        Writer writer = new OutputStreamWriter(new FileOutputStream(properties), Charsets.UTF_8);
        try {
            allProps.store(writer, "Spark Context configuration");
        } finally {
            writer.close();
        }
        // Define how to pass options to the child process. If launching in client (or local)
        // mode, the driver options need to be passed directly on the command line. Otherwise,
        // SparkSubmit will take care of that for us.
        String master = conf.get("spark.master");
        Preconditions.checkArgument(master != null, "spark.master is not defined.");
        String deployMode = conf.get("spark.submit.deployMode");
        List<String> argv = Lists.newLinkedList();
        if (sparkHome != null) {
            argv.add(new File(sparkHome, "bin/spark-submit").getAbsolutePath());
        } else {
            LOG.info("No spark.home provided, calling SparkSubmit directly.");
            argv.add(new File(System.getProperty("java.home"), "bin/java").getAbsolutePath());
            if (master.startsWith("local") || master.startsWith("mesos") || SparkClientUtilities.isYarnClientMode(master, deployMode) || master.startsWith("spark")) {
                String mem = conf.get("spark.driver.memory");
                if (mem != null) {
                    argv.add("-Xms" + mem);
                    argv.add("-Xmx" + mem);
                }
                String cp = conf.get("spark.driver.extraClassPath");
                if (cp != null) {
                    argv.add("-classpath");
                    argv.add(cp);
                }
                String libPath = conf.get("spark.driver.extraLibPath");
                if (libPath != null) {
                    argv.add("-Djava.library.path=" + libPath);
                }
                String extra = conf.get(DRIVER_OPTS_KEY);
                if (extra != null) {
                    for (String opt : extra.split("[ ]")) {
                        if (!opt.trim().isEmpty()) {
                            argv.add(opt.trim());
                        }
                    }
                }
            }
            argv.add("org.apache.spark.deploy.SparkSubmit");
        }
        if (SparkClientUtilities.isYarnClusterMode(master, deployMode)) {
            String executorCores = conf.get("spark.executor.cores");
            if (executorCores != null) {
                argv.add("--executor-cores");
                argv.add(executorCores);
            }
            String executorMemory = conf.get("spark.executor.memory");
            if (executorMemory != null) {
                argv.add("--executor-memory");
                argv.add(executorMemory);
            }
            String numOfExecutors = conf.get("spark.executor.instances");
            if (numOfExecutors != null) {
                argv.add("--num-executors");
                argv.add(numOfExecutors);
            }
        }
        // long-running application.
        if ("kerberos".equals(hiveConf.get(HADOOP_SECURITY_AUTHENTICATION))) {
            String principal = SecurityUtil.getServerPrincipal(hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL), "0.0.0.0");
            String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB);
            if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) {
                List<String> kinitArgv = Lists.newLinkedList();
                kinitArgv.add("kinit");
                kinitArgv.add(principal);
                kinitArgv.add("-k");
                kinitArgv.add("-t");
                kinitArgv.add(keyTabFile + ";");
                kinitArgv.addAll(argv);
                argv = kinitArgv;
            } else {
                // if doAs is not enabled, we pass the principal/keypad to spark-submit in order to
                // support the possible delegation token renewal in Spark
                argv.add("--principal");
                argv.add(principal);
                argv.add("--keytab");
                argv.add(keyTabFile);
            }
        }
        if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) {
            try {
                String currentUser = Utils.getUGI().getShortUserName();
                // do not do impersonation in CLI mode
                if (!currentUser.equals(System.getProperty("user.name"))) {
                    LOG.info("Attempting impersonation of " + currentUser);
                    argv.add("--proxy-user");
                    argv.add(currentUser);
                }
            } catch (Exception e) {
                String msg = "Cannot obtain username: " + e;
                throw new IllegalStateException(msg, e);
            }
        }
        argv.add("--properties-file");
        argv.add(properties.getAbsolutePath());
        argv.add("--class");
        argv.add(RemoteDriver.class.getName());
        String jar = "spark-internal";
        if (SparkContext.jarOfClass(this.getClass()).isDefined()) {
            jar = SparkContext.jarOfClass(this.getClass()).get();
        }
        argv.add(jar);
        argv.add("--remote-host");
        argv.add(serverAddress);
        argv.add("--remote-port");
        argv.add(serverPort);
        //as --properties-file contains the spark.* keys that are meant for SparkConf object.
        for (String hiveSparkConfKey : RpcConfiguration.HIVE_SPARK_RSC_CONFIGS) {
            String value = RpcConfiguration.getValue(hiveConf, hiveSparkConfKey);
            argv.add("--conf");
            argv.add(String.format("%s=%s", hiveSparkConfKey, value));
        }
        String cmd = Joiner.on(" ").join(argv);
        LOG.info("Running client driver with argv: {}", cmd);
        ProcessBuilder pb = new ProcessBuilder("sh", "-c", cmd);
        // Prevent hive configurations from being visible in Spark.
        pb.environment().remove("HIVE_HOME");
        pb.environment().remove("HIVE_CONF_DIR");
        // Add credential provider password to the child process's environment
        // In case of Spark the credential provider location is provided in the jobConf when the job is submitted
        String password = getSparkJobCredentialProviderPassword();
        if (password != null) {
            pb.environment().put(Constants.HADOOP_CREDENTIAL_PASSWORD_ENVVAR, password);
        }
        if (isTesting != null) {
            pb.environment().put("SPARK_TESTING", isTesting);
        }
        final Process child = pb.start();
        int childId = childIdGenerator.incrementAndGet();
        final List<String> childErrorLog = new ArrayList<String>();
        redirect("stdout-redir-" + childId, new Redirector(child.getInputStream()));
        redirect("stderr-redir-" + childId, new Redirector(child.getErrorStream(), childErrorLog));
        runnable = new Runnable() {

            @Override
            public void run() {
                try {
                    int exitCode = child.waitFor();
                    if (exitCode != 0) {
                        StringBuilder errStr = new StringBuilder();
                        for (String s : childErrorLog) {
                            errStr.append(s);
                            errStr.append('\n');
                        }
                        rpcServer.cancelClient(clientId, "Child process exited before connecting back with error log " + errStr.toString());
                        LOG.warn("Child process exited with code {}", exitCode);
                    }
                } catch (InterruptedException ie) {
                    LOG.warn("Waiting thread interrupted, killing child process.");
                    Thread.interrupted();
                    child.destroy();
                } catch (Exception e) {
                    LOG.warn("Exception while waiting for child process.", e);
                }
            }
        };
    }
    Thread thread = new Thread(runnable);
    thread.setDaemon(true);
    thread.setName("Driver");
    thread.start();
    return thread;
}
Also used : ArrayList(java.util.ArrayList) Properties(java.util.Properties) URL(java.net.URL) ArrayList(java.util.ArrayList) List(java.util.List) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) SparkException(org.apache.spark.SparkException) IOException(java.io.IOException) ByteArrayInputStream(java.io.ByteArrayInputStream) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) Map(java.util.Map) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer)

Aggregations

OutputStreamWriter (java.io.OutputStreamWriter)1644 IOException (java.io.IOException)625 BufferedWriter (java.io.BufferedWriter)596 FileOutputStream (java.io.FileOutputStream)594 Writer (java.io.Writer)443 File (java.io.File)365 PrintWriter (java.io.PrintWriter)272 InputStreamReader (java.io.InputStreamReader)222 ByteArrayOutputStream (java.io.ByteArrayOutputStream)202 OutputStream (java.io.OutputStream)194 BufferedReader (java.io.BufferedReader)189 Test (org.junit.Test)123 InputStream (java.io.InputStream)92 ArrayList (java.util.ArrayList)90 FileNotFoundException (java.io.FileNotFoundException)88 Path (org.apache.hadoop.fs.Path)86 UnsupportedEncodingException (java.io.UnsupportedEncodingException)78 URL (java.net.URL)72 Socket (java.net.Socket)70 HttpURLConnection (java.net.HttpURLConnection)65