Search in sources :

Example 66 with FileWriter

use of java.io.FileWriter in project AndroidPicker by gzu-liyujiang.

the class FileUtils method appendText.

/**
     * 追加文本内容
     */
public static boolean appendText(String path, String content) {
    LogUtils.verbose(String.format("append %s", path));
    File file = new File(path);
    FileWriter writer = null;
    try {
        if (!file.exists()) {
            //noinspection ResultOfMethodCallIgnored
            file.createNewFile();
        }
        writer = new FileWriter(file, true);
        writer.write(content);
        return true;
    } catch (IOException e) {
        LogUtils.warn(e);
        return false;
    } finally {
        closeSilently(writer);
    }
}
Also used : FileWriter(java.io.FileWriter) IOException(java.io.IOException) File(java.io.File)

Example 67 with FileWriter

use of java.io.FileWriter in project h2o-2 by h2oai.

the class WebAPI method exportModel.

/**
   * Exports a model to a JSON file.
   */
static void exportModel() throws Exception {
    HttpClient client = new HttpClient();
    GetMethod get = new GetMethod(URL + "/2/ExportModel.json?model=MyInitialNeuralNet");
    int status = client.executeMethod(get);
    if (status != 200)
        throw new Exception(get.getStatusText());
    JsonObject response = (JsonObject) new JsonParser().parse(new InputStreamReader(get.getResponseBodyAsStream()));
    JsonElement model = response.get("model");
    JsonWriter writer = new JsonWriter(new FileWriter(JSON_FILE));
    writer.setLenient(true);
    writer.setIndent("  ");
    Streams.write(model, writer);
    writer.close();
    get.releaseConnection();
}
Also used : InputStreamReader(java.io.InputStreamReader) JsonElement(dontweave.gson.JsonElement) HttpClient(org.apache.commons.httpclient.HttpClient) FileWriter(java.io.FileWriter) GetMethod(org.apache.commons.httpclient.methods.GetMethod) JsonObject(dontweave.gson.JsonObject) JsonWriter(dontweave.gson.stream.JsonWriter) JsonParser(dontweave.gson.JsonParser)

Example 68 with FileWriter

use of java.io.FileWriter in project h2o-3 by h2oai.

the class MungeCsv method main.

/**
   * CSV reader and predictor test program.
   *
   * @param args Command-line args.
   * @throws Exception
   */
public static void main(String[] args) throws Exception {
    parseArgs(args);
    GenMunger rawMunger;
    rawMunger = (hex.genmodel.GenMunger) Class.forName(assemblyClassName).newInstance();
    BufferedReader input = new BufferedReader(new FileReader(inputCSVFileName));
    BufferedWriter output = new BufferedWriter(new FileWriter(outputCSVFileName));
    // Emit outputCSV column names.
    String[] rawHeader = rawMunger.outNames();
    StringBuilder header = new StringBuilder();
    for (int i = 0; i < rawHeader.length; ++i) {
        header.append("\"").append(rawHeader[i]).append("\"");
        if (i < rawHeader.length - 1)
            header.append(",");
    }
    output.write(header.toString());
    output.write("\n");
    // Loop over inputCSV one row at a time.
    int lineNum = 0;
    String line;
    try {
        while ((line = input.readLine()) != null) {
            lineNum++;
            // skip the header.
            if (lineNum == 1)
                continue;
            // Parse the CSV line.  Somewhat handles quoted commas.  But this ain't no parser test!
            RowData row;
            try {
                row = parseDataRow(line, rawMunger);
            } catch (NumberFormatException nfe) {
                nfe.printStackTrace();
                System.out.println("Failed to parse row: " + lineNum);
                throw new RuntimeException();
            }
            RowData mungedRow = rawMunger.fit(row);
            for (int i = 0; i < rawMunger.outNames().length; ++i) {
                Object val = mungedRow == null ? Double.NaN : mungedRow.get(rawMunger.outNames()[i]);
                if (val instanceof Double)
                    output.write(String.valueOf(val));
                else
                    output.write("\"" + val + "\"");
                if (i < rawMunger.outNames().length - 1)
                    output.write(",");
            }
            output.write("\n");
        }
    } catch (Exception e) {
        System.out.println("Caught exception on line " + lineNum);
        System.out.println("");
        e.printStackTrace();
        System.exit(1);
    } finally {
        // Clean up.
        output.close();
        input.close();
    }
    // Predictions were successfully generated.  Calling program can now compare them with something.
    System.exit(0);
}
Also used : FileWriter(java.io.FileWriter) BufferedWriter(java.io.BufferedWriter) RowData(hex.genmodel.easy.RowData) GenMunger(hex.genmodel.GenMunger) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader)

Example 69 with FileWriter

use of java.io.FileWriter in project h2o-3 by h2oai.

the class ChunksizeTest method run.

@Test
public void run() throws IOException {
    FileWriter fw = new FileWriter("/tmp/chunksize.csv");
    String header = "\t" + String.format("%10s", "cloudSize") + "\t" + String.format("%8s", "cores") + "\t" + String.format("%8s", "numCols") + "\t" + String.format("%8s", "numRows") + "\t" + String.format("%16s", "maxLineLength") + "\t" + String.format("%13s", "totalSize") + "\t" + String.format("%13s", "chunkSize") + "\t" + String.format("%15s", "parseChunkCount") + "\t" + String.format("%15s", "totalChunks") + "\n";
    int[] toosmall = new int[2];
    int[] toolarge = new int[2];
    int[] toofew = new int[2];
    int[] toomany = new int[2];
    int[] counter = new int[2];
    int[] failed = new int[2];
    for (int oldheuristic : new int[] { 0, 1 }) {
        for (int cloudSize : new int[] { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 }) {
            for (int cores : new int[] { 2, 4, 8, 16, 32, 64, 128 }) {
                //per node
                for (int numCols : new int[] { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768 }) {
                    for (long maxLineLength : new long[] { 10, 100, 1000, 10000, 1000000 }) {
                        for (double totalSize : new double[] { 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14 }) {
                            int numRows = (int) (totalSize / maxLineLength);
                            //need at least 1 row
                            if (maxLineLength > totalSize)
                                continue;
                            //need at least 3 bytes per column
                            if ((double) maxLineLength / numCols < 3)
                                continue;
                            //can't have more than 100 bytes per column
                            if ((double) maxLineLength / numCols > 100)
                                continue;
                            // Pretend to be in ParseSetup
                            int chunkSize = FileVec.calcOptimalChunkSize((long) totalSize, numCols, maxLineLength, cores, cloudSize, oldheuristic == 1, true);
                            int parseChunkCount = (int) Math.max(1, totalSize / chunkSize);
                            int parseChunkCountPerNode = parseChunkCount / cloudSize;
                            long totalChunks = (long) parseChunkCount * numCols;
                            String log = "\t" + String.format("%10s", cloudSize) + "\t" + String.format("%8s", cores) + "\t" + String.format("%8s", numCols) + "\t" + String.format("%8s", numRows) + "\t" + String.format("%16s", maxLineLength) + "\t" + String.format("%13s", totalSize) + "\t" + String.format("%13s", chunkSize) + "\t" + String.format("%15s", parseChunkCount) + "\t" + String.format("%15s", totalChunks);
                            boolean fail = false;
                            String msg = "\n" + header + log + "                  <- TOO ";
                            // don't cut small data into too many chunks (only 10 numbers per chunk)
                            if (chunkSize < 10 * maxLineLength) {
                                msg += "SMALL ";
                                FileVec.calcOptimalChunkSize((long) totalSize, numCols, maxLineLength, cores, cloudSize, oldheuristic == 1, true);
                                toosmall[oldheuristic]++;
                                fail = true;
                            }
                            if (chunkSize >= (1 << 28)) {
                                //256MB
                                msg += "LARGE ";
                                FileVec.calcOptimalChunkSize((long) totalSize, numCols, maxLineLength, cores, cloudSize, oldheuristic == 1, true);
                                toolarge[oldheuristic]++;
                                fail = true;
                            }
                            // want at least one chunk per core
                            if (parseChunkCountPerNode < cores && oldheuristic == 0) {
                                // only complain if we have at least 100k matrix entries per node - otherwise it's small data and fast enough anyway even with fewer chunks
                                if (numRows * numCols > 100000 * cloudSize && // Only complain about too few chunks if there's enough data to cut it into Chunk POJO of 1kB each, otherwise it's small data and we're fine with fewer chunks
                                totalSize / cloudSize / numCols / (4 * cores) > 1000) {
                                    msg += "FEW ";
                                    FileVec.calcOptimalChunkSize((long) totalSize, numCols, maxLineLength, cores, cloudSize, oldheuristic == 1, true);
                                    toofew[oldheuristic]++;
                                    fail = true;
                                    //only for very wide data
                                    Assert.assertTrue(numCols > 1e4);
                                    //at least keep half the cores busy
                                    Assert.assertTrue(parseChunkCountPerNode > cores / 2);
                                }
                            }
                            if (parseChunkCountPerNode * numCols > (1 << 24)) {
                                //no more than 16M chunk POJOs per node
                                msg += "MANY ";
                                FileVec.calcOptimalChunkSize((long) totalSize, numCols, maxLineLength, cores, cloudSize, oldheuristic == 1, true);
                                toomany[oldheuristic]++;
                                fail = true;
                                //only for big data, where we have more than 1GB per core
                                Assert.assertTrue(totalSize / cloudSize / cores > 1e9);
                            }
                            if (fail) {
                                Log.info(msg + (oldheuristic == 0 ? "(New Heuristic)" : "(Old Heuristic)"));
                                failed[oldheuristic]++;
                            }
                            counter[oldheuristic]++;
                        }
                    }
                }
            }
        }
    }
    fw.close();
    for (int i : new int[] { 0, 1 }) {
        Log.info((i == 1 ? "Old" : "New") + " heuristic:");
        Log.info("Total: " + counter[i]);
        Log.info("Failure rate: " + PrettyPrint.formatPct((double) failed[i] / counter[i]));
        Log.info("Too small: " + PrettyPrint.formatPct((double) toosmall[i] / counter[i]));
        Log.info("Too large: " + PrettyPrint.formatPct((double) toolarge[i] / counter[i]));
        Log.info("Too few: " + PrettyPrint.formatPct((double) toofew[i] / counter[i]));
        Log.info("Too many: " + PrettyPrint.formatPct((double) toomany[i] / counter[i]));
        if (i == 0) {
            Assert.assertTrue("Too small means that files cannot be parsed", toosmall[i] == 0);
            Assert.assertTrue("Too large means that chunks cannot fit in the DKV", toolarge[i] == 0);
            //extremely rare, only for wide data
            Assert.assertTrue("Too few means that cores aren't utilized", toofew[i] < 1e-3 * counter[i]);
            //it's very rare to have too many chunks (huge data)
            Assert.assertTrue("Too many means that each node has to store more than 8M chunks in its KV store", toomany[i] < 3e-2 * counter[i]);
        }
    }
}
Also used : FileWriter(java.io.FileWriter) PrettyPrint(water.util.PrettyPrint) Test(org.junit.Test)

Example 70 with FileWriter

use of java.io.FileWriter in project h2o-3 by h2oai.

the class PredictCsv method run.

private void run() throws Exception {
    ModelCategory category = model.getModelCategory();
    CSVReader reader = new CSVReader(new FileReader(inputCSVFileName));
    BufferedWriter output = new BufferedWriter(new FileWriter(outputCSVFileName));
    // Emit outputCSV column names.
    switch(category) {
        case AutoEncoder:
            output.write(model.getHeader());
            break;
        case Binomial:
        case Multinomial:
            output.write("predict");
            String[] responseDomainValues = model.getResponseDomainValues();
            for (String s : responseDomainValues) {
                output.write(",");
                output.write(s);
            }
            break;
        case Clustering:
            output.write("cluster");
            break;
        case Regression:
            output.write("predict");
            break;
        default:
            throw new Exception("Unknown model category " + category);
    }
    output.write("\n");
    // Loop over inputCSV one row at a time.
    //
    // TODO: performance of scoring can be considerably improved if instead of scoring each row at a time we passed
    //       all the rows to the score function, in which case it can evaluate each tree for each row, avoiding
    //       multiple rounds of fetching each tree from the filesystem.
    //
    int lineNum = 0;
    try {
        String[] inputColumnNames = null;
        String[] splitLine;
        while ((splitLine = reader.readNext()) != null) {
            lineNum++;
            // Handle the header.
            if (lineNum == 1) {
                inputColumnNames = splitLine;
                continue;
            }
            // Parse the CSV line.  Don't handle quoted commas.  This isn't a parser test.
            RowData row = formatDataRow(splitLine, inputColumnNames);
            // Emit the result to the output file.
            switch(category) {
                case AutoEncoder:
                    {
                        throw new UnsupportedOperationException();
                    // AutoEncoderModelPrediction p = model.predictAutoEncoder(row);
                    // break;
                    }
                case Binomial:
                    {
                        BinomialModelPrediction p = model.predictBinomial(row);
                        output.write(p.label);
                        output.write(",");
                        for (int i = 0; i < p.classProbabilities.length; i++) {
                            if (i > 0) {
                                output.write(",");
                            }
                            output.write(myDoubleToString(p.classProbabilities[i]));
                        }
                        break;
                    }
                case Multinomial:
                    {
                        MultinomialModelPrediction p = model.predictMultinomial(row);
                        output.write(p.label);
                        output.write(",");
                        for (int i = 0; i < p.classProbabilities.length; i++) {
                            if (i > 0) {
                                output.write(",");
                            }
                            output.write(myDoubleToString(p.classProbabilities[i]));
                        }
                        break;
                    }
                case Clustering:
                    {
                        ClusteringModelPrediction p = model.predictClustering(row);
                        output.write(myDoubleToString(p.cluster));
                        break;
                    }
                case Regression:
                    {
                        RegressionModelPrediction p = model.predictRegression(row);
                        output.write(myDoubleToString(p.value));
                        break;
                    }
                default:
                    throw new Exception("Unknown model category " + category);
            }
            output.write("\n");
        }
    } catch (Exception e) {
        System.out.println("Caught exception on line " + lineNum);
        System.out.println("");
        e.printStackTrace();
        System.exit(1);
    }
    // Clean up.
    output.close();
    reader.close();
}
Also used : CSVReader(au.com.bytecode.opencsv.CSVReader) FileWriter(java.io.FileWriter) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) RowData(hex.genmodel.easy.RowData) FileReader(java.io.FileReader) ModelCategory(hex.ModelCategory)

Aggregations

FileWriter (java.io.FileWriter)1994 File (java.io.File)1195 IOException (java.io.IOException)866 BufferedWriter (java.io.BufferedWriter)798 PrintWriter (java.io.PrintWriter)329 Test (org.junit.Test)243 Writer (java.io.Writer)181 FileReader (java.io.FileReader)148 BufferedReader (java.io.BufferedReader)128 ArrayList (java.util.ArrayList)121 FileNotFoundException (java.io.FileNotFoundException)78 Date (java.util.Date)68 FileOutputStream (java.io.FileOutputStream)65 Properties (java.util.Properties)65 HashMap (java.util.HashMap)61 FileInputStream (java.io.FileInputStream)54 StringWriter (java.io.StringWriter)51 Path (org.apache.hadoop.fs.Path)50 Map (java.util.Map)42 InputStreamReader (java.io.InputStreamReader)34