Search in sources :

Example 1 with CSVReader

use of au.com.bytecode.opencsv.CSVReader in project Android-IMSI-Catcher-Detector by CellularPrivacy.

the class ImportTask method doInBackground.

/**
     * Imports data from cell_towers.csv
     * <p/>
     * <blockquote>
     * opencellid.csv layout:
     * lat,lon,mcc,mnc,lac,cellid,averageSignalStrength,range,samples,changeable,radio,rnc,cid,psc,
     * tac,pci,sid,nid,bid
     * <p/>
     * example:
     * 52.201454,21.065345,260,2,58140,42042781,-59,1234,3,1,UMTS,641,34205,,,,
     * <p/>
     * cell_towers.csv layout:
     * radio,mcc,net,area,cell,unit,lon,lat,range,samples,changeable,created,updated,averageSignal
     * 0 radio
     * 1 mcc
     * 2 net (mnc)
     * 3 area (lac)
     * 4 cell (long)
     * 5 unit
     * 6 lon
     * 7 lat
     * 8 range
     * 9 samples
     * 10 changeable
     * 11 created
     * 12 updated
     * 13 averageSignal
     * <p/>
     * example:
     * UMTS,260,2,58140,42042781,,21.03006,52.207811,21,2,1,1379428153,1458591497,-92
     * </blockquote>
     */
@Override
protected String doInBackground(String... commandString) {
    try {
        @Cleanup Realm realm = Realm.getDefaultInstance();
        Long elapsedSeconds = System.currentTimeMillis() / 1000;
        // Prepare filtering values
        final String mccFilter = String.valueOf(mobileCountryCode);
        final String mncFilter = String.valueOf(mobileNetworkCode);
        long progress = 0;
        long failedRecords = 0;
        CSVReader csvReader = null;
        try {
            String[] next;
            csvReader = new CSVReader(createFileReader());
            // skip header
            csvReader.readNext();
            String[] opencellid_csv = new String[14];
            while ((next = csvReader.readNext()) != null) {
                if (next.length < 14) {
                    log.warn("Not enough values in string: " + Arrays.toString(next));
                    ++failedRecords;
                    continue;
                }
                if (!next[1].equals(mccFilter) || !next[2].equals(mncFilter)) {
                    continue;
                }
                if (next[6].isEmpty() || next[7].isEmpty()) {
                    continue;
                }
                GeoLocation location = GeoLocation.fromDegrees(Double.parseDouble(next[7]), Double.parseDouble(next[6]));
                if (location.distanceTo(currentLocation, EARTH_RADIUS) > locationRadius) {
                    continue;
                }
                try {
                    // set non-existent range, avgSignal, etc to "0" so they
                    // will be possibly filtered by checkDBe
                    // lat
                    opencellid_csv[0] = next[7];
                    // lon
                    opencellid_csv[1] = next[6];
                    // mcc
                    opencellid_csv[2] = next[1];
                    // mnc
                    opencellid_csv[3] = next[2];
                    // lac
                    opencellid_csv[4] = next[3];
                    // cellid, long
                    opencellid_csv[5] = next[4];
                    // averageSignalStrength
                    opencellid_csv[6] = stringOrZero(next[13]);
                    // range
                    opencellid_csv[7] = stringOrZero(next[8]);
                    // samples
                    opencellid_csv[8] = stringOrZero(next[9]);
                    // changeable
                    opencellid_csv[9] = stringOrZero(next[10]);
                    // radio
                    opencellid_csv[10] = next[0];
                    // rnc, not used
                    opencellid_csv[11] = null;
                    // cid, not used
                    opencellid_csv[12] = null;
                    // psc, not present
                    opencellid_csv[13] = null;
                    Date dateCreated = dateOrNow(next[11]);
                    Date dateUpdated = dateOrNow(next[12]);
                    mDbAdapter.addCSVRecord(realm, opencellid_csv, dateCreated, dateUpdated);
                    ++progress;
                } catch (NumberFormatException e) {
                    log.warn("Problem parsing a record: " + Arrays.toString(opencellid_csv), e);
                    ++failedRecords;
                }
                if ((progress % 100) == 0) {
                    log.debug("Imported records for now: " + String.valueOf(progress));
                // do not know progress because determining line count in gzipped
                // multi-gigabyte file is slow
                //publishProgress((int) progress, (int) totalRecords);
                }
                if ((progress % 1000) == 0) {
                    try {
                        // wait 1 second to allow user to see progress bar.
                        Thread.sleep(1000);
                    } catch (InterruptedException ex) {
                        Thread.currentThread().interrupt();
                    }
                }
            }
        } finally {
            if (csvReader != null) {
                csvReader.close();
            }
        }
        elapsedSeconds = (System.currentTimeMillis() / 1000) - elapsedSeconds;
        log.debug("Importing took " + String.valueOf(elapsedSeconds) + " seconds");
        log.debug("Imported records: " + String.valueOf(progress));
        log.debug("Failed records: " + String.valueOf(failedRecords));
        return "Successful";
    } catch (IOException e) {
        log.warn("Problem reading data from CSV", e);
        return null;
    }
}
Also used : CSVReader(au.com.bytecode.opencsv.CSVReader) IOException(java.io.IOException) Cleanup(lombok.Cleanup) Realm(io.realm.Realm) Date(java.util.Date)

Example 2 with CSVReader

use of au.com.bytecode.opencsv.CSVReader in project ORCID-Source by ORCID.

the class LoadRinggoldData method processAltNamesFile.

private Map<String, String> processAltNamesFile(Reader reader) throws IOException {
    Map<String, String> altNamesMap = new HashMap<String, String>();
    Map<String, Date> altNamesTimestamps = new HashMap<String, Date>();
    try (CSVReader csvReader = createCSVReader(reader)) {
        String[] line;
        while ((line = csvReader.readNext()) != null) {
            // If the DN indicator exists
            if (!PojoUtil.isEmpty(line[7]) && DN.equals(line[7])) {
                String name = null;
                // If the ext_name is not empty, use it
                if (!PojoUtil.isEmpty(line[2])) {
                    LOGGER.info("Using ext_name {} for pCode {}", new Object[] { line[2], line[0] });
                    name = line[2];
                } else {
                    LOGGER.info("Using name {} for pCode {}", new Object[] { line[2], line[0] });
                    name = line[1];
                }
                // get the timestamp
                Date timestamp = null;
                try {
                    timestamp = getDateFromTimestamp(line[8]);
                } catch (ParseException p) {
                    LOGGER.warn("Unable to parse timestamp {} for p_code {}", new Object[] { line[8], line[0] });
                }
                // Check if there is already a name for that pCode
                if (altNamesMap.containsKey(line[0])) {
                    // new timestamp
                    if (altNamesTimestamps.containsKey(line[0]) && altNamesTimestamps.get(line[0]) != null) {
                        Date existing = altNamesTimestamps.get(line[0]);
                        if (existing.before(timestamp)) {
                            LOGGER.info("Replacing old name {}({}) with {}({})", new Object[] { altNamesMap.get(line[0]), altNamesTimestamps.get(line[0]), name, timestamp });
                            altNamesMap.put(line[0], name);
                            altNamesTimestamps.put(line[0], timestamp);
                        } else {
                            LOGGER.info("Leaving old name {}({}) instead of using this one {}({})", new Object[] { altNamesMap.get(line[0]), altNamesTimestamps.get(line[0]), name, timestamp });
                        }
                    } else {
                        // Else, just replace it with the new one
                        altNamesMap.put(line[0], name);
                        altNamesTimestamps.put(line[0], timestamp);
                    }
                } else {
                    altNamesMap.put(line[0], name);
                    altNamesTimestamps.put(line[0], timestamp);
                }
            }
        }
    } finally {
        LOGGER.info("Number added={}, number updated={}, number unchanged={}, num skipped={}, total={}", new Object[] { numAdded, numUpdated, numUnchanged, numSkipped, getTotal() });
    }
    return altNamesMap;
}
Also used : HashMap(java.util.HashMap) CSVReader(au.com.bytecode.opencsv.CSVReader) ParseException(java.text.ParseException) Date(java.util.Date)

Example 3 with CSVReader

use of au.com.bytecode.opencsv.CSVReader in project h2o-3 by h2oai.

the class GlmMojoBenchHelper method readData.

static void readData(File f, int[] mapping, String firstColName, double[][] out, MojoModel mojo) throws IOException {
    InputStream is = new FileInputStream(f);
    try {
        InputStream source;
        if (f.getName().endsWith(".zip")) {
            ZipInputStream zis = new ZipInputStream(is);
            ZipEntry entry = zis.getNextEntry();
            if (!entry.getName().endsWith(".csv"))
                throw new IllegalStateException("CSV file expected, name " + entry.getName());
            source = zis;
        } else {
            source = new GZIPInputStream(is);
        }
        CSVReader r = new CSVReader(new InputStreamReader(source));
        if (firstColName != null) {
            String[] header = r.readNext();
            if (header == null)
                throw new IllegalStateException("File empty");
            if (!firstColName.equals(header[0]))
                throw new IllegalStateException("Header expected");
        }
        int rowIdx = 0;
        String[] row;
        while ((rowIdx < out.length) && ((row = r.readNext()) != null)) {
            double[] outRow = out[rowIdx++];
            if (row.length < mapping.length)
                throw new IllegalStateException("Row too short: " + Arrays.toString(row));
            for (int i = 0; i < mapping.length; i++) {
                int target = mapping[i];
                if (target < 0)
                    continue;
                if ("NA".equals(row[i])) {
                    outRow[target] = Double.NaN;
                    continue;
                }
                String[] domain = mojo.getDomainValues(target);
                if (domain == null)
                    outRow[target] = Double.parseDouble(row[i]);
                else {
                    outRow[target] = -1;
                    for (int d = 0; d < domain.length; d++) if (domain[d].equals(row[i])) {
                        outRow[target] = d;
                        break;
                    }
                    if (outRow[target] < 0)
                        throw new IllegalStateException("Value " + row[i] + " not found in domain " + Arrays.toString(domain));
                }
            }
        }
    } finally {
        is.close();
    }
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) ZipInputStream(java.util.zip.ZipInputStream) CSVReader(au.com.bytecode.opencsv.CSVReader) GZIPInputStream(java.util.zip.GZIPInputStream) ZipInputStream(java.util.zip.ZipInputStream) ZipEntry(java.util.zip.ZipEntry)

Example 4 with CSVReader

use of au.com.bytecode.opencsv.CSVReader in project h2o-3 by h2oai.

the class PredictCsv method run.

private void run() throws Exception {
    ModelCategory category = model.getModelCategory();
    CSVReader reader = new CSVReader(new FileReader(inputCSVFileName));
    BufferedWriter output = new BufferedWriter(new FileWriter(outputCSVFileName));
    // Emit outputCSV column names.
    switch(category) {
        case AutoEncoder:
            output.write(model.getHeader());
            break;
        case Binomial:
        case Multinomial:
            output.write("predict");
            String[] responseDomainValues = model.getResponseDomainValues();
            for (String s : responseDomainValues) {
                output.write(",");
                output.write(s);
            }
            break;
        case Clustering:
            output.write("cluster");
            break;
        case Regression:
            output.write("predict");
            break;
        default:
            throw new Exception("Unknown model category " + category);
    }
    output.write("\n");
    // Loop over inputCSV one row at a time.
    //
    // TODO: performance of scoring can be considerably improved if instead of scoring each row at a time we passed
    //       all the rows to the score function, in which case it can evaluate each tree for each row, avoiding
    //       multiple rounds of fetching each tree from the filesystem.
    //
    int lineNum = 0;
    try {
        String[] inputColumnNames = null;
        String[] splitLine;
        while ((splitLine = reader.readNext()) != null) {
            lineNum++;
            // Handle the header.
            if (lineNum == 1) {
                inputColumnNames = splitLine;
                continue;
            }
            // Parse the CSV line.  Don't handle quoted commas.  This isn't a parser test.
            RowData row = formatDataRow(splitLine, inputColumnNames);
            // Emit the result to the output file.
            switch(category) {
                case AutoEncoder:
                    {
                        throw new UnsupportedOperationException();
                    // AutoEncoderModelPrediction p = model.predictAutoEncoder(row);
                    // break;
                    }
                case Binomial:
                    {
                        BinomialModelPrediction p = model.predictBinomial(row);
                        output.write(p.label);
                        output.write(",");
                        for (int i = 0; i < p.classProbabilities.length; i++) {
                            if (i > 0) {
                                output.write(",");
                            }
                            output.write(myDoubleToString(p.classProbabilities[i]));
                        }
                        break;
                    }
                case Multinomial:
                    {
                        MultinomialModelPrediction p = model.predictMultinomial(row);
                        output.write(p.label);
                        output.write(",");
                        for (int i = 0; i < p.classProbabilities.length; i++) {
                            if (i > 0) {
                                output.write(",");
                            }
                            output.write(myDoubleToString(p.classProbabilities[i]));
                        }
                        break;
                    }
                case Clustering:
                    {
                        ClusteringModelPrediction p = model.predictClustering(row);
                        output.write(myDoubleToString(p.cluster));
                        break;
                    }
                case Regression:
                    {
                        RegressionModelPrediction p = model.predictRegression(row);
                        output.write(myDoubleToString(p.value));
                        break;
                    }
                default:
                    throw new Exception("Unknown model category " + category);
            }
            output.write("\n");
        }
    } catch (Exception e) {
        System.out.println("Caught exception on line " + lineNum);
        System.out.println("");
        e.printStackTrace();
        System.exit(1);
    }
    // Clean up.
    output.close();
    reader.close();
}
Also used : CSVReader(au.com.bytecode.opencsv.CSVReader) FileWriter(java.io.FileWriter) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) RowData(hex.genmodel.easy.RowData) FileReader(java.io.FileReader) ModelCategory(hex.ModelCategory)

Example 5 with CSVReader

use of au.com.bytecode.opencsv.CSVReader in project epadd by ePADD.

the class LabelManager method readObjectFromStream.

public static LabelManager readObjectFromStream(String dirname) {
    // reading labelinfo map from json format
    LabelManager lm = new LabelManager();
    FileReader reader = null;
    try {
        String str = dirname + File.separator + JSONFILENAME;
        reader = new FileReader(str);
        Type type = new TypeToken<Map<String, Label>>() {
        }.getType();
        lm.labelInfoMap = new Gson().fromJson(reader, type);
        reader.close();
    } catch (IOException e) {
        log.warn("Unable to read labelinfo file");
    } finally {
        try {
            reader.close();
        } catch (IOException e) {
            log.warn("Unable to close labelinfo file");
        }
    }
    // / reading docToLabelIDmap from csv
    try {
        FileReader fr = new FileReader(dirname + File.separator + CSVFILENAME);
        CSVReader csvreader = new CSVReader(fr, ',', '"', '\n');
        // read line by line, except the first line which is header
        String[] record = null;
        // skip the first line.
        record = csvreader.readNext();
        while ((record = csvreader.readNext()) != null) {
            lm.docToLabelID.put(record[0], record[1]);
        }
        csvreader.close();
        fr.close();
    } catch (IOException e) {
        log.warn("Unable to read docid to label map from csv file");
    }
    return lm;
}
Also used : Type(java.lang.reflect.Type) CSVReader(au.com.bytecode.opencsv.CSVReader) Gson(com.google.gson.Gson)

Aggregations

CSVReader (au.com.bytecode.opencsv.CSVReader)83 IOException (java.io.IOException)30 InputStreamReader (java.io.InputStreamReader)28 ArrayList (java.util.ArrayList)17 FileReader (java.io.FileReader)12 StringReader (java.io.StringReader)11 HashMap (java.util.HashMap)9 BufferedReader (java.io.BufferedReader)8 InputStream (java.io.InputStream)6 File (java.io.File)5 Reader (java.io.Reader)5 HttpClient (org.apache.commons.httpclient.HttpClient)5 GetMethod (org.apache.commons.httpclient.methods.GetMethod)5 Test (org.junit.Test)5 LinkedHashMap (java.util.LinkedHashMap)4 DBException (org.jkiss.dbeaver.DBException)4 Query (au.org.ala.spatial.util.Query)3 TransformationExample (eu.esdihumboldt.cst.test.TransformationExample)3 Date (java.util.Date)3 JSONArray (org.json.simple.JSONArray)3