use of org.supercsv.io.CsvMapReader in project apex-malhar by apache.
the class CsvParser method setup.
@Override
public void setup(OperatorContext context) {
delimitedParserSchema = new DelimitedSchema(schema);
preference = new CsvPreference.Builder(delimitedParserSchema.getQuoteChar(), delimitedParserSchema.getDelimiterChar(), delimitedParserSchema.getLineDelimiter()).build();
nameMapping = delimitedParserSchema.getFieldNames().toArray(new String[delimitedParserSchema.getFieldNames().size()]);
header = StringUtils.join(nameMapping, (char) delimitedParserSchema.getDelimiterChar() + "");
processors = getProcessor(delimitedParserSchema.getFields());
csvStringReader = new ReusableStringReader();
csvMapReader = new CsvMapReader(csvStringReader, preference);
csvBeanReader = new CsvBeanReader(csvStringReader, preference);
}
use of org.supercsv.io.CsvMapReader in project apex-malhar by apache.
the class DelimitedFSLoader method init.
private void init() {
delimitedParserSchema = new DelimitedSchema(schema);
preference = new CsvPreference.Builder(delimitedParserSchema.getQuoteChar(), delimitedParserSchema.getDelimiterChar(), delimitedParserSchema.getLineDelimiter()).build();
nameMapping = delimitedParserSchema.getFieldNames().toArray(new String[delimitedParserSchema.getFieldNames().size()]);
header = StringUtils.join(nameMapping, (char) delimitedParserSchema.getDelimiterChar() + "");
processors = getProcessor(delimitedParserSchema.getFields());
csvStringReader = new ReusableStringReader();
csvMapReader = new CsvMapReader(csvStringReader, preference);
}
use of org.supercsv.io.CsvMapReader in project Xponents by OpenSextant.
the class GeonamesUtility method loadAdmin1Metadata.
/**
* <pre>
* TODO: This is mildly informed by geonames.org, however even there we are still missing
* a mapping between ADM1 FIPS/ISO codes for a state and the Postal codes/abbreviations.
*
* Aliases for the same US province:
* "US.25" = "MA" = "US.MA" = "Massachussetts" = "the Bay State"
*
* Easily mapping the coded data (e.g., 'MA' = '25') worldwide would be helpful.
*
* TODO: Make use of geonames.org or other sources for ADM1 postal code listings at top level.
* </pre>
*
* @throws IOException if CSV file not found in classpath
*/
public void loadAdmin1Metadata() throws IOException {
String uri = "/country-adm1-codes.csv";
try (Reader fio = new InputStreamReader(GeonamesUtility.class.getResourceAsStream(uri))) {
CsvMapReader adm1CSV = new CsvMapReader(fio, CsvPreference.EXCEL_PREFERENCE);
String[] columns = adm1CSV.getHeader(true);
Map<String, String> stateRow = null;
//
while ((stateRow = adm1CSV.read(columns)) != null) {
String roughID = String.format("%s.%s", stateRow.get("ISO2_CC"), stateRow.get("POSTAL_CODE"));
Place s = new Place(roughID, stateRow.get("STATE"));
s.setFeatureClass("A");
s.setFeatureCode("ADM1");
s.setAdmin1(stateRow.get("ADM1_CODE").substring(2));
s.setCountryCode(stateRow.get("ISO2_CC"));
s.defaultHierarchicalPath();
LatLon yx = GeodeticUtility.parseLatLon(stateRow.get("LAT"), stateRow.get("LON"));
s.setLatLon(yx);
s.setAdmin1PostalCode(stateRow.get("POSTAL_CODE"));
admin1Metadata.add(s);
}
adm1CSV.close();
} catch (Exception err) {
throw new IOException("Could not load US State data" + uri, err);
}
}
use of org.supercsv.io.CsvMapReader in project Xponents by OpenSextant.
the class GeonamesUtility method loadCountryTimezones.
/**
* Pase geonames.org TZ table.
*
* @throws IOException
* if timeZones.txt is not found or has an issue.
*/
private void loadCountryTimezones() throws IOException {
java.io.InputStream io = getClass().getResourceAsStream("/geonames.org/timeZones.txt");
java.io.Reader tzReader = new InputStreamReader(io);
CsvMapReader tzMap = new CsvMapReader(tzReader, CsvPreference.TAB_PREFERENCE);
String[] columns = tzMap.getHeader(true);
Map<String, String> tzdata = null;
while ((tzdata = tzMap.read(columns)) != null) {
String cc = tzdata.get("CountryCode");
if (cc.trim().startsWith("#")) {
continue;
}
Country C = getCountry(cc);
if (C == null) {
continue;
}
Country.TZ tz = new Country.TZ(tzdata.get("TimeZoneId"), tzdata.get("GMT offset 1. Jan 2016"), tzdata.get("DST offset 1. Jul 2016"), tzdata.get("rawOffset (independant of DST)"));
C.addTimezone(tz);
}
tzMap.close();
// Add all TZ to countries;
for (String cc : isoCountries.keySet()) {
if (cc.length() > 2) {
continue;
}
Country C = isoCountries.get(cc);
for (String tmzn : C.getAllTimezones().keySet()) {
addTimezone(tmzn, cc);
}
for (Country.TZ tz : C.getTZDatabase().values()) {
addTZOffset(utc2cc, tz.utcOffset, cc);
addTZOffset(dst2cc, tz.dstOffset, cc);
}
}
}
use of org.supercsv.io.CsvMapReader in project Xponents by OpenSextant.
the class TagFilter method loadExclusions.
/**
* Exclusions have two columns in a CSV file. 'exclusion', 'category'
*
* "#" in exclusion column implies a comment.
* Call is responsible for getting I/O stream.
*
* @param filestream
* URL/file with exclusion terms
* @return set of filter terms
* @throws ConfigException
* if filter is not found
*/
public static Set<String> loadExclusions(InputStream filestream) throws ConfigException {
/*
* Load the exclusion names -- these are terms that are gazeteer
* entries, e.g., gazetteer.name = <exclusion term>, that will be marked
* as search_only = true.
*/
try (Reader termsIO = new InputStreamReader(filestream)) {
CsvMapReader termreader = new CsvMapReader(termsIO, CsvPreference.EXCEL_PREFERENCE);
String[] columns = termreader.getHeader(true);
Map<String, String> terms = null;
HashSet<String> stopTerms = new HashSet<String>();
while ((terms = termreader.read(columns)) != null) {
String term = terms.get("exclusion");
if (StringUtils.isBlank(term) || term.startsWith("#")) {
continue;
}
stopTerms.add(term.toLowerCase().trim());
}
termreader.close();
return stopTerms;
} catch (Exception err) {
throw new ConfigException("Could not load exclusions.", err);
}
}
Aggregations