use of au.com.bytecode.opencsv.CSVReader in project epadd by ePADD.
the class CorrespondentAuthorityMapper method readObjectFromStream.
/*
Reading authority records from csv files and setting up this object.
*/
public static CorrespondentAuthorityMapper readObjectFromStream(String dirname) {
// read candidate authorities from csv file and fill in cnameToFastIdCandidates multimap
CorrespondentAuthorityMapper cauthorityMapper = new CorrespondentAuthorityMapper();
try {
FileReader fr = new FileReader(dirname + File.separator + CANDIDATE_AUTHORITIES_FILE_NAME);
CSVReader csvreader = new CSVReader(fr, ',', '"', '\n');
// read line by line, except the first line which is header
String[] record = null;
// skip the first line.
record = csvreader.readNext();
while ((record = csvreader.readNext()) != null) {
if (record.length != 2)
continue;
long fastid = 0;
try {
fastid = Long.parseLong(record[1].trim());
} catch (NumberFormatException e) {
fastid = -1;
}
if (// means fastid was not parseable from string.. skip it
fastid == -1)
continue;
else
cauthorityMapper.cnameToFastIdCandidates.put(record[0], fastid);
}
csvreader.close();
fr.close();
} catch (IOException e) {
log.warn("Unable to read candidateAuthorities from csv file");
}
// read confirmed authorities from csv file and fill in cnameToAuthority map
try {
FileReader fr = new FileReader(dirname + File.separator + CONFIRMED_AUTHORITIES_FILE_NAME);
CSVReader csvreader = new CSVReader(fr, ',', '"', '\n');
// read line by line, except the first line which is header
String[] record = null;
// skip the first line.
record = csvreader.readNext();
while ((record = csvreader.readNext()) != null) {
if (record.length <= 1)
// to handle the case when an empty line is present in the csv file.
continue;
AuthorityRecord ar = new AuthorityRecord();
String canonname = record[0].trim();
ar.preferredLabel = record[1];
try {
ar.fastId = Long.parseLong(record[2].trim());
ar.viafId = record[3].trim();
ar.wikipediaId = record[4].trim();
ar.lcshId = record[5].trim();
ar.lcnafId = record[6].trim();
ar.localId = record[7].trim();
ar.extent = record[8].trim();
String ismanual = record[9].trim();
if (ismanual.toLowerCase().equals("y"))
ar.isManuallyAssigned = true;
else if (ismanual.toLowerCase().equals("n"))
ar.isManuallyAssigned = false;
else {
log.warn("Invalid character found for isManuallyAssigned field in the correspondent confirmed authority csv file");
continue;
}
} catch (NumberFormatException e) {
log.warn("Invalid integer found in the correspondent confirmed authority csv file");
// because some string to number formatting failed.
continue;
}
cauthorityMapper.cnameToAuthority.put(canonname, ar);
}
csvreader.close();
fr.close();
} catch (IOException e) {
log.warn("Unable to read confirmed authorities from csv file");
}
return cauthorityMapper;
}
use of au.com.bytecode.opencsv.CSVReader in project epadd by ePADD.
the class AnnotationManager method readObjectFromStream.
// read annotation manager from a human readable file
public static AnnotationManager readObjectFromStream(String filepath) {
File annotationfile = new File(filepath);
AnnotationManager annotationManager = new AnnotationManager();
if (annotationfile.exists()) {
// read the annotations and assign them to a document in archive (based on unique id)
try {
FileReader fr = new FileReader(annotationfile);
CSVReader csvreader = new CSVReader(fr, ',', '"', ' ');
// read line by line, except the first line which is header
String[] record = null;
// skip the first line.
record = csvreader.readNext();
while ((record = csvreader.readNext()) != null) {
String docid = record[0];
// skip record[2] for the time being
String annotation = record[1];
annotationManager.docToAnnotation.put(docid, annotation);
}
csvreader.close();
fr.close();
} catch (IOException e) {
log.warn("Unable to read docid to label map from csv file");
}
}
return annotationManager;
}
use of au.com.bytecode.opencsv.CSVReader in project epadd by ePADD.
the class BlobStore method setNormalizationMap.
public void setNormalizationMap(String blobNormalizationMapPath) {
if (normalizationMap == null)
normalizationMap = new LinkedHashMap<>();
// read the normalization info file and put it in a map
try {
FileReader fr = new FileReader(blobNormalizationMapPath);
CSVReader csvreader = new CSVReader(fr, ',', '"');
// read line by line, except the first line which is header
String[] record = null;
// skip the first line.
record = csvreader.readNext();
while ((record = csvreader.readNext()) != null) {
String filename = record[0];
String cleanedupname = record[1];
String normalizedname = record[2];
this.normalizationMap.put(filename, new Pair<String, String>(cleanedupname, normalizedname));
}
csvreader.close();
fr.close();
} catch (IOException e) {
log.warn("Unable to read docid to label map from csv file");
}
}
use of au.com.bytecode.opencsv.CSVReader in project epadd by ePADD.
the class SearchResult method selectDocsForBulkFlags.
// This method was moved from bulk-flags.jsp so that all types of result set creation happens here.
public static SearchResult selectDocsForBulkFlags(SearchResult inputSet) {
String filePath = JSPHelper.getParam(inputSet.queryParams, "filePath");
String allDocsParam = JSPHelper.getParam(inputSet.queryParams, "allDocs");
boolean allDocs = allDocsParam != null && allDocsParam.equals("1");
if (allDocs || (filePath != null && (new File(filePath).exists()))) {
if (allDocs) {
inputSet.matchedDocs.keySet().retainAll(inputSet.archive.getAllDocsAsSet());
} else {
Set<String> eas = new LinkedHashSet<>();
try {
// read the entries in the file
CSVReader reader = new CSVReader(new FileReader(filePath));
String[] line;
while ((line = reader.readNext()) != null) {
String eA = line[0].trim();
eas.add(eA);
}
Map<String, Set<Document>> matchesWithNumHits = EmailUtils.getDocsForEAs(inputSet.archive.getAllDocsAsSet(), eas);
Set<Document> matchedDocs = new LinkedHashSet<>();
if (matchesWithNumHits != null)
for (Set<Document> docs : matchesWithNumHits.values()) {
if (docs != null)
matchedDocs.addAll(docs);
}
inputSet.matchedDocs.keySet().retainAll(matchedDocs);
} catch (Exception e) {
Util.print_exception("Exception while fetching messages for: " + eas, e, JSPHelper.log);
}
}
}
return inputSet;
}
use of au.com.bytecode.opencsv.CSVReader in project data-prep by Talend.
the class CSVFastHeaderAndTypeAnalyzer method readLine.
/**
* Extracts fields from a line, using CSVReader.
*
* @param line line as it's in the CSV raw file
* @return a list of ordered fields
*/
private List<String> readLine(String line) {
List<String> result = Collections.emptyList();
try (CSVReader csvReader = new CSVReader(new InputStreamReader(IOUtils.toInputStream(line)), separator.getSeparator())) {
String[] fields = csvReader.readNext();
csvReader.close();
if (fields != null && fields.length != 0) {
result = Arrays.asList(fields).stream().collect(Collectors.toList());
}
} catch (IOException e) {
LOGGER.info("Unable to read line {i} of sample", line, e);
}
// remove last fields if it is empty
int size = result.size();
if (size > 0 && StringUtils.isEmpty(result.get(size - 1))) {
result.remove(size - 1);
}
return result;
}
Aggregations