use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForINaturalist method retrieveDataParseResults.
private int retrieveDataParseResults() throws StudyImporterException {
List<Integer> typesIgnored;
try {
typesIgnored = buildTypesIgnored(parserFactory.createParser(getTypeIgnoredURI(), CharsetConstant.UTF8));
} catch (IOException e) {
throw new StudyImporterException("failed to load ignored interaction types from [" + getTypeIgnoredURI() + "]");
}
Map<Integer, InteractType> typeMap;
try {
LabeledCSVParser labeledCSVParser = parserFactory.createParser(getTypeMapURI(), CharsetConstant.UTF8);
typeMap = buildTypeMap(getTypeMapURI(), labeledCSVParser);
} catch (IOException e) {
throw new StudyImporterException("failed to load interaction mapping from [" + getTypeMapURI() + "]");
}
int totalInteractions = 0;
int previousResultCount = 0;
int pageNumber = 1;
do {
String uri = INATURALIST_URL + "/observation_field_values.json?type=taxon&page=" + pageNumber + "&per_page=100&quality_grade=research";
try {
previousResultCount = parseJSON(getDataset().getResource(uri), typesIgnored, typeMap);
pageNumber++;
totalInteractions += previousResultCount;
} catch (IOException | StudyImporterException e) {
throw new StudyImporterException("failed to import iNaturalist at [" + uri + "]", e);
}
} while (previousResultCount > 0);
return totalInteractions;
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForINaturalist method buildTypeMap.
public static Map<Integer, InteractType> buildTypeMap(String resource, LabeledCSVParser labeledCSVParser) throws IOException {
LabeledCSVParser parser = labeledCSVParser;
Map<Integer, InteractType> typeMap = new TreeMap<Integer, InteractType>();
while (parser.getLine() != null) {
String inatIdString = parser.getValueByLabel("observation_field_id");
Integer inatId = null;
String prefix = PREFIX_OBSERVATION_FIELD;
if (StringUtils.startsWith(inatIdString, prefix)) {
inatId = Integer.parseInt(inatIdString.replace(prefix, ""));
}
if (inatId == null) {
LOG.warn("failed to map observation field id [" + inatIdString + "] in line [" + resource + ":" + parser.lastLineNumber() + "]");
} else {
String interactionTypeId = parser.getValueByLabel("interaction_type_id");
InteractType interactType = InteractType.typeOf(interactionTypeId);
if (interactType == null) {
LOG.warn("failed to map interaction type [" + interactionTypeId + "] in line [" + resource + ":" + parser.lastLineNumber() + "]");
} else {
typeMap.put(inatId, interactType);
}
}
}
return typeMap;
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForINaturalist method buildTypesIgnored.
public static List<Integer> buildTypesIgnored(LabeledCSVParser labeledCSVParser) throws IOException {
LabeledCSVParser parser = labeledCSVParser;
List<Integer> typeMap1 = new ArrayList<Integer>();
while (parser.getLine() != null) {
String inatIdString = parser.getValueByLabel("observation_field_id");
if (StringUtils.startsWith(inatIdString, PREFIX_OBSERVATION_FIELD)) {
typeMap1.add(Integer.parseInt(inatIdString.replace(PREFIX_OBSERVATION_FIELD, "")));
}
}
return typeMap1;
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForGoMexSI2Test method importSinglePreyMissingDatabaseName.
@Test
public void importSinglePreyMissingDatabaseName() throws IOException, StudyImporterException {
final Map<String, String> parsedProperties = new HashMap<String, String>();
String predOneLine = "DATA_ID,PRED_ID,PREY_SOURCE_NAME,PREY_DATABASE_NAME,PHYSIOLOG_STATE,SED_ORIGIN,PREY_PARTS,PREY_LIFE_HIST_STAGE,PREY_COND_INDEX,PREY_SEX,PREY_SEX_RATIO,PREY_LEN_TYPE,PREY_MIN_LEN,PREY_MAX_LEN,PREY_MN_LEN,PREY_MIN_WIDTH,PREY_MAX_WIDTH,PREY_MN_WIDTH,BIOMASS,BIOMASS_QUALIFIER,PCT_BIOMASS,PCT_BIOMASS_QUALIFIER,N_CONS,N_CONS_QUALIFIER,PCT_N_CONS,PCT_N_CONS_QUALIFIER,VOL_CONS,VOL_CONS_QUALIFIER,PCT_VOL_CONS,PCT_VOL_CONS_QUALIFIER,FREQ_OCC,FREQ_OCC_QUALIFIER,PCT_FREQ_OCC,PCT_FREQ_OCC_QUALIFIER,IRI,PCT_IRI,IRIa,IIR,E,PREY_NOTES,ENTRY_DATE,ENTRY_PERSON,EDITED_DATE,DATA_EDITOR,MODIFY_DATE,DATA_MODIFIER\n" + "ACT_16R,Cchr.1,Crustacea,,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.245,NA,0.15,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,Jim Simons,NA,Jim Simons,27/06/2016,Theresa Mitchell\n";
StudyImporterForGoMexSI2.parseSpecimen("test.txt", "PREY_", new ParseEventHandler() {
@Override
public void onSpecimen(String predatorUID, Map<String, String> properties) {
parsedProperties.putAll(properties);
}
}, new LabeledCSVParser(new CSVParser(new StringReader(predOneLine))));
assertThat(parsedProperties.get("name"), is("Crustacea"));
assertThat(parsedProperties.get("GOMEXSI:PREY_SOURCE_NAME"), is("Crustacea"));
assertThat(parsedProperties.get("GOMEXSI:PREY_DATABASE_NAME"), is(""));
}
use of com.Ostermiller.util.LabeledCSVParser in project eol-globi-data by jhpoelen.
the class StudyImporterForMetaTableIT method importNHMStatic.
@Test
public void importNHMStatic() throws IOException, StudyImporterException {
final List<Map<String, String>> links = new ArrayList<Map<String, String>>();
final InteractionListener interactionListener = properties -> links.add(properties);
final StudyImporterForMetaTable.TableParserFactory tableFactory = (config, dataset) -> {
String firstFewLines = "\"InteractionID\",\"InteractionURL\",\"Species1UUID\",\"Species1Name\",\"Species1LifeCycleStage\",\"Species1OrganismPart\",\"Species1Status\",\"InteractionType\",\"InteractionOntologyURL\",\"Species2UUID\",\"Species2Name\",\"Species2LifeCycleStage\",\"Species2OrganismPart\",\"Species2Status\",\"LocationUUID\",\"LocationName\",\"LocationCountryName\",\"ISO2\",\"Importance\",\"InteractionRecordType\",\"Reference\",\"ReferenceDOI\",\"Reference Page\",\"Notes\"\n" + "\"4bee827f-c9f5-4c0e-9db3-e40a6e4d8008\",\"http://phthiraptera.info/node/94209\",\"c8faa033-237b-40b9-9b76-d9e7fcff9238\",\"Menacanthus alaudae\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"e275d77c-e993-4de0-981f-b3f39fd4da9b\",\"Acanthis flavirostris\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"310\",\"[REF: Palma, Price & Hellenthal, 1998:310]\"\n" + "\"80e66e7c-75db-467f-9a89-a11f94d58eb3\",\"http://phthiraptera.info/node/94210\",\"fe5b2e50-b414-41d9-840d-189e732b2ea5\",\"Ricinus fringillae flammeae\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"f26a1199-c0bb-4d7c-a511-2fe6284c5378\",\"Acanthis flammea flammea\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"Self citation to checklist added. Requires page number.\"\n" + "\"001ee8aa-dbab-43b8-9137-a61565ccf41b\",\"http://phthiraptera.info/node/94211\",\"ee17d179-9f60-4198-ac49-dc9dab3ae529\",\"Brueelia sibirica\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"f26a1199-c0bb-4d7c-a511-2fe6284c5378\",\"Acanthis flammea flammea\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"Self citation to checklist added. Requires page number.\"\n" + "\"d0929673-2f4c-49ec-877f-116e74ea360e\",\"http://phthiraptera.info/node/94212\",\"46084bc3-cfbf-4e01-96f8-5ecb50bc5ff9\",\"Ricinus fringillae\",\"\",\"\",\"\",\"ectoparasite of\",\"http://purl.obolibrary.org/obo/RO_0002632\",\"2027cf09-f15d-4c2b-be28-9cb00fabf308\",\"Acanthis flammea\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"204\",\"[REF: Rheinwald, 1968:204]\"\n";
return new LabeledCSVParser(new CSVParser(IOUtils.toInputStream(firstFewLines)));
};
final String baseUrl = "https://raw.githubusercontent.com/globalbioticinteractions/natural-history-museum-london-interactions-bank/master";
final String resource = baseUrl + "/globi.json";
importAll(interactionListener, tableFactory, baseUrl, resource);
assertThat(links.size(), is(4));
for (Map<String, String> firstLine : links) {
assertNotNull(firstLine.get(StudyImporterForTSV.INTERACTION_TYPE_NAME));
assertNotNull(firstLine.get(StudyImporterForTSV.TARGET_TAXON_ID));
assertNotNull(firstLine.get(StudyImporterForTSV.TARGET_TAXON_NAME));
assertNotNull(firstLine.get(StudyImporterForTSV.SOURCE_TAXON_ID));
assertNotNull(firstLine.get(StudyImporterForTSV.SOURCE_TAXON_NAME));
}
}
Aggregations