use of org.mapdb.DB in project invesdwin-context-persistence by subes.
the class ADelegateMapDB method newDelegate.
protected ConcurrentMap<K, V> newDelegate() {
final Maker fileDB = createDB();
final DB db = configureDB(fileDB).make();
final HashMapMaker<K, V> maker = db.hashMap(name, newKeySerializier(), newValueSerializer());
return configureHashMap(maker).createOrOpen();
}
use of org.mapdb.DB in project eol-globi-data by jhpoelen.
the class StudyImporterForSeltmann method importStudy.
@Override
public void importStudy() throws StudyImporterException {
final String archiveURL = DatasetUtil.getNamedResourceURI(getDataset(), "archive");
if (org.apache.commons.lang.StringUtils.isBlank(archiveURL)) {
throw new StudyImporterException("failed to import [" + getDataset().getNamespace() + "]: no [archiveURL] specified");
}
DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
final HTreeMap<String, Map<String, String>> assocMap = db.createHashMap("assocMap").make();
try {
InputStream inputStream = DatasetUtil.getNamedResourceStream(getDataset(), "archive");
ZipInputStream zipInputStream = new ZipInputStream(inputStream);
ZipEntry entry;
File assocTempFile = null;
File occTempFile = null;
while ((entry = zipInputStream.getNextEntry()) != null) {
if (entry.getName().matches("(^|(.*/))associatedTaxa.tsv$")) {
assocTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else if (entry.getName().matches("(^|(.*/))occurrences.tsv$")) {
occTempFile = FileUtils.saveToTmpFile(zipInputStream, entry);
} else {
IOUtils.copy(zipInputStream, new NullOutputStream());
}
}
IOUtils.closeQuietly(zipInputStream);
if (assocTempFile == null) {
throw new StudyImporterException("failed to find expected [associatedTaxa.tsv] resource");
}
if (occTempFile == null) {
throw new StudyImporterException("failed to find expected [occurrences.tsv] resource");
}
BufferedReader assocReader = FileUtils.getUncompressedBufferedReader(new FileInputStream(assocTempFile), CharsetConstant.UTF8);
LabeledCSVParser parser = CSVTSVUtil.createLabeledCSVParser(assocReader);
parser.changeDelimiter('\t');
while (parser.getLine() != null) {
Map<String, String> prop = new HashMap<String, String>();
addKeyValue(parser, prop, "dwc:coreid");
addKeyValue(parser, prop, "dwc:basisOfRecord");
addKeyValue(parser, prop, FIELD_IDIGBIO_RECORD_ID);
addKeyValue(parser, prop, FIELD_ASSOCIATED_GENUS);
addKeyValue(parser, prop, FIELD_ASSOCIATED_SPECIFIC_EPITHET);
addKeyValue(parser, prop, FIELD_ASSOCIATED_SCIENTIFIC_NAME);
addKeyValue(parser, prop, "dwc:basisOfRecord");
addKeyValue(parser, prop, "aec:associatedRelationshipTerm");
addKeyValue(parser, prop, "aec:associatedRelationshipURI");
addKeyValue(parser, prop, "aec:associatedLocationOnHost");
addKeyValue(parser, prop, "aec:associatedEmergenceVerbatimDate");
String coreId = parser.getValueByLabel("dwc:coreid");
if (StringUtils.isBlank(coreId)) {
LOG.warn("no coreid for line [" + parser.getLastLineNumber() + 1 + "]");
} else {
assocMap.put(coreId, prop);
}
}
LabeledCSVParser occurrence = CSVTSVUtil.createLabeledCSVParser(new FileInputStream(occTempFile));
occurrence.changeDelimiter('\t');
while (occurrence.getLine() != null) {
String references = occurrence.getValueByLabel("dcterms:references");
Study study = nodeFactory.getOrCreateStudy(new StudyImpl("seltmann" + references, CitationUtil.sourceCitationLastAccessed(this.getDataset(), references), null, references));
String recordId = occurrence.getValueByLabel(FIELD_IDIGBIO_RECORD_ID);
Map<String, String> assoc = assocMap.get(recordId);
if (assoc != null) {
String targetName = getTargetNameFromAssocMap(assoc);
String sourceName = occurrence.getValueByLabel("scientificName");
String eventDate = occurrence.getValueByLabel("eventDate");
Date date = null;
if (StringUtils.equals(eventDate, "0000-00-00")) {
getLogger().warn(study, "found suspicious event date [" + eventDate + "]" + getLineMsg(occurrence));
} else if (StringUtils.isBlank(eventDate)) {
getLogger().warn(study, "found suspicious event date [" + eventDate + "]" + getLineMsg(occurrence));
} else {
DateTimeFormatter fmtDateTime1 = DateTimeFormat.forPattern("yyyy-MM-dd").withZoneUTC();
String dateString = eventDate.split("/")[0];
try {
date = fmtDateTime1.parseDateTime(dateString).toDate();
} catch (IllegalArgumentException e) {
getLogger().warn(study, "invalid date [" + dateString + "] " + getLineMsg(occurrence));
}
}
if (StringUtils.isBlank(sourceName)) {
getLogger().warn(study, "found blank source taxon name" + getLineMsg(occurrence));
}
if (StringUtils.isBlank(targetName)) {
getLogger().warn(study, "found blank associated target taxon name" + getLineMsg(occurrence));
}
InteractType interactType = parseInteractType(occurrence, assoc);
if (interactType != null && StringUtils.isNotBlank(sourceName) && StringUtils.isNotBlank(targetName)) {
try {
createInteraction(occurrence, study, assoc, targetName, sourceName, date, interactType);
} catch (NodeFactoryException ex) {
String message = "failed to import interaction because of [" + ex.getMessage() + "]" + getLineMsg(occurrence);
LOG.warn(message);
getLogger().warn(study, message);
}
}
}
}
} catch (IOException | NodeFactoryException e) {
throw new StudyImporterException(e);
}
db.close();
}
use of org.mapdb.DB in project eol-globi-data by jhpoelen.
the class TaxonCacheService method initTaxonCache.
private void initTaxonCache() throws PropertyEnricherException {
DB db = initDb("taxonCache");
String taxonCacheName = "taxonCacheById";
if (db.exists(taxonCacheName)) {
LOG.info("re-using pre-existing cache");
resolvedIdToTaxonMap = db.getTreeMap(taxonCacheName);
} else {
LOG.info("no pre-existing cache found, rebuilding...");
LOG.info("taxon cache loading [" + taxonCacheResource + "]...");
StopWatch watch = new StopWatch();
watch.start();
try {
resolvedIdToTaxonMap = db.createTreeMap(taxonCacheName).pumpPresort(100000).pumpIgnoreDuplicates().pumpSource(taxonCacheIterator(taxonCacheResource, new LineSkipper() {
@Override
public boolean shouldSkipLine(LabeledCSVParser parser) {
final Taxon taxon = TaxonCacheParser.parseLine(parser);
return StringUtils.isBlank(taxon.getPath());
}
})).keySerializer(BTreeKeySerializer.STRING).make();
} catch (IOException e) {
throw new PropertyEnricherException("failed to instantiate taxonCache: [" + e.getMessage() + "]", e);
}
watch.stop();
LOG.info("taxon cache loading [" + taxonCacheResource + "] done.");
logCacheLoadStats(watch.getTime(), resolvedIdToTaxonMap.size());
watch.reset();
}
}
use of org.mapdb.DB in project eol-globi-data by jhpoelen.
the class DOIResolverCache method init.
void init(final Reader reader) throws PropertyEnricherException, IOException {
DB db = initDb("doiCache");
StopWatch watch = new StopWatch();
watch.start();
final CSVParser parser = CSVTSVUtil.createTSVParser(reader);
LOG.info("doi cache building...");
doiCitationMap = db.createTreeMap("doiCache").pumpPresort(300000).pumpIgnoreDuplicates().pumpSource(new Iterator<Fun.Tuple2<String, String>>() {
private String[] line;
String getCitation(String[] line) {
return line[1];
}
String getDOI(String[] line) {
return line[0];
}
@Override
public boolean hasNext() {
try {
do {
line = parser.getLine();
} while (line != null && line.length > 1 && !StringUtils.isNoneBlank(getCitation(line), getDOI(line)));
boolean hasNext = line != null && line.length > 1 && StringUtils.isNoneBlank(getCitation(line), getDOI(line));
if (!hasNext) {
System.out.println("[no more]");
}
return hasNext;
} catch (IOException e) {
LOG.error("problem reading", e);
return false;
}
}
@Override
public Fun.Tuple2<String, String> next() {
String citationString = StringUtils.defaultString(line[1], "");
String doi = StringUtils.defaultString(line[0], "");
return new Fun.Tuple2<>(citationString, doi);
}
}).make();
watch.stop();
LOG.info("doi cache built in [" + watch.getTime() / 1000 + "] s.");
}
use of org.mapdb.DB in project eol-globi-data by jhpoelen.
the class OpenTreeTaxonIndex method buildMap.
protected HTreeMap<String, Long> buildMap() {
DB db = DBMaker.newMemoryDirectDB().compressionEnable().transactionDisable().make();
final HTreeMap<String, Long> idLookup = db.createHashMap("ottIdMap").make();
OpenTreeListener taxonListener = new OpenTreeListener() {
@Override
public void taxonSameAs(String ottId, String nonOttId) {
long value = Long.parseLong(ottId);
idLookup.put(nonOttId, value);
String[] split = nonOttId.split(":");
if (split.length > 1) {
prefix.add(split[0] + ":");
}
}
};
try {
if (openTreeTaxonomyUrl == null) {
LOG.error("failed to import open tree taxonomy: no taxonomy input stream");
} else {
OpenTreeUtil.readTaxonomy(taxonListener, openTreeTaxonomyUrl.openStream());
}
} catch (IOException e) {
LOG.error("failed to build open tree taxon map map", e);
}
return idLookup;
}
Aggregations