use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class ExportRecordsProcessor method process.
@Override
public String process(HarvestedRecordUniqueId recordId) throws Exception {
HarvestedRecord record = harvestedRecordDao.get(recordId);
try {
if (record != null && record.getRawRecord() != null && record.getRawRecord().length != 0) {
InputStream is = new ByteArrayInputStream(record.getRawRecord());
progressLogger.incrementAndLogProgress();
switch(record.getFormat()) {
case Constants.METADATA_FORMAT_MARC21:
MarcRecord marcRecord = marcXmlParser.parseRecord(is);
if (marcRecord.getDataFields(OAI_FIELD).isEmpty()) {
marcRecord.addDataField(OAI_FIELD, ' ', ' ', "a", record.getUniqueId().getRecordId());
}
return marcRecord.export(iOFormat);
case Constants.METADATA_FORMAT_DUBLIN_CORE:
case Constants.METADATA_FORMAT_ESE:
DublinCoreRecord dcRecord = dcParser.parseRecord(is);
return dcRecord.export(iOFormat);
}
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class InspirationDeleteWriter method write.
@Override
public void write(List<? extends Long> items) throws Exception {
for (Long id : items) {
Inspiration ins = insDao.get(id);
if (ins == null)
continue;
HarvestedRecord hr = hrDao.get(ins.getHarvestedRecordId());
if (hr == null)
continue;
List<Inspiration> inspirations = hr.getInspiration();
inspirations.remove(ins);
hr.setInspiration(inspirations);
hr.setUpdated(new Date());
hrDao.persist(hr);
insDao.delete(ins);
}
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class DedupSimpleKeysStepProcessor method process.
@Override
public List<HarvestedRecord> process(List<Long> idList) throws Exception {
List<HarvestedRecord> hrList = new ArrayList<>();
// count of DedupRecord in current batch
Multiset<DedupRecord> dedupMap = HashMultiset.create();
// Map of records that shoul be updated after processing of batch
// used in merging two different DedupRecords into one
Map<DedupRecord, Set<DedupRecord>> updateDedupRecordsMap = new HashMap<>();
for (Long id : idList) {
HarvestedRecord currentHr = harvestedRecordDao.get(id);
if (currentHr == null) {
logger.warn("Missing record with id: " + id);
continue;
}
DedupRecord currentDr = currentHr.getDedupRecord();
if (currentDr != null) {
dedupMap.add(currentDr);
}
hrList.add(currentHr);
}
for (int i = 0; i < hrList.size(); i++) {
HarvestedRecord outerRec = hrList.get(i);
for (int j = i + 1; j < hrList.size(); j++) {
HarvestedRecord innerRec = hrList.get(j);
if (matchRecords(outerRec, innerRec)) {
// merge records, both already have assigned DedupRecord
if (outerRec.getDedupRecord() != null && innerRec.getDedupRecord() != null) {
if (sameDedupRecords(outerRec.getDedupRecord(), innerRec.getDedupRecord())) {
// equal dedupRecord, nothing to do
} else {
DedupRecord moreFrequented = dedupMap.count(outerRec.getDedupRecord()) >= dedupMap.count(innerRec.getDedupRecord()) ? outerRec.getDedupRecord() : innerRec.getDedupRecord();
DedupRecord lessFrequented = sameDedupRecords(moreFrequented, outerRec.getDedupRecord()) ? innerRec.getDedupRecord() : outerRec.getDedupRecord();
outerRec.setDedupRecord(moreFrequented);
innerRec.setDedupRecord(moreFrequented);
lessFrequented.setUpdated(new Date());
dedupMap.add(moreFrequented);
dedupMap.remove(lessFrequented);
// all occurrences of lessFrequented in database should be updated to moreFrequented later
if (harvestedRecordDao.existsByDedupRecord(lessFrequented)) {
updateDedupRecordsMap.computeIfAbsent(moreFrequented, key -> new HashSet<>()).add(lessFrequented);
}
}
continue;
}
// any of records have assigned DedupRecord
if (outerRec.getDedupRecord() == null && innerRec.getDedupRecord() == null) {
DedupRecord newDr = new DedupRecord();
newDr.setUpdated(new Date());
newDr = dedupRecordDAO.persist(newDr);
outerRec.setDedupRecord(newDr);
innerRec.setDedupRecord(newDr);
dedupMap.setCount(newDr, 2);
continue;
}
// if we got this far, exactly one of records has assigned DedupRecord
DedupRecord dr = outerRec.getDedupRecord() != null ? outerRec.getDedupRecord() : innerRec.getDedupRecord();
dr.setUpdated(new Date());
outerRec.setDedupRecord(dr);
innerRec.setDedupRecord(dr);
dedupMap.add(dr);
}
}
}
// walk through map and update references
for (Map.Entry<DedupRecord, Set<DedupRecord>> entry : updateDedupRecordsMap.entrySet()) {
for (DedupRecord updatedDR : entry.getValue()) {
for (HarvestedRecord toBeUpdated : harvestedRecordDao.getByDedupRecord(updatedDR)) {
toBeUpdated.setDedupRecord(entry.getKey());
}
}
}
return hrList;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class KrameriusHarvester method downloadRecord.
/*
* Return unparsed(!) HarvestedRecord (most of variables are not set yet)
*/
public HarvestedRecord downloadRecord(String uuid) {
String recordId = uuid;
HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(harvestedFrom, recordId);
HarvestedRecord unparsedHr = new HarvestedRecord(id);
String url = createUrl(uuid);
logger.trace("Harvesting record from: {}", url);
try (InputStream is = httpClient.executeGet(url)) {
if (is.markSupported()) {
is.mark(Integer.MAX_VALUE);
is.reset();
}
unparsedHr.setRawRecord(IOUtils.toByteArray(is));
} catch (IOException ioe) {
logger.error("Harvesting record from: " + url + " caused IOException!");
logger.error(ioe.getMessage());
return null;
}
return unparsedHr;
}
use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.
the class HarvestedRecordRowMapper method mapRow.
@Override
public HarvestedRecord mapRow(ResultSet rs, int rowNum) throws SQLException {
ImportConfiguration importConfig = importConfDao.load(rs.getLong("import_conf_id"));
HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(importConfig, rs.getString("record_id"));
HarvestedRecord record = new HarvestedRecord(id);
record.setId(rs.getLong("id"));
record.setHarvestedFrom(importConfig);
record.setUpdated(rs.getDate("updated"));
record.setDeleted(rs.getDate("deleted"));
record.setRawRecord(rs.getBytes("raw_record"));
record.setFormat(rs.getString("format"));
return record;
}
Aggregations