Search in sources :

Example 6 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class ExportRecordsProcessor method process.

@Override
public String process(HarvestedRecordUniqueId recordId) throws Exception {
    HarvestedRecord record = harvestedRecordDao.get(recordId);
    try {
        if (record != null && record.getRawRecord() != null && record.getRawRecord().length != 0) {
            InputStream is = new ByteArrayInputStream(record.getRawRecord());
            progressLogger.incrementAndLogProgress();
            switch(record.getFormat()) {
                case Constants.METADATA_FORMAT_MARC21:
                    MarcRecord marcRecord = marcXmlParser.parseRecord(is);
                    if (marcRecord.getDataFields(OAI_FIELD).isEmpty()) {
                        marcRecord.addDataField(OAI_FIELD, ' ', ' ', "a", record.getUniqueId().getRecordId());
                    }
                    return marcRecord.export(iOFormat);
                case Constants.METADATA_FORMAT_DUBLIN_CORE:
                case Constants.METADATA_FORMAT_ESE:
                    DublinCoreRecord dcRecord = dcParser.parseRecord(is);
                    return dcRecord.export(iOFormat);
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}
Also used : DublinCoreRecord(cz.mzk.recordmanager.server.dc.DublinCoreRecord) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 7 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class InspirationDeleteWriter method write.

@Override
public void write(List<? extends Long> items) throws Exception {
    for (Long id : items) {
        Inspiration ins = insDao.get(id);
        if (ins == null)
            continue;
        HarvestedRecord hr = hrDao.get(ins.getHarvestedRecordId());
        if (hr == null)
            continue;
        List<Inspiration> inspirations = hr.getInspiration();
        inspirations.remove(ins);
        hr.setInspiration(inspirations);
        hr.setUpdated(new Date());
        hrDao.persist(hr);
        insDao.delete(ins);
    }
}
Also used : Inspiration(cz.mzk.recordmanager.server.model.Inspiration) Date(java.util.Date) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 8 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class DedupSimpleKeysStepProcessor method process.

@Override
public List<HarvestedRecord> process(List<Long> idList) throws Exception {
    List<HarvestedRecord> hrList = new ArrayList<>();
    // count of DedupRecord in current batch
    Multiset<DedupRecord> dedupMap = HashMultiset.create();
    // Map of records that shoul be updated after processing of batch
    // used in merging two different DedupRecords into one
    Map<DedupRecord, Set<DedupRecord>> updateDedupRecordsMap = new HashMap<>();
    for (Long id : idList) {
        HarvestedRecord currentHr = harvestedRecordDao.get(id);
        if (currentHr == null) {
            logger.warn("Missing record with id: " + id);
            continue;
        }
        DedupRecord currentDr = currentHr.getDedupRecord();
        if (currentDr != null) {
            dedupMap.add(currentDr);
        }
        hrList.add(currentHr);
    }
    for (int i = 0; i < hrList.size(); i++) {
        HarvestedRecord outerRec = hrList.get(i);
        for (int j = i + 1; j < hrList.size(); j++) {
            HarvestedRecord innerRec = hrList.get(j);
            if (matchRecords(outerRec, innerRec)) {
                // merge records, both already have assigned DedupRecord
                if (outerRec.getDedupRecord() != null && innerRec.getDedupRecord() != null) {
                    if (sameDedupRecords(outerRec.getDedupRecord(), innerRec.getDedupRecord())) {
                    // equal dedupRecord, nothing to do
                    } else {
                        DedupRecord moreFrequented = dedupMap.count(outerRec.getDedupRecord()) >= dedupMap.count(innerRec.getDedupRecord()) ? outerRec.getDedupRecord() : innerRec.getDedupRecord();
                        DedupRecord lessFrequented = sameDedupRecords(moreFrequented, outerRec.getDedupRecord()) ? innerRec.getDedupRecord() : outerRec.getDedupRecord();
                        outerRec.setDedupRecord(moreFrequented);
                        innerRec.setDedupRecord(moreFrequented);
                        lessFrequented.setUpdated(new Date());
                        dedupMap.add(moreFrequented);
                        dedupMap.remove(lessFrequented);
                        // all occurrences of lessFrequented in database should be updated to moreFrequented later
                        if (harvestedRecordDao.existsByDedupRecord(lessFrequented)) {
                            updateDedupRecordsMap.computeIfAbsent(moreFrequented, key -> new HashSet<>()).add(lessFrequented);
                        }
                    }
                    continue;
                }
                // any of records have assigned DedupRecord
                if (outerRec.getDedupRecord() == null && innerRec.getDedupRecord() == null) {
                    DedupRecord newDr = new DedupRecord();
                    newDr.setUpdated(new Date());
                    newDr = dedupRecordDAO.persist(newDr);
                    outerRec.setDedupRecord(newDr);
                    innerRec.setDedupRecord(newDr);
                    dedupMap.setCount(newDr, 2);
                    continue;
                }
                // if we got this far, exactly one of records has assigned DedupRecord
                DedupRecord dr = outerRec.getDedupRecord() != null ? outerRec.getDedupRecord() : innerRec.getDedupRecord();
                dr.setUpdated(new Date());
                outerRec.setDedupRecord(dr);
                innerRec.setDedupRecord(dr);
                dedupMap.add(dr);
            }
        }
    }
    // walk through map and update references
    for (Map.Entry<DedupRecord, Set<DedupRecord>> entry : updateDedupRecordsMap.entrySet()) {
        for (DedupRecord updatedDR : entry.getValue()) {
            for (HarvestedRecord toBeUpdated : harvestedRecordDao.getByDedupRecord(updatedDR)) {
                toBeUpdated.setDedupRecord(entry.getKey());
            }
        }
    }
    return hrList;
}
Also used : Logger(org.slf4j.Logger) Date(java.util.Date) Multiset(com.google.common.collect.Multiset) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) Autowired(org.springframework.beans.factory.annotation.Autowired) HashMap(java.util.HashMap) ItemProcessor(org.springframework.batch.item.ItemProcessor) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) List(java.util.List) Component(org.springframework.stereotype.Component) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) DedupRecordDAO(cz.mzk.recordmanager.server.oai.dao.DedupRecordDAO) HashMultiset(com.google.common.collect.HashMultiset) DedupRecord(cz.mzk.recordmanager.server.model.DedupRecord) Map(java.util.Map) HarvestedRecordDAO(cz.mzk.recordmanager.server.oai.dao.HarvestedRecordDAO) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DedupRecord(cz.mzk.recordmanager.server.model.DedupRecord) Date(java.util.Date) HashMap(java.util.HashMap) Map(java.util.Map) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) HashSet(java.util.HashSet)

Example 9 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class KrameriusHarvester method downloadRecord.

/*
	 * Return unparsed(!) HarvestedRecord (most of variables are not set yet)
	 */
public HarvestedRecord downloadRecord(String uuid) {
    String recordId = uuid;
    HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(harvestedFrom, recordId);
    HarvestedRecord unparsedHr = new HarvestedRecord(id);
    String url = createUrl(uuid);
    logger.trace("Harvesting record from: {}", url);
    try (InputStream is = httpClient.executeGet(url)) {
        if (is.markSupported()) {
            is.mark(Integer.MAX_VALUE);
            is.reset();
        }
        unparsedHr.setRawRecord(IOUtils.toByteArray(is));
    } catch (IOException ioe) {
        logger.error("Harvesting record from: " + url + " caused IOException!");
        logger.error(ioe.getMessage());
        return null;
    }
    return unparsedHr;
}
Also used : HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) InputStream(java.io.InputStream) IOException(java.io.IOException) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 10 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class HarvestedRecordRowMapper method mapRow.

@Override
public HarvestedRecord mapRow(ResultSet rs, int rowNum) throws SQLException {
    ImportConfiguration importConfig = importConfDao.load(rs.getLong("import_conf_id"));
    HarvestedRecordUniqueId id = new HarvestedRecordUniqueId(importConfig, rs.getString("record_id"));
    HarvestedRecord record = new HarvestedRecord(id);
    record.setId(rs.getLong("id"));
    record.setHarvestedFrom(importConfig);
    record.setUpdated(rs.getDate("updated"));
    record.setDeleted(rs.getDate("deleted"));
    record.setRawRecord(rs.getBytes("raw_record"));
    record.setFormat(rs.getString("format"));
    return record;
}
Also used : HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) ImportConfiguration(cz.mzk.recordmanager.server.model.ImportConfiguration) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Aggregations

HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)60 Test (org.testng.annotations.Test)20 InputStream (java.io.InputStream)19 ByteArrayInputStream (java.io.ByteArrayInputStream)18 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)16 HashMap (java.util.HashMap)16 MarcRecord (cz.mzk.recordmanager.server.marc.MarcRecord)15 Date (java.util.Date)14 JobParameters (org.springframework.batch.core.JobParameters)14 HarvestedRecordUniqueId (cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId)12 JobParameter (org.springframework.batch.core.JobParameter)12 MarcRecordImpl (cz.mzk.recordmanager.server.marc.MarcRecordImpl)9 Record (org.marc4j.marc.Record)9 JobExecution (org.springframework.batch.core.JobExecution)8 Job (org.springframework.batch.core.Job)6 OAIHarvestConfiguration (cz.mzk.recordmanager.server.model.OAIHarvestConfiguration)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 DedupRecord (cz.mzk.recordmanager.server.model.DedupRecord)4 FulltextKramerius (cz.mzk.recordmanager.server.model.FulltextKramerius)3