Search in sources :

Example 46 with MarcRecord

use of cz.mzk.recordmanager.server.marc.MarcRecord in project RecordManager2 by moravianlibrary.

the class GenerateSkatKeysProcessor method process.

@Override
public Set<SkatKey> process(Long item) throws Exception {
    Set<SkatKey> parsedKeys = new HashSet<>();
    HarvestedRecord hr = harvestedRecordDao.get(item);
    if (hr.getRawRecord() == null) {
        return parsedKeys;
    }
    MarcRecord marc = null;
    InputStream is = new ByteArrayInputStream(hr.getRawRecord());
    try {
        marc = marcXmlParser.parseRecord(is);
    } catch (Exception e) {
        return parsedKeys;
    }
    for (DataField df : marc.getDataFields("996")) {
        if (df.getSubfield('e') == null) {
            continue;
        }
        if (df.getSubfield('w') == null) {
            continue;
        }
        String sigla = df.getSubfield('e').getData();
        String recordId = df.getSubfield('w').getData();
        if (recordId.length() > 100 || sigla.length() > 20) {
            // ignore garbage
            continue;
        }
        SkatKey key = new SkatKey(new SkatKeyCompositeId(hr.getId(), sigla, recordId));
        parsedKeys.add(key);
    }
    // ignore records having not enough information
    if (parsedKeys.size() < 2) {
        return Collections.emptySet();
    }
    // find already existing keys
    Set<SkatKey> existingKeys = new HashSet<>(skatKeyDao.getSkatKeysForRecord(item));
    Set<SkatKey> newKeys = new HashSet<>();
    for (SkatKey current : parsedKeys) {
        if (existingKeys.contains(current)) {
            continue;
        }
        newKeys.add(current);
    }
    return newKeys;
}
Also used : DataField(org.marc4j.marc.DataField) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) SkatKeyCompositeId(cz.mzk.recordmanager.server.model.SkatKey.SkatKeyCompositeId) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) SkatKey(cz.mzk.recordmanager.server.model.SkatKey) HashSet(java.util.HashSet) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Aggregations

MarcRecord (cz.mzk.recordmanager.server.marc.MarcRecord)46 ByteArrayInputStream (java.io.ByteArrayInputStream)22 MarcRecordImpl (cz.mzk.recordmanager.server.marc.MarcRecordImpl)21 InputStream (java.io.InputStream)19 Record (org.marc4j.marc.Record)19 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)15 HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)15 DataField (org.marc4j.marc.DataField)15 Test (org.testng.annotations.Test)15 List (java.util.List)13 ArrayList (java.util.ArrayList)11 TreeSet (java.util.TreeSet)11 ControlField (org.marc4j.marc.ControlField)11 RecordImpl (cz.mzk.recordmanager.server.marc.marc4j.RecordImpl)10 MarcFunctionContext (cz.mzk.recordmanager.server.scripting.marc.MarcFunctionContext)9 JobParameters (org.springframework.batch.core.JobParameters)8 HashMap (java.util.HashMap)7 JobParameter (org.springframework.batch.core.JobParameter)7 MarcFactoryImpl (cz.mzk.recordmanager.server.marc.marc4j.MarcFactoryImpl)5 MarcFactory (org.marc4j.marc.MarcFactory)5