Search in sources :

Example 56 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class GenerateSkatKeysProcessor method process.

@Override
public Set<SkatKey> process(Long item) throws Exception {
    Set<SkatKey> parsedKeys = new HashSet<>();
    HarvestedRecord hr = harvestedRecordDao.get(item);
    if (hr.getRawRecord() == null) {
        return parsedKeys;
    }
    MarcRecord marc = null;
    InputStream is = new ByteArrayInputStream(hr.getRawRecord());
    try {
        marc = marcXmlParser.parseRecord(is);
    } catch (Exception e) {
        return parsedKeys;
    }
    for (DataField df : marc.getDataFields("996")) {
        if (df.getSubfield('e') == null) {
            continue;
        }
        if (df.getSubfield('w') == null) {
            continue;
        }
        String sigla = df.getSubfield('e').getData();
        String recordId = df.getSubfield('w').getData();
        if (recordId.length() > 100 || sigla.length() > 20) {
            // ignore garbage
            continue;
        }
        SkatKey key = new SkatKey(new SkatKeyCompositeId(hr.getId(), sigla, recordId));
        parsedKeys.add(key);
    }
    // ignore records having not enough information
    if (parsedKeys.size() < 2) {
        return Collections.emptySet();
    }
    // find already existing keys
    Set<SkatKey> existingKeys = new HashSet<>(skatKeyDao.getSkatKeysForRecord(item));
    Set<SkatKey> newKeys = new HashSet<>();
    for (SkatKey current : parsedKeys) {
        if (existingKeys.contains(current)) {
            continue;
        }
        newKeys.add(current);
    }
    return newKeys;
}
Also used : DataField(org.marc4j.marc.DataField) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) SkatKeyCompositeId(cz.mzk.recordmanager.server.model.SkatKey.SkatKeyCompositeId) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) SkatKey(cz.mzk.recordmanager.server.model.SkatKey) HashSet(java.util.HashSet) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 57 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class HarvestedRecordDAOHibernate method deleteUpvApplicationRecord.

@Override
public void deleteUpvApplicationRecord(String appId) {
    Session session = sessionFactory.getCurrentSession();
    HarvestedRecord hr = (HarvestedRecord) session.createQuery("from HarvestedRecord where uniqueId.harvestedFromId = ? and uniqueId.recordId = ?").setParameter(0, Constants.IMPORT_CONF_ID_UPV).setParameter(1, appId).uniqueResult();
    if (hr != null) {
        hr.setUpdated(new Date());
        hr.setDeleted(new Date());
        sessionFactory.getCurrentSession().persist(hr);
    }
}
Also used : Date(java.util.Date) Session(org.hibernate.Session) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 58 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class HarvestedRecordDAOHibernate method findBySolrId.

public HarvestedRecord findBySolrId(String solrId) {
    if (!solrId.contains(".")) {
        return this.findByRecordId(solrId);
    }
    String[] parts = solrId.split("\\.", 2);
    String prefix = parts[0];
    String id = parts[1];
    Session session = sessionFactory.getCurrentSession();
    return (HarvestedRecord) // 
    session.createQuery(// 
    "from HarvestedRecord where uniqueId.recordId = ? and harvestedFrom.idPrefix = ?").setParameter(0, // 
    id).setParameter(1, // 
    prefix).uniqueResult();
}
Also used : Session(org.hibernate.Session) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 59 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class KrameriusFulltextWriter method write.

@Override
public void write(List<? extends HarvestedRecord> items) throws Exception {
    try {
        for (HarvestedRecord hr : items) {
            DedupRecord dr = hr.getDedupRecord();
            if (dr != null) {
                dr.setUpdated(new Date());
                dedupDao.persist(dr);
            }
            recordDao.persist(hr);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : DedupRecord(cz.mzk.recordmanager.server.model.DedupRecord) Date(java.util.Date) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 60 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class ManuallyMergedSkatDedupKeysReader method read.

@Override
public Set<SkatKey> read() throws Exception {
    Matcher matcher;
    if (toDate == null) {
        toDate = new Date();
    }
    toDate = DateUtils.truncate(toDate, Calendar.DAY_OF_MONTH);
    while (!counterDate.after(toDate)) {
        downloadedKeys.clear();
        String get = IOUtils.toString(harvest(prepareAlephBaseUrl(DATE_FORMAT.format(counterDate))));
        if (!(matcher = PATTERN.matcher(get)).find()) {
            logger.info("Session not found!!!");
        } else {
            // wait 20 - 30 seconds
            sleep(20000, 30000);
            get = IOUtils.toString(harvest(prepareAlephMailUrl(matcher.group(1))));
            if (!(matcher = PATTERN2.matcher(get)).find()) {
                logger.info("File with results not found!!!");
            } else {
                // wait 20 - 30 seconds
                sleep(20000, 30000);
                matcher = SYSNO.matcher(IOUtils.toString(harvest(matcher.group(1))));
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        downloadedKeys.add(SKC_ID_PREFIX + matcher.group(1));
                    }
                }
            }
        }
        // next day
        counterDate = DateUtils.addDays(counterDate, 1);
        // wait 2-3 minutes
        if (counterDate.before(toDate))
            sleep(120000, 180000);
        Set<SkatKey> results = new HashSet<>();
        // get skat keys
        downloadedKeys.forEach(key -> {
            HarvestedRecord hr = hrDao.findByIdAndHarvestConfiguration(key, Constants.IMPORT_CONF_ID_CASLIN);
            if (hr != null) {
                List<SkatKey> skatkeyList = skatKeyDao.findSkatKeysBySkatId(hr.getId());
                if (skatkeyList != null)
                    results.addAll(skatkeyList);
            }
        });
        results.forEach(key -> key.setManuallyMerged(true));
        // update caslin record
        pushToDatabase();
        // update skatKeys, local records from skatKeys
        return results;
    }
    return null;
}
Also used : Matcher(java.util.regex.Matcher) SkatKey(cz.mzk.recordmanager.server.model.SkatKey) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Aggregations

HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)60 Test (org.testng.annotations.Test)20 InputStream (java.io.InputStream)19 ByteArrayInputStream (java.io.ByteArrayInputStream)18 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)16 HashMap (java.util.HashMap)16 MarcRecord (cz.mzk.recordmanager.server.marc.MarcRecord)15 Date (java.util.Date)14 JobParameters (org.springframework.batch.core.JobParameters)14 HarvestedRecordUniqueId (cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId)12 JobParameter (org.springframework.batch.core.JobParameter)12 MarcRecordImpl (cz.mzk.recordmanager.server.marc.MarcRecordImpl)9 Record (org.marc4j.marc.Record)9 JobExecution (org.springframework.batch.core.JobExecution)8 Job (org.springframework.batch.core.Job)6 OAIHarvestConfiguration (cz.mzk.recordmanager.server.model.OAIHarvestConfiguration)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 DedupRecord (cz.mzk.recordmanager.server.model.DedupRecord)4 FulltextKramerius (cz.mzk.recordmanager.server.model.FulltextKramerius)3