Search in sources :

Example 1 with SkatKey

use of cz.mzk.recordmanager.server.model.SkatKey in project RecordManager2 by moravianlibrary.

the class GenerateSkatKeysProcessor method process.

@Override
public Set<SkatKey> process(Long item) throws Exception {
    Set<SkatKey> parsedKeys = new HashSet<>();
    HarvestedRecord hr = harvestedRecordDao.get(item);
    if (hr.getRawRecord() == null) {
        return parsedKeys;
    }
    MarcRecord marc = null;
    InputStream is = new ByteArrayInputStream(hr.getRawRecord());
    try {
        marc = marcXmlParser.parseRecord(is);
    } catch (Exception e) {
        return parsedKeys;
    }
    for (DataField df : marc.getDataFields("996")) {
        if (df.getSubfield('e') == null) {
            continue;
        }
        if (df.getSubfield('w') == null) {
            continue;
        }
        String sigla = df.getSubfield('e').getData();
        String recordId = df.getSubfield('w').getData();
        if (recordId.length() > 100 || sigla.length() > 20) {
            // ignore garbage
            continue;
        }
        SkatKey key = new SkatKey(new SkatKeyCompositeId(hr.getId(), sigla, recordId));
        parsedKeys.add(key);
    }
    // ignore records having not enough information
    if (parsedKeys.size() < 2) {
        return Collections.emptySet();
    }
    // find already existing keys
    Set<SkatKey> existingKeys = new HashSet<>(skatKeyDao.getSkatKeysForRecord(item));
    Set<SkatKey> newKeys = new HashSet<>();
    for (SkatKey current : parsedKeys) {
        if (existingKeys.contains(current)) {
            continue;
        }
        newKeys.add(current);
    }
    return newKeys;
}
Also used : DataField(org.marc4j.marc.DataField) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) SkatKeyCompositeId(cz.mzk.recordmanager.server.model.SkatKey.SkatKeyCompositeId) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) SkatKey(cz.mzk.recordmanager.server.model.SkatKey) HashSet(java.util.HashSet) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 2 with SkatKey

use of cz.mzk.recordmanager.server.model.SkatKey in project RecordManager2 by moravianlibrary.

the class SkatKeyDAOHibernate method getSkatKeysForRecord.

@SuppressWarnings({ "unchecked", "serial" })
@Override
public List<SkatKey> getSkatKeysForRecord(Long skatRecordId) {
    Session session = sessionFactory.getCurrentSession();
    return (List<SkatKey>) session.createSQLQuery("SELECT skat_record_id,sigla,local_record_id,manually_merged" + " FROM skat_keys" + " WHERE skat_record_id = ?").setResultTransformer(new ResultTransformer() {

        @Override
        public Object transformTuple(Object[] tuple, String[] aliases) {
            SkatKeyCompositeId compositeId = new SkatKeyCompositeId();
            boolean manuallyMerged = false;
            for (int i = 0; i < tuple.length; i++) {
                switch(aliases[i]) {
                    case "skat_record_id":
                        compositeId.setSkatHarvestedRecordId(((BigDecimal) tuple[i]).longValue());
                        break;
                    case "sigla":
                        compositeId.setSigla(((String) tuple[i]));
                        break;
                    case "local_record_id":
                        compositeId.setRecordId((String) tuple[i]);
                        break;
                    case "manually_merged":
                        manuallyMerged = (boolean) tuple[i];
                        break;
                }
            }
            SkatKey key = new SkatKey(compositeId);
            key.setManuallyMerged(manuallyMerged);
            return key;
        }

        @SuppressWarnings("rawtypes")
        @Override
        public List transformList(List collection) {
            return collection;
        }
    }).setParameter(0, skatRecordId).list();
}
Also used : SkatKeyCompositeId(cz.mzk.recordmanager.server.model.SkatKey.SkatKeyCompositeId) ResultTransformer(org.hibernate.transform.ResultTransformer) List(java.util.List) SkatKey(cz.mzk.recordmanager.server.model.SkatKey) Session(org.hibernate.Session)

Example 3 with SkatKey

use of cz.mzk.recordmanager.server.model.SkatKey in project RecordManager2 by moravianlibrary.

the class GenerateSkatKeysWriter method write.

@Override
public void write(List<? extends Set<SkatKey>> items) throws Exception {
    Long lastSkatId = -1L;
    for (Set<SkatKey> list : items) {
        for (SkatKey key : list) {
            skatKeyDao.persist(key);
            if (lastSkatId.equals(key.getSkatKeyId().getSkatHarvestedRecordId()))
                continue;
            List<Long> import_confs = siglas.get(key.getSkatKeyId().getSigla());
            if (import_confs == null)
                continue;
            for (Long import_conf : import_confs) {
                String query = "UPDATE harvested_record SET next_dedup_flag=true WHERE import_conf_id = ? AND raw_001_id = ? ";
                Session session = sessionFactory.getCurrentSession();
                int status = session.createSQLQuery(query).setLong(0, import_conf).setString(1, key.getSkatKeyId().getRecordId()).executeUpdate();
                if (status > 0) {
                    lastSkatId = key.getSkatKeyId().getSkatHarvestedRecordId();
                    break;
                }
            }
        }
    }
}
Also used : SkatKey(cz.mzk.recordmanager.server.model.SkatKey) Session(org.hibernate.Session)

Example 4 with SkatKey

use of cz.mzk.recordmanager.server.model.SkatKey in project RecordManager2 by moravianlibrary.

the class ManuallyMergedSkatDedupKeysReader method read.

@Override
public Set<SkatKey> read() throws Exception {
    Matcher matcher;
    if (toDate == null) {
        toDate = new Date();
    }
    toDate = DateUtils.truncate(toDate, Calendar.DAY_OF_MONTH);
    while (!counterDate.after(toDate)) {
        downloadedKeys.clear();
        String get = IOUtils.toString(harvest(prepareAlephBaseUrl(DATE_FORMAT.format(counterDate))));
        if (!(matcher = PATTERN.matcher(get)).find()) {
            logger.info("Session not found!!!");
        } else {
            // wait 20 - 30 seconds
            sleep(20000, 30000);
            get = IOUtils.toString(harvest(prepareAlephMailUrl(matcher.group(1))));
            if (!(matcher = PATTERN2.matcher(get)).find()) {
                logger.info("File with results not found!!!");
            } else {
                // wait 20 - 30 seconds
                sleep(20000, 30000);
                matcher = SYSNO.matcher(IOUtils.toString(harvest(matcher.group(1))));
                while (matcher.find()) {
                    if (matcher.group(1) != null) {
                        downloadedKeys.add(SKC_ID_PREFIX + matcher.group(1));
                    }
                }
            }
        }
        // next day
        counterDate = DateUtils.addDays(counterDate, 1);
        // wait 2-3 minutes
        if (counterDate.before(toDate))
            sleep(120000, 180000);
        Set<SkatKey> results = new HashSet<>();
        // get skat keys
        downloadedKeys.forEach(key -> {
            HarvestedRecord hr = hrDao.findByIdAndHarvestConfiguration(key, Constants.IMPORT_CONF_ID_CASLIN);
            if (hr != null) {
                List<SkatKey> skatkeyList = skatKeyDao.findSkatKeysBySkatId(hr.getId());
                if (skatkeyList != null)
                    results.addAll(skatkeyList);
            }
        });
        results.forEach(key -> key.setManuallyMerged(true));
        // update caslin record
        pushToDatabase();
        // update skatKeys, local records from skatKeys
        return results;
    }
    return null;
}
Also used : Matcher(java.util.regex.Matcher) SkatKey(cz.mzk.recordmanager.server.model.SkatKey) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Aggregations

SkatKey (cz.mzk.recordmanager.server.model.SkatKey)4 HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)2 SkatKeyCompositeId (cz.mzk.recordmanager.server.model.SkatKey.SkatKeyCompositeId)2 Session (org.hibernate.Session)2 MarcRecord (cz.mzk.recordmanager.server.marc.MarcRecord)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 InputStream (java.io.InputStream)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Matcher (java.util.regex.Matcher)1 ResultTransformer (org.hibernate.transform.ResultTransformer)1 DataField (org.marc4j.marc.DataField)1