Search in sources :

Example 16 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class KrameriusFulltextJobConfig method missingReader.

@Bean(name = Constants.JOB_ID_MISSING_FULLTEXT_KRAMERIUS + ":reader")
@StepScope
public ItemReader<HarvestedRecord> missingReader(@Value("#{jobParameters[" + Constants.JOB_PARAM_CONF_ID + "]}") Long configId, @Value("#{jobParameters[" + Constants.JOB_PARAM_FULLTEXT_FIRST + "]}") String firstId, @Value("#{jobParameters[" + Constants.JOB_PARAM_FULLTEXT_LAST + "]}") String lastId, @Value("#{stepExecutionContext[" + Constants.JOB_PARAM_FROM_DATE + "] " + "?:jobParameters[ " + Constants.JOB_PARAM_FROM_DATE + "]}") Date from, @Value("#{stepExecutionContext[" + Constants.JOB_PARAM_UNTIL_DATE + "]" + "?:jobParameters[" + Constants.JOB_PARAM_UNTIL_DATE + "]}") Date to) throws Exception {
    JdbcPagingItemReader<HarvestedRecord> reader = new JdbcPagingItemReader<HarvestedRecord>();
    SqlPagingQueryProviderFactoryBean pqpf = new SqlPagingQueryProviderFactoryBean();
    pqpf.setDataSource(dataSource);
    pqpf.setSelectClause("SELECT *");
    pqpf.setFromClause("FROM harvested_record hr");
    String whereClause = "WHERE hr.import_conf_id = :configId AND NOT EXISTS (" + "SELECT 1 FROM fulltext_kramerius fk WHERE hr.id = fk.harvested_record_id)";
    Map<String, Object> parameterValues = new HashMap<String, Object>();
    parameterValues.put("configId", configId);
    if (from != null) {
        whereClause += " AND hr.updated >= :from";
        parameterValues.put("from", new Timestamp(from.getTime()));
    }
    if (to != null) {
        Date toStamp = new Timestamp(to.getTime());
        whereClause += " AND hr.updated <= :to";
        parameterValues.put("to", toStamp);
    }
    if (firstId != null) {
        whereClause += " AND hr.record_id >= :firstId";
        parameterValues.put("firstId", firstId);
    }
    if (lastId != null) {
        whereClause += " AND hr.record_id <= :lastId";
        parameterValues.put("lastId", lastId);
    }
    pqpf.setWhereClause(whereClause);
    pqpf.setSortKey("record_id");
    reader.setParameterValues(parameterValues);
    reader.setRowMapper(harvestedRecordRowMapper);
    reader.setPageSize(PAGE_SIZE);
    reader.setQueryProvider(pqpf.getObject());
    reader.setDataSource(dataSource);
    reader.afterPropertiesSet();
    return reader;
}
Also used : SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) HashMap(java.util.HashMap) JdbcPagingItemReader(org.springframework.batch.item.database.JdbcPagingItemReader) Timestamp(java.sql.Timestamp) Date(java.util.Date) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) StepScope(org.springframework.batch.core.configuration.annotation.StepScope) SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) Bean(org.springframework.context.annotation.Bean)

Example 17 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class RegenerateDedupKeysWriter method write.

@Override
public void write(List<? extends Long> ids) {
    for (Long id : ids) {
        HarvestedRecord rec = harvestedRecordDao.get(id);
        if (rec.getDedupKeysHash() != null || !rec.getHarvestedFrom().isGenerateDedupKeys() || rec.getRawRecord() == null || rec.getRawRecord().length == 0) {
            continue;
        }
        try {
            rec = dedupKeysParser.parse(rec);
            harvestedRecordDao.persist(rec);
            ++totalCount;
            logProgress();
        } catch (InvalidMarcException ime) {
            logger.warn("Invalid Marc in record: " + rec.getId());
        } catch (Exception e) {
            logger.warn("Skipping record due to error: " + e.toString());
        }
    }
}
Also used : InvalidMarcException(cz.mzk.recordmanager.server.marc.InvalidMarcException) InvalidMarcException(cz.mzk.recordmanager.server.marc.InvalidMarcException) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 18 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class KrameriusFulltextProcessor method process.

@Override
public HarvestedRecord process(HarvestedRecord item) throws Exception {
    logger.debug("Processing Harvested Record: " + item.toString() + " uniqueId: " + item.getUniqueId());
    String policy;
    String model;
    // read complete HarvestedRecord using DAO
    HarvestedRecord rec = recordDao.findByIdAndHarvestConfiguration(item.getUniqueId().getRecordId(), confId);
    InputStream is = new ByteArrayInputStream(rec.getRawRecord());
    // get Kramerius policy from record
    try {
        DublinCoreRecord dcRecord = parser.parseRecord(is);
        MetadataDublinCoreRecord mdrc = new MetadataDublinCoreRecord(dcRecord);
        policy = mdrc.getPolicyKramerius();
        model = mdrc.getModelKramerius();
    } catch (InvalidDcException e) {
        logger.warn("InvalidDcException for record with id:" + item.getUniqueId());
        logger.warn(e.getMessage());
        // doesn't do anything, just returns rec from DAO and writes a message into log
        return rec;
    }
    // modify read HarvestedRecord only if following condition is fulfilled
    if (policy.equals("public") || downloadPrivateFulltexts) {
        logger.debug("Processor: privacy condition fulfilled, reading pages");
        String rootUuid = rec.getUniqueId().getRecordId();
        List<FulltextKramerius> pages;
        if (model.equals("periodical")) {
            logger.info("Using (periodical) fultexter \"for root\" for uuid " + rootUuid + ".");
            pages = fulltexter.getFulltextForRoot(rootUuid);
        } else {
            logger.info("Using (monograph/default) fultexter \"for parent\" for uuid " + rootUuid + ".");
            pages = fulltexter.getFulltextObjects(rootUuid);
        }
        // if we got empty list in pages => do nothing, return original record
        if (pages.isEmpty()) {
            return rec;
        }
        // delete old FulltextKramerius from database before adding new ones
        fmDao.deleteFulltext(rec.getId());
        rec.setFulltextKramerius(pages);
    } else {
        logger.debug("Processor: privacy condition is NOT fulfilled, skipping record");
    }
    return rec;
}
Also used : DublinCoreRecord(cz.mzk.recordmanager.server.dc.DublinCoreRecord) MetadataDublinCoreRecord(cz.mzk.recordmanager.server.metadata.MetadataDublinCoreRecord) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) InvalidDcException(cz.mzk.recordmanager.server.dc.InvalidDcException) FulltextKramerius(cz.mzk.recordmanager.server.model.FulltextKramerius) MetadataDublinCoreRecord(cz.mzk.recordmanager.server.metadata.MetadataDublinCoreRecord) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 19 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class ExportRecordsForClassifierProcessor method process.

@Override
public String process(HarvestedRecord.HarvestedRecordUniqueId recordId) throws Exception {
    HarvestedRecord record = harvestedRecordDao.get(recordId);
    if (record != null && record.getRawRecord().length != 0) {
        InputStream is = new ByteArrayInputStream(record.getRawRecord());
        MarcRecord marcRecord = marcXmlParser.parseRecord(is);
        if (marcRecord.getDataFields(OAI_FIELD).isEmpty()) {
            marcRecord.addDataField(OAI_FIELD, ' ', ' ', "a", record.getUniqueId().getRecordId());
        }
        return marcRecord.getDataFields("080").isEmpty() ? null : marcRecord.export(iOFormat);
    }
    return null;
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 20 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class FilterCaslinRecordsWriter method write.

@Override
public void write(List<? extends HarvestedRecordUniqueId> items) throws Exception {
    for (HarvestedRecordUniqueId uniqueId : items) {
        try {
            HarvestedRecord hr = hrDao.get(uniqueId);
            if (hr == null || hr.getRawRecord().length == 0)
                continue;
            MarcRecord marc = marcXmlParser.parseRecord(new ByteArrayInputStream(hr.getRawRecord()));
            Record record = marcXmlParser.parseUnderlyingRecord(new ByteArrayInputStream(hr.getRawRecord()));
            Boolean updated = false;
            Record newRecord = new RecordImpl();
            MarcFactory marcFactory = new MarcFactoryImpl();
            newRecord.setLeader(record.getLeader());
            for (ControlField cf : record.getControlFields()) {
                newRecord.addVariableField(cf);
            }
            Map<String, List<DataField>> dfMap = marc.getAllFields();
            for (String tag : new TreeSet<String>(dfMap.keySet())) {
                for (DataField df : dfMap.get(tag)) {
                    // add $q0 when sigla is in db
                    if (df.getTag().equals("996")) {
                        if (caslinFilter.filter(df.getSubfield('e').getData()) && (df.getSubfield('q') == null || !df.getSubfield('q').getData().equals("0"))) {
                            df.addSubfield(marcFactory.newSubfield('q', "0"));
                            updated = true;
                        }
                    }
                    newRecord.addVariableField(df);
                }
            }
            hr.setRawRecord(new MarcRecordImpl(newRecord).export(IOFormat.XML_MARC).getBytes(StandardCharsets.UTF_8));
            if (hr.getDeleted() == null && !mrFactory.getMetadataRecord(hr).matchFilter()) {
                hr.setDeleted(new Date());
                updated = true;
            }
            if (updated) {
                hr.setUpdated(new Date());
                hrDao.persist(hr);
            }
        } catch (Exception ex) {
            logger.error(String.format("Exception thrown when filtering harvested_record with id=%s", uniqueId), ex);
        }
    }
}
Also used : HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) MarcFactory(org.marc4j.marc.MarcFactory) MarcRecordImpl(cz.mzk.recordmanager.server.marc.MarcRecordImpl) RecordImpl(cz.mzk.recordmanager.server.marc.marc4j.RecordImpl) Date(java.util.Date) ControlField(org.marc4j.marc.ControlField) DataField(org.marc4j.marc.DataField) MarcRecordImpl(cz.mzk.recordmanager.server.marc.MarcRecordImpl) ByteArrayInputStream(java.io.ByteArrayInputStream) TreeSet(java.util.TreeSet) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) Record(org.marc4j.marc.Record) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) List(java.util.List) MarcFactoryImpl(cz.mzk.recordmanager.server.marc.marc4j.MarcFactoryImpl) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Aggregations

HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)60 Test (org.testng.annotations.Test)20 InputStream (java.io.InputStream)19 ByteArrayInputStream (java.io.ByteArrayInputStream)18 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)16 HashMap (java.util.HashMap)16 MarcRecord (cz.mzk.recordmanager.server.marc.MarcRecord)15 Date (java.util.Date)14 JobParameters (org.springframework.batch.core.JobParameters)14 HarvestedRecordUniqueId (cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId)12 JobParameter (org.springframework.batch.core.JobParameter)12 MarcRecordImpl (cz.mzk.recordmanager.server.marc.MarcRecordImpl)9 Record (org.marc4j.marc.Record)9 JobExecution (org.springframework.batch.core.JobExecution)8 Job (org.springframework.batch.core.Job)6 OAIHarvestConfiguration (cz.mzk.recordmanager.server.model.OAIHarvestConfiguration)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 DedupRecord (cz.mzk.recordmanager.server.model.DedupRecord)4 FulltextKramerius (cz.mzk.recordmanager.server.model.FulltextKramerius)3