Search in sources :

Example 46 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class SolrInputDocumentFactoryImpl method create.

public List<SolrInputDocument> create(DedupRecord dedupRecord, List<HarvestedRecord> records) {
    if (records.isEmpty()) {
        return null;
    }
    List<SolrInputDocument> childs = records.stream().map(rec -> create(rec)).collect(Collectors.toCollection(ArrayList::new));
    SolrUtils.sortByWeight(childs);
    HarvestedRecord record = records.get(0);
    SolrInputDocument mergedDocument = asSolrDocument(mapper.map(dedupRecord, records));
    mergedDocument.addField(SolrFieldConstants.ID_FIELD, dedupRecord.getId());
    mergedDocument.addField(SolrFieldConstants.MERGED_FIELD, 1);
    mergedDocument.addField(SolrFieldConstants.WEIGHT, record.getWeight());
    mergedDocument.addField(SolrFieldConstants.LOCAL_IDS_FIELD, getLocalIds(childs));
    if (childs.size() > 1)
        mergedDocument.addField(SolrFieldConstants.MERGED_RECORDS, 1);
    mergedDocument.addField(SolrFieldConstants.INSPIRATION, getInspirations(records));
    dedupRecordEnrichers.forEach(enricher -> enricher.enrich(dedupRecord, mergedDocument, childs));
    mergedDocument.addChildDocuments(childs);
    if (logger.isTraceEnabled()) {
        logger.info("Mapping of dedupRecord with id = {} finished", dedupRecord.getId());
    }
    return Collections.singletonList(mergedDocument);
}
Also used : Arrays(java.util.Arrays) Logger(org.slf4j.Logger) Inspiration(cz.mzk.recordmanager.server.model.Inspiration) SolrUtils(cz.mzk.recordmanager.server.util.SolrUtils) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) Autowired(org.springframework.beans.factory.annotation.Autowired) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) InitializingBean(org.springframework.beans.factory.InitializingBean) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) IndexingUtils(cz.mzk.recordmanager.server.util.IndexingUtils) List(java.util.List) Component(org.springframework.stereotype.Component) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) DedupRecord(cz.mzk.recordmanager.server.model.DedupRecord) Map(java.util.Map) Entry(java.util.Map.Entry) HarvestedRecordEnricher(cz.mzk.recordmanager.server.index.enrich.HarvestedRecordEnricher) Collections(java.util.Collections) DedupRecordEnricher(cz.mzk.recordmanager.server.index.enrich.DedupRecordEnricher) SolrInputDocument(org.apache.solr.common.SolrInputDocument) SolrInputDocument(org.apache.solr.common.SolrInputDocument) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 47 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class AbstractAuthorityVizFields method getEnrichingValues.

@Override
protected List<String> getEnrichingValues(String key, String enrichingField) {
    Map<String, List<String>> cache = cacheMap.get(enrichingField);
    if (cache.containsKey(key)) {
        return new ArrayList<>(cache.get(key));
    } else {
        HarvestedRecord hr = hrdao.findByHarvestConfAndRaw001Id(400L, key);
        if (hr != null) {
            MarcRecord mr = marcXmlParser.parseRecord(new ByteArrayInputStream(hr.getRawRecord()));
            List<String> results = Collections.unmodifiableList(mr.getFields(enrichingField, " ", 'a', 'b', 'c', 'd'));
            if (!results.isEmpty()) {
                cache.put(key, results);
                return new ArrayList<>(results);
            }
        }
    }
    return null;
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) ArrayList(java.util.ArrayList) MarcRecord(cz.mzk.recordmanager.server.marc.MarcRecord) ArrayList(java.util.ArrayList) List(java.util.List) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 48 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class KrameriusFulltextJobConfig method reader.

/* reads document uuids for given config (may be limited by update date)
	 * returns ItemReader for HarvestedRecord(s)
	 */
@Bean(name = "krameriusFulltextJob:reader")
@StepScope
public ItemReader<HarvestedRecord> reader(@Value("#{jobParameters[" + Constants.JOB_PARAM_CONF_ID + "]}") Long configId, @Value("#{stepExecutionContext[" + Constants.JOB_PARAM_FROM_DATE + "] " + "?:jobParameters[ " + Constants.JOB_PARAM_FROM_DATE + "]}") Date from, @Value("#{stepExecutionContext[" + Constants.JOB_PARAM_UNTIL_DATE + "]" + "?:jobParameters[" + Constants.JOB_PARAM_UNTIL_DATE + "]}") Date to) throws Exception {
    Timestamp fromStamp = null;
    Timestamp toStamp = null;
    JdbcPagingItemReader<HarvestedRecord> reader = new JdbcPagingItemReader<HarvestedRecord>();
    SqlPagingQueryProviderFactoryBean pqpf = new SqlPagingQueryProviderFactoryBean();
    pqpf.setDataSource(dataSource);
    pqpf.setSelectClause("SELECT *");
    pqpf.setFromClause("FROM harvested_record");
    String whereClause = "WHERE import_conf_id = :configId";
    if (from != null) {
        fromStamp = new Timestamp(from.getTime());
        whereClause += " AND updated >= :from";
    }
    if (to != null) {
        toStamp = new Timestamp(to.getTime());
        whereClause += " AND updated <= :to";
    }
    if (configId != null) {
        pqpf.setWhereClause(whereClause);
    }
    pqpf.setSortKeys(ImmutableMap.of("import_conf_id", Order.ASCENDING, "record_id", Order.ASCENDING));
    reader.setRowMapper(harvestedRecordRowMapper);
    reader.setPageSize(PAGE_SIZE);
    reader.setQueryProvider(pqpf.getObject());
    reader.setDataSource(dataSource);
    if (configId != null) {
        Map<String, Object> parameterValues = new HashMap<String, Object>();
        parameterValues.put("configId", configId);
        parameterValues.put("from", fromStamp);
        parameterValues.put("to", toStamp);
        reader.setParameterValues(parameterValues);
    }
    reader.afterPropertiesSet();
    return reader;
}
Also used : SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) HashMap(java.util.HashMap) JdbcPagingItemReader(org.springframework.batch.item.database.JdbcPagingItemReader) Timestamp(java.sql.Timestamp) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) StepScope(org.springframework.batch.core.configuration.annotation.StepScope) SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) Bean(org.springframework.context.annotation.Bean)

Example 49 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class ManuscriptoriumFulltextWriter method write.

@Override
public void write(List<? extends HarvestedRecordUniqueId> items) throws Exception {
    for (HarvestedRecordUniqueId uniqueId : items) {
        HarvestedRecord hr = harvestedRecordDao.get(uniqueId);
        if (!hr.getFulltextKramerius().isEmpty())
            continue;
        getNextFulltext(uniqueId.getRecordId());
        FulltextKramerius fk = new FulltextKramerius();
        String fulltext = fulltextReader.next();
        if (fulltext.isEmpty()) {
            logger.warn("Fulltext from " + FULLTEXT_URL + uniqueId.getRecordId() + " is empty.");
        } else {
            fk.setFulltext(fulltext.getBytes());
            fk.setUuidPage(uniqueId.getRecordId());
            fk.setPage("1");
            fk.setOrder(1L);
            hr.setFulltextKramerius(Collections.singletonList(fk));
            hr.setUpdated(new Date());
            InputStream is = new ByteArrayInputStream(hr.getRawRecord());
            Document doc = documentBuilder.parse(removeFormating(is));
            // remove old TEI element from DC
            NodeList tei = doc.getElementsByTagName(TEI);
            if (tei != null && tei.getLength() > 0) {
                Node remove = tei.item(0);
                remove.getParentNode().removeChild(tei.item(0));
            }
            // get new TEI element from source document
            Document teiDoc = documentBuilder.parse(removeFormating(teiReader));
            Node newNode = teiDoc.getElementsByTagName(TEI).item(0).cloneNode(true);
            doc.adoptNode(newNode);
            // add TEI elemenet to DC
            Node root = doc.getFirstChild();
            root.appendChild(newNode);
            DOMSource source = new DOMSource(doc.getDocumentElement());
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            StreamResult result = new StreamResult(bos);
            transformer.transform(source, result);
            hr.setRawRecord(bos.toByteArray());
            harvestedRecordDao.persist(hr);
        }
        client.close();
    }
    sessionFactory.getCurrentSession().flush();
    sessionFactory.getCurrentSession().clear();
}
Also used : DOMSource(javax.xml.transform.dom.DOMSource) HarvestedRecordUniqueId(cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId) StreamResult(javax.xml.transform.stream.StreamResult) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) NodeList(org.w3c.dom.NodeList) Node(org.w3c.dom.Node) FulltextKramerius(cz.mzk.recordmanager.server.model.FulltextKramerius) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Document(org.w3c.dom.Document) Date(java.util.Date) ByteArrayInputStream(java.io.ByteArrayInputStream) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 50 with HarvestedRecord

use of cz.mzk.recordmanager.server.model.HarvestedRecord in project RecordManager2 by moravianlibrary.

the class HarvestedRecordDAOTest method findBySolrId.

@Test
public void findBySolrId() {
    HarvestedRecord record = harvestedRecordDao.findBySolrId("MZK.MZK01-000000117");
    Assert.assertNotNull(record);
    Assert.assertEquals(record.getUniqueId().getRecordId(), "MZK01-000000117");
    Assert.assertEquals(record.getHarvestedFrom().getIdPrefix(), "MZK");
}
Also used : HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Aggregations

HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)60 Test (org.testng.annotations.Test)20 InputStream (java.io.InputStream)19 ByteArrayInputStream (java.io.ByteArrayInputStream)18 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)16 HashMap (java.util.HashMap)16 MarcRecord (cz.mzk.recordmanager.server.marc.MarcRecord)15 Date (java.util.Date)14 JobParameters (org.springframework.batch.core.JobParameters)14 HarvestedRecordUniqueId (cz.mzk.recordmanager.server.model.HarvestedRecord.HarvestedRecordUniqueId)12 JobParameter (org.springframework.batch.core.JobParameter)12 MarcRecordImpl (cz.mzk.recordmanager.server.marc.MarcRecordImpl)9 Record (org.marc4j.marc.Record)9 JobExecution (org.springframework.batch.core.JobExecution)8 Job (org.springframework.batch.core.Job)6 OAIHarvestConfiguration (cz.mzk.recordmanager.server.model.OAIHarvestConfiguration)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 DedupRecord (cz.mzk.recordmanager.server.model.DedupRecord)4 FulltextKramerius (cz.mzk.recordmanager.server.model.FulltextKramerius)3