Search in sources :

Example 6 with DedupRecord

use of cz.mzk.recordmanager.server.model.DedupRecord in project RecordManager2 by moravianlibrary.

the class UrlEnricherTest method unknownUrlTest.

@Test
public void unknownUrlTest() {
    DedupRecord dr = new DedupRecord();
    SolrInputDocument merged = new SolrInputDocument();
    List<SolrInputDocument> local = new ArrayList<SolrInputDocument>();
    local.add(newField(MZK_UNKNOWN_MZK_TEXT_URL));
    local.add(newField(TRE_UNKNOWN_MZK_TEXT_URL));
    local.add(newField(MZK_UNKNOWN_BRNO_URL));
    local.add(newField(TRE_UNKNOWN_BRNO_URL));
    local.add(newField(MZK_UNKNOWN_TRE_URL));
    List<String> result = new ArrayList<>();
    result.add(MZK_UNKNOWN_TRE_URL);
    result.add(UNKNOWN_MZK_TEXT_URL);
    result.add(UNKNOWN_BRNO_URL);
    UrlDedupRecordEnricher ue = new UrlDedupRecordEnricher();
    ue.enrich(dr, merged, local);
    Assert.assertEquals(merged.getFieldValues(SolrFieldConstants.URL).toArray(), result.toArray());
}
Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) UrlDedupRecordEnricher(cz.mzk.recordmanager.server.index.enrich.UrlDedupRecordEnricher) ArrayList(java.util.ArrayList) DedupRecord(cz.mzk.recordmanager.server.model.DedupRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 7 with DedupRecord

use of cz.mzk.recordmanager.server.model.DedupRecord in project RecordManager2 by moravianlibrary.

the class UrlEnricherTest method onlineUrlTest.

@Test
public void onlineUrlTest() {
    DedupRecord dr = new DedupRecord();
    SolrInputDocument merged = new SolrInputDocument();
    List<SolrInputDocument> local = new ArrayList<SolrInputDocument>();
    local.add(newField(MZK_ONLINE_MZK_URL));
    local.add(newField(TRE_ONLINE_MZK_URL));
    local.add(newField(MZK_UNKNOWN_MZK_URL));
    local.add(newField(MZK_PROTECTED_MZK_URL));
    List<String> result = new ArrayList<>();
    result.add(MZK_ONLINE_MZK_URL);
    result.add(TRE_ONLINE_MZK_URL);
    UrlDedupRecordEnricher ue = new UrlDedupRecordEnricher();
    ue.enrich(dr, merged, local);
    Assert.assertEquals(merged.getFieldValues(SolrFieldConstants.URL).toArray(), result.toArray());
}
Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) UrlDedupRecordEnricher(cz.mzk.recordmanager.server.index.enrich.UrlDedupRecordEnricher) ArrayList(java.util.ArrayList) DedupRecord(cz.mzk.recordmanager.server.model.DedupRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 8 with DedupRecord

use of cz.mzk.recordmanager.server.model.DedupRecord in project RecordManager2 by moravianlibrary.

the class UrlEnricherTest method unknownProtectedUrlTest.

@Test
public void unknownProtectedUrlTest() {
    DedupRecord dr = new DedupRecord();
    SolrInputDocument merged = new SolrInputDocument();
    List<SolrInputDocument> local = new ArrayList<SolrInputDocument>();
    local.add(newField(TRE_UNKNOWN_MZK_URL));
    local.add(newField(MZK_PROTECTED_MZK_URL));
    List<String> result = new ArrayList<>();
    result.add(MZK_PROTECTED_MZK_URL);
    UrlDedupRecordEnricher ue = new UrlDedupRecordEnricher();
    ue.enrich(dr, merged, local);
    Assert.assertEquals(merged.getFieldValues(SolrFieldConstants.URL).toArray(), result.toArray());
}
Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) UrlDedupRecordEnricher(cz.mzk.recordmanager.server.index.enrich.UrlDedupRecordEnricher) ArrayList(java.util.ArrayList) DedupRecord(cz.mzk.recordmanager.server.model.DedupRecord) Test(org.testng.annotations.Test) AbstractTest(cz.mzk.recordmanager.server.AbstractTest)

Example 9 with DedupRecord

use of cz.mzk.recordmanager.server.model.DedupRecord in project RecordManager2 by moravianlibrary.

the class IndexIndividualRecordsTasklet method execute.

@Override
public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception {
    SolrServerFacade solrServer = solrServerFactory.create(solrUrl, null, LoggingSolrIndexingExceptionHandler.INSTANCE);
    for (String solrId : recordIds) {
        HarvestedRecord rec = harvestedRecordDao.findBySolrId(solrId);
        if (rec == null) {
            throw new IllegalArgumentException(String.format("Harvested record %s not found", solrId));
        }
        DedupRecord dedupRecord = rec.getDedupRecord();
        if (dedupRecord == null) {
            throw new IllegalArgumentException(String.format("Harvested record %s is not deduplicated, run dedup first.", solrId));
        }
        List<HarvestedRecord> records = harvestedRecordDao.getByDedupRecord(dedupRecord);
        List<SolrInputDocument> documents = solrInputDocumentFactory.create(dedupRecord, records);
        documents = SolrUtils.removeHiddenFields(documents);
        solrServer.add(documents, commitWithinMs);
    }
    solrServer.commit();
    return RepeatStatus.FINISHED;
}
Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) DedupRecord(cz.mzk.recordmanager.server.model.DedupRecord) SolrServerFacade(cz.mzk.recordmanager.server.solr.SolrServerFacade) HarvestedRecord(cz.mzk.recordmanager.server.model.HarvestedRecord)

Example 10 with DedupRecord

use of cz.mzk.recordmanager.server.model.DedupRecord in project RecordManager2 by moravianlibrary.

the class IndexRecordsToSolrJobConfig method updatedRecordsReader.

@Bean(name = "indexRecordsToSolrJob:updatedRecordsReader")
@StepScope
public JdbcPagingItemReader<DedupRecord> updatedRecordsReader(@Value("#{jobParameters[" + Constants.JOB_PARAM_FROM_DATE + "]}") Date from, @Value("#{jobParameters[" + Constants.JOB_PARAM_UNTIL_DATE + "]}") Date to) throws Exception {
    if (from != null && to == null) {
        to = new Date();
    }
    SqlPagingQueryProviderFactoryBean pqpf = new SqlPagingQueryProviderFactoryBean();
    pqpf.setDataSource(dataSource);
    pqpf.setSelectClause("SELECT dedup_record_id");
    pqpf.setFromClause("FROM dedup_record_last_update");
    if (from != null && to != null) {
        pqpf.setWhereClause("WHERE last_update BETWEEN :from AND :to");
    }
    pqpf.setSortKey("dedup_record_id");
    JdbcPagingItemReader<DedupRecord> reader = new JdbcPagingItemReader<>();
    reader.setRowMapper(new DedupRecordRowMapper("dedup_record_id"));
    reader.setPageSize(PAGE_SIZE);
    reader.setQueryProvider(pqpf.getObject());
    reader.setDataSource(dataSource);
    if (from != null && to != null) {
        Map<String, Object> parameterValues = new HashMap<>();
        parameterValues.put("from", from);
        parameterValues.put("to", to);
        reader.setParameterValues(parameterValues);
    }
    reader.setSaveState(true);
    reader.afterPropertiesSet();
    return reader;
}
Also used : DedupRecordRowMapper(cz.mzk.recordmanager.server.jdbc.DedupRecordRowMapper) SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) HashMap(java.util.HashMap) JdbcPagingItemReader(org.springframework.batch.item.database.JdbcPagingItemReader) DedupRecord(cz.mzk.recordmanager.server.model.DedupRecord) Date(java.util.Date) StepScope(org.springframework.batch.core.configuration.annotation.StepScope) SqlPagingQueryProviderFactoryBean(org.springframework.batch.item.database.support.SqlPagingQueryProviderFactoryBean) Bean(org.springframework.context.annotation.Bean)

Aggregations

DedupRecord (cz.mzk.recordmanager.server.model.DedupRecord)14 AbstractTest (cz.mzk.recordmanager.server.AbstractTest)7 ArrayList (java.util.ArrayList)7 SolrInputDocument (org.apache.solr.common.SolrInputDocument)7 Test (org.testng.annotations.Test)7 UrlDedupRecordEnricher (cz.mzk.recordmanager.server.index.enrich.UrlDedupRecordEnricher)5 HarvestedRecord (cz.mzk.recordmanager.server.model.HarvestedRecord)4 Date (java.util.Date)4 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 Logger (org.slf4j.Logger)2 LoggerFactory (org.slf4j.LoggerFactory)2 Autowired (org.springframework.beans.factory.annotation.Autowired)2 Component (org.springframework.stereotype.Component)2 HashMultiset (com.google.common.collect.HashMultiset)1 Multiset (com.google.common.collect.Multiset)1 DedupRecordEnricher (cz.mzk.recordmanager.server.index.enrich.DedupRecordEnricher)1