/*
 * Decompiled with CFR 0.152.
 */
package eu.europa.ec.jrc.qcs.engine.rule.crossrecord;

import eu.europa.ec.jrc.qcs.Configuration;
import eu.europa.ec.jrc.qcs.dao.datasource.FileConnection;
import eu.europa.ec.jrc.qcs.dao.model.config.Property;
import eu.europa.ec.jrc.qcs.dao.model.input.DataRecord;
import eu.europa.ec.jrc.qcs.dao.model.output.RuleOutput;
import eu.europa.ec.jrc.qcs.dao.model.protocol.RuleDefinition;
import eu.europa.ec.jrc.qcs.dao.model.protocol.RuleType;
import eu.europa.ec.jrc.qcs.engine.DataSetReader;
import eu.europa.ec.jrc.qcs.engine.ValidationEngine;
import eu.europa.ec.jrc.qcs.engine.preset.DefaultFieldIDencr2014;
import eu.europa.ec.jrc.qcs.engine.rule.RuleParameter;
import eu.europa.ec.jrc.qcs.engine.rule.RuleRuntimeException;
import eu.europa.ec.jrc.qcs.engine.rule.crossrecord.BasicDuplicatesRule;
import eu.europa.ec.jrc.qcs.engine.rule.crossrecord.WhereCondition;
import eu.europa.ec.jrc.qcs.engine.rule.crossrecord.encr2014.CrossRecordRule;
import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BunchDuplicatesRule
extends CrossRecordRule {
    protected static Logger logger = LoggerFactory.getLogger(BunchDuplicatesRule.class);

    public BunchDuplicatesRule(RuleDefinition ruleDefinition) {
        super(ruleDefinition);
        this.setLongDescription("Checks that the input dataset does not contain duplicate records, where being duplicates means having exactly the same values in all fields of the composite key. Such composite key is composed by concatenation of the fields defined in the WhereCondition property.\nImplementation: input records are fetched and compared using the \"load bunch\" feature.");
    }

    @Override
    public boolean initAcceptableFile() {
        logger.warn("initAcceptableFile() - Not required by this cross-record rule");
        return false;
    }

    @Override
    public List<RuleOutput> applyCheck() {
        int chunckSize;
        ValidationEngine engine = this.getValidationEngine();
        if (logger.isDebugEnabled()) {
            logger.debug(this.toShortString() + " - Using validation engine: " + String.valueOf(engine));
        }
        FileConnection connection = this.getFileConnection();
        File file = connection.getFile();
        String originalFile = file.getAbsolutePath();
        String targetFile = this.whereCondition.getFullFileName();
        boolean debug = logger.isDebugEnabled();
        ArrayList<RuleOutput> duplicates = new ArrayList<RuleOutput>();
        if (logger.isDebugEnabled()) {
            logger.debug(this.toShortString() + " (Using target file: " + targetFile);
        }
        if (targetFile == null) {
            String message = "WhereCondition used by " + this.toShortString() + " has a NULL target file";
            throw new RuleRuntimeException(message);
        }
        File targetFileObj = new File(targetFile);
        if (!targetFileObj.exists()) {
            logger.error("applyCheck() - FAILED CHECKING DUPLICATES RECORDS !!!");
            logger.error("applyCheck() - Missing sorted file -> skipping this check");
            return duplicates;
        }
        if (debug) {
            logger.info(this.toShortString() + " (where condition " + String.valueOf(this.whereCondition) + ") -> applying on: " + targetFile);
        }
        if (debug) {
            logger.info(this.toShortString() + " original file: " + originalFile);
        }
        this.dataSetReader = new DataSetReader();
        this.dataSetReader.setMaxRows(0);
        this.dataSetReader.setFilePath(targetFile);
        this.dataSetReader.resetDataset();
        int keyIndex = this.getIndexOfFirstField();
        if (debug) {
            logger.info("applyCheck() - " + this.getModelName() + " (ruleID=" + this.getId() + ")");
            logger.info("applyCheck() - Reading bunch on column's index " + keyIndex);
        }
        if ((chunckSize = Configuration.getInstance().getIntegerProperty(Property.MONITOR_BUNCH_SIZE)) <= 0) {
            int defaultSize = 50000;
            logger.error(this.toShortString() + "Wrong chunck size: " + chunckSize + " -> using default value: " + defaultSize);
            chunckSize = defaultSize;
        }
        engine.notifyMessageToWorker("\nStarting check on perfect duplicates");
        int counter = 0;
        while (this.dataSetReader.hasNext()) {
            this.bunch = this.dataSetReader.getNextBunch(keyIndex);
            if (debug) {
                logger.info("");
                logger.info(this.toShortString() + " - CYCLING on bunch for: " + this.dataSetReader.getBunchInformation());
            }
            List<RuleOutput> duplicatesInBunch = this.findDuplicates(this.bunch);
            duplicates.addAll(duplicatesInBunch);
            if (counter++ > 1 && counter % chunckSize == 0) {
                String label = counter + " records";
                engine.notifyMessageToWorker("Perfect duplicates - checked: " + label);
            }
            if (!debug) continue;
            logger.info("");
            logger.info(this.toShortString() + " Total duplicates after current bunch: " + duplicates.size());
        }
        if (debug) {
            logger.info(this.toShortString() + " analyzed whole dataset. Found " + duplicates.size() + " duplicates");
        }
        return duplicates;
    }

    public List<RuleOutput> findDuplicates(List<DataRecord> bunch) {
        ArrayList<RuleOutput> result = new ArrayList<RuleOutput>();
        HashSet<String> uniqueKeys = new HashSet<String>();
        HashSet<String> duplicateKeys = new HashSet<String>();
        boolean debug = logger.isDebugEnabled();
        for (DataRecord record : bunch) {
            String compositeKey = this.getCompositeKey(record);
            if (uniqueKeys.add(compositeKey)) continue;
            duplicateKeys.add(compositeKey);
            if (!debug) continue;
            logger.info("filterNonUniqueCompositeKeys() - Found duplicate composite key: " + compositeKey);
        }
        int lineNumberLastRecord = this.dataSetReader.getCounter();
        int lineNumberFirstRecord = lineNumberLastRecord - bunch.size();
        for (DataRecord record : bunch) {
            String compositeKey = this.getCompositeKey(record);
            if (duplicateKeys.contains(compositeKey)) {
                int originalLineNumber = this.getOriginalLineNumber(lineNumberFirstRecord, record);
                if (debug) {
                    String message = "- compositeKey = " + compositeKey + " (Original line: " + originalLineNumber + ")";
                    logger.info("filterNonUniqueCompositeKeys() - " + lineNumberFirstRecord + ") " + message);
                }
                RuleOutput ruleOutput = this.produceRuleOutput(record);
                ruleOutput.setLineNumber(originalLineNumber);
                result.add(ruleOutput);
                this.addMatchedRecord(ruleOutput, record);
            }
            ++lineNumberFirstRecord;
        }
        return result;
    }

    @Override
    public void setDefaultConfiguration() {
        super.setDefaultConfiguration();
        if (logger.isDebugEnabled()) {
            logger.debug("setDefaultConfiguration() for rule ID " + this.getId() + ". Default where condition: " + String.valueOf(this.whereCondition));
        }
        ArrayList<RuleParameter> defaultRuleParameters = new ArrayList<RuleParameter>();
        RuleParameter param_1 = new RuleParameter(1, DefaultFieldIDencr2014.INCIDENCE_PATIENT_ID.id, false);
        RuleParameter param_2 = new RuleParameter(2, DefaultFieldIDencr2014.INCIDENCE_TUMOUR_ID.id, false);
        defaultRuleParameters.add(param_1);
        defaultRuleParameters.add(param_2);
        this.ruleConfiguration.setDefaultRuleParameters(defaultRuleParameters);
        this.ruleConfiguration.setRuleType(RuleType.DUPLICATES);
        int[] fieldIDs = new int[]{DefaultFieldIDencr2014.INCIDENCE_PATIENT_ID.id, DefaultFieldIDencr2014.INCIDENCE_TUMOUR_ID.id};
        WhereCondition whereCondition = new WhereCondition(fieldIDs);
        this.setWhereCondition(whereCondition);
        if (logger.isDebugEnabled()) {
            logger.debug("setDefaultConfiguration() for rule ID " + this.getId() + " -> Set where condition: " + String.valueOf(this.whereCondition));
        }
    }

    @Override
    public Logger getLogger() {
        return BasicDuplicatesRule.logger;
    }
}

