/*
 * Decompiled with CFR 0.152.
 */
package org.pentaho.di.trans.steps.fuzzymatch;

import com.wcohen.ss.Jaro;
import com.wcohen.ss.JaroWinkler;
import com.wcohen.ss.NeedlemanWunsch;
import java.util.Iterator;
import org.apache.commons.codec.language.DoubleMetaphone;
import org.apache.commons.codec.language.Metaphone;
import org.apache.commons.codec.language.RefinedSoundex;
import org.apache.commons.codec.language.Soundex;
import org.apache.commons.lang.StringUtils;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.RowSet;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.exception.KettleValueException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
import org.pentaho.di.trans.steps.fuzzymatch.FuzzyMatchData;
import org.pentaho.di.trans.steps.fuzzymatch.FuzzyMatchMeta;
import org.pentaho.di.trans.steps.fuzzymatch.LetterPairSimilarity;

public class FuzzyMatch
extends BaseStep
implements StepInterface {
    private static Class<?> PKG = FuzzyMatchMeta.class;
    private FuzzyMatchMeta meta;
    private FuzzyMatchData data;

    public FuzzyMatch(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans) {
        super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
    }

    private boolean readLookupValues() throws KettleException {
        this.data.infoStream = this.meta.getStepIOMeta().getInfoStreams().get(0);
        if (this.data.infoStream.getStepMeta() == null) {
            this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.NoLookupStepSpecified", (String[])new String[0]));
            return false;
        }
        if (this.isDetailed()) {
            this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.ReadingFromStream", (String[])new String[0]) + this.data.infoStream.getStepname() + "]");
        }
        boolean firstRun = true;
        RowSet rowSet = this.findInputRowSet(this.data.infoStream.getStepname());
        Object[] rowData = this.getRowFrom(rowSet);
        while (rowData != null) {
            ValueMetaInterface fromStreamRowMeta;
            if (firstRun) {
                this.data.infoMeta = rowSet.getRowMeta().clone();
                int indexOfLookupField = this.data.infoMeta.indexOfValue(this.environmentSubstitute(this.meta.getLookupField()));
                if (indexOfLookupField < 0) {
                    throw new KettleException(BaseMessages.getString(PKG, (String)"FuzzyMatch.Exception.CouldnotFindLookField", (String[])new String[]{this.meta.getLookupField()}));
                }
                this.data.infoCache = new RowMeta();
                ValueMetaInterface keyValueMeta = this.data.infoMeta.getValueMeta(indexOfLookupField);
                keyValueMeta.setStorageType(0);
                this.data.infoCache.addValueMeta(keyValueMeta);
                this.data.indexOfCachedFields[0] = indexOfLookupField;
                if (this.data.addAdditionalFields) {
                    for (int i = 0; i < this.meta.getValue().length; ++i) {
                        int fi = i + 1;
                        this.data.indexOfCachedFields[fi] = this.data.infoMeta.indexOfValue(this.meta.getValue()[i]);
                        if (this.data.indexOfCachedFields[fi] < 0) {
                            throw new KettleException(BaseMessages.getString(PKG, (String)"FuzzyMatch.Exception.CouldnotFindLookField", (String[])new String[]{this.meta.getValue()[i]}));
                        }
                        ValueMetaInterface additionalFieldValueMeta = this.data.infoMeta.getValueMeta(this.data.indexOfCachedFields[fi]);
                        additionalFieldValueMeta.setStorageType(0);
                        this.data.infoCache.addValueMeta(additionalFieldValueMeta);
                    }
                    this.data.nrCachedFields += this.meta.getValue().length;
                }
            }
            if (this.log.isRowLevel()) {
                this.logRowlevel(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.ReadLookupRow", (String[])new String[0]) + rowSet.getRowMeta().getString(rowData));
            }
            Object[] storeData = new Object[this.data.nrCachedFields];
            storeData[0] = rowData[this.data.indexOfCachedFields[0]] == null ? "" : ((fromStreamRowMeta = rowSet.getRowMeta().getValueMeta(this.data.indexOfCachedFields[0])).isStorageBinaryString() ? fromStreamRowMeta.convertToNormalStorageType(rowData[this.data.indexOfCachedFields[0]]) : rowData[this.data.indexOfCachedFields[0]]);
            for (int i = 1; i < this.data.nrCachedFields; ++i) {
                ValueMetaInterface fromStreamRowMeta2 = rowSet.getRowMeta().getValueMeta(this.data.indexOfCachedFields[i]);
                storeData[i] = fromStreamRowMeta2.isStorageBinaryString() ? fromStreamRowMeta2.convertToNormalStorageType(rowData[this.data.indexOfCachedFields[i]]) : rowData[this.data.indexOfCachedFields[i]];
            }
            if (this.isDebug()) {
                this.logDebug(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.AddingValueToCache", (String[])new String[]{this.data.infoCache.getString(storeData)}));
            }
            this.addToCache(storeData);
            rowData = this.getRowFrom(rowSet);
            if (!firstRun) continue;
            firstRun = false;
        }
        return true;
    }

    private Object[] lookupValues(RowMetaInterface rowMeta, Object[] row) throws KettleException {
        if (this.first) {
            this.first = false;
            this.data.outputRowMeta = this.getInputRowMeta().clone();
            this.meta.getFields(this.data.outputRowMeta, this.getStepname(), new RowMetaInterface[]{this.data.infoMeta}, null, this, this.repository, this.metaStore);
            this.data.indexOfMainField = this.getInputRowMeta().indexOfValue(this.environmentSubstitute(this.meta.getMainStreamField()));
            if (this.data.indexOfMainField < 0) {
                throw new KettleException(BaseMessages.getString(PKG, (String)"FuzzyMatch.Exception.CouldnotFindMainField", (String[])new String[]{this.meta.getMainStreamField()}));
            }
        }
        Object[] add = null;
        if (row[this.data.indexOfMainField] == null) {
            add = this.buildEmptyRow();
        } else {
            try {
                add = this.getFromCache(row);
            }
            catch (Exception e) {
                throw new KettleStepException((Throwable)e);
            }
        }
        return RowDataUtil.addRowData((Object[])row, (int)rowMeta.size(), (Object[])add);
    }

    private void addToCache(Object[] value) throws KettleException {
        try {
            this.data.look.add(value);
        }
        catch (OutOfMemoryError o) {
            throw new KettleException(BaseMessages.getString(PKG, (String)"FuzzyMatch.Error.JavaHeap", (String[])new String[]{o.toString()}));
        }
    }

    private Object[] getFromCache(Object[] keyRow) throws KettleValueException {
        if (this.isDebug()) {
            this.logDebug(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.ReadingMainStreamRow", (String[])new String[]{this.getInputRowMeta().getString(keyRow)}));
        }
        Object[] retval = null;
        switch (this.meta.getAlgorithmType()) {
            case 0: 
            case 1: 
            case 2: {
                retval = this.doDistance(keyRow);
                break;
            }
            case 6: 
            case 7: 
            case 8: 
            case 9: {
                retval = this.doPhonetic(keyRow);
                break;
            }
            case 3: 
            case 4: 
            case 5: {
                retval = this.doSimilarity(keyRow);
                break;
            }
        }
        return retval;
    }

    private Object[] doDistance(Object[] row) throws KettleValueException {
        Object[] rowData = this.buildEmptyRow();
        Iterator<Object[]> it = this.data.look.iterator();
        long distance = -1L;
        String lookupvalue = this.getInputRowMeta().getString(row, this.data.indexOfMainField);
        while (it.hasNext()) {
            Object[] cachedData = it.next();
            String cacheValue = (String)cachedData[0];
            int cdistance = -1;
            String usecacheValue = cacheValue;
            String uselookupvalue = lookupvalue;
            if (!this.meta.isCaseSensitive()) {
                usecacheValue = cacheValue.toLowerCase();
                uselookupvalue = lookupvalue.toLowerCase();
            }
            switch (this.meta.getAlgorithmType()) {
                case 1: {
                    cdistance = Utils.getDamerauLevenshteinDistance((String)usecacheValue, (String)uselookupvalue);
                    break;
                }
                case 2: {
                    cdistance = Math.abs((int)new NeedlemanWunsch().score(usecacheValue, uselookupvalue));
                    break;
                }
                default: {
                    cdistance = StringUtils.getLevenshteinDistance((String)usecacheValue, (String)uselookupvalue);
                }
            }
            if (this.data.minimalDistance > cdistance || cdistance > this.data.maximalDistance) continue;
            if (this.meta.isGetCloserValue()) {
                if ((long)cdistance >= distance && distance != -1L) continue;
                distance = cdistance;
                int index = 0;
                rowData[index++] = cacheValue;
                if (this.data.addValueFieldName) {
                    rowData[index++] = distance;
                }
                if (!this.data.addAdditionalFields) continue;
                for (int i = 0; i < this.meta.getValue().length; ++i) {
                    int nr = i + 1;
                    int nf = i + index;
                    rowData[nf] = cachedData[nr];
                }
                continue;
            }
            if (rowData[0] == null) {
                rowData[0] = cacheValue;
                continue;
            }
            rowData[0] = (String)rowData[0] + this.data.valueSeparator + cacheValue;
        }
        return rowData;
    }

    private Object[] doPhonetic(Object[] row) {
        Object[] rowData = this.buildEmptyRow();
        Iterator<Object[]> it = this.data.look.iterator();
        Object o = row[this.data.indexOfMainField];
        String lookupvalue = (String)o;
        String lookupValueMF = this.getEncodedMF(lookupvalue, this.meta.getAlgorithmType());
        while (it.hasNext()) {
            Object[] cachedData = it.next();
            String cacheValue = (String)cachedData[0];
            String cacheValueMF = this.getEncodedMF(cacheValue, this.meta.getAlgorithmType());
            if (!lookupValueMF.equals(cacheValueMF)) continue;
            int index = 0;
            rowData[index++] = cacheValue;
            if (this.data.addValueFieldName) {
                rowData[index++] = cacheValueMF;
            }
            if (!this.data.addAdditionalFields) continue;
            for (int i = 0; i < this.meta.getValue().length; ++i) {
                int nf = i + index;
                int nr = i + 1;
                rowData[nf] = cachedData[nr];
            }
        }
        return rowData;
    }

    private String getEncodedMF(String value, Integer algorithmType) {
        String encodedValueMF = "";
        switch (algorithmType) {
            case 6: {
                encodedValueMF = new Metaphone().metaphone(value);
                break;
            }
            case 7: {
                encodedValueMF = new DoubleMetaphone().doubleMetaphone(value);
                break;
            }
            case 8: {
                encodedValueMF = new Soundex().encode(value);
                break;
            }
            case 9: {
                encodedValueMF = new RefinedSoundex().encode(value);
                break;
            }
        }
        return encodedValueMF;
    }

    private Object[] doSimilarity(Object[] row) {
        String lookupvalue;
        Object[] rowData = this.buildEmptyRow();
        Iterator<Object[]> it = this.data.look.iterator();
        double similarity = 0.0;
        Object o = row[this.data.indexOfMainField];
        String string = lookupvalue = o == null ? "" : (String)o;
        while (it.hasNext()) {
            Object[] cachedData = it.next();
            String cacheValue = (String)cachedData[0];
            double csimilarity = new Double(0.0);
            switch (this.meta.getAlgorithmType()) {
                case 3: {
                    csimilarity = new Jaro().score(cacheValue, lookupvalue);
                    break;
                }
                case 4: {
                    csimilarity = new JaroWinkler().score(cacheValue, lookupvalue);
                    break;
                }
                default: {
                    csimilarity = LetterPairSimilarity.getSimiliarity(cacheValue, lookupvalue);
                }
            }
            if (!(this.data.minimalSimilarity <= csimilarity) || !(csimilarity <= this.data.maximalSimilarity)) continue;
            if (this.meta.isGetCloserValue()) {
                if (!(csimilarity > similarity) && (csimilarity != 0.0 || !cacheValue.equals(lookupvalue))) continue;
                similarity = csimilarity;
                int index = 0;
                rowData[index++] = cacheValue;
                if (this.data.addValueFieldName) {
                    rowData[index++] = new Double(similarity);
                }
                if (!this.data.addAdditionalFields) continue;
                for (int i = 0; i < this.meta.getValue().length; ++i) {
                    int nf = i + index;
                    int nr = i + 1;
                    rowData[nf] = cachedData[nr];
                }
                continue;
            }
            if (rowData[0] == null) {
                rowData[0] = cacheValue;
                continue;
            }
            rowData[0] = (String)rowData[0] + this.data.valueSeparator + cacheValue;
        }
        return rowData;
    }

    private Object[] buildEmptyRow() {
        Object[] rowData = RowDataUtil.allocateRowData((int)this.data.outputRowMeta.size());
        return rowData;
    }

    @Override
    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
        block10: {
            Object[] r;
            this.meta = (FuzzyMatchMeta)smi;
            this.data = (FuzzyMatchData)sdi;
            if (this.data.readLookupValues) {
                this.data.readLookupValues = false;
                if (!this.readLookupValues()) {
                    this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.UnableToReadDataFromLookupStream", (String[])new String[0]));
                    this.setErrors(1L);
                    this.stopAll();
                    return false;
                }
                if (this.isDetailed()) {
                    this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.ReadValuesInMemory", (Object[])new Object[]{this.data.look.size()}));
                }
            }
            if ((r = this.getRow()) == null) {
                if (this.isDetailed()) {
                    this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.StoppedProcessingWithEmpty", (Object[])new Object[]{this.getLinesRead()}));
                }
                this.setOutputDone();
                return false;
            }
            try {
                Object[] outputRow = this.lookupValues(this.getInputRowMeta(), r);
                if (outputRow == null) {
                    this.setOutputDone();
                    return false;
                }
                this.putRow(this.data.outputRowMeta, outputRow);
                if (this.checkFeedback(this.getLinesRead()) && this.log.isBasic()) {
                    this.logBasic(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.LineNumber", (String[])new String[0]) + this.getLinesRead());
                }
            }
            catch (KettleException e) {
                boolean sendToErrorRow = false;
                String errorMessage = null;
                if (!this.getStepMeta().isDoingErrorHandling()) {
                    this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.ErrorInStepRunning", (String[])new String[0]) + e.getMessage());
                    this.setErrors(1L);
                    this.stopAll();
                    this.setOutputDone();
                    return false;
                }
                sendToErrorRow = true;
                errorMessage = e.toString();
                if (!sendToErrorRow) break block10;
                this.putError(this.getInputRowMeta(), r, 1L, errorMessage, this.meta.getMainStreamField(), "FuzzyMatch001");
            }
        }
        return true;
    }

    @Override
    public boolean init(StepMetaInterface smi, StepDataInterface sdi) {
        this.meta = (FuzzyMatchMeta)smi;
        this.data = (FuzzyMatchData)sdi;
        if (super.init(smi, sdi)) {
            if (Utils.isEmpty((CharSequence)this.meta.getMainStreamField())) {
                this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Error.MainStreamFieldMissing", (String[])new String[0]));
                return false;
            }
            if (Utils.isEmpty((CharSequence)this.meta.getLookupField())) {
                this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Error.LookupStreamFieldMissing", (String[])new String[0]));
                return false;
            }
            String matchField = this.environmentSubstitute(this.meta.getOutputMatchField());
            if (Utils.isEmpty((CharSequence)matchField)) {
                this.logError(BaseMessages.getString(PKG, (String)"FuzzyMatch.Error.OutputMatchFieldMissing", (String[])new String[0]));
                return false;
            }
            this.data.addValueFieldName = !Utils.isEmpty((CharSequence)this.environmentSubstitute(this.meta.getOutputValueField())) && this.meta.isGetCloserValue();
            int nrFields = 1;
            if (this.meta.getValue() != null && this.meta.getValue().length > 0 && (this.meta.isGetCloserValue() || this.meta.getAlgorithmType() == 7 || this.meta.getAlgorithmType() == 8 || this.meta.getAlgorithmType() == 9 || this.meta.getAlgorithmType() == 6)) {
                this.data.addAdditionalFields = true;
                nrFields += this.meta.getValue().length;
            }
            this.data.indexOfCachedFields = new int[nrFields];
            switch (this.meta.getAlgorithmType()) {
                case 0: 
                case 1: 
                case 2: {
                    this.data.minimalDistance = Const.toInt((String)this.environmentSubstitute(this.meta.getMinimalValue()), (int)0);
                    if (this.isDetailed()) {
                        this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.MinimalDistance", (Object[])new Object[]{this.data.minimalDistance}));
                    }
                    this.data.maximalDistance = Const.toInt((String)this.environmentSubstitute(this.meta.getMaximalValue()), (int)5);
                    if (this.isDetailed()) {
                        this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.MaximalDistance", (Object[])new Object[]{this.data.maximalDistance}));
                    }
                    if (this.meta.isGetCloserValue()) break;
                    this.data.valueSeparator = this.environmentSubstitute(this.meta.getSeparator());
                    if (!this.isDetailed()) break;
                    this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.Separator", (String[])new String[]{this.data.valueSeparator}));
                    break;
                }
                case 3: 
                case 4: 
                case 5: {
                    this.data.minimalSimilarity = Const.toDouble((String)this.environmentSubstitute(this.meta.getMinimalValue()), (double)0.0);
                    if (this.isDetailed()) {
                        this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.MinimalSimilarity", (Object[])new Object[]{this.data.minimalSimilarity}));
                    }
                    this.data.maximalSimilarity = Const.toDouble((String)this.environmentSubstitute(this.meta.getMaximalValue()), (double)1.0);
                    if (this.isDetailed()) {
                        this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.MaximalSimilarity", (Object[])new Object[]{this.data.maximalSimilarity}));
                    }
                    if (this.meta.isGetCloserValue()) break;
                    this.data.valueSeparator = this.environmentSubstitute(this.meta.getSeparator());
                    if (!this.isDetailed()) break;
                    this.logDetailed(BaseMessages.getString(PKG, (String)"FuzzyMatch.Log.Separator", (String[])new String[]{this.data.valueSeparator}));
                    break;
                }
            }
            this.data.readLookupValues = true;
            return true;
        }
        return false;
    }

    @Override
    public void dispose(StepMetaInterface smi, StepDataInterface sdi) {
        this.meta = (FuzzyMatchMeta)smi;
        this.data = (FuzzyMatchData)sdi;
        this.data.look.clear();
        super.dispose(smi, sdi);
    }
}

