package com.top_logic.knowledge.analyze.lucene;

import com.top_logic.basic.IdentifierUtil;
import com.top_logic.basic.Logger;
import com.top_logic.basic.TLID;
import com.top_logic.basic.config.InstantiationContext;
import com.top_logic.basic.io.StreamUtilities;
import com.top_logic.basic.mime.MimeTypesModule;
import com.top_logic.basic.module.ServiceDependencies;
import com.top_logic.convert.FormatConverterFactory;
import com.top_logic.convert.converters.FormatConverter;
import com.top_logic.convert.converters.FormatConverterException;
import com.top_logic.dob.ex.NoSuchAttributeException;
import com.top_logic.dsa.DataAccessProxy;
import com.top_logic.dsa.DatabaseAccessException;
import com.top_logic.dsa.ex.UnknownDBException;
import com.top_logic.dsa.util.MimeTypes;
import com.top_logic.knowledge.analyze.AnalyzeException;
import com.top_logic.knowledge.analyze.DefaultAnalyzeService;
import com.top_logic.knowledge.analyze.KnowledgeObjectResult;
import com.top_logic.knowledge.analyze.KnowledgeObjectResultImpl;
import com.top_logic.knowledge.indexing.lucene.LuceneIndex;
import com.top_logic.knowledge.objects.KnowledgeObject;
import com.top_logic.knowledge.service.KBUtils;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;

@ServiceDependencies({LuceneIndex.Module.class, FormatConverterFactory.Module.class, MimeTypesModule.Module.class})
/* loaded from: input_file:com/top_logic/knowledge/analyze/lucene/LuceneAnalyzeService.class */
public class LuceneAnalyzeService extends DefaultAnalyzeService<Config> {
    private static final boolean DEBUG = Logger.isDebugEnabled(LuceneAnalyzeService.class);
    public static final String PROP_DF_MAXCOUNT = "DefaultMaxCountDocFinder";
    public static final String PROP_DF_THRESHOLD = "DefaultThresholdDocFinder";
    public static final String PROP_FE_MAXCOUNT = "DefaultMaxCountFeatureExtractor";
    public static final String PROP_FE_THRESHOLD = "DefaultThresholdFeatureExtractor";
    public static final String WORD_TOKENTYPE = "<ALPHANUM>";
    public static final int WORD_MINSIZE = 2;
    public static final boolean USE_AND = true;
    public static final boolean USE_OR = false;
    public static final int MAX_SIMLIAR_TERMS = 10;
    private final LuceneIndex index;

    /* loaded from: input_file:com/top_logic/knowledge/analyze/lucene/LuceneAnalyzeService$Config.class */
    public interface Config extends DefaultAnalyzeService.Config<LuceneAnalyzeService> {
        int getDefaultMaxCountFeatureExtractor();

        double getDefaultThresholdFeatureExtractor();

        int getDefaultMaxCountDocFinder();

        double getDefaultThresholdDocFinder();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:com/top_logic/knowledge/analyze/lucene/LuceneAnalyzeService$DescendingIntegerHolderComparator.class */
    public static class DescendingIntegerHolderComparator implements Comparator<Map.Entry<String, Integer>> {
        public static final DescendingIntegerHolderComparator INSTANCE = new DescendingIntegerHolderComparator();

        private DescendingIntegerHolderComparator() {
        }

        @Override // java.util.Comparator
        public int compare(Map.Entry<String, Integer> entry, Map.Entry<String, Integer> entry2) {
            if (entry.getValue().intValue() < entry2.getValue().intValue()) {
                return 1;
            }
            return entry.getValue().intValue() > entry2.getValue().intValue() ? -1 : 0;
        }
    }

    /* loaded from: input_file:com/top_logic/knowledge/analyze/lucene/LuceneAnalyzeService$KnowledgeObjectComparator.class */
    private static final class KnowledgeObjectComparator implements Comparator<KnowledgeObject> {
        public static final KnowledgeObjectComparator INSTANCE = new KnowledgeObjectComparator();

        private KnowledgeObjectComparator() {
        }

        @Override // java.util.Comparator
        public int compare(KnowledgeObject knowledgeObject, KnowledgeObject knowledgeObject2) {
            return KBUtils.compareIds(knowledgeObject.getObjectName(), knowledgeObject2.getObjectName());
        }
    }

    public LuceneAnalyzeService(InstantiationContext instantiationContext, Config config) {
        super(instantiationContext, config);
        this.index = LuceneIndex.getInstance();
    }

    public boolean serviceAvailable() {
        return true;
    }

    public Collection<? extends KnowledgeObject> findSimilar(KnowledgeObject knowledgeObject) throws AnalyzeException {
        if (DEBUG) {
            Logger.debug("Finding documents similar to: " + String.valueOf(knowledgeObject.getObjectName()), LuceneAnalyzeService.class);
        }
        int defaultMaxCountDocFinder = getConfig().getDefaultMaxCountDocFinder();
        Iterator<String> it = m3extractKeywords(knowledgeObject).iterator();
        HashSet hashSet = new HashSet();
        for (int i = 0; it.hasNext() && i < 10; i++) {
            hashSet.addAll(search(it.next(), defaultMaxCountDocFinder));
        }
        if (hashSet.size() > defaultMaxCountDocFinder) {
            HashSet hashSet2 = new HashSet(defaultMaxCountDocFinder);
            Iterator it2 = hashSet.iterator();
            while (it2.hasNext() && hashSet2.size() < defaultMaxCountDocFinder) {
                hashSet2.add((KnowledgeObject) it2.next());
            }
            hashSet = hashSet2;
        }
        if (DEBUG) {
            Logger.debug("Similar documents: " + String.valueOf(hashSet), LuceneAnalyzeService.class);
        }
        return hashSet;
    }

    public Collection<? extends KnowledgeObjectResult> findSimilarRanked(KnowledgeObject knowledgeObject) throws AnalyzeException {
        if (DEBUG) {
            Logger.debug("Finding documents similar ranked as: " + String.valueOf(knowledgeObject.getObjectName()), LuceneAnalyzeService.class);
        }
        int defaultMaxCountDocFinder = getConfig().getDefaultMaxCountDocFinder();
        List<String> m3extractKeywords = m3extractKeywords(knowledgeObject);
        TreeSet<KnowledgeObject> treeSet = new TreeSet(KnowledgeObjectComparator.INSTANCE);
        IndexSearcher searcher = this.index.getSearcher();
        while (treeSet.size() < defaultMaxCountDocFinder && !m3extractKeywords.isEmpty()) {
            treeSet.addAll(searchWordList(m3extractKeywords, defaultMaxCountDocFinder, searcher));
            m3extractKeywords.remove(m3extractKeywords.size() - 1);
        }
        int size = treeSet.size();
        ArrayList arrayList = new ArrayList();
        for (KnowledgeObject knowledgeObject2 : treeSet) {
            if (knowledgeObject2 != knowledgeObject) {
                arrayList.add(new KnowledgeObjectResultImpl(knowledgeObject2, size / size));
                size--;
            }
        }
        if (DEBUG) {
            Logger.debug("Similar ranked documents are: " + String.valueOf(arrayList), LuceneAnalyzeService.class);
        }
        return arrayList;
    }

    private List<KnowledgeObject> searchWordList(List<?> list, int i, IndexSearcher indexSearcher) {
        if (DEBUG) {
            Logger.debug("Searching for " + i + " documents containing the words: " + String.valueOf(list), LuceneAnalyzeService.class);
        }
        StringBuilder sb = new StringBuilder();
        Iterator<?> it = list.iterator();
        while (it.hasNext()) {
            sb.append(it.next());
            sb.append(' ');
        }
        try {
            List<KnowledgeObject> search = search(getQueryForExpression(sb.toString(), true), i, indexSearcher);
            if (DEBUG) {
                Logger.debug("Search results: " + String.valueOf(search), LuceneAnalyzeService.class);
            }
            return search;
        } catch (Exception e) {
            Logger.error("Unable to search for " + String.valueOf(sb), e, this);
            return Collections.emptyList();
        }
    }

    /* renamed from: extractKeywords, reason: merged with bridge method [inline-methods] */
    public List<String> m3extractKeywords(KnowledgeObject knowledgeObject) throws AnalyzeException {
        if (DEBUG) {
            Logger.debug("Extracting keywords from document: " + String.valueOf(knowledgeObject.getObjectName()), LuceneAnalyzeService.class);
        }
        int defaultMaxCountFeatureExtractor = getConfig().getDefaultMaxCountFeatureExtractor();
        double defaultThresholdFeatureExtractor = getConfig().getDefaultThresholdFeatureExtractor();
        HashMap<String, Integer> findKeywordsCounted = findKeywordsCounted(new StringReader(getFilteredContent(getPhysicalResource(knowledgeObject))));
        int i = 0;
        Iterator<Integer> it = findKeywordsCounted.values().iterator();
        while (it.hasNext()) {
            i += it.next().intValue();
        }
        ArrayList arrayList = new ArrayList(findKeywordsCounted.entrySet());
        Collections.sort(arrayList, DescendingIntegerHolderComparator.INSTANCE);
        int size = arrayList.size();
        if (size > defaultMaxCountFeatureExtractor) {
            size = defaultMaxCountFeatureExtractor;
        }
        ArrayList arrayList2 = new ArrayList();
        for (int i2 = 0; i2 < size && ((Integer) ((Map.Entry) arrayList.get(i2)).getValue()).intValue() / i >= defaultThresholdFeatureExtractor; i2++) {
            arrayList2.add((String) ((Map.Entry) arrayList.get(i2)).getKey());
        }
        if (DEBUG) {
            Logger.debug("Extracted keywords from document " + String.valueOf(knowledgeObject.getObjectName()) + " are: " + String.valueOf(arrayList2), LuceneAnalyzeService.class);
        }
        return arrayList2;
    }

    protected HashMap<String, Integer> findKeywordsCounted(Reader reader) throws AnalyzeException {
        HashMap<String, Integer> hashMap = new HashMap<>();
        try {
            TokenStream tokenStream = LuceneIndex.getInstance().getDefaultDocumentAnalyzer().tokenStream(LuceneIndex.FIELD_CONTENTS, reader);
            try {
                tokenStream.reset();
                while (tokenStream.incrementToken()) {
                    String obj = tokenStream.getAttribute(CharTermAttribute.class).toString();
                    if (tokenStream.getAttribute(TypeAttribute.class).type().equals(WORD_TOKENTYPE) && obj.length() >= 2) {
                        if (hashMap.containsKey(obj)) {
                            hashMap.put(obj, Integer.valueOf(hashMap.get(obj).intValue() + 1));
                        } else {
                            hashMap.put(obj, 1);
                        }
                    }
                }
                tokenStream.end();
                if (tokenStream != null) {
                    tokenStream.close();
                }
                return hashMap;
            } finally {
            }
        } catch (IOException e) {
            throw new AnalyzeException(e.toString());
        }
    }

    protected Collection<? extends KnowledgeObject> search(String str, int i) throws AnalyzeException {
        try {
            if (DEBUG) {
                Logger.debug("Finding " + i + " documents with expression: " + str, LuceneAnalyzeService.class);
            }
            List<KnowledgeObject> search = search(getQueryForExpression(str, false), i);
            if (DEBUG) {
                Logger.debug("Documents found: " + String.valueOf(search), LuceneAnalyzeService.class);
            }
            return search;
        } catch (IOException e) {
            Logger.error("Could not parese the search expression.", e, this);
            throw new AnalyzeException(e.toString());
        } catch (ParseException e2) {
            Logger.error("Could not parse the search expression.", e2, this);
            throw new AnalyzeException(e2.toString());
        }
    }

    protected Collection<? extends KnowledgeObjectResult> searchRanked(String str, int i) throws AnalyzeException {
        ArrayList arrayList = new ArrayList();
        Iterator<? extends KnowledgeObject> it = search(str, i).iterator();
        while (it.hasNext()) {
            arrayList.add(new KnowledgeObjectResultImpl(it.next(), 1.0d));
        }
        return arrayList;
    }

    private String getPhysicalResource(KnowledgeObject knowledgeObject) throws AnalyzeException {
        try {
            return (String) knowledgeObject.getAttributeValue("physicalResource");
        } catch (NoSuchAttributeException e) {
            AnalyzeException analyzeException = new AnalyzeException("physicalResource is not an attribute of " + knowledgeObject.getClass().getName());
            Logger.error("getPhysicalResource: " + analyzeException.toString(), e, this);
            throw analyzeException;
        }
    }

    private String getFilteredContent(String str) throws AnalyzeException {
        String simpleContent;
        String mimeType = MimeTypes.getInstance().getMimeType(str);
        FormatConverter formatConverter = FormatConverterFactory.getInstance().getFormatConverter(mimeType);
        if (formatConverter != null) {
            try {
                simpleContent = getConvertedContent(str, formatConverter, mimeType);
            } catch (Exception e) {
                String str2 = "Unable to filter content of resource '" + str + "'";
                if (e instanceof FormatConverterException) {
                    Logger.warn(str2 + " (reason is: " + e.getMessage() + ")!", this);
                } else {
                    Logger.warn(str2 + "!", e, this);
                }
                simpleContent = getSimpleContent(str);
            }
        } else {
            simpleContent = getSimpleContent(str);
        }
        if (simpleContent != null) {
            return parseStringContent(simpleContent);
        }
        Logger.warn("FileContent was null, returning empty String...", this);
        return "";
    }

    private String getConvertedContent(String str, FormatConverter formatConverter, String str2) throws DatabaseAccessException, UnknownDBException, IOException {
        InputStream entry = new DataAccessProxy(str).getEntry();
        try {
            Reader convert = formatConverter.convert(entry, str2);
            try {
                StringWriter stringWriter = new StringWriter();
                try {
                    StreamUtilities.copyReaderWriterContents(convert, stringWriter);
                    String stringBuffer = stringWriter.getBuffer().toString();
                    stringWriter.close();
                    if (convert != null) {
                        convert.close();
                    }
                    if (entry != null) {
                        entry.close();
                    }
                    return stringBuffer;
                } catch (Throwable th) {
                    try {
                        stringWriter.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                    throw th;
                }
            } finally {
            }
        } catch (Throwable th3) {
            if (entry != null) {
                try {
                    entry.close();
                } catch (Throwable th4) {
                    th3.addSuppressed(th4);
                }
            }
            throw th3;
        }
    }

    private String getSimpleContent(String str) {
        if (DEBUG) {
            Logger.debug("Using simple filter for " + str, LuceneAnalyzeService.class);
        }
        try {
            InputStream entry = new DataAccessProxy(str).getEntry();
            try {
                String readFileContent = readFileContent(entry);
                if (entry != null) {
                    entry.close();
                }
                return readFileContent;
            } finally {
            }
        } catch (Exception e) {
            Logger.warn("getFilteredContent (simple filter): " + String.valueOf(e), LuceneAnalyzeService.class);
            throw new AnalyzeException("getSimpleContent: " + String.valueOf(e));
        }
    }

    private String readFileContent(InputStream inputStream) throws AnalyzeException {
        InputStreamReader inputStreamReader = null;
        StringWriter stringWriter = null;
        try {
            try {
                inputStreamReader = new InputStreamReader(inputStream);
                stringWriter = new StringWriter();
                while (true) {
                    int read = inputStreamReader.read();
                    if (read == -1) {
                        break;
                    }
                    stringWriter.write(((read < 32 || read >= 128) && (read < 160 || read >= 256)) ? ' ' : (char) read);
                }
                stringWriter.flush();
                if (inputStreamReader != null) {
                    try {
                        inputStreamReader.close();
                    } catch (IOException e) {
                        Logger.error("Could not close the InputStreamReader.", e, this);
                    }
                }
                if (stringWriter != null) {
                    try {
                        stringWriter.close();
                    } catch (IOException e2) {
                        Logger.error("Could not close the StringWriter.", e2, this);
                    }
                }
                return stringWriter.toString();
            } catch (Exception e3) {
                Logger.error("Couldn't read : " + String.valueOf(e3), e3, this);
                throw new AnalyzeException("Couldn't read " + String.valueOf(e3));
            }
        } catch (Throwable th) {
            if (inputStreamReader != null) {
                try {
                    inputStreamReader.close();
                } catch (IOException e4) {
                    Logger.error("Could not close the InputStreamReader.", e4, this);
                }
            }
            if (stringWriter != null) {
                try {
                    stringWriter.close();
                } catch (IOException e5) {
                    Logger.error("Could not close the StringWriter.", e5, this);
                }
            }
            throw th;
        }
    }

    private String parseStringContent(String str) {
        int length = str.length();
        StringBuffer stringBuffer = null;
        for (int i = 0; i < length; i++) {
            char charAt = str.charAt(i);
            if ((charAt < ' ' || charAt >= 128) && (charAt < 160 || charAt >= 256)) {
                if (stringBuffer == null) {
                    stringBuffer = new StringBuffer(str);
                }
                stringBuffer.setCharAt(i, ' ');
            }
        }
        return stringBuffer != null ? stringBuffer.toString() : str;
    }

    private synchronized List<KnowledgeObject> search(Query query, int i) throws AnalyzeException {
        return search(query, i, this.index.getSearcher());
    }

    private synchronized List<KnowledgeObject> search(Query query, int i, IndexSearcher indexSearcher) {
        ArrayList arrayList = new ArrayList();
        try {
            ScoreDoc[] scoreDocArr = indexSearcher.search(query, i).scoreDocs;
            int length = scoreDocArr.length;
            Document document = null;
            if (length > i) {
                length = i;
            }
            TLID[] tlidArr = new TLID[length];
            for (int i2 = 0; i2 < length; i2++) {
                document = indexSearcher.doc(scoreDocArr[i2].doc);
                tlidArr[i2] = IdentifierUtil.fromExternalForm(document.get(LuceneIndex.FIELD_KO_ID));
            }
            for (TLID tlid : tlidArr) {
                KnowledgeObject ko = LuceneIndex.getKO(tlid, document.get(LuceneIndex.FIELD_DOCTYPE), this.kbase);
                if (ko != null) {
                    arrayList.add(ko);
                }
            }
            return arrayList;
        } catch (IOException e) {
            Logger.error("Could not find expression in Lucene index.", e, this);
            throw new AnalyzeException(e.toString());
        }
    }

    private Query getQueryForExpression(String str, boolean z) throws AnalyzeException, IOException, ParseException {
        String str2 = z ? " AND " : " OR ";
        Analyzer defaultDocumentAnalyzer = LuceneIndex.getInstance().getDefaultDocumentAnalyzer();
        int i = 0;
        StringBuffer stringBuffer = new StringBuffer();
        StringReader stringReader = new StringReader(str);
        try {
            TokenStream tokenStream = defaultDocumentAnalyzer.tokenStream(LuceneIndex.FIELD_CONTENTS, stringReader);
            try {
                tokenStream.reset();
                while (tokenStream.incrementToken()) {
                    if (i != 0) {
                        stringBuffer.append(str2);
                    }
                    stringBuffer.append((CharSequence) tokenStream.getAttribute(CharTermAttribute.class));
                    stringBuffer.append("*");
                    i++;
                }
                tokenStream.end();
                if (tokenStream != null) {
                    tokenStream.close();
                }
                stringReader.close();
                return LuceneIndex.getInstance().getQueryParser(LuceneIndex.FIELD_CONTENTS).parse(stringBuffer.toString());
            } finally {
            }
        } catch (Throwable th) {
            try {
                stringReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }
}
