/*
 * Decompiled with CFR 0.152.
 */
package com.mathworks.mlwidgets.help.search.lucene;

import com.mathworks.mlwidgets.help.search.RefEntityType;
import com.mathworks.mlwidgets.help.search.ResultType;
import com.mathworks.mlwidgets.help.search.lucene.DocumentHandler;
import com.mathworks.mlwidgets.help.search.lucene.DocumentHandlerException;
import com.mathworks.mlwidgets.help.search.lucene.DocumentInfo;
import com.mathworks.mlwidgets.help.search.lucene.LuceneIndexUtils;
import com.mathworks.mlwidgets.html.HTMLUtils;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.xerces.parsers.AbstractSAXParser;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLParserConfiguration;
import org.cyberneko.html.HTMLConfiguration;
import org.cyberneko.html.HTMLEntities;
import org.xml.sax.InputSource;

public class NekoHtmlSaxDocumentHandler
implements DocumentHandler {
    private static final Pattern ENTITIES_PATTERN = Pattern.compile("\\&\\S*;");
    private static final Pattern CHARSET_PATTERN = Pattern.compile("text/html;\\s*charset=(.*)");
    private boolean fAllowMultipleSections;
    private static final String SUBSTITUTION_CHAR = "\u001a";
    private static final String HEADER = "head";
    private static final String BODY = "body";
    private static final String TITLE = "title";
    private static final String ANCHOR = "a";
    private static final String TABLE = "table";
    private static final String IMAGE = "img";
    private static final String FRAMESET = "frameset";
    private static final String PARAGRAPH = "p";
    private static final String FONT = "font";
    private static final String IMAGE_SOURCE = "src";
    private static final String NAME = "name";
    private static final String ID = "id";
    private static final String SEE_ALSO = "See Also";
    private static final String SCRIPT = "script";
    private static final char NBSP = '\u00a0';

    NekoHtmlSaxDocumentHandler(boolean bl) {
        this.fAllowMultipleSections = bl;
    }

    @Override
    public List<DocumentInfo> getDocumentInfo(InputStream inputStream) throws DocumentHandlerException {
        try {
            NekoHtmlSaxParser nekoHtmlSaxParser = new NekoHtmlSaxParser();
            nekoHtmlSaxParser.parse(new InputSource(inputStream));
            return nekoHtmlSaxParser.getDocumentInfo();
        }
        catch (Exception exception) {
            exception.printStackTrace();
            throw new DocumentHandlerException("An error occurred while parsing an HTML document", exception);
        }
    }

    private static String fixEntities(String string) {
        Matcher matcher = ENTITIES_PATTERN.matcher(string);
        StringBuilder stringBuilder = new StringBuilder();
        int n = 0;
        while (matcher.find()) {
            int n2 = matcher.start();
            stringBuilder.append(string.substring(n, n2));
            String string2 = matcher.group();
            String string3 = string2.substring(1, string2.length() - 1);
            if (string3.startsWith("#")) {
                try {
                    stringBuilder.append((char)Integer.parseInt(string3.substring(1)));
                }
                catch (Exception exception) {
                    stringBuilder.append(string2);
                }
            } else {
                int n3 = HTMLEntities.get((String)string3);
                if (n3 > -1) {
                    stringBuilder.append((char)n3);
                } else {
                    stringBuilder.append(string2);
                }
            }
            n = matcher.end();
        }
        stringBuilder.append(string.substring(n));
        return stringBuilder.toString();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String[] stringArray) throws Exception {
        NekoHtmlSaxDocumentHandler nekoHtmlSaxDocumentHandler = new NekoHtmlSaxDocumentHandler(true);
        for (String string : stringArray) {
            System.out.println("*** Indexing document: " + string + " ***");
            InputStream inputStream = null;
            try {
                inputStream = string.startsWith("jar:file:") ? new ByteArrayInputStream(HTMLUtils.getSource(string).getBytes()) : new FileInputStream(new File(string));
                List<DocumentInfo> list = nekoHtmlSaxDocumentHandler.getDocumentInfo(inputStream);
                for (DocumentInfo documentInfo : list) {
                    System.out.println("Title: " + documentInfo.getTitle());
                    System.out.println("Section: " + documentInfo.getSection());
                    System.out.println("Anchor: " + documentInfo.getAnchor());
                    System.out.println("Ref page? " + documentInfo.isReferencePage());
                    System.out.println("Content: " + documentInfo.getBody());
                    System.out.println("Section headings: " + documentInfo.getSectionHeadings());
                    System.out.println("------------------------------------------------");
                }
            }
            finally {
                if (inputStream != null) {
                    inputStream.close();
                }
            }
        }
    }

    private class NekoHtmlSaxParser
    extends AbstractSAXParser {
        private boolean fInHeader;
        private boolean fInBody;
        private boolean fInTitle;
        private boolean fSkipSection;
        private boolean fInScript;
        private int fTableDepth;
        private boolean fSkipTable;
        private boolean fInCopyright;
        private boolean fIsSectionComment;
        private boolean fTitleComplete;
        private SectionHeadingTag fHeadingTag;
        private String fTitleTag;
        private StringBuilder fTableContent;
        private StringBuilder fBodyContent;
        private String fCurrentAnchor;
        private boolean fRefTitle;
        private boolean fInRefPageSummary;
        private boolean fReference;
        private String fCharset;
        private ResultType fResultType;
        private Map<String, RefEntityType> fRefEntities;
        private boolean fStudent;
        private List<DocumentInfo> fDocInfos;
        private DocumentInfo fCurrentDocInfo;
        private boolean fDoNotIndex;

        private NekoHtmlSaxParser() {
            super((XMLParserConfiguration)new HTMLConfiguration());
            this.fInHeader = false;
            this.fInBody = false;
            this.fInTitle = false;
            this.fSkipSection = false;
            this.fInScript = false;
            this.fTableDepth = 0;
            this.fSkipTable = false;
            this.fInCopyright = false;
            this.fIsSectionComment = false;
            this.fTitleComplete = false;
            this.fHeadingTag = null;
            this.fTableContent = null;
            this.fBodyContent = null;
            this.fCurrentAnchor = null;
            this.fRefTitle = false;
            this.fReference = false;
            this.fCharset = null;
            this.fResultType = null;
            this.fRefEntities = new HashMap<String, RefEntityType>();
            this.fStudent = false;
            this.fDocInfos = new ArrayList<DocumentInfo>();
            this.fCurrentDocInfo = null;
            this.fDoNotIndex = false;
            this.startNewDocument(false);
        }

        public void characters(XMLString xMLString, Augmentations augmentations) throws XNIException {
            String string = xMLString.toString().replace('\u00a0', ' ').replaceAll("\\s+", " ");
            if (string.length() == 0) {
                return;
            }
            if (this.fHeadingTag != null) {
                if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.SEE_ALSO) || this.isReferencePage() && string.trim().equalsIgnoreCase("syntax")) {
                    this.fSkipSection = true;
                }
                if (this.allowMultipleSections() && this.fHeadingTag.startsNewDocument() && this.fCurrentDocInfo.getTitle() != null && this.fCurrentAnchor != null) {
                    this.startNewDocument(true);
                }
                if (!this.fTitleComplete) {
                    this.fCurrentDocInfo.appendToTitle(NekoHtmlSaxDocumentHandler.fixEntities(string));
                } else {
                    this.fCurrentDocInfo.addSectionHeading(NekoHtmlSaxDocumentHandler.fixEntities(string));
                }
            }
            if (this.fTableDepth > 0) {
                this.checkSkipTable(string);
            }
            if (this.fInTitle) {
                this.fTitleTag = this.fTitleTag == null ? string : this.fTitleTag + string;
                String string2 = string.toUpperCase(Locale.ENGLISH);
                if (string2.contains(": TABLE OF CONTENTS") || string2.contains(": INDEX")) {
                    this.fDoNotIndex = true;
                }
            } else if (this.includeText()) {
                this.appendContent(string, true);
                if (this.fInRefPageSummary) {
                    this.fCurrentDocInfo.appendToRefPageSummary(string);
                }
            }
            this.fCurrentAnchor = null;
        }

        private void appendContent(String string, boolean bl) {
            if (string != null) {
                String string2;
                String string3 = string2 = bl ? string.replaceAll(NekoHtmlSaxDocumentHandler.SUBSTITUTION_CHAR, " ") : string;
                if (!this.fSkipTable && !this.fInCopyright) {
                    if (this.fTableDepth > 0 && this.fTableContent != null) {
                        this.fTableContent.append(string2);
                    } else if (this.fBodyContent == null) {
                        this.fBodyContent = new StringBuilder(string2);
                    } else {
                        this.fBodyContent.append(string2);
                    }
                }
            }
        }

        private void appendSubstitutionCharacter() {
            this.appendContent(NekoHtmlSaxDocumentHandler.SUBSTITUTION_CHAR, false);
        }

        private boolean includeText() {
            return this.fInBody && !this.fSkipSection && !this.fInScript;
        }

        public void startElement(QName qName, XMLAttributes xMLAttributes, Augmentations augmentations) {
            if (qName.rawname.equalsIgnoreCase("meta")) {
                Matcher matcher;
                String string = xMLAttributes.getValue(NekoHtmlSaxDocumentHandler.NAME);
                String string2 = xMLAttributes.getValue("content");
                if (string != null && string2 != null) {
                    if (string.equalsIgnoreCase("chunktype") && (string2.equalsIgnoreCase("refpage") || string2.equalsIgnoreCase("classrefpage"))) {
                        this.fReference = true;
                    } else if (string.equalsIgnoreCase("sw-edition") && string2.equalsIgnoreCase("student")) {
                        this.fStudent = true;
                    } else if (string.equalsIgnoreCase("refentity")) {
                        if (string2.matches(".+:.+")) {
                            String[] stringArray = string2.split(":");
                            RefEntityType refEntityType = RefEntityType.resolve(stringArray[0]);
                            this.fRefEntities.put(stringArray[1], refEntityType);
                        }
                    } else if (string.equalsIgnoreCase("toctype")) {
                        this.fResultType = ResultType.fromKey(string2);
                    }
                } else if (string2 != null && (matcher = CHARSET_PATTERN.matcher(string2)).find()) {
                    this.fCharset = matcher.group(1).trim();
                }
            } else if (qName.rawname.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.HEADER)) {
                this.fInHeader = true;
            } else if (qName.rawname.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.BODY)) {
                this.fInBody = true;
            } else if (this.fInHeader && qName.rawname.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.TITLE)) {
                this.fInTitle = true;
            } else if (qName.rawname.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.FRAMESET)) {
                this.fDoNotIndex = true;
            } else if (this.fInBody) {
                this.startBodyElement(qName, xMLAttributes);
            }
            super.startElement(qName, xMLAttributes, augmentations);
        }

        private void startBodyElement(QName qName, XMLAttributes xMLAttributes) {
            String string = qName.rawname;
            if (LuceneIndexUtils.isSentenceBreakTag(string)) {
                this.appendSubstitutionCharacter();
            } else if (LuceneIndexUtils.isWordBreakTag(string)) {
                this.appendContent(" ", false);
            }
            if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.ANCHOR)) {
                this.fCurrentAnchor = xMLAttributes.getValue(NekoHtmlSaxDocumentHandler.NAME);
            } else if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.SCRIPT)) {
                this.fInScript = true;
            } else if (SectionHeadingTag.isSectionHeadingTag(string)) {
                this.fHeadingTag = SectionHeadingTag.valueOf(string.toUpperCase(Locale.ENGLISH));
                this.fIsSectionComment = false;
                this.fSkipSection = false;
                this.fCurrentAnchor = xMLAttributes.getValue(NekoHtmlSaxDocumentHandler.ID);
                String string2 = xMLAttributes.getValue("class");
                if (string2 != null && string2.equalsIgnoreCase("reftitle")) {
                    this.fRefTitle = true;
                }
            } else if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.TABLE)) {
                if (this.fTableDepth == 0) {
                    this.fTableContent = new StringBuilder();
                }
                ++this.fTableDepth;
            } else if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.IMAGE)) {
                String string3 = xMLAttributes.getValue(NekoHtmlSaxDocumentHandler.IMAGE_SOURCE);
                if (string3 != null) {
                    this.checkSkipTable(string3);
                }
            } else if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.PARAGRAPH)) {
                String string4 = xMLAttributes.getValue("class");
                if (string4 != null && string4.equalsIgnoreCase("copy")) {
                    this.fInCopyright = true;
                }
                if ("purpose".equalsIgnoreCase(string4)) {
                    this.fInRefPageSummary = true;
                    this.fCurrentDocInfo.clearRefPageSummary();
                } else {
                    this.fInRefPageSummary = this.fRefTitle;
                }
            }
        }

        private void checkSkipTable(String string) {
            if (string.equalsIgnoreCase("b_prev.gif") || string.toLowerCase(Locale.ENGLISH).contains("on this page")) {
                this.fSkipTable = true;
                this.fTableContent = null;
            }
        }

        private boolean allowMultipleSections() {
            return NekoHtmlSaxDocumentHandler.this.fAllowMultipleSections && !this.isReferencePage();
        }

        private void startNewDocument(boolean bl) {
            this.saveDocument();
            this.fBodyContent = null;
            this.fCurrentDocInfo = new DocumentInfo();
            this.fTitleComplete = false;
            if (bl) {
                this.fCurrentDocInfo.setAnchor(this.fCurrentAnchor);
            }
        }

        private void saveDocument() {
            if (this.fCurrentDocInfo != null && !this.fDoNotIndex) {
                this.fCurrentDocInfo.setCharset(this.fCharset);
                String string = this.getBody();
                if (string != null) {
                    this.fCurrentDocInfo.setBody(string.trim());
                } else {
                    this.fCurrentDocInfo.setBody(null);
                }
                this.fDocInfos.add(this.fCurrentDocInfo);
            }
        }

        public void endElement(QName qName, Augmentations augmentations) throws XNIException {
            String string = qName.rawname;
            if (this.fInBody) {
                if (LuceneIndexUtils.isSentenceBreakTag(string)) {
                    this.appendSubstitutionCharacter();
                } else if (LuceneIndexUtils.isWordBreakTag(string)) {
                    this.appendContent(" ", false);
                }
            }
            if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.HEADER)) {
                this.fInHeader = false;
            } else if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.BODY)) {
                this.fInBody = false;
            } else if (this.fInHeader && string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.TITLE)) {
                this.fInTitle = false;
            } else if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.SCRIPT)) {
                this.fInScript = false;
            } else if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.TABLE)) {
                --this.fTableDepth;
                if (this.fTableDepth == 0) {
                    if (!this.fSkipTable && this.fTableContent != null) {
                        this.appendContent(this.fTableContent.toString(), false);
                    }
                    this.fSkipTable = false;
                    this.fTableContent = null;
                }
            } else if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.PARAGRAPH)) {
                this.fInCopyright = false;
                this.fRefTitle = false;
                this.fInRefPageSummary = false;
            } else if (SectionHeadingTag.isSectionHeadingTag(string)) {
                this.fHeadingTag = null;
                this.fTitleComplete = true;
            } else if (string.equalsIgnoreCase(NekoHtmlSaxDocumentHandler.FONT) && this.fIsSectionComment) {
                this.fHeadingTag = null;
                this.fTitleComplete = true;
            }
        }

        public void comment(XMLString xMLString, Augmentations augmentations) {
            String string = xMLString.toString().trim().toUpperCase(Locale.ENGLISH);
            if (SectionHeadingTag.isSectionHeadingTag(string)) {
                this.fHeadingTag = SectionHeadingTag.valueOf(string);
                this.fIsSectionComment = true;
            }
            super.comment(xMLString, augmentations);
        }

        public void endDocument(Augmentations augmentations) throws XNIException {
            super.endDocument(augmentations);
            this.saveDocument();
            String[] stringArray = this.parseTitleTag();
            String string = stringArray[0];
            String string2 = stringArray[1];
            if (string2 == null || string2.equals(string)) {
                string2 = stringArray[2];
            }
            if (string2 == null) {
                string2 = string;
            }
            if (string2 != null) {
                string2 = NekoHtmlSaxDocumentHandler.fixEntities(string2.trim());
                for (DocumentInfo documentInfo : this.fDocInfos) {
                    documentInfo.setSection(NekoHtmlSaxDocumentHandler.fixEntities(string2));
                    documentInfo.setReferencePage(this.isReferencePage());
                    documentInfo.setResultType(this.getResultType());
                    documentInfo.setStudent(this.isStudentPage());
                    if ((documentInfo.getTitle() == null || this.isReferencePage()) && string != null) {
                        documentInfo.setTitle(NekoHtmlSaxDocumentHandler.fixEntities(string));
                    }
                    for (Map.Entry<String, RefEntityType> entry : this.getRefPageEntities().entrySet()) {
                        documentInfo.addRefPageEntity(entry.getKey(), entry.getValue());
                    }
                }
            }
        }

        private String[] parseTitleTag() {
            String[] stringArray = new String[3];
            String string = this.fTitleTag;
            if (string != null) {
                int n;
                string = string.trim();
                int n2 = string.lastIndexOf(40);
                int n3 = string.lastIndexOf(41);
                if (n2 >= 0 && n3 > n2) {
                    stringArray[2] = string.substring(n2 + 1, n3);
                    string = string.substring(0, n2);
                }
                if ((n = string.indexOf("::")) > -1) {
                    stringArray[1] = string.substring(n + 2).trim();
                    stringArray[0] = string.substring(0, n);
                } else {
                    stringArray[0] = string;
                }
            }
            return stringArray;
        }

        private List<DocumentInfo> getDocumentInfo() {
            return this.fDocInfos;
        }

        private String getBody() {
            if (this.fBodyContent != null) {
                String string = this.fBodyContent.toString().replaceAll("\\s+", " ");
                return string.replaceAll("(\\s?\u001a\\s?)+", "\n");
            }
            return null;
        }

        private boolean isReferencePage() {
            return this.fReference;
        }

        private ResultType getResultType() {
            return this.fResultType;
        }

        private Map<String, RefEntityType> getRefPageEntities() {
            return this.fRefEntities;
        }

        private boolean isStudentPage() {
            return this.fStudent;
        }
    }

    private static enum SectionHeadingTag {
        H1,
        H2,
        H3,
        H4;


        private boolean startsNewDocument() {
            return this != H4;
        }

        private static boolean isSectionHeadingTag(String string) {
            for (SectionHeadingTag sectionHeadingTag : SectionHeadingTag.values()) {
                if (!string.equalsIgnoreCase(sectionHeadingTag.toString())) continue;
                return true;
            }
            return false;
        }
    }
}

