/*
 * Decompiled with CFR 0.152.
 */
package wiki.xsx.core.pdf.doc;

import java.awt.Rectangle;
import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import wiki.xsx.core.pdf.doc.XEasyPdfDocument;

public class XEasyPdfDocumentExtractor {
    private final PDDocument document;
    private final XEasyPdfDocument pdfDocument;
    private final SimpleExtractor simpleExtractor;
    private final RegionExtractor regionExtractor;

    XEasyPdfDocumentExtractor(XEasyPdfDocument pdfDocument) {
        this.pdfDocument = pdfDocument;
        this.document = this.pdfDocument.getTarget();
        this.simpleExtractor = new SimpleExtractor(this.document);
        this.regionExtractor = new RegionExtractor();
    }

    public XEasyPdfDocumentExtractor addRegion(String regionName, Rectangle rectangle) {
        this.regionExtractor.addRegion(regionName, rectangle);
        return this;
    }

    public XEasyPdfDocumentExtractor extractByRegions(List<Map<String, String>> dataList, int ... pageIndex) throws IOException {
        if (pageIndex != null && pageIndex.length > 0) {
            for (int index : pageIndex) {
                this.addData(dataList, index);
            }
        } else {
            int count = this.document.getNumberOfPages() - 1;
            for (int index = 0; index < count; ++index) {
                this.addData(dataList, index);
            }
        }
        return this;
    }

    public XEasyPdfDocumentExtractor extract(List<String> textList, int ... pageIndex) throws IOException {
        this.extract(textList, null, pageIndex);
        return this;
    }

    public XEasyPdfDocumentExtractor extract(List<String> textList, String regex, int ... pageIndex) throws IOException {
        this.simpleExtractor.extract(textList, regex, pageIndex);
        return this;
    }

    public XEasyPdfDocumentExtractor extractBySimpleTable(List<List<String>> textList, int pageIndex) throws IOException {
        ArrayList<String> sourceList = new ArrayList<String>(1024);
        this.extract(sourceList, "(\\S[^\\n&]+)", pageIndex);
        for (String rowText : sourceList) {
            textList.add(Arrays.asList(rowText.split("\\s")));
        }
        return this;
    }

    public XEasyPdfDocument finish() {
        return this.pdfDocument;
    }

    private void addData(List<Map<String, String>> dataList, int pageIndex) throws IOException {
        Map<String, String> data = this.regionExtractor.extract(this.document.getPage(pageIndex));
        if (!data.isEmpty()) {
            dataList.add(data);
        }
    }

    private static class RegionExtractor
    extends PDFTextStripper {
        private final Map<String, ArrayList<List<TextPosition>>> regionCharacterList = new HashMap<String, ArrayList<List<TextPosition>>>();
        private final Map<String, StringWriter> regionText = new HashMap<String, StringWriter>(256);
        private final Map<String, Rectangle> regionArea = new HashMap<String, Rectangle>(32);

        RegionExtractor() throws IOException {
            super.setSortByPosition(true);
        }

        void addRegion(String regionName, Rectangle rectangle) {
            this.regionArea.put(regionName, rectangle);
        }

        Map<String, String> extract(PDPage page) throws IOException {
            HashMap<String, String> data;
            if (this.regionArea.isEmpty()) {
                data = new HashMap<String, String>(0);
            } else {
                Set<String> keySet = this.regionArea.keySet();
                data = new HashMap(keySet.size());
                for (String region : keySet) {
                    this.setStartPage(this.getCurrentPageNo());
                    this.setEndPage(this.getCurrentPageNo());
                    ArrayList regionCharactersByArticle = new ArrayList(256);
                    regionCharactersByArticle.add(new ArrayList(256));
                    this.regionCharacterList.put(region, regionCharactersByArticle);
                    this.regionText.put(region, new StringWriter());
                }
                if (page.hasContents()) {
                    this.processPage(page);
                }
                for (String region : keySet) {
                    data.put(region, this.regionText.get(region).toString());
                }
            }
            return data;
        }

        protected void processTextPosition(TextPosition text) {
            Set<Map.Entry<String, Rectangle>> entrySet = this.regionArea.entrySet();
            for (Map.Entry<String, Rectangle> regionAreaEntry : entrySet) {
                Rectangle2D rect = regionAreaEntry.getValue();
                if (!rect.contains(text.getX(), text.getY())) continue;
                this.charactersByArticle = this.regionCharacterList.get(regionAreaEntry.getKey());
                super.processTextPosition(text);
            }
        }

        protected void writePage() throws IOException {
            Set<String> keySet = this.regionArea.keySet();
            for (String region : keySet) {
                this.charactersByArticle = this.regionCharacterList.get(region);
                this.output = this.regionText.get(region);
                super.writePage();
            }
        }
    }

    private static class SimpleExtractor
    extends PDFTextStripper {
        private final PDDocument document;

        public SimpleExtractor(PDDocument document) throws IOException {
            this.document = document;
        }

        void extract(List<String> textList, String regex, int ... pageIndex) throws IOException {
            if (pageIndex != null && pageIndex.length > 0) {
                for (int index : pageIndex) {
                    this.setStartPage(index + 1);
                    this.setEndPage(index + 1);
                    this.extract(textList, regex);
                }
            } else {
                this.extract(textList, regex);
            }
        }

        private void extract(List<String> textList, String regex) throws IOException {
            String text = this.getText(this.document);
            if (regex != null && regex.trim().length() > 0) {
                Matcher matcher = Pattern.compile(regex).matcher(text);
                while (matcher.find()) {
                    textList.add(matcher.group());
                }
            } else {
                textList.add(text);
            }
        }
    }
}

