/*
 * Decompiled with CFR 0.152.
 */
package ambience.etl.pdf;

import ambience.etl.BuildError;
import ambience.etl.Builder;
import ambience.etl.ETLStep;
import ambience.etl.ETLStepType$;
import ambience.etl.pdf.PdfCategory$;
import java.io.Serializable;
import monix.reactive.Observable;
import monix.reactive.Observable$;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.bson.BsonBinary;
import org.bson.BsonDocument;
import org.bson.BsonInt32;
import org.mongodb.scala.bson.BsonArray$;
import org.mongodb.scala.bson.BsonDocument$;
import org.mongodb.scala.bson.BsonInt32$;
import org.mongodb.scala.bson.BsonMagnets;
import org.mongodb.scala.bson.BsonMagnets$;
import org.mongodb.scala.bson.BsonTransformer;
import org.mongodb.scala.bson.BsonTransformer$;
import org.mongodb.scala.bson.DefaultHelper;
import org.mongodb.scala.bson.collection.BaseDocument;
import org.mongodb.scala.bson.collection.immutable.Document;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.ArrayOps$;
import scala.collection.Iterable;
import scala.collection.StringOps$;
import scala.collection.immutable.Seq;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.ModuleSerializationProxy;
import scala.runtime.ScalaRunTime$;
import scala.util.Either;
import scala.util.Right$;

public final class TextPageExtractor$
implements Builder,
Serializable {
    private static final String stepType;
    private static final String PageEndMarker;
    public static final TextPageExtractor$ MODULE$;

    private TextPageExtractor$() {
    }

    static {
        MODULE$ = new TextPageExtractor$();
        stepType = ETLStepType$.MODULE$.apply("pdf.TextPageExtractor");
        PageEndMarker = "<elx-page-end>";
    }

    private Object writeReplace() {
        return new ModuleSerializationProxy(TextPageExtractor$.class);
    }

    public String stepType() {
        return stepType;
    }

    public String PageEndMarker() {
        return PageEndMarker;
    }

    public Either<BuildError, Function1<Observable<Document>, Observable<Document>>> build(Builder.Context cxt, ETLStep step) {
        return Right$.MODULE$.apply((Function1 & Serializable)_$1 -> _$1.flatMap((Function1 & Serializable)doc -> MODULE$.toText((Document)doc)));
    }

    public Observable<Document> toText(Document doc) {
        Option option = doc.get("bytes", DefaultHelper.DefaultsTo$.MODULE$.overrideDefault(), ClassTag$.MODULE$.apply(BsonBinary.class)).map((Function1 & Serializable)_$2 -> _$2.getData());
        if (option instanceof Some) {
            byte[] bytes = (byte[])((Some)option).value();
            PDDocument document = PdfCategory$.MODULE$.loadDocument(bytes);
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setPageEnd("<elx-page-end>");
            stripper.setSortByPosition(true);
            String pdfFileInText = stripper.getText(document);
            Object[] pageTexts = StringOps$.MODULE$.stripSuffix$extension(Predef$.MODULE$.augmentString(pdfFileInText), this.PageEndMarker()).split(this.PageEndMarker());
            document.close();
            Object object = Predef$.MODULE$.refArrayOps(pageTexts);
            Object object2 = Predef$.MODULE$.refArrayOps((Object[])ArrayOps$.MODULE$.zipWithIndex$extension(object));
            Object[] lines = (BsonDocument[])ArrayOps$.MODULE$.flatMap$extension(object2, (Function1 & Serializable)x$12 -> {
                Tuple2 tuple2 = x$12;
                if (tuple2 != null) {
                    String page = (String)tuple2._1();
                    int pageIdx = BoxesRunTime.unboxToInt((Object)tuple2._2());
                    BsonInt32 pageNo = BsonInt32$.MODULE$.apply(pageIdx + 1);
                    Object object = Predef$.MODULE$.refArrayOps((Object[])page.split("\\r?\\n"));
                    Object object2 = Predef$.MODULE$.refArrayOps((Object[])ArrayOps$.MODULE$.filter$extension(object, (Function1 & Serializable)_$3 -> StringOps$.MODULE$.nonEmpty$extension(Predef$.MODULE$.augmentString(_$3.trim()))));
                    Object object3 = Predef$.MODULE$.refArrayOps((Object[])ArrayOps$.MODULE$.zipWithIndex$extension(object2));
                    return (BsonDocument[])ArrayOps$.MODULE$.map$extension(object3, (Function1 & Serializable)x$1 -> {
                        Tuple2 tuple2 = x$1;
                        if (tuple2 != null) {
                            String line = (String)tuple2._1();
                            int lineIdx = BoxesRunTime.unboxToInt((Object)tuple2._2());
                            Object[] objectArray = new BsonMagnets.CanBeBsonElement[3];
                            String string = (String)Predef$.MODULE$.ArrowAssoc((Object)"page");
                            objectArray[0] = BsonMagnets$.MODULE$.tupleToCanBeBsonElement(Predef.ArrowAssoc$.MODULE$.$minus$greater$extension((Object)string, (Object)pageNo), (BsonTransformer)BsonTransformer$.MODULE$.TransformBsonValue());
                            String string2 = (String)Predef$.MODULE$.ArrowAssoc((Object)"line");
                            objectArray[1] = BsonMagnets$.MODULE$.tupleToCanBeBsonElement(Predef.ArrowAssoc$.MODULE$.$minus$greater$extension((Object)string2, (Object)BsonInt32$.MODULE$.apply(lineIdx + 1)), (BsonTransformer)BsonTransformer$.MODULE$.TransformBsonValue());
                            String string3 = (String)Predef$.MODULE$.ArrowAssoc((Object)"text");
                            objectArray[2] = BsonMagnets$.MODULE$.tupleToCanBeBsonElement(Predef.ArrowAssoc$.MODULE$.$minus$greater$extension((Object)string3, (Object)line.trim()), (BsonTransformer)BsonTransformer$.MODULE$.TransformString());
                            return BsonDocument$.MODULE$.apply((Seq)ScalaRunTime$.MODULE$.wrapRefArray(objectArray));
                        }
                        throw new MatchError((Object)tuple2);
                    }, ClassTag$.MODULE$.apply(BsonDocument.class));
                }
                throw new MatchError((Object)tuple2);
            }, (Function1 & Serializable)xs -> Predef$.MODULE$.wrapRefArray((Object[])xs), ClassTag$.MODULE$.apply(BsonDocument.class));
            Object[] objectArray = new BsonMagnets.CanBeBsonElement[1];
            String string = (String)Predef$.MODULE$.ArrowAssoc((Object)"lines");
            objectArray[0] = BsonMagnets$.MODULE$.tupleToCanBeBsonElement(Predef.ArrowAssoc$.MODULE$.$minus$greater$extension((Object)string, (Object)BsonArray$.MODULE$.fromIterable((Iterable)Predef$.MODULE$.wrapRefArray(lines))), (BsonTransformer)BsonTransformer$.MODULE$.TransformBsonValue());
            return Observable$.MODULE$.now(((BaseDocument)doc.$minus((Seq)ScalaRunTime$.MODULE$.wrapRefArray((Object[])new String[]{"bytes"}))).$plus((Seq)ScalaRunTime$.MODULE$.wrapRefArray(objectArray)));
        }
        if (None$.MODULE$.equals(option)) {
            return Observable$.MODULE$.now((Object)doc);
        }
        throw new MatchError((Object)option);
    }
}

