/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.formats.avro;

import com.dataiku.dip.coremodel.SchemaValidator;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.formats.avro.AvroDSSRowReader;
import com.dataiku.dip.formats.avro.AvroFormatConfig;
import com.dataiku.dip.formats.avro.SchemaConverter;
import com.dataiku.dip.input.formats.ArchiveCapableFormatExtractor;
import com.dataiku.dip.input.formats.FixedSchemaExtractor;
import com.dataiku.dip.plugin.InputStreamWithContextInfo;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.JSON;
import com.google.gson.JsonObject;
import java.io.InputStream;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.io.DatumReader;

public class AvroFormatExtractor
extends ArchiveCapableFormatExtractor
implements FixedSchemaExtractor {
    private static DKULogger LOG = DKULogger.getLogger((String)"dku.format.avro");
    private final AvroFormatConfig params;

    public AvroFormatExtractor(AvroFormatConfig params) {
        this.params = params;
    }

    private static String prettify(String jsonStr) {
        return JSON.pretty((Object)JSON.parse((String)jsonStr, JsonObject.class));
    }

    @Override
    public boolean canSetSchemaForExtractor() {
        return true;
    }

    @Override
    public void setSchema(com.dataiku.dip.coremodel.Schema schema, boolean allowExtraColumns) {
        this.schema = schema;
    }

    @Override
    protected boolean doExtractStream(InputStreamWithContextInfo isn, ProcessorOutput out, ColumnFactory cf, RowFactory rf, ArchiveCapableFormatExtractor.ArchiveCapableObserver observer) throws Exception {
        AvroDSSRowReader avroDecoder = new AvroDSSRowReader();
        InputStream is = isn.getInputStream();
        try (DataFileStream avroFile = new DataFileStream(is, (DatumReader)avroDecoder);){
            com.dataiku.dip.coremodel.Schema dssSchema = this.getSchema();
            Schema avroWriterSchema = avroFile.getSchema();
            if (dssSchema == null) {
                LOG.info((Object)"No DSS schema has been provided. A new one will be generated from the Avro file.");
                dssSchema = SchemaConverter.convertSchema(avroWriterSchema);
            }
            LOG.debug((Object)("Avro file original schema is :\n" + AvroFormatExtractor.prettify(avroWriterSchema.toString())));
            LOG.debug((Object)("DSS schema is :\n" + JSON.prettyLog((Object)dssSchema)));
            new SchemaValidator().validate(dssSchema);
            avroDecoder.initialize(cf, rf, dssSchema, this.params);
            int nbRecords = 0;
            while (avroFile.hasNext()) {
                if (!observer.checkLimit(nbRecords)) {
                    boolean bl = false;
                    return bl;
                }
                Row row = (Row)avroFile.next();
                isn.fillRowContext(row.getRowContext());
                row.getRowContext().sourceRecord = nbRecords + 1;
                out.emitRow(row);
                if (++nbRecords % 500 != 0) continue;
                observer.onInterval(nbRecords);
            }
            observer.onEnd(nbRecords);
        }
        return true;
    }

    @Override
    public com.dataiku.dip.coremodel.Schema detectSchema(InputStream stream) throws Exception {
        GenericDatumReader recordReader = new GenericDatumReader();
        try (DataFileStream avroFile = new DataFileStream(stream, (DatumReader)recordReader);){
            com.dataiku.dip.coremodel.Schema schema = SchemaConverter.convertSchema(avroFile.getSchema());
            return schema;
        }
    }
}

