/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.analysis.ml;

import com.dataiku.dip.analysis.model.preprocessing.CatFeaturePreprocessingParams;
import com.dataiku.dip.analysis.model.preprocessing.FeaturePreprocessingParams;
import com.dataiku.dip.analysis.model.preprocessing.PreprocessingParams;
import com.dataiku.dip.analysis.model.preprocessing.TextFeaturePreprocessingParams;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

public class FeatureUtils {
    public static List<String> getVerySparseFeatures(PreprocessingParams params) {
        return FeatureUtils.getVerySparseFeatures(params.per_feature.values());
    }

    public static List<String> getQuiteSparseFeatures(PreprocessingParams params) {
        return FeatureUtils.getQuiteSparseFeatures(params.per_feature.values());
    }

    private static List<String> getVerySparseFeatures(Collection<FeaturePreprocessingParams> fp) {
        ArrayList<String> ret = new ArrayList<String>();
        for (FeaturePreprocessingParams f : fp) {
            if (f.role == FeaturePreprocessingParams.Role.REJECT) continue;
            if (f.type == FeaturePreprocessingParams.FeatureType.TEXT) {
                TextFeaturePreprocessingParams tf = (TextFeaturePreprocessingParams)f;
                if (tf.text_handling != TextFeaturePreprocessingParams.TextHandlingMethod.TOKENIZE_HASHING || tf.hashSize <= 10000) continue;
                ret.add(f.name);
                continue;
            }
            if (f.type != FeaturePreprocessingParams.FeatureType.CATEGORY) continue;
            CatFeaturePreprocessingParams cf = (CatFeaturePreprocessingParams)f;
            if (cf.category_handling != CatFeaturePreprocessingParams.CategoryHandlingMethod.HASHING || cf.nb_bins_hashing <= 10000) continue;
            ret.add(f.name);
        }
        return ret;
    }

    private static List<String> getQuiteSparseFeatures(Collection<FeaturePreprocessingParams> fp) {
        ArrayList<String> ret = new ArrayList<String>();
        for (FeaturePreprocessingParams f : fp) {
            if (f.role == FeaturePreprocessingParams.Role.REJECT) continue;
            if (f.type == FeaturePreprocessingParams.FeatureType.TEXT) {
                TextFeaturePreprocessingParams tf = (TextFeaturePreprocessingParams)f;
                if ((tf.text_handling == TextFeaturePreprocessingParams.TextHandlingMethod.TOKENIZE_COUNTS || tf.text_handling == TextFeaturePreprocessingParams.TextHandlingMethod.TOKENIZE_TFIDF) && tf.maxWords == 0) {
                    ret.add(f.name);
                }
                if (tf.text_handling != TextFeaturePreprocessingParams.TextHandlingMethod.TOKENIZE_HASHING || tf.hashSize <= 200 || tf.hashSize >= 10000) continue;
                ret.add(f.name);
                continue;
            }
            if (f.type != FeaturePreprocessingParams.FeatureType.CATEGORY) continue;
            CatFeaturePreprocessingParams cf = (CatFeaturePreprocessingParams)f;
            if (cf.category_handling != CatFeaturePreprocessingParams.CategoryHandlingMethod.HASHING || cf.nb_bins_hashing <= 200 || cf.nb_bins_hashing >= 10000) continue;
            ret.add(f.name);
        }
        return ret;
    }
}

