package owltools.gaf.species;

import java.io.BufferedReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.jena.atlas.lib.Chars;
import org.apache.log4j.Logger;
import owltools.gaf.io.ResourceLoader;

/* loaded from: input_file:owltools/gaf/species/TaxonFinder.class */
public class TaxonFinder {
    public static final String TAXON_PREFIX = "taxon:";
    private static Map<String, Species> taxa2IDs;
    private static Map<String, Species> IDs2taxa;
    private static final String NCBI_TAXA = "ncbi_taxa_ids.txt";
    private static final String UNIPROT_TAXA = "speclist.txt";
    private static final Logger log = Logger.getLogger(TaxonFinder.class);

    public static String getTaxonID(String str) {
        if (taxa2IDs == null) {
            loadTaxaMapping();
        }
        Species species = null;
        if (str != null && str.length() > 0) {
            if (!str.equals("root")) {
                str = str.substring(0, 1).toUpperCase() + str.substring(1);
            }
            species = taxa2IDs.get(speciesNameHack(str));
            if (species == null) {
                species = taxa2IDs.get(str);
            }
            if (species == null) {
                species = taxa2IDs.get(str.toLowerCase());
            }
        }
        return species != null ? "taxon:" + species.getNcbi_taxon_id() : "1";
    }

    public static String getSpecies(String str) {
        Species species = IDs2taxa.get(str);
        if (species != null) {
            return species.getLabel();
        }
        if (str.indexOf(58) >= 0) {
            species = IDs2taxa.get(str.substring(str.indexOf(58) + 1));
        }
        return species != null ? species.getLabel() : "";
    }

    public static String getCode(String str) {
        if (str.startsWith("taxon:")) {
            str = str.substring("taxon:".length());
        }
        Species species = IDs2taxa.get(str);
        return species != null ? species.getFive_code() : "";
    }

    private static void loadTaxaMapping() {
        taxa2IDs = new HashMap();
        IDs2taxa = new HashMap();
        Species species = new Species();
        species.setLabel("LUCA");
        species.setNcbi_taxon_id("1");
        species.setSpecies("LUCA");
        taxa2IDs.put("LUCA", species);
        IDs2taxa.put("1", species);
        loadUniProtTaxa();
        loadNCBITaxa();
    }

    private static void loadNCBITaxa() {
        BufferedReader loadResource = ResourceLoader.inst().loadResource("ncbi_taxa_ids.txt.gz", true);
        if (loadResource == null) {
            loadResource = ResourceLoader.inst().loadResource(NCBI_TAXA);
        }
        if (loadResource != null) {
            try {
                for (String readLine = loadResource.readLine(); readLine != null; readLine = loadResource.readLine()) {
                    if (!readLine.contains("authority")) {
                        String replace = readLine.replace('\t', ' ');
                        String[] split = replace.split("\\|");
                        String trim = split[0].trim();
                        String trim2 = split[1].trim();
                        if (!split[2].contains(trim2)) {
                            trim2 = (trim2 + " " + split[2].trim()).trim();
                        } else if (split[2].trim().length() > trim2.length()) {
                            trim2 = split[2].trim();
                        }
                        if (isNumeric(trim)) {
                            Species species = getSpecies(trim, trim2);
                            if (replace.contains("scientific name")) {
                                species.setScientificName(trim2);
                                species.setNcbi_taxon_id(trim);
                                IDs2taxa.put(trim, species);
                                taxa2IDs.put(trim2, species);
                            }
                        } else {
                            System.err.println("Stop right here");
                        }
                    }
                }
                loadResource.close();
            } catch (Exception e) {
                log.error("Unable to read ncbi_taxa_ids.txt exception=" + e.getMessage());
            }
        }
    }

    private static Species getSpecies(String str, String str2) {
        Species species = IDs2taxa.get(str);
        if (species == null) {
            species = taxa2IDs.get(str2);
        }
        if (species == null) {
            species = new Species();
        }
        return species;
    }

    private static boolean isNumeric(String str) {
        return str.matches("-?\\d+(\\.\\d+)?");
    }

    private static void loadUniProtTaxa() {
        BufferedReader loadResource = ResourceLoader.inst().loadResource(UNIPROT_TAXA);
        if (loadResource != null) {
            try {
                for (String readLine = loadResource.readLine(); readLine != null; readLine = loadResource.readLine()) {
                    if (readLine.contains("N=") && !readLine.contains("Official")) {
                        String substring = readLine.substring(0, readLine.indexOf(32));
                        String[] split = readLine.split(Chars.S_COLON);
                        String trim = split[0].substring(split[0].lastIndexOf(32) + 1).trim();
                        String trim2 = split[1].substring(split[1].indexOf("N=") + 2).trim();
                        if (!isNumeric(trim)) {
                            System.err.println("Stop right here");
                        }
                        Species species = getSpecies(trim, trim2);
                        species.setNcbi_taxon_id(trim);
                        species.setFive_code(substring);
                        species.setScientificName(trim2);
                        if (!IDs2taxa.containsKey(trim)) {
                            IDs2taxa.put(trim, species);
                        }
                        if (!taxa2IDs.containsKey(trim2)) {
                            taxa2IDs.put(trim2, species);
                        }
                        if (!taxa2IDs.containsKey(substring)) {
                            taxa2IDs.put(substring, species);
                        }
                    }
                }
                loadResource.close();
            } catch (Exception e) {
                log.error("Unable to read speclist.txt exception=" + e.getMessage());
            }
        }
    }

    private static String speciesNameHack(String str) {
        String lowerCase = str.toLowerCase();
        if (lowerCase.equals("human")) {
            str = "Homo sapiens";
        } else if (lowerCase.equals("pantr")) {
            str = "Pan troglodytes";
        } else if (lowerCase.equals("homo-pan")) {
            str = "Homininae";
        } else if (lowerCase.equals("mouse")) {
            str = "Mus musculus";
        } else if (lowerCase.equals("rat")) {
            str = "Rattus norvegicus";
        } else if (lowerCase.equals("bovin")) {
            str = "Bos taurus";
        } else if (lowerCase.equals("canis familiaris") || lowerCase.equals("canfa")) {
            str = "Canis lupus familiaris";
        } else if (lowerCase.equals("mondo")) {
            str = "Monodelphis domestica";
        } else if (lowerCase.equals("ornan")) {
            str = "Ornithorhynchus anatinus";
        } else if (lowerCase.equals("chick")) {
            str = "Gallus gallus";
        } else if (lowerCase.equals("xentr")) {
            str = "Xenopus (Silurana) tropicalis";
        } else if (lowerCase.equals("fugu rubripes") || lowerCase.equals("fugru")) {
            str = "Takifugu rubripes";
        } else if (lowerCase.equals("brachydanio rerio") || lowerCase.equals("danre")) {
            str = "Danio rerio";
        } else if (lowerCase.equals("cioin")) {
            str = "Ciona intestinalis";
        } else if (lowerCase.equals("strpu")) {
            str = "Strongylocentrotus purpuratus";
        } else if (lowerCase.equals("caenorhabditis")) {
            str = "Caenorhabditis elegans";
        } else if (lowerCase.equals("briggsae") || lowerCase.equals("caebr")) {
            str = "Caenorhabditis briggsae";
        } else if (lowerCase.equals("drome")) {
            str = "Drosophila melanogaster";
        } else if (lowerCase.equals("anopheles gambiae str. pest") || lowerCase.equals("anoga")) {
            str = "Anopheles gambiae";
        } else if (lowerCase.equals("yeast")) {
            str = "Saccharomyces cerevisiae";
        } else if (lowerCase.equals("ashbya gossypii") || lowerCase.equals("ashgo")) {
            str = "Eremothecium gossypii";
        } else if (lowerCase.equals("neucr")) {
            str = "Neurospora crassa";
        } else if (lowerCase.startsWith("schizosaccharomyces pombe 927")) {
            str = "Schizosaccharomyces pombe";
        } else if (lowerCase.startsWith("schpo")) {
            str = "SCHPM";
        } else if (lowerCase.equals("dicdi")) {
            str = "Dictyostelium discoideum";
        } else if (lowerCase.equals("aspergillus nidulans")) {
            str = "Emericella nidulans";
        } else if (lowerCase.equals("chlre")) {
            str = "Chlamydomonas reinhardtii";
        } else if (lowerCase.equals("orysj")) {
            str = "Oryza sativa";
        } else if (lowerCase.equals("arath")) {
            str = "Arabidopsis thaliana";
        } else if (lowerCase.equals("metac")) {
            str = "Methanosarcina acetivorans";
        } else if (lowerCase.equals("strco")) {
            str = "Streptomyces coelicolor";
        } else if (lowerCase.equals("glovi")) {
            str = "Gloeobacter violaceus";
        } else if (lowerCase.equals("lepin")) {
            str = "Leptospira interrogans";
        } else if (lowerCase.equals("braja")) {
            str = "Bradyrhizobium japonicum";
        } else if (lowerCase.equals("escherichia coli coli str. K-12 substr. MG1655") || lowerCase.equals("ecoli")) {
            str = "Escherichia coli";
        } else if (lowerCase.equals("enthi")) {
            str = "Entamoeba histolytica";
        } else if (lowerCase.equals("bacsu")) {
            str = "Bacillus subtilis";
        } else if (lowerCase.equals("deira")) {
            str = "Deinococcus radiodurans";
        } else if (lowerCase.equals("thema")) {
            str = "Thermotoga maritima";
        } else if (lowerCase.equals("opisthokonts")) {
            str = "Opisthokonta";
        } else if (lowerCase.equals("bactn")) {
            str = "Bacteroides thetaiotaomicron";
        } else if (lowerCase.equals("leima")) {
            str = "Leishmania major";
        } else if (lowerCase.equals("eubacteria")) {
            str = "Bacteria <prokaryote>";
        } else if (lowerCase.equals("theria")) {
            str = "Theria <Mammalia>";
        } else if (lowerCase.equals("geobacter sufurreducens") || lowerCase.equals("geosl")) {
            str = "Geobacter sulfurreducens";
        } else if (lowerCase.equals("psea7")) {
            str = "Pseudomonas aeruginosa";
        } else if (lowerCase.equals("aquae") || lowerCase.equals("aquifex aeolicus vf5")) {
            str = "Aquifex aeolicus";
        } else if (lowerCase.equals("metac") || lowerCase.equals("methanosarcina acetivorans c2a")) {
            str = "Methanosarcina acetivorans";
        } else if (lowerCase.equals("sulso") || lowerCase.equals("sulfolobus solfataricus p2")) {
            str = "Sulfolobus solfataricus";
        } else if (lowerCase.equals("saccharomycetaceae-candida")) {
            str = "mitosporic Nakaseomyces";
        } else if (lowerCase.equals("sordariomycetes-leotiomycetes")) {
            str = "Leotiomycetes";
        } else if (lowerCase.equals("excavates")) {
            str = "Excavarus";
        } else if (lowerCase.equals("metazoa-choanoflagellida")) {
            str = "Opisthokonta";
        } else if (lowerCase.equals("alveolata-stramenopiles")) {
            str = "Eukaryota";
        } else if (lowerCase.equals("pezizomycotina-saccharomycotina")) {
            str = "saccharomyceta";
        } else if (lowerCase.equals("unikonts")) {
            str = "Eukaryota";
        } else if (lowerCase.equals("archaea-eukaryota")) {
            str = "cellular organisms";
        } else if (lowerCase.equals("osteichthyes")) {
            str = "Euteleostomi";
        } else if (lowerCase.equals("luca")) {
            str = "root";
        } else if (lowerCase.equals("craniata-cephalochordata")) {
            str = "Chordata";
        } else if (lowerCase.equals("hexapoda-crustacea")) {
            str = "Pancrustacea";
        } else if (lowerCase.equals("rhabditida-chromadorea")) {
            str = "Chromadorea";
        } else if (lowerCase.startsWith("artiodactyla")) {
            str = "Cetartiodactyla";
        }
        return str;
    }
}
