package org.biopax.validator.utils;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.biopax.miriam.MiriamLink;
import org.biopax.paxtools.controller.ModelUtils;
import org.biopax.paxtools.controller.ShallowCopy;
import org.biopax.paxtools.converter.LevelUpgrader;
import org.biopax.paxtools.io.SimpleIOHandler;
import org.biopax.paxtools.model.BioPAXElement;
import org.biopax.paxtools.model.BioPAXLevel;
import org.biopax.paxtools.model.Model;
import org.biopax.paxtools.model.level3.BioSource;
import org.biopax.paxtools.model.level3.ControlledVocabulary;
import org.biopax.paxtools.model.level3.EntityReference;
import org.biopax.paxtools.model.level3.Named;
import org.biopax.paxtools.model.level3.NucleicAcidReference;
import org.biopax.paxtools.model.level3.NucleicAcidRegionReference;
import org.biopax.paxtools.model.level3.ProteinReference;
import org.biopax.paxtools.model.level3.Provenance;
import org.biopax.paxtools.model.level3.PublicationXref;
import org.biopax.paxtools.model.level3.RelationshipTypeVocabulary;
import org.biopax.paxtools.model.level3.RelationshipXref;
import org.biopax.paxtools.model.level3.SimplePhysicalEntity;
import org.biopax.paxtools.model.level3.SmallMoleculeReference;
import org.biopax.paxtools.model.level3.UnificationXref;
import org.biopax.paxtools.model.level3.UtilityClass;
import org.biopax.paxtools.model.level3.XReferrable;
import org.biopax.paxtools.model.level3.Xref;
import org.biopax.paxtools.util.BPCollections;
import org.biopax.paxtools.util.ClassFilterSet;

/* loaded from: input_file:org/biopax/validator/utils/Normalizer.class */
public final class Normalizer {
    private static final Log log = LogFactory.getLog(Normalizer.class);
    private boolean fixDisplayName;
    private boolean inferPropertyOrganism;
    private boolean inferPropertyDataSource;
    private String xmlBase;
    public static final String PROPERTY_NORMALIZER_URI_STRATEGY = "biopax.normalizer.uri.strategy";
    public static final String VALUE_NORMALIZER_URI_STRATEGY_SIMPLE = "simple";
    public static final String VALUE_NORMALIZER_URI_STRATEGY_MD5 = "md5";
    private String description = "";
    private SimpleIOHandler biopaxReader = new SimpleIOHandler(BioPAXLevel.L3);

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/biopax/validator/utils/Normalizer$NormalizerMap.class */
    public static class NormalizerMap {
        final Model model;
        final Map<BioPAXElement, BioPAXElement> subs = BPCollections.I.createMap();
        final Map<String, BioPAXElement> uriToSub = BPCollections.I.createMap();
        final ShallowCopy copier = new ShallowCopy();

        NormalizerMap(Model model) {
            this.model = model;
        }

        void put(UtilityClass utilityClass, UnificationXref unificationXref) {
            if (utilityClass instanceof Xref) {
                throw new IllegalArgumentException("put(bpe,xref): the first arg was Xref.");
            }
            String db = unificationXref.getDb();
            String id = unificationXref.getId();
            try {
                String identifiersOrgURI = MiriamLink.getIdentifiersOrgURI(db, id);
                if (identifiersOrgURI != null) {
                    put((BioPAXElement) utilityClass, identifiersOrgURI);
                }
            } catch (Exception e) {
                Normalizer.log.error("Cannot get a Miriam standard ID for " + utilityClass + " (" + utilityClass.getModelInterface().getSimpleName() + ") , using " + db + ":" + id + ". " + e + ". ");
            }
        }

        void put(BioPAXElement bioPAXElement, String str) {
            if (this.model.containsID(str)) {
                map(bioPAXElement, this.model.getByID(str));
            } else if (containsNewUri(str)) {
                map(bioPAXElement, this.uriToSub.get(str));
            } else {
                map(bioPAXElement, this.copier.copy(bioPAXElement, str));
            }
        }

        void doSubs() {
            Iterator<BioPAXElement> it = this.subs.keySet().iterator();
            while (it.hasNext()) {
                this.model.remove(it.next());
            }
            try {
                ModelUtils.replace(this.model, this.subs);
                for (BioPAXElement bioPAXElement : this.subs.values()) {
                    if (!this.model.contains(bioPAXElement)) {
                        this.model.add(bioPAXElement);
                    }
                }
                Iterator it2 = this.model.getObjects().iterator();
                while (it2.hasNext()) {
                    ModelUtils.fixDanglingInverseProperties((BioPAXElement) it2.next(), this.model);
                }
            } catch (Exception e) {
                Normalizer.log.error("Failed to replace BioPAX elements.", e);
            }
        }

        private void map(BioPAXElement bioPAXElement, BioPAXElement bioPAXElement2) {
            this.subs.put(bioPAXElement, bioPAXElement2);
            this.uriToSub.put(bioPAXElement2.getRDFId(), bioPAXElement2);
        }

        private boolean containsNewUri(String str) {
            return this.uriToSub.containsKey(str);
        }
    }

    public Normalizer() {
        this.biopaxReader.mergeDuplicates(true);
        this.fixDisplayName = true;
        this.inferPropertyOrganism = true;
        this.inferPropertyDataSource = true;
        this.xmlBase = "";
    }

    public String normalize(String str) {
        if (str == null || str.length() == 0) {
            throw new IllegalArgumentException("no data. " + this.description);
        }
        try {
            Model convertFromOWL = this.biopaxReader.convertFromOWL(new ByteArrayInputStream(str.replaceAll("taxonXref", "xref").getBytes("UTF-8")));
            if (convertFromOWL == null) {
                throw new IllegalArgumentException("Failed to create Model! " + this.description);
            }
            if (convertFromOWL.getLevel() != BioPAXLevel.L3) {
                log.info("Converting model to BioPAX Level3...");
                convertFromOWL = new LevelUpgrader().filter(convertFromOWL);
            }
            normalize(convertFromOWL);
            return convertToOWL(convertFromOWL);
        } catch (UnsupportedEncodingException e) {
            throw new IllegalArgumentException("Failed! " + this.description, e);
        }
    }

    private void normalizeXrefs(Model model) {
        NormalizerMap normalizerMap = new NormalizerMap(model);
        String xmlBase = getXmlBase(model);
        for (RelationshipXref relationshipXref : new HashSet(model.getObjects(Xref.class))) {
            String str = null;
            if (relationshipXref.getId() != null) {
                str = relationshipXref.getId();
                if (relationshipXref.getIdVersion() != null) {
                    str = str + "_" + relationshipXref.getIdVersion();
                }
            }
            if (relationshipXref instanceof PublicationXref) {
                if (relationshipXref.getDb() != null && relationshipXref.getId() != null) {
                    normalizerMap.put((BioPAXElement) relationshipXref, uri(xmlBase, relationshipXref.getDb(), str, relationshipXref.getModelInterface()));
                }
            } else if (!(relationshipXref instanceof RelationshipXref)) {
                if (relationshipXref instanceof UnificationXref) {
                    if (relationshipXref.getDb() != null && relationshipXref.getId() != null) {
                        String db = relationshipXref.getDb();
                        try {
                            db = MiriamLink.getName(relationshipXref.getDb());
                            if (db != null) {
                                relationshipXref.setDb(db);
                            }
                        } catch (IllegalArgumentException e) {
                        }
                        if (db.toUpperCase().startsWith("UNIPROT")) {
                            if (uri(xmlBase, "UniProt Isoform", relationshipXref.getId(), ProteinReference.class).startsWith("http://identifiers.org/uniprot.isoform/")) {
                                relationshipXref.setDb("UniProt Isoform");
                                str = relationshipXref.getId();
                            } else if (relationshipXref.getIdVersion() != null) {
                                String str2 = relationshipXref.getId() + "-" + relationshipXref.getIdVersion();
                                if (uri(xmlBase, "UniProt Isoform", str2, ProteinReference.class).startsWith("http://identifiers.org/uniprot.isoform/")) {
                                    relationshipXref.setDb("UniProt Isoform");
                                    relationshipXref.setId(str2);
                                    relationshipXref.setIdVersion((String) null);
                                    str = str2;
                                }
                            }
                        }
                    }
                }
                normalizerMap.put((BioPAXElement) relationshipXref, uri(xmlBase, relationshipXref.getDb(), str, relationshipXref.getModelInterface()));
            } else if (relationshipXref.getRDFId().startsWith("http://identifiers.org/")) {
                RelationshipTypeVocabulary relationshipType = relationshipXref.getRelationshipType();
                if (relationshipType != null && !relationshipType.getTerm().isEmpty()) {
                    str = str != null ? str + "_" + StringUtils.join(relationshipType.getTerm(), '_').toLowerCase() : StringUtils.join(relationshipType.getTerm(), '_').toLowerCase();
                }
                normalizerMap.put((BioPAXElement) relationshipXref, uri(xmlBase, relationshipXref.getDb(), str, relationshipXref.getModelInterface()));
            }
        }
        normalizerMap.doSubs();
    }

    public static String uri(String str, String str2, String str3, Class<? extends BioPAXElement> cls) {
        if (cls == null || (str2 == null && str3 == null)) {
            throw new IllegalArgumentException("'Either type' is null, or both dbName and idPart are nulls.");
        }
        if (str2 != null) {
            try {
                str2 = MiriamLink.getName(str2);
                if (cls.equals(PublicationXref.class) || cls.equals(RelationshipTypeVocabulary.class) || EntityReference.class.isAssignableFrom(cls)) {
                    return MiriamLink.getIdentifiersOrgURI(str2, str3);
                }
            } catch (IllegalArgumentException e) {
                log.debug("uri: not a standard db name or synonym: " + str2, e);
            }
        }
        StringBuilder sb = new StringBuilder();
        if (str2 != null) {
            sb.append(str2.toLowerCase());
        }
        if (str3 != null) {
            if (str2 != null) {
                sb.append("_");
            }
            sb.append(str3);
        }
        String sb2 = sb.toString();
        return (str != null ? str : "") + cls.getSimpleName() + "_" + ((VALUE_NORMALIZER_URI_STRATEGY_SIMPLE.equals(System.getProperty(PROPERTY_NORMALIZER_URI_STRATEGY, VALUE_NORMALIZER_URI_STRATEGY_MD5)) || Xref.class.isAssignableFrom(cls)) ? sb2.replaceAll("[^-\\w]", "_") : ModelUtils.md5hex(sb2));
    }

    public String getDescription() {
        return this.description;
    }

    public void setDescription(String str) {
        this.description = str;
    }

    private void fixDisplayName(Model model) {
        log.info("Trying to auto-fix 'null' displayName...");
        for (Named named : model.getObjects(Named.class)) {
            if (named.getDisplayName() == null) {
                if (named.getStandardName() != null) {
                    named.setDisplayName(named.getStandardName());
                    log.info(named + " displayName auto-fix: " + named.getDisplayName() + ". " + this.description);
                } else if (!named.getName().isEmpty()) {
                    String str = (String) named.getName().iterator().next();
                    for (String str2 : named.getName()) {
                        if (str2.length() < str.length()) {
                            str = str2;
                        }
                    }
                    named.setDisplayName(str);
                    log.info(named + " displayName auto-fix: " + str + ". " + this.description);
                }
            }
        }
        for (EntityReference entityReference : model.getObjects(EntityReference.class)) {
            for (SimplePhysicalEntity simplePhysicalEntity : entityReference.getEntityReferenceOf()) {
                if (simplePhysicalEntity.getDisplayName() == null || simplePhysicalEntity.getDisplayName().trim().length() == 0) {
                    if (entityReference.getDisplayName() != null && entityReference.getDisplayName().trim().length() > 0) {
                        simplePhysicalEntity.setDisplayName(entityReference.getDisplayName());
                    }
                }
            }
        }
    }

    private String convertToOWL(Model model) {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        new SimpleIOHandler(model.getLevel()).convertToOWL(model, byteArrayOutputStream);
        return byteArrayOutputStream.toString();
    }

    private List<UnificationXref> getUnificationXrefsSorted(XReferrable xReferrable) {
        ArrayList arrayList = new ArrayList((Collection) new ClassFilterSet(xReferrable.getXref(), UnificationXref.class));
        Iterator it = new ArrayList(arrayList).iterator();
        while (it.hasNext()) {
            UnificationXref unificationXref = (UnificationXref) it.next();
            if (unificationXref.getDb() == null || unificationXref.getId() == null) {
                log.warn("Won't consider the UnificationXref having NULL 'db' or 'id' property: " + unificationXref + ", " + unificationXref.getRDFId() + ". " + this.description);
                arrayList.remove(unificationXref);
            }
        }
        Collections.sort(arrayList, new Comparator<UnificationXref>() { // from class: org.biopax.validator.utils.Normalizer.1
            @Override // java.util.Comparator
            public int compare(UnificationXref unificationXref2, UnificationXref unificationXref3) {
                return (unificationXref2.getDb() + unificationXref2.getId()).compareTo(unificationXref3.getDb() + unificationXref3.getId());
            }
        });
        return arrayList;
    }

    private UnificationXref getFirstUnificationXref(XReferrable xReferrable) {
        UnificationXref unificationXref = null;
        String str = null;
        if (xReferrable instanceof ProteinReference) {
            str = "uniprot";
        } else if (xReferrable instanceof SmallMoleculeReference) {
            str = "chebi";
        } else if ((xReferrable instanceof NucleicAcidReference) || (xReferrable instanceof NucleicAcidRegionReference)) {
            str = "entrez";
        }
        List<UnificationXref> unificationXrefsSorted = getUnificationXrefsSorted(xReferrable);
        if (str == null && !unificationXrefsSorted.isEmpty()) {
            return unificationXrefsSorted.iterator().next();
        }
        Iterator<UnificationXref> it = unificationXrefsSorted.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            UnificationXref next = it.next();
            if (next.getDb().toLowerCase().startsWith(str)) {
                unificationXref = next;
                break;
            }
        }
        if (unificationXref == null && (xReferrable instanceof ProteinReference)) {
            Iterator<UnificationXref> it2 = unificationXrefsSorted.iterator();
            while (true) {
                if (!it2.hasNext()) {
                    break;
                }
                UnificationXref next2 = it2.next();
                if (next2.getDb().toLowerCase().startsWith("refseq")) {
                    unificationXref = next2;
                    break;
                }
            }
        }
        return unificationXref;
    }

    public void normalize(Model model) {
        if (model.getLevel() != BioPAXLevel.L3) {
            throw new IllegalArgumentException("Not Level3 model. Consider converting it first (e.g., with the PaxTools).");
        }
        log.info("Normalizing xrefs..." + this.description);
        normalizeXrefs(model);
        if (this.fixDisplayName) {
            log.info("Normalizing display names..." + this.description);
            fixDisplayName(model);
        }
        log.info("Normalizing CVs..." + this.description);
        normalizeCVs(model);
        log.info("Normalizing organisms..." + this.description);
        normalizeBioSources(model);
        log.info("Normalizing entity references..." + this.description);
        normalizeERs(model);
        log.info("Repairing..." + this.description);
        model.repair();
        log.info("Optional tasks (reasoning)..." + this.description);
        if (this.inferPropertyDataSource) {
            ModelUtils.inferPropertyFromParent(model, "dataSource", new Class[0]);
        }
        if (this.inferPropertyOrganism) {
            ModelUtils.inferPropertyFromParent(model, "organism", new Class[0]);
        }
    }

    private void normalizeCVs(Model model) {
        NormalizerMap normalizerMap = new NormalizerMap(model);
        for (ControlledVocabulary controlledVocabulary : model.getObjects(ControlledVocabulary.class)) {
            UnificationXref firstUnificationXref = getFirstUnificationXref(controlledVocabulary);
            if (firstUnificationXref != null) {
                normalizerMap.put((BioPAXElement) controlledVocabulary, uri(this.xmlBase, firstUnificationXref.getDb(), firstUnificationXref.getId(), controlledVocabulary.getModelInterface()));
            } else if (controlledVocabulary.getTerm().isEmpty()) {
                log.info("Cannot normalize " + controlledVocabulary.getModelInterface().getSimpleName() + " : no unification xrefs nor terms found in " + controlledVocabulary.getRDFId() + ". " + this.description);
            } else {
                normalizerMap.put((BioPAXElement) controlledVocabulary, uri(this.xmlBase, null, (String) controlledVocabulary.getTerm().iterator().next(), controlledVocabulary.getModelInterface()));
            }
        }
        normalizerMap.doSubs();
    }

    private void normalizeBioSources(Model model) {
        NormalizerMap normalizerMap = new NormalizerMap(model);
        for (BioSource bioSource : model.getObjects(BioSource.class)) {
            UnificationXref firstUnificationXref = getFirstUnificationXref(bioSource);
            if (firstUnificationXref == null || !(firstUnificationXref.getDb().toLowerCase().contains("taxonomy") || firstUnificationXref.getDb().equalsIgnoreCase("newt"))) {
                log.debug("Won't normalize BioSource : no taxonomy unification xref found in " + bioSource.getRDFId() + ". " + this.description);
            } else {
                String id = firstUnificationXref.getId();
                if (bioSource.getTissue() != null && !bioSource.getTissue().getTerm().isEmpty()) {
                    id = id + "_" + ((String) bioSource.getTissue().getTerm().iterator().next());
                }
                if (bioSource.getCellType() != null && !bioSource.getCellType().getTerm().isEmpty()) {
                    id = id + "_" + ((String) bioSource.getCellType().getTerm().iterator().next());
                }
                normalizerMap.put((BioPAXElement) bioSource, uri(this.xmlBase, firstUnificationXref.getDb(), id, BioSource.class));
            }
        }
        normalizerMap.doSubs();
    }

    private void normalizeERs(Model model) {
        NormalizerMap normalizerMap = new NormalizerMap(model);
        for (EntityReference entityReference : model.getObjects(EntityReference.class)) {
            if (entityReference.getRDFId().startsWith("http://identifiers.org/")) {
                log.info("Skip already normalized: " + entityReference.getRDFId());
            } else {
                UnificationXref firstUnificationXref = getFirstUnificationXref(entityReference);
                if (firstUnificationXref != null) {
                    normalizerMap.put((UtilityClass) entityReference, firstUnificationXref);
                } else {
                    log.info("Cannot normalize EntityReference: no unification xrefs found in " + entityReference.getRDFId() + ". " + this.description);
                }
            }
        }
        normalizerMap.doSubs();
    }

    public static void autoName(Provenance provenance) {
        if (!provenance.getRDFId().startsWith("urn:miriam:") && !provenance.getRDFId().startsWith("http://identifiers.org/") && provenance.getName().isEmpty()) {
            log.info("Skipping: cannot normalize Provenance: " + provenance.getRDFId());
            return;
        }
        TreeSet treeSet = new TreeSet();
        String rDFId = (provenance.getRDFId().startsWith("urn:miriam:") || provenance.getRDFId().startsWith("http://identifiers.org/")) ? provenance.getRDFId() : provenance.getStandardName() != null ? provenance.getStandardName() : provenance.getDisplayName();
        if (rDFId != null) {
            try {
                treeSet.addAll(Arrays.asList(MiriamLink.getNames(rDFId)));
                provenance.setStandardName(MiriamLink.getName(rDFId));
                provenance.addComment(MiriamLink.getDataTypeDef(provenance.getStandardName()));
            } catch (IllegalArgumentException e) {
            }
        }
        if (treeSet.isEmpty()) {
            Iterator it = provenance.getName().iterator();
            while (it.hasNext()) {
                try {
                    treeSet.addAll(Arrays.asList(MiriamLink.getNames((String) it.next())));
                } catch (IllegalArgumentException e2) {
                }
            }
            if (!treeSet.isEmpty()) {
                provenance.setStandardName(MiriamLink.getName((String) treeSet.iterator().next()));
            }
        }
        Iterator it2 = treeSet.iterator();
        while (it2.hasNext()) {
            provenance.addName((String) it2.next());
        }
        if (provenance.getDisplayName() == null) {
            provenance.setDisplayName(provenance.getStandardName());
        }
    }

    public static String convertToLevel3(String str) {
        String str2 = "";
        try {
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(str.getBytes());
            SimpleIOHandler simpleIOHandler = new SimpleIOHandler();
            simpleIOHandler.mergeDuplicates(true);
            Model convertFromOWL = simpleIOHandler.convertFromOWL(byteArrayInputStream);
            if (convertFromOWL.getLevel() != BioPAXLevel.L3) {
                log.info("Converting to BioPAX Level3... " + convertFromOWL.getXmlBase());
                Model filter = new LevelUpgrader().filter(convertFromOWL);
                if (filter != null) {
                    simpleIOHandler.setFactory(filter.getLevel().getDefaultFactory());
                    simpleIOHandler.convertToOWL(filter, byteArrayOutputStream);
                    str2 = byteArrayOutputStream.toString();
                }
            } else {
                str2 = str;
            }
            return str2;
        } catch (Exception e) {
            throw new RuntimeException("Cannot convert to BioPAX Level3", e);
        }
    }

    private String getXmlBase(Model model) {
        return (this.xmlBase == null || this.xmlBase.isEmpty()) ? model.getXmlBase() != null ? model.getXmlBase() : "" : this.xmlBase;
    }

    public boolean isFixDisplayName() {
        return this.fixDisplayName;
    }

    public void setFixDisplayName(boolean z) {
        this.fixDisplayName = z;
    }

    public boolean isInferPropertyOrganism() {
        return this.inferPropertyOrganism;
    }

    public void setInferPropertyOrganism(boolean z) {
        this.inferPropertyOrganism = z;
    }

    public boolean isInferPropertyDataSource() {
        return this.inferPropertyDataSource;
    }

    public void setInferPropertyDataSource(boolean z) {
        this.inferPropertyDataSource = z;
    }

    public String getXmlBase() {
        return this.xmlBase;
    }

    public void setXmlBase(String str) {
        this.xmlBase = str;
    }
}
