From ebdc9dc74c0354466c1004a9f9e34af8f3057afc Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Mon, 20 Mar 2017 09:30:26 -0700 Subject: [PATCH] implemented a simple boolean matcher. This can be thought of as a simple boolean limit case of other matchers; e.g. jaccard with a floor function. although this is not anticipated to be useful for the general phenotype search use case, it can be useful to leverage owlsim services for doing basic queries of the form: find all genes with phenotypes A and B but not C --- .../owlsim/compute/classmatch/ClassMatcher.java | 15 +++- .../matcher/impl/BooleanProfileMatcher.java | 90 ++++++++++++++++++++++ .../owlsim/eval/ProfileMatchEvaluator.java | 2 + .../monarchinitiative/owlsim/eval/TestQuery.java | 10 +++ .../owlsim/model/match/impl/MatchSetImpl.java | 9 ++- .../matcher/AbstractProfileMatcherTest.java | 11 +++ .../compute/matcher/BooleanProfileMatcherTest.java | 73 ++++++++++++++++++ 7 files changed, 205 insertions(+), 5 deletions(-) create mode 100644 owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BooleanProfileMatcher.java create mode 100644 owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/BooleanProfileMatcherTest.java diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java index f3c784a..6997919 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/classmatch/ClassMatcher.java @@ -27,8 +27,8 @@ public ClassMatcher(BMKnowledgeBase kb) { /** * Find best match for every class in ont1, where the best match is in ont2 * - * @param qOnt - * @param tOnt + * @param qOnt - ontology prefix + * @param tOnt - ontology prefix * @return list of matches */ public List matchOntologies(String qOnt, String tOnt) { @@ -37,8 +37,17 @@ public ClassMatcher(BMKnowledgeBase kb) { return matchClassSets(qids, tids); } + /** + * Find best matches for all class combos {qid1, ...} x {tid1, ...} + * + * @param qids - classes + * @param tids - classes + * @return list of matches + */ public List matchClassSets(Set qids, Set tids) { ArrayList matches = new ArrayList<>(); + + // TODO: consider optimization, by first grouping by system for (String q : qids) { matches.add(getBestMatch(q, tids)); } @@ -58,6 +67,8 @@ private SimpleClassMatch getBestMatch(String q, Set tids) { bestEqScore = eqScore; best = t; } + if (bestEqScore >= 1.0) + break; } EWAHCompressedBitmap tbm = kb.getSuperClassesBM(best); diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BooleanProfileMatcher.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BooleanProfileMatcher.java new file mode 100644 index 0000000..bff1411 --- /dev/null +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/compute/matcher/impl/BooleanProfileMatcher.java @@ -0,0 +1,90 @@ +package org.monarchinitiative.owlsim.compute.matcher.impl; + +import java.util.List; + +import javax.inject.Inject; + +import org.apache.log4j.Logger; +import org.monarchinitiative.owlsim.compute.matcher.NegationAwareProfileMatcher; +import org.monarchinitiative.owlsim.compute.matcher.ProfileMatcher; +import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; +import org.monarchinitiative.owlsim.kb.filter.UnknownFilterException; +import org.monarchinitiative.owlsim.model.match.MatchSet; +import org.monarchinitiative.owlsim.model.match.ProfileQuery; +import org.monarchinitiative.owlsim.model.match.QueryWithNegation; +import org.monarchinitiative.owlsim.model.match.impl.MatchSetImpl; + +import com.googlecode.javaewah.EWAHCompressedBitmap; + +/** + * Implements a standard boolean query + * + * @author cjm + * + */ +public class BooleanProfileMatcher extends AbstractProfileMatcher implements NegationAwareProfileMatcher { + + private Logger LOG = Logger.getLogger(BooleanProfileMatcher.class); + + /** + * @param kb + */ + @Inject + public BooleanProfileMatcher(BMKnowledgeBase kb) { + super(kb); + } + + + /** + * @param kb + * @return new instance + */ + public static ProfileMatcher create(BMKnowledgeBase kb) { + return new BooleanProfileMatcher(kb); + } + + @Override + public String getShortName() { + return "boolean"; + } + + /** + * @param q + * @return match profile containing probabilities of each individual + * @throws UnknownFilterException + */ + public MatchSet findMatchProfileImpl(ProfileQuery q) throws UnknownFilterException { + + EWAHCompressedBitmap queryProfileBM = getDirectProfileBM(q); + boolean hasNegationQuery = false; + EWAHCompressedBitmap negatedQueryProfileBM = null; + if (q instanceof QueryWithNegation) { + negatedQueryProfileBM = getDirectNegatedProfileBM((QueryWithNegation) q); + hasNegationQuery = negatedQueryProfileBM.cardinality() > 0; + } + + // TODO + MatchSet mp = MatchSetImpl.create(q); + int qcard = queryProfileBM.cardinality(); + List indIds = getFilteredIndividualIds(q.getFilter()); + for (String itemId : indIds) { + EWAHCompressedBitmap targetProfileBM = knowledgeBase.getTypesBM(itemId); + int numInQueryAndInTarget = queryProfileBM.andCardinality(targetProfileBM); + if (numInQueryAndInTarget == qcard) { + if (!hasNegationQuery || + negatedQueryProfileBM.andCardinality(targetProfileBM) == 0) { + String label = knowledgeBase.getLabelMapper().getArbitraryLabel(itemId); + mp.add(createMatch(itemId, label, 1)); + + } + } + } + mp.sortMatches(); + return mp; + } + + + + + +} diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/eval/ProfileMatchEvaluator.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/eval/ProfileMatchEvaluator.java index eb9691d..9c8fdff 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/eval/ProfileMatchEvaluator.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/eval/ProfileMatchEvaluator.java @@ -78,6 +78,8 @@ public boolean evaluateTestQuery(ProfileMatcher profileMatcher, TestQuery tq) th if (jsonWriter != null) { LOG.info("Writing MatchSet using "+jsonWriter+" results will appear in "+jsonWriter); + jsonWriter.write(mp.getMatches().get(0)); + LOG.info("MATCHES:"+mp); jsonWriter.write(mp); } diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/eval/TestQuery.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/eval/TestQuery.java index fdf5e2c..fb613c0 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/eval/TestQuery.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/eval/TestQuery.java @@ -27,4 +27,14 @@ public TestQuery(ProfileQuery query, String expectedId, int maxRank) { this.expectedId = expectedId; this.maxRank = maxRank; } + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "TestQuery [query=" + query + ", expectedId=" + expectedId + + ", maxRank=" + maxRank + ", matchSet=" + matchSet + "]"; + } + + } \ No newline at end of file diff --git a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/impl/MatchSetImpl.java b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/impl/MatchSetImpl.java index 1c1f5ff..dd5e015 100644 --- a/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/impl/MatchSetImpl.java +++ b/owlsim-core/src/main/java/org/monarchinitiative/owlsim/model/match/impl/MatchSetImpl.java @@ -23,7 +23,8 @@ * */ public class MatchSetImpl implements MatchSet { - private ProfileQuery query; + + private ProfileQuery query; private List matches; // TODO - make this neutral ExecutionMetadata executionMetadata; MethodMetadata methodMetadata; @@ -210,8 +211,10 @@ public String toString() { public void calculateMatchSignificance(DescriptiveStatistics background) { for (Match m : this.matches) { - double p = TestUtils.tTest(m.getScore(), background); - m.setSignificance(p); + if (background.getN() > 1) { + double p = TestUtils.tTest(m.getScore(), background); + m.setSignificance(p); + } } } diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java index 041b20a..8d372cc 100644 --- a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/AbstractProfileMatcherTest.java @@ -192,6 +192,17 @@ protected boolean isRankedLast(String matchId, MatchSet matchSet) { return true; } + protected boolean isNotInMatchSet(String matchId, MatchSet matchSet) { + for (Match m : matchSet.getMatches()) { + if (m.getMatchId().equals(matchId)) { + return false; + } + } + return true; + } + + + protected boolean isRankedAt(String matchId, MatchSet matchSet, int expectedRank) { int matchRank = 0; for (Match m : matchSet.getMatches()) { diff --git a/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/BooleanProfileMatcherTest.java b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/BooleanProfileMatcherTest.java new file mode 100644 index 0000000..4b7cd4a --- /dev/null +++ b/owlsim-core/src/test/java/org/monarchinitiative/owlsim/compute/matcher/BooleanProfileMatcherTest.java @@ -0,0 +1,73 @@ +package org.monarchinitiative.owlsim.compute.matcher; + +import org.apache.log4j.Logger; +import org.junit.Assert; +import org.junit.Test; +import org.monarchinitiative.owlsim.compute.matcher.impl.BooleanProfileMatcher; +import org.monarchinitiative.owlsim.eval.TestQuery; +import org.monarchinitiative.owlsim.kb.BMKnowledgeBase; +import org.monarchinitiative.owlsim.model.match.ProfileQuery; + +public class BooleanProfileMatcherTest extends AbstractProfileMatcherTest { + + private Logger LOG = Logger.getLogger(BooleanProfileMatcherTest.class); + + protected ProfileMatcher createProfileMatcher(BMKnowledgeBase kb) { + return BooleanProfileMatcher.create(kb); + } + @Test + public void testBoolean() throws Exception { + loadSimplePhenoWithNegation(); + //LOG.info("INDS="+kb.getIndividualIdsInSignature()); + ProfileMatcher profileMatcher = createProfileMatcher(kb); + + int nOk = 0; + for (String i : kb.getIndividualIdsInSignature()) { + LOG.info("I: "+i); + if (i.equals("http://x.org/ind-no-brain-phenotype")) { + continue; + } + if (i.equals("http://x.org/ind-unstated-phenotype")) { + continue; + } + ProfileQuery pq = profileMatcher.createProfileQuery(i); + TestQuery tq = new TestQuery(pq, i, 1); // self should always be ranked first + String fn = i.replaceAll(".*/", ""); + eval.writeJsonTo("target/boolean-test-results-"+fn+".json"); + + LOG.info("Evaluating for "+i); + eval.evaluateTestQuery(profileMatcher, tq); + //Assert.assertTrue(eval.evaluateTestQuery(profileMatcher, tq)); + + if (i.equals("http://x.org/ind-dec-all")) { + Assert.assertTrue(isNotInMatchSet("http://x.org/ind-unstated-phenotype", tq.matchSet)); + nOk++; + } + if (i.equals("http://x.org/ind-small-heart-big-brain")) { + Assert.assertTrue(isNotInMatchSet("http://x.org/ind-bone", tq.matchSet)); + nOk++; + } + + } + Assert.assertEquals(2, nOk); + } + + @Test + public void testExampleWithNegation() throws Exception { + loadSimplePhenoWithNegation(); + //LOG.info("INDS="+kb.getIndividualIdsInSignature()); + ProfileMatcher profileMatcher = createProfileMatcher(kb); + + int nOk = 0; + String i = "http://x.org/ind-small-heart-big-brain"; + + ProfileQuery pq = profileMatcher.createProfileQuery(i); + TestQuery tq = new TestQuery(pq, i, 1); // self should always be ranked first + String fn = i.replaceAll(".*/", ""); + eval.writeJsonTo("target/boolean-extra-test-results-"+fn+".json"); + Assert.assertTrue(eval.evaluateTestQuery(profileMatcher, tq)); + + Assert.assertTrue(isNotInMatchSet("http://x.org/ind-no-brain-phenotype", tq.matchSet)); + + } +}