aimacode · bionascu · Mar 22, 2017 · Chipe1 · Apr 18, 2017
diff --git a/nlp.py b/nlp.py
@@ -293,14 +293,17 @@ def expand_pages( pages ):
     return expanded
 
 def relevant_pages(query):
-    """Relevant pages are pages that contain the query in its entireity.
-    If a page's content contains the query it is returned by the function."""
-    relevant = {}
-    print("pagesContent in function: ", pagesContent)
-    for addr, page in pagesIndex.items():
-        if query.lower() in pagesContent[addr].lower():
-            relevant[addr] = page
-    return relevant
+    """Relevant pages are pages that contain all of the query words. They are obtained by 
+    intersecting the hit lists of the query words."""
+    intersection = set()
+    query_words = query.split()
+    for query_word in query_words:
+        hit_list = set()
+        for addr in pagesIndex:
+            if query_word.lower() in pagesContent[addr].lower():
+                hit_list.add(addr)
+        intersection = hit_list if not intersection else intersection.intersection(hit_list)
+    return {addr: pagesIndex[addr] for addr in intersection}
 
 def normalize( pages ):
     """From the pseudocode: Normalize divides each page's score by the sum of

diff --git a/tests/test_nlp.py b/tests/test_nlp.py
@@ -26,7 +26,7 @@ def test_lexicon():
             href="https://google.com.au"
             < href="/wiki/TestThing" > href="/wiki/TestBoy"
             href="/wiki/TestLiving" href="/wiki/TestMan" >"""
-testHTML2 = "Nothing"
+testHTML2 = "a mom and a dad"
 
 pA = Page("A", 1, 6, ["B", "C", "E"], ["D"])
 pB = Page("B", 2, 5, ["E"], ["A", "C", "D"])
@@ -87,9 +87,13 @@ def test_expand_pages():
 
 
 def test_relevant_pages():
-    pages = relevant_pages("male")
-    assert all((x in pages.keys()) for x in ['A', 'C', 'E'])
+    pages = relevant_pages("his dad")
+    assert all((x in pages) for x in ['A', 'C', 'E'])
     assert all((x not in pages) for x in ['B', 'D', 'F'])
+    pages = relevant_pages("mom and dad")
+    assert all((x in pages) for x in ['A', 'B', 'C', 'D', 'E', 'F'])
+    pages = relevant_pages("philosophy")
+    assert all((x not in pages) for x in ['A', 'B', 'C', 'D', 'E', 'F'])
 
 
 def test_normalize():