Skip to content

Commit 2c29a90

Browse files
Chipe1norvig
authored andcommitted
Fixed mistake in HITS and add test to NLP (aimacode#441)
* Add test for determineInlinks() * Add test for HITS() * fixed premature updation * Refactor code to match pseudocode
1 parent d3155eb commit 2c29a90

File tree

2 files changed

+18
-13
lines changed

2 files changed

+18
-13
lines changed

nlp.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -356,13 +356,13 @@ def detect(self):
356356
def getInlinks(page):
357357
if not page.inlinks:
358358
page.inlinks = determineInlinks(page)
359-
return [p for addr, p in pagesIndex.items() if addr in page.inlinks]
359+
return [addr for addr, p in pagesIndex.items() if addr in page.inlinks]
360360

361361

362362
def getOutlinks(page):
363363
if not page.outlinks:
364364
page.outlinks = findOutlinks(page)
365-
return [p for addr, p in pagesIndex.items() if addr in page.outlinks]
365+
return [addr for addr, p in pagesIndex.items() if addr in page.outlinks]
366366

367367

368368
# ______________________________________________________________________________
@@ -389,9 +389,11 @@ def HITS(query):
389389
p.authority = 1
390390
p.hub = 1
391391
while True: # repeat until... convergence
392-
for p in pages.values():
393-
p.authority = sum(x.hub for x in getInlinks(p)) # p.authority ← ∑i Inlinki(p).Hub
394-
p.hub = sum(x.authority for x in getOutlinks(p)) # p.hub ← ∑i Outlinki(p).Authority
392+
authority = {p: pages[p].authority for p in pages}
393+
hub = {p: pages[p].hub for p in pages}
394+
for p in pages:
395+
pages[p].authority = sum(hub[x] for x in getInlinks(pages[p])) # p.authority ← ∑i Inlinki(p).Hub
396+
pages[p].hub = sum(authority[x] for x in getOutlinks(pages[p])) # p.hub ← ∑i Outlinki(p).Authority
395397
normalize(pages)
396398
if convergence():
397399
break

tests/test_nlp.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from nlp import loadPageHTML, stripRawHTML, findOutlinks, onlyWikipediaURLS
55
from nlp import expand_pages, relevant_pages, normalize, ConvergenceDetector, getInlinks
6-
from nlp import getOutlinks, Page
6+
from nlp import getOutlinks, Page, determineInlinks, HITS
77
from nlp import Rules, Lexicon
88
# Clumsy imports because we want to access certain nlp.py globals explicitly, because
99
# they are accessed by function's within nlp.py
@@ -80,9 +80,9 @@ def test_stripRawHTML(html_mock):
8080

8181

8282
def test_determineInlinks():
83-
# TODO
84-
assert True
85-
83+
assert set(determineInlinks(pA)) == set(['B', 'C', 'E'])
84+
assert set(determineInlinks(pE)) == set([])
85+
assert set(determineInlinks(pF)) == set(['E'])
8686

8787
def test_findOutlinks_wiki():
8888
testPage = pageDict[pA.address]
@@ -141,17 +141,20 @@ def test_detectConvergence():
141141

142142
def test_getInlinks():
143143
inlnks = getInlinks(pageDict['A'])
144-
assert sorted([page.address for page in inlnks]) == pageDict['A'].inlinks
144+
assert sorted(inlnks) == pageDict['A'].inlinks
145145

146146

147147
def test_getOutlinks():
148148
outlnks = getOutlinks(pageDict['A'])
149-
assert sorted([page.address for page in outlnks]) == pageDict['A'].outlinks
149+
assert sorted(outlnks) == pageDict['A'].outlinks
150150

151151

152152
def test_HITS():
153-
# TODO
154-
assert True # leave for now
153+
HITS('inherit')
154+
auth_list = [pA.authority, pB.authority, pC.authority, pD.authority, pE.authority, pF.authority]
155+
hub_list = [pA.hub, pB.hub, pC.hub, pD.hub, pE.hub, pF.hub]
156+
assert max(auth_list) == pD.authority
157+
assert max(hub_list) == pE.hub
155158

156159

157160
if __name__ == '__main__':

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy