Content-Length: 562443 | pFad | http://github.com/internetarchive/openlibrary/commit/62d2243131a9c7e6aee00d1e9c5660fd5b594e89

80 Aggregate want to read counts by author (#9545) · internetarchive/openlibrary@62d2243 · GitHub
Skip to content

Commit 62d2243

Browse files
authored
Aggregate want to read counts by author (#9545)
* first attempt at updating Solr's author updater to use the new facet API * Fixed divide by zero issue when there are no recorded ratings. * Adjusted test to reflect the new way in which solr handles the updating of authors. * Added default case to build ratings
1 parent e0e34eb commit 62d2243

File tree

3 files changed

+86
-33
lines changed

3 files changed

+86
-33
lines changed

openlibrary/core/ratings.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,13 @@ def work_ratings_summary_from_counts(
114114
cls, rating_counts: list[int]
115115
) -> WorkRatingsSummary:
116116
total_count = sum(rating_counts, 0)
117+
ratings_average = (
118+
(sum((k * n_k for k, n_k in enumerate(rating_counts, 1)), 0) / total_count)
119+
if total_count != 0
120+
else 0
121+
)
117122
return {
118-
'ratings_average': sum(
119-
(k * n_k for k, n_k in enumerate(rating_counts, 1)), 0
120-
)
121-
/ total_count,
123+
'ratings_average': ratings_average,
122124
'ratings_sortable': cls.compute_sortable_rating(rating_counts),
123125
'ratings_count': total_count,
124126
'ratings_count_1': rating_counts[0],

openlibrary/solr/updater/author.py

Lines changed: 69 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
1+
from typing import cast
2+
import typing
13
import httpx
4+
from openlibrary.solr.solr_types import SolrDocument
25
from openlibrary.solr.updater.abstract import AbstractSolrBuilder, AbstractSolrUpdater
36
from openlibrary.solr.utils import SolrUpdateRequest, get_solr_base_url
7+
from openlibrary.solr.data_provider import WorkReadingLogSolrSummary
8+
from openlibrary.core.ratings import WorkRatingsSummary, Ratings
9+
10+
11+
SUBJECT_FACETS = ['subject_facet', 'time_facet', 'person_facet', 'place_facet']
412

513

614
class AuthorSolrUpdater(AbstractSolrUpdater):
@@ -9,24 +17,34 @@ class AuthorSolrUpdater(AbstractSolrUpdater):
917

1018
async def update_key(self, author: dict) -> tuple[SolrUpdateRequest, list[str]]:
1119
author_id = author['key'].split("/")[-1]
12-
facet_fields = ['subject', 'time', 'person', 'place']
13-
base_url = get_solr_base_url() + '/select'
14-
20+
base_url = get_solr_base_url() + '/query'
21+
22+
json: dict[str, typing.Any] = {
23+
"params": {
24+
"json.nl": "arrarr",
25+
"q": "author_key:%s " % author_id,
26+
"fl": "title, subtitle",
27+
"sort": "edition_count desc",
28+
},
29+
'facet': {
30+
"ratings_count_1": "sum(ratings_count_1)",
31+
"ratings_count_2": "sum(ratings_count_2)",
32+
"ratings_count_3": "sum(ratings_count_3)",
33+
"ratings_count_4": "sum(ratings_count_4)",
34+
"ratings_count_5": "sum(ratings_count_5)",
35+
"readinglog_count": "sum(readinglog_count)",
36+
"want_to_read_count": "sum(want_to_read_count)",
37+
"currently_reading_count": "sum(currently_reading_count)",
38+
"already_read_count": "sum(already_read_count)",
39+
},
40+
}
41+
for field in SUBJECT_FACETS:
42+
json["facet"][field] = {
43+
"type": "terms",
44+
"field": field,
45+
}
1546
async with httpx.AsyncClient() as client:
16-
response = await client.get(
17-
base_url,
18-
params=[ # type: ignore[arg-type]
19-
('wt', 'json'),
20-
('json.nl', 'arrarr'),
21-
('q', 'author_key:%s' % author_id),
22-
('sort', 'edition_count desc'),
23-
('rows', 1),
24-
('fl', 'title,subtitle'),
25-
('facet', 'true'),
26-
('facet.mincount', 1),
27-
]
28-
+ [('facet.field', '%s_facet' % field) for field in facet_fields],
29-
)
47+
response = await client.post(base_url, json=json)
3048
reply = response.json()
3149

3250
doc = AuthorSolrBuilder(author, reply).build()
@@ -85,8 +103,38 @@ def work_count(self) -> int:
85103
@property
86104
def top_subjects(self) -> list[str]:
87105
all_subjects = []
88-
for counts in self._solr_reply['facet_counts']['facet_fields'].values():
89-
for s, num in counts:
90-
all_subjects.append((num, s))
106+
for field in SUBJECT_FACETS:
107+
if facet := self._solr_reply['facets'].get(field):
108+
for bucket in facet['buckets']:
109+
all_subjects.append((bucket.count, bucket.val))
91110
all_subjects.sort(reverse=True)
92-
return [s for num, s in all_subjects[:10]]
111+
return [top_facets for num, top_facets in all_subjects[:10]]
112+
113+
def build(self) -> SolrDocument:
114+
doc = cast(dict, super().build())
115+
doc |= self.build_ratings()
116+
doc |= self.build_reading_log()
117+
return cast(SolrDocument, doc)
118+
119+
def build_ratings(self) -> WorkRatingsSummary:
120+
return Ratings.work_ratings_summary_from_counts(
121+
[
122+
self._solr_reply["facets"].get(f"ratings_count_{index}", 0)
123+
for index in range(1, 6)
124+
]
125+
)
126+
127+
def build_reading_log(self) -> WorkReadingLogSolrSummary:
128+
reading_log = {
129+
"want_to_read_count": self._solr_reply["facets"].get(
130+
"want_to_read_count", 0.0
131+
),
132+
"already_read_count": self._solr_reply["facets"].get(
133+
"already_read_count", 0.0
134+
),
135+
"currently_reading_count": self._solr_reply["facets"].get(
136+
"currently_reading_count", 0.0
137+
),
138+
"readinglog_count": self._solr_reply["facets"].get("readinglog_count", 0.0),
139+
}
140+
return cast(WorkReadingLogSolrSummary, reading_log)

openlibrary/tests/solr/updater/test_author.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,19 @@ async def __aenter__(self):
2323
async def __aexit__(self, exc_type, exc_val, exc_tb):
2424
pass
2525

26-
async def get(self, url, params):
26+
async def post(self, url, json):
2727
return MockResponse(
2828
{
29-
"facet_counts": {
30-
"facet_fields": {
31-
"place_facet": [],
32-
"person_facet": [],
33-
"subject_facet": [],
34-
"time_facet": [],
35-
}
29+
"facets": {
30+
"ratings_count_1": 0.0,
31+
"ratings_count_2": 0.0,
32+
"ratings_count_3": 0.0,
33+
"ratings_count_4": 0.0,
34+
"ratings_count_5": 0.0,
35+
"subject_facet": {"buckets": []},
36+
"place_facet": {"buckets": []},
37+
"time_facet": {"buckets": []},
38+
"person_facet": {"buckets": []},
3639
},
3740
"response": {"numFound": 0},
3841
}

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/internetarchive/openlibrary/commit/62d2243131a9c7e6aee00d1e9c5660fd5b594e89

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy