|
7 | 7 | import java.util.function.Function;
|
8 | 8 | import java.util.stream.Collectors;
|
9 | 9 |
|
10 |
| -/* |
11 |
| -* MapReduce is a programming model for processing and generating large data sets with a parallel, |
12 |
| -distributed algorithm on a cluster. |
13 |
| -* It has two main steps: the Map step, where the data is divided into smaller chunks and processed in parallel, |
14 |
| -and the Reduce step, where the results from the Map step are combined to produce the final output. |
15 |
| -* Wikipedia link : https://en.wikipedia.org/wiki/MapReduce |
16 |
| -*/ |
17 |
| - |
| 10 | +/** |
| 11 | + * MapReduce is a programming model for processing and generating large data sets |
| 12 | + * using a parallel, distributed algorithm on a cluster. |
| 13 | + * It consists of two main phases: |
| 14 | + * - Map: the input data is split into smaller chunks and processed in parallel. |
| 15 | + * - Reduce: the results from the Map phase are aggregated to produce the final output. |
| 16 | + * |
| 17 | + * See also: https://en.wikipedia.org/wiki/MapReduce |
| 18 | + */ |
18 | 19 | public final class MapReduce {
|
| 20 | + |
19 | 21 | private MapReduce() {
|
20 | 22 | }
|
21 |
| - /* |
22 |
| - *Counting all the words frequency within a sentence. |
23 |
| - */ |
24 |
| - public static String mapreduce(String sentence) { |
25 |
| - List<String> wordList = Arrays.stream(sentence.split(" ")).toList(); |
26 | 23 |
|
27 |
| - // Map step |
28 |
| - Map<String, Long> wordCounts = wordList.stream().collect(Collectors.groupingBy(Function.identity(), LinkedHashMap::new, Collectors.counting())); |
29 |
| - |
30 |
| - // Reduce step |
31 |
| - StringBuilder result = new StringBuilder(); |
32 |
| - wordCounts.forEach((word, count) -> result.append(word).append(": ").append(count).append(",")); |
| 24 | + /** |
| 25 | + * Counts the frequency of each word in a given sentence using a simple MapReduce-style approach. |
| 26 | + * |
| 27 | + * @param sentence the input sentence |
| 28 | + * @return a string representing word frequencies in the format "word: count,word: count,..." |
| 29 | + */ |
| 30 | + public static String countWordFrequencies(String sentence) { |
| 31 | + // Map phase: split the sentence into words |
| 32 | + List<String> words = Arrays.asList(sentence.trim().split("\\s+")); |
33 | 33 |
|
34 |
| - // Removing the last ',' if it exists |
35 |
| - if (!result.isEmpty()) { |
36 |
| - result.setLength(result.length() - 1); |
37 |
| - } |
| 34 | + // Group and count occurrences of each word, maintain insertion order |
| 35 | + Map<String, Long> wordCounts = words.stream().collect(Collectors.groupingBy(Function.identity(), LinkedHashMap::new, Collectors.counting())); |
38 | 36 |
|
39 |
| - return result.toString(); |
| 37 | + // Reduce phase: format the result |
| 38 | + return wordCounts.entrySet().stream().map(entry -> entry.getKey() + ": " + entry.getValue()).collect(Collectors.joining(",")); |
40 | 39 | }
|
41 | 40 | }
|
0 commit comments