Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
{-
Google's MapReduce programming model revisited
(C) Ralf Laemmel, 2006
An exectutable specification of MapReduce's key abstraction.
The example "counting word occurrences" is also demonstrated.
-}
module Step7 where
import qualified Data.List -- A library for advanced list-processing
import qualified Data.Map -- A library type for dictionaries
type Dict k v = Data.Map.Map k v -- An alias for Haskell's dictionary type
mapReduce :: Ord k2 -- Needed for grouping
=> (k1 -> v1 -> [(k2,v2)]) -- The map function
-> (k2 -> [v2] -> Maybe v3) -- The reduce function
-> Dict k1 v1 -- A key to input-value mapping
-> Dict k2 v3 -- A key to output-value mapping
mapReduce map reduce
= reducePerKey reduce
. groupByKey
. mapPerKey map
mapPerKey :: (k1 -> v1 -> [(k2,v2)]) -- The map function
-> Dict k1 v1 -- A key to input-value mapping
-> [(k2,v2)] -- The intermediate key/value pairs
mapPerKey f = concat -- Concatenate per-key lists
. map (uncurry f) -- Map map over list of pairs
. Data.Map.toList -- Turn dictionary into list
groupByKey :: Ord k2 -- Needed for grouping
=> [(k2,v2)] -- The intermediate key/value pairs
-> Dict k2 [v2] -- The grouped intermediate values
groupByKey = foldl insert Data.Map.empty
where
insert m (k2,v2) = Data.Map.insertWith (++) k2 [v2] m
reducePerKey :: Ord k2 -- Potentially needed for fromList
=> (k2 -> [v2] -> Maybe v3) -- The reduce function
-> Dict k2 [v2] -- The grouped intermediate values
-> Dict k2 v3 -- A key to output-value mapping
reducePerKey f = Data.Map.mapWithKey h -- 3. Eliminate Justs
. Data.Map.filterWithKey g -- 2. Filter non-Nothings
. Data.Map.mapWithKey f -- 1. Apply reduce per key
where
g k2 Nothing = False
g k2 (Just _) = True
h k2 (Just x) = x
-- The 4 liner for word-occurrence counting
wordOccurrenceCount = mapReduce myMap myReduce
where
myMap = const (map (flip (,) 1) . words)
myReduce = const (Just . sum) -- ... or use length!
-- Test harness
main = print
$ wordOccurrenceCount
$ Data.Map.insert "doc2" "appreciate the unfold"
$ Data.Map.insert "doc1" "fold the fold"
$ Data.Map.empty
{-
Main> main
{"appreciate":=1,"fold":=2,"the":=2,"unfold":=1}
-}