Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
{- Google's MapReduce programming model revisited (C) Ralf Laemmel, 2006 An exectutable specification of MapReduce's key abstraction. The example "counting word occurrences" is also demonstrated. -} module Step7 where import qualified Data.List -- A library for advanced list-processing import qualified Data.Map -- A library type for dictionaries type Dict k v = Data.Map.Map k v -- An alias for Haskell's dictionary type mapReduce :: Ord k2 -- Needed for grouping => (k1 -> v1 -> [(k2,v2)]) -- The map function -> (k2 -> [v2] -> Maybe v3) -- The reduce function -> Dict k1 v1 -- A key to input-value mapping -> Dict k2 v3 -- A key to output-value mapping mapReduce map reduce = reducePerKey reduce . groupByKey . mapPerKey map mapPerKey :: (k1 -> v1 -> [(k2,v2)]) -- The map function -> Dict k1 v1 -- A key to input-value mapping -> [(k2,v2)] -- The intermediate key/value pairs mapPerKey f = concat -- Concatenate per-key lists . map (uncurry f) -- Map map over list of pairs . Data.Map.toList -- Turn dictionary into list groupByKey :: Ord k2 -- Needed for grouping => [(k2,v2)] -- The intermediate key/value pairs -> Dict k2 [v2] -- The grouped intermediate values groupByKey = foldl insert Data.Map.empty where insert m (k2,v2) = Data.Map.insertWith (++) k2 [v2] m reducePerKey :: Ord k2 -- Potentially needed for fromList => (k2 -> [v2] -> Maybe v3) -- The reduce function -> Dict k2 [v2] -- The grouped intermediate values -> Dict k2 v3 -- A key to output-value mapping reducePerKey f = Data.Map.mapWithKey h -- 3. Eliminate Justs . Data.Map.filterWithKey g -- 2. Filter non-Nothings . Data.Map.mapWithKey f -- 1. Apply reduce per key where g k2 Nothing = False g k2 (Just _) = True h k2 (Just x) = x -- The 4 liner for word-occurrence counting wordOccurrenceCount = mapReduce myMap myReduce where myMap = const (map (flip (,) 1) . words) myReduce = const (Just . sum) -- ... or use length! -- Test harness main = print $ wordOccurrenceCount $ Data.Map.insert "doc2" "appreciate the unfold" $ Data.Map.insert "doc1" "fold the fold" $ Data.Map.empty {- Main> main {"appreciate":=1,"fold":=2,"the":=2,"unfold":=1} -}