Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
{-# OPTIONS -fscoped-type-variables #-}
{-# OPTIONS -fglasgow-exts #-}
{-
Google's MapReduce programming model revisited
(C) Ralf Laemmel, 2006
Replace explicit by implicit parallelism.
-}
module Step9 where
import Prelude hiding (map,foldl,fst,snd,concat)
import qualified Prelude (map,foldl,fst,snd,concat)
import qualified Data.List
import qualified Data.Map
type Dict k v = Data.Map.Map k v
-- Interface for determining size of values
class Size x
where
size :: x -> Int
instance Size String
where
size = const 1
instance Size Int
where
size = const 1
-- System-wide definition of split size
split = 3
-- Interface for partitioning
class Ord x => Keycode x
where
keycode :: Int -> x -> Int
instance Keycode String
where
keycode max key =
((Prelude.fromEnum (Prelude.head key)) `mod` max) + 1
-- The key abstraction
mapReduce :: forall k1 k2 v1 v2 v3 v4.
(Size v1, Size v2, Ord k1, Keycode k2)
=> (k1 -> v1 -> [(k2,v2)]) -- The map function
-> (k2 -> [v2] -> Maybe v3) -- The combiner function
-> (k2 -> [v3] -> Maybe v4) -- The reduce function
-> Dict k1 v1 -- Input data
-> Dict k2 v4 -- Output data
mapReduce map combiner reduce =
concatReducts -- 9. Concatenate results
. Prelude.map (
reducePerKey -- 8. Apply reduce to each partition
. groupByKey ) -- 7. Group intermediates per key
. mergeParts -- 6. Merge scattered partitions
. Prelude.map (
Prelude.map (
combinePerKey -- 5. Apply combiner locally
. groupByKey ) ) -- 4. Group local intermediate data
. partition -- 3. Partition local intermediate data
. Prelude.map mapPerKey -- 2. Apply map locally to each piece
. splitInput -- 1. Split up input data into pieces
where
splitInput :: Dict k1 v1 -> [Dict k1 v1]
splitInput =
Prelude.map Data.Map.fromList -- 4. Turn list of pairs into dictionary
. Prelude.fst -- 3. Project away size of last piece
. Prelude.foldl splitHelper ([[]],0) -- 2. Splitting as a list fold
. Data.Map.toList -- 1. Access dictionary as list of pairs
where
splitHelper :: ([[(k1,v1)]],Int) -- Pieces so far with size of head
-> (k1,v1) -- The key/value pair to be considered
-> ([[(k1,v1)]],Int) -- New set of pieces and size of head
splitHelper (ps,s) x@(k1,v1) =
if size v1 + s < split || Prelude.null (Prelude.head ps)
then (((x:Prelude.head ps):Prelude.tail ps), size v1 + s)
else ([x]:ps,size v1)
mapPerKey :: Dict k1 v1 -> [(k2,v2)]
mapPerKey = Prelude.concat
. Prelude.map (uncurry map)
. Data.Map.toList
partition :: [[(k2,v2)]] -> [[[(k2,v2)]]]
partition all = Prelude.map partition' all
where
partition' task = Prelude.map partition'' [1..parts]
where
parts = flip div split
$ Prelude.sum
$ Prelude.concat
$ Prelude.map (Prelude.map (size . Prelude.snd)) all
partition'' :: Int -> [(k2,v2)]
partition'' p = Prelude.filter
( (==) p
. keycode parts
. Prelude.fst) task
groupByKey :: [(k2,v2or3)] -> Dict k2 [v2or3]
groupByKey = Prelude.foldl
groupHelper
Data.Map.empty
where
groupHelper :: Dict k2 [v2or3] -> (k2,v2or3) -> Dict k2 [v2or3]
groupHelper m (k2,v2) = Data.Map.insertWith (++) k2 [v2] m
combinePerKey :: Dict k2 [v2] -> Dict k2 v3
combinePerKey = Data.Map.mapWithKey h -- 3. Eliminate Maybe
. Data.Map.filterWithKey g -- 2. Filter Justs
. Data.Map.mapWithKey combiner -- 1. Apply combiner per key
where
g k2 Nothing = False
g k2 (Just _) = True
h k2 (Just x) = x
mergeParts :: [[Dict k2 v3]] -> [[(k2,v3)]]
mergeParts =
Prelude.map ( Prelude.concat -- 3. Unite partition
. Prelude.map Data.Map.toList) -- 2. Access dictionaries
. Data.List.transpose -- 1. Transpose grouping
reducePerKey :: Dict k2 [v3] -> Dict k2 v4
reducePerKey = Data.Map.mapWithKey h -- 3. Eliminate Maybe
. Data.Map.filterWithKey g -- 2. Filter Justs
. Data.Map.mapWithKey reduce -- 1. Apply reduce per key
where
g k2 Nothing = False
g k2 (Just _) = True
h k2 (Just x) = x
concatReducts :: [Dict k2 v4] -> Dict k2 v4
concatReducts = Data.Map.fromList
. Prelude.concat
. Prelude.map Data.Map.toList
-- The encoding of word-occurrence counting
wordOccurrenceCount
= mapReduce
myMap
myReduce
myReduce
where
myHash key = ((Prelude.fromEnum (Prelude.head key)) `mod` 7) + 1
myMap = const (Prelude.map (flip (,) (1::Int)) . words)
myReduce = const (Just . sum)
-- Test harness
main = print
$ wordOccurrenceCount
$ Data.Map.insert "a" "1 3 2"
$ Data.Map.insert "b" "3 2 3 4 4 4 4"
$ Data.Map.insert "c" "5 7 5 5 5 6 6 6 6 6 6"
$ Data.Map.insert "d" "7 5 7 7 7 7 7 8 8 8 9 8 8 8 8"
$ Data.Map.insert "e" "9 8 9 9 9 9 9 9 9 10 10 10 10 11 10 10 10 10 10"
$ Data.Map.insert "f" "11 11 11 11 11 11 11 11 11 11 12"
$ Data.Map.insert "g" "12 12 12 12 12 12 12 12 12 12 13 12"
$ Data.Map.insert "h" "13 13 13 13 13 13 13 13 13 13 10 13 13"
$ Data.Map.empty