
Both sections relate to the case study: Index for a document of text. SECTION A: Given the attached Haskell code which produces an index of words, make the following alterations by modifying existing functions and including new functions where necessary : 3) Treat a capitalised word (one or more capitals) as being different from the word in all lower case (but they should still be sorted alphabetically) – unless it is at the start of a sentence with only the initial letter capitalised. A sentence is terminated by a ‘.’, ‘?’ or ‘!’. import Prelude type Doc = String type Line = String type Word = String makeIndex :: Doc -> [ ([Int], Word) ] makeIndex = shorten . -- [([Int], Word)] -> [([Int], Word)] amalgamate .-- [([Int], Word)] -> [([Int], Word)] makeLists . -- [(Int, Word)] -> [([Int], Word)] sortLs . -- [(Int, Word)] -> [(Int, Word)] allNumWords .-- [(Int, Line)] -> [(Int, Word)] numLines . -- [Line] -> [(Int, Line)] splitUp -- Doc -> [Line] splitUp :: Doc -> [Line] splitUp [] = [] splitUp text = takeWhile (/='\n') text : -- first line (splitUp . -- splitup other lines dropWhile (==’\n’) . -- delete 1st newline(s) dropWhile (/='\n')) text -- other lines numLines :: [Line] -> [(Int, Line)] numLines lines -- list of pairs of = zip [1 .. length lines] lines -- line no. & line -- for each line -- a) split into words -- b) attach line no. to each word splitWords :: Line -> [Word] -- a) splitWords [] = [] splitWords line = takeWhile isLetter line : -- first word in line (splitWords . -- split other words dropWhile (not.isLetter) . -- delete separators dropWhile isLetter) line -- other words where isLetter ch = (‘a’<=ch) && (ch<=’z’) (‘A’<=ch) && (ch<=’Z’) numWords :: (Int, Line) -> [(Int, Word)] -- b) numWords (number, line) = map addLineNum ( splitWords line) -- all line pairs where addLineNum word = (number, word) -- a pair allNumWords :: [(Int, Line)] -> [(Int, Word)] allNumWords = concat . map numWords -- doc pairs sortLs :: [(Int, Word)] -> [(Int, Word)] sortLs [ ] = [ ] sortLs (a:x) = sortLs [b | b <- x, compare b a] -- sort 1st half ++ [a] ++ -- 1st in middle sortLs [b | b <- x, compare a b] -- sort 2nd half where compare (n1, w1) (n2, w2) = (w1 < w2) -- 1st word less (w1 == w2 && n1 < n2) -- check no. makeLists :: [(Int, Word)] -> [([Int], Word)] makeLists = map mk -- all pairs where mk (num, word) = ([num], word) -- list of single no. amalgamate :: [([Int], Word)] -> [([Int], Word)] amalgamate [ ] = [ ] amalgamate [a] = [a] amalgamate ((n1, w1) : (n2, w2) : rest)-- pairs of pairs | w1 /= w2 = (n1, w1) : amalgamate ((n2, w2) : rest) | otherwise = amalgamate ((n1 ++ n2, w1) : rest) -- if words are same grow list of numbers shorten :: [([Int], Word)] -> [([Int], Word)] shorten = filter long -- keep pairs >4 where long (num, word) = length word > 4 -- check word >4

http://www.haskell.org/haskellwiki/Homework_help On Fri, Mar 11, 2011 at 09:26:59PM +0000, Chatura Roche wrote:
Both sections relate to the case study: Index for a document of text.
SECTION A:
Given the attached Haskell code which produces an index of words, make the following alterations by modifying existing functions and including new functions where necessary :
3) Treat a capitalised word (one or more capitals) as being different from the word in all lower case (but they should still be sorted alphabetically) – unless it is at the start of a sentence with only the initial letter capitalised. A sentence is terminated by a ‘.’, ‘?’ or ‘!’.
import Prelude
type Doc = String type Line = String type Word = String
makeIndex :: Doc -> [ ([Int], Word) ]
makeIndex
= shorten . -- [([Int], Word)] -> [([Int], Word)]
amalgamate .-- [([Int], Word)] -> [([Int], Word)] makeLists . -- [(Int, Word)] -> [([Int], Word)] sortLs . -- [(Int, Word)] -> [(Int, Word)]
allNumWords .-- [(Int, Line)] -> [(Int, Word)] numLines . -- [Line] -> [(Int, Line)] splitUp -- Doc -> [Line]
splitUp :: Doc -> [Line]
splitUp [] = [] splitUp text
= takeWhile (/='\n') text : -- first line
(splitUp . -- splitup other lines
dropWhile (==’\n’) . -- delete 1st newline(s) dropWhile (/='\n')) text -- other lines
numLines :: [Line] -> [(Int, Line)]
numLines lines -- list of pairs of
= zip [1 .. length lines] lines -- line no. & line
-- for each line -- a) split into words -- b) attach line no. to each word
splitWords :: Line -> [Word] -- a)
splitWords [] = [] splitWords line
= takeWhile isLetter line : -- first word in line
(splitWords . -- split other words
dropWhile (not.isLetter) . -- delete separators dropWhile isLetter) line -- other words
where isLetter ch
= (‘a’<=ch) && (ch<=’z’)
(‘A’<=ch) && (ch<=’Z’)
numWords :: (Int, Line) -> [(Int, Word)] -- b)
numWords (number, line)
= map addLineNum ( splitWords line) -- all line pairs
where addLineNum word = (number, word) -- a pair
allNumWords :: [(Int, Line)] -> [(Int, Word)]
allNumWords = concat . map numWords -- doc pairs
sortLs :: [(Int, Word)] -> [(Int, Word)]
sortLs [ ] = [ ] sortLs (a:x)
= sortLs [b | b <- x, compare b a] -- sort 1st half
++ [a] ++ -- 1st in middle sortLs [b | b <- x, compare a b] -- sort 2nd half
where compare (n1, w1) (n2, w2)
= (w1 < w2) -- 1st word less
(w1 == w2 && n1 < n2) -- check no.
makeLists :: [(Int, Word)] -> [([Int], Word)]
makeLists
= map mk -- all pairs
where mk (num, word) = ([num], word)
-- list of single no.
amalgamate :: [([Int], Word)] -> [([Int], Word)]
amalgamate [ ] = [ ] amalgamate [a] = [a] amalgamate ((n1, w1) : (n2, w2) : rest)-- pairs of pairs
| w1 /= w2 = (n1, w1) : amalgamate ((n2, w2) : rest) | otherwise = amalgamate ((n1 ++ n2, w1) : rest)
-- if words are same grow list of numbers
shorten :: [([Int], Word)] -> [([Int], Word)]
shorten
= filter long -- keep pairs >4
where
long (num, word) = length word > 4 -- check word >4
_______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe
participants (2)
-
Brent Yorgey
-
Chatura Roche