Both sections relate to the case study:         Index for a document of text.

 

SECTION  A:

 

Given the attached Haskell code which produces an index of words, make the following alterations by modifying existing functions and including new functions where necessary – parts 1) to 5):

 

1)                  Where a word occurs N times on the same line, ensure that the line number occurs n times in the index entry for that word.           

 

2)                  Allow words to be hyphenated and treat a hyphenated word as a single word.  However, for those words which are split over two lines, treat a split word as a single word without the hyphen.            

 

3)                  Treat a capitalised word (one or more capitals) as being different from the word in all lower case (but they should still be sorted alphabetically) – unless it is at the start of a sentence with only the initial letter capitalised.  A sentence is terminated by a ‘.’, ‘?’ or ‘!’.

 

4)                  Make the output more readable in the form of an index table in columns with appropriate spacing and without brackets. 

 

5)                  Include a user-friendly menu, so that the user can choose input/output file names or default files, and choose to rerun or exit.   

 

Parts 1) to 5) may be developed in any order. 

 

 

SECTION B:

 

6)                  For your version of function, makeIndex (only), show how an alternative ordering of the composed functions would provide a more efficient execution of makeIndex.  Justify your answer.      

 

7)                  For the parts 1) to 5) above that you have attempted, discuss the use you have made of a) higher-order functions, b) list comprehension, c) monadic input/output, d) functional composition, and/or e) partial parameterisation (or Curried functions).  Include an evaluation of how useful your use of these concepts has been.                                  

 

                                                                                                                 


import Prelude

 

type Doc  =  String

type Line =  String

type Word =  String

 

makeIndex  ::  Doc  ->  [ ([Int], Word) ]

 

makeIndex

 =   shorten . --   [([Int], Word)]    -> [([Int], Word)]

     amalgamate .-- [([Int], Word)]    -> [([Int], Word)]

     makeLists .   --   [(Int, Word)] -> [([Int], Word)]

     sortLs .  --   [(Int, Word)] -> [(Int, Word)]

     allNumWords .--    [(Int, Line)] -> [(Int, Word)]

     numLines .    --   [Line]        -> [(Int, Line)]

     splitUp  --   Doc           -> [Line]

 

splitUp :: Doc -> [Line]

 

splitUp [] = []

splitUp  text

 = takeWhile (/='\n') text : --   first line

   (splitUp .           --   splitup other lines

    dropWhile (==’\n’) .     --   delete 1st newline(s)

    dropWhile (/='\n')) text --   other lines

 

numLines :: [Line] -> [(Int, Line)]

 

numLines lines               --   list of pairs of

 = zip [1 .. length lines] lines  --   line no. & line

 

--   for each line

--   a)   split into words

--   b)   attach line no. to each word

 

splitWords :: Line -> [Word]      --   a)

 

splitWords [] = []

splitWords  line

 = takeWhile isLetter line :      --   first word in line

     (splitWords .           --   split other words

       dropWhile (not.isLetter) .      --   delete separators

       dropWhile isLetter) line   --   other words

   where

   isLetter ch

     = (‘a’<=ch) && (ch<=’z’)

       || (‘A’<=ch) && (ch<=’Z’)

 

numWords :: (Int, Line) -> [(Int, Word)]   --   b)

 

numWords (number, line)

 = map addLineNum ( splitWords line)  --   all line pairs

   where

   addLineNum word = (number, word)        --   a pair

 

allNumWords :: [(Int, Line)] -> [(Int, Word)]

 

allNumWords = concat . map numWords        --   doc pairs

 

sortLs :: [(Int, Word)] -> [(Int, Word)]

 

sortLs  [ ]  =  [ ]

sortLs (a:x)

 = sortLs [b | b <- x, compare b a]   --   sort 1st half

   ++  [a]  ++               --   1st in middle

   sortLs [b | b <- x, compare a b]   --   sort 2nd half

    where

    compare (n1, w1) (n2, w2)

     = (w1 < w2)                  --   1st word less

        || (w1 == w2 && n1 < n2)  --   check no.

 

makeLists :: [(Int, Word)] -> [([Int], Word)]

 

makeLists

 = map mk                    --  all pairs

   where mk (num, word) = ([num], word)

                             --  list of single no.

 

amalgamate :: [([Int], Word)] -> [([Int], Word)]

 

amalgamate [ ] = [ ]

amalgamate [a] = [a]

amalgamate ((n1, w1) : (n2, w2) : rest)--  pairs of pairs

 | w1 /= w2        = (n1, w1) : amalgamate ((n2, w2) : rest)

 | otherwise = amalgamate ((n1 ++ n2, w1) : rest)

     --   if words are same grow list of numbers

 

shorten :: [([Int], Word)] -> [([Int], Word)]

 

shorten

 = filter long                   --   keep pairs >4

   where

     long (num, word) = length word > 4  -- check word >4