-- PARSING AND INTERPRETING A MORE REALISTIC ENGLISH FRAGMENT -- defining a more realistic lexicon :l Lexicon -- we define a set of agreement features :i Feat features -- agreement morphology consists of feature bundles :i Agreement -- we also define grammatically relevant subsets of these features :t gender gender features number features person features gcase features pronType features tense features prepType features -- finally, we define a function that will eliminate the underspecified gender feature MascOrFem whenever the fully specified gender features Masc or Fem are added to the feature bundle {- prune :: Agreement -> Agreement prune fs = if (Masc `elem` fs || Fem `elem` fs) then (delete MascOrFem fs) else fs -} gender features prune $ gender features number features prune $ number features -- we can now define syntactic categories as a list consisting of a phonological representation, a category label, an agreement feature bundle and a subcategorization list :i Cat :i Phon :i CatLabel :i Agreement -- here are a couple of examples of categories :t Cat Cat "goldilocks" "NP" [Thrd,Fem,Sg] [] Cat "" "NP" [Thrd,Fem,Sg] [] Cat "littlemook" "NP" [Thrd,Masc,Sg] [] Cat "every" "DET" [Sg] [] Cat "all" "DET" [Pl] [] Cat "some" "DET" [] [] Cat "several" "DET" [Pl] [] Cat "a" "DET" [Sg] [] Cat "did" "AUX" [] [] Cat "helped" "VP" [Tense] [Cat "" "NP" [AccOrDat] []] Cat "and" "CONJ" [] [] -- we define 4 functions that enable us to extract the individual components of the categories phon $ Cat "helped" "VP" [Tense] [Cat "" "NP" [AccOrDat] []] catLabel $ Cat "helped" "VP" [Tense] [Cat "" "NP" [AccOrDat] []] fs $ Cat "helped" "VP" [Tense] [Cat "" "NP" [AccOrDat] []] subcatList $ Cat "helped" "VP" [Tense] [Cat "" "NP" [AccOrDat] []] -- we are now ready to define our lexicon as a function from strings (the words themselves) to lists of categories (lists b/c some words might be ambiguous) -- see the Lexicon.hs file for many examples -- we define a way to combine the feature bundles of 2 categories (the empty list [] indicates failure to combine) {- combine :: Cat -> Cat -> [Agreement] combine cat1 cat2 = [feats | length (gender feats) <= 1, length (number feats) <= 1, length (person feats) <= 1, length (gcase feats) <= 1, length (pronType feats) <= 1, length (tense feats) <= 1, length (prepType feats) <= 1] where feats = prune . nub . sort $ fs cat1 ++ fs cat2 -} :{ let cat1 = Cat "goldilocks" "NP" [Thrd,Fem,Sg] [] cat2 = Cat "runs" "VP" [Tense,Sg] [] cat3 = Cat "run" "VP" [Tense,Pl] [] :} combine cat1 cat2 combine cat1 cat3 -- we can determine whether 2 categories agree this way: they agree if we combine them and we don't get an empty list {- agree :: Cat -> Cat -> Bool agree cat1 cat2 = not . null $ combine cat1 cat2 -} agree cat1 cat2 agree cat1 cat3 -- finally, we define a function in which a particular agreement feature is assigned to a category {- assign :: Feat -> Cat -> [Cat] assign f c@(Cat phon label fs subcatlist) = [Cat phon label fs' subcatlist | fs' <- combine c (Cat "" "" [f] [])] -} assign Tense $ Cat "run" "VP" [Pl] [] -- we are now ready to take an incoming string, identify the lexical items it contains (after some preprocessing -- see the definitions of preproc and scan in the Lexicon.hs file) and extract their categories from te lexicon {- type Words = [String] lexer :: String -> Words lexer = preproc . words . (map toLower) . scan -} lexer "I loved her." lexer "She despised me." {- lookupWord :: (String -> [Cat]) -> String -> [Cat] lookupWord db w = [c | c <- db w] collectCats :: (String -> [Cat]) -> Words -> [[Cat]] collectCats db words = let listing = map (\ x -> (x,lookupWord db x)) words unknown = map fst (filter (null.snd) listing) in if unknown /= [] then error ("unknown words: " ++ show unknown) else initCats (map snd listing) initCats :: [[Cat]] -> [[Cat]] initCats [] = [[]] initCats (cs:rests) = [c:rest | c <- cs, rest <- initCats rests] -} collectCats lexicon $ lexer "I loved her." collectCats lexicon $ lexer "She despised me."