From 41cccc064268526b2a2b8c2399791f8ef39d5242 Mon Sep 17 00:00:00 2001 From: Anthony Marakis Date: Fri, 4 Aug 2017 19:22:28 +0300 Subject: [PATCH 1/4] add cnf_rules to grammar --- nlp.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/nlp.py b/nlp.py index 51007a985..2810d9910 100644 --- a/nlp.py +++ b/nlp.py @@ -52,6 +52,16 @@ def isa(self, word, cat): """Return True iff word is of category cat""" return cat in self.categories[word] + def cnf_rules(self): + """Returns the tuple (X, Y, Z) for rules in the form: + X -> Y Z""" + cnf = [] + for X, rules in self.rules.items(): + for (Y, Z) in rules: + cnf.append((X, Y, Z)) + + return cnf + def generate_random(self, S='S'): """Replace each token in S by a random entry in grammar (recursively).""" import random @@ -229,6 +239,21 @@ def __repr__(self): Digit="0 [0.35] | 1 [0.35] | 2 [0.3]" )) + + +E_Chomsky = Grammar('E_Prob_Chomsky', # A Grammar in Chomsky Normal Form + Rules( + S='NP VP', + NP='Article Noun | Adjective Noun', + VP='Verb NP | Verb Adjective', + ), + Lexicon( + Article='the | a | an', + Noun='robot | sheep | fence', + Adjective='good | new | sad', + Verb='is | say | are' + )) + E_Prob_Chomsky = ProbGrammar('E_Prob_Chomsky', # A Probabilistic Grammar in CNF ProbRules( S='NP VP [1]', From 79a83136d04d5a43de4a44d2c21b43548a96febe Mon Sep 17 00:00:00 2001 From: Anthony Marakis Date: Fri, 4 Aug 2017 19:22:54 +0300 Subject: [PATCH 2/4] Update nlp.ipynb --- nlp.ipynb | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/nlp.ipynb b/nlp.ipynb index 4f79afe75..853d16092 100644 --- a/nlp.ipynb +++ b/nlp.ipynb @@ -81,6 +81,23 @@ "Now we know it is more likely for `S` to be replaced by `aSb` than by `e`." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Chomsky Normal Form\n", + "\n", + "A grammar is in Chomsky Normal Form (or **CNF**, not to be confused with *Conjunctive Normal Form*) if its rules are one of the three:\n", + "\n", + "* `X -> Y Z`\n", + "* `A -> a`\n", + "* `S -> ε`\n", + "\n", + "Where *X*, *Y*, *Z*, *A* are non-terminals, *a* is a terminal, *ε* is the empty string and *S* is the start symbol (the start symbol should not be appearing on the right hand side of rules). Note that there can be multiple rules for each left hand side non-terminal, as long they follow the above. For example, a rule for *X* might be: `X -> Y Z | A B | a | b`.\n", + "\n", + "Of course, we can also have a *CNF* with probabilities." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -275,6 +292,52 @@ "print(\"Is 'here' a noun?\", grammar.isa('here', 'Noun'))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If the grammar is in Chomsky Normal Form, we can call the class function `cnf_rules` to get all the rules in the form of `(X, Y, Z)` for each `X -> Y Z` rule. Since the above grammar is not in *CNF* though, we have to create a new one." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "E_Chomsky = Grammar('E_Prob_Chomsky', # A Grammar in Chomsky Normal Form\n", + " Rules(\n", + " S='NP VP',\n", + " NP='Article Noun | Adjective Noun',\n", + " VP='Verb NP | Verb Adjective',\n", + " ),\n", + " Lexicon(\n", + " Article='the | a | an',\n", + " Noun='robot | sheep | fence',\n", + " Adjective='good | new | sad',\n", + " Verb='is | say | are'\n", + " ))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('NP', 'Article', 'Noun'), ('NP', 'Adjective', 'Noun'), ('VP', 'Verb', 'NP'), ('VP', 'Verb', 'Adjective'), ('S', 'NP', 'VP')]\n" + ] + } + ], + "source": [ + "print(E_Chomsky.cnf_rules())" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -428,6 +491,52 @@ "print(\"Is 'here' a noun?\", grammar.isa('here', 'Noun'))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we have a grammar in *CNF*, we can get a list of all the rules. Let's create a grammar in the form and print the *CNF* rules:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "E_Prob_Chomsky = ProbGrammar('E_Prob_Chomsky', # A Probabilistic Grammar in CNF\n", + " ProbRules(\n", + " S='NP VP [1]',\n", + " NP='Article Noun [0.6] | Adjective Noun [0.4]',\n", + " VP='Verb NP [0.5] | Verb Adjective [0.5]',\n", + " ),\n", + " ProbLexicon(\n", + " Article='the [0.5] | a [0.25] | an [0.25]',\n", + " Noun='robot [0.4] | sheep [0.4] | fence [0.2]',\n", + " Adjective='good [0.5] | new [0.2] | sad [0.3]',\n", + " Verb='is [0.5] | say [0.3] | are [0.2]'\n", + " ))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('NP', 'Article', 'Noun', 0.6), ('NP', 'Adjective', 'Noun', 0.4), ('VP', 'Verb', 'NP', 0.5), ('VP', 'Verb', 'Adjective', 0.5), ('S', 'NP', 'VP', 1.0)]\n" + ] + } + ], + "source": [ + "print(E_Prob_Chomsky.cnf_rules())" + ] + }, { "cell_type": "markdown", "metadata": {}, From 12752d6e0132fd2d8a39c009bf6bd8b3a37f5c46 Mon Sep 17 00:00:00 2001 From: Anthony Marakis Date: Fri, 4 Aug 2017 19:23:20 +0300 Subject: [PATCH 3/4] Update test_nlp.py --- tests/test_nlp.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_nlp.py b/tests/test_nlp.py index 030469f46..ae7c52822 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -32,6 +32,10 @@ def test_grammar(): assert grammar.rewrites_for('A') == [['B', 'C'], ['D', 'E']] assert grammar.isa('the', 'Article') + grammar = nlp.E_Chomsky + for rule in grammar.cnf_rules(): + assert len(rule) == 3 + def test_generation(): lexicon = Lexicon(Article="the | a | an", @@ -77,6 +81,10 @@ def test_prob_grammar(): assert grammar.rewrites_for('A') == [(['B', 'C'], 0.3), (['D', 'E'], 0.7)] assert grammar.isa('the', 'Article') + grammar = nlp.E_Prob_Chomsky + for rule in grammar.cnf_rules(): + assert len(rule) == 4 + def test_prob_generation(): lexicon = ProbLexicon(Verb="am [0.5] | are [0.25] | is [0.25]", From cffa5d1db01c1fd03b6ae96ad778d2f1e9066260 Mon Sep 17 00:00:00 2001 From: Anthony Marakis Date: Sat, 5 Aug 2017 20:13:20 +0300 Subject: [PATCH 4/4] add more to CNF section --- nlp.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nlp.ipynb b/nlp.ipynb index 853d16092..9370271e2 100644 --- a/nlp.ipynb +++ b/nlp.ipynb @@ -95,7 +95,9 @@ "\n", "Where *X*, *Y*, *Z*, *A* are non-terminals, *a* is a terminal, *ε* is the empty string and *S* is the start symbol (the start symbol should not be appearing on the right hand side of rules). Note that there can be multiple rules for each left hand side non-terminal, as long they follow the above. For example, a rule for *X* might be: `X -> Y Z | A B | a | b`.\n", "\n", - "Of course, we can also have a *CNF* with probabilities." + "Of course, we can also have a *CNF* with probabilities.\n", + "\n", + "This type of grammar may seem restrictive, but it can be proven that any context-free grammar can be converted to CNF." ] }, { pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy