0% found this document useful (0 votes)
22 views

Co302 Lab File CD

Uploaded by

manthansingh0414
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
22 views

Co302 Lab File CD

Uploaded by

manthansingh0414
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 34

DELHI TECHNOLOGICAL UNIVERSITY

(Formerly Delhi College of Engineering)


Shahbad Daulatpur, Bawana Road, Delhi- 110042

DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

Compiler Design Laboratory


Subject Code: CO-302

SUBMITTED TO: SUBMITTED BY:


Mr. Sanchit Agarwal Madhav Gupta
(2K21/CO/262)
INDEX

S No. Experiment Date Remarks

1 Write a program to convert NFA to DFA 15/1/2024

Program to build a DFA to accept strings that start and end with same
2 9/2/2024
character (given a string of characters a & b)

3 Program to detect tokens in a Program 23/2/2024

4 Write a program to implement lexical analyzer 1/3/2024

5 Program To implement the recursive descent parser 5/4/2024

Write a program to compute FIRST and FOLLOW for a given


6 5/4/2024
grammar

7 Program to eliminate left factoring in the given grammar 6/4/2024

8 Write a program to remove left recursion in given grammar 6/4/2024


Experiment 1
AIM
Write a program to convert NFA to DFA.

THEORY
A Deterministic Finite Automaton (DFA) is a computational model designed to process input strings by
transitioning between states based on the input symbols. The transitions in a DFA are deterministic,
meaning that for a given input symbol string and a specific DFA, the machine follows a unique path
through its states. Each state in a DFA has only one transition for a given input symbol, ensuring a singular
and predictable path for a given input.

On the other hand, a Non-deterministic Finite Automaton (NFA) allows for non-unique transitions. In a
state, an NFA can transition to multiple states for a single input symbol, and it can also transition using an
empty input.

ALGORITHM
Suppose there is an NFA N: {Q, ∑, q0, δ, F} which recognizes a language L.
Then the DFA D: { Q’, ∑, q0, δ’, F’ } can be constructed for language L as:

1. Initially Q’ = ɸ.
2. Add q0 to Q’.
3. For each state in Q’, find the possible set of states for each input symbol using transition function
of NFA. If this set of states is not in Q’, add it to Q’.
4. Final state of DFA will be all states with contain F (final states of NFA).

CODE
#include<bits/stdc++.h>
using namespace std;
void print(const vector<vector<vector<int>>> &table) {
cout << " STATE/INPUT |";
for (char a = 'a'; a < 'a' + table[0].size() - 1; ++a) {
cout << " " << a << " |";
}
cout << " ^ " << endl << endl;
for (int i = 0; i < table.size(); ++i) {
cout << " " << i << " ";
for (const auto &row : table[i]) {
cout << " | ";
for (int val : row) {
cout << val << " ";
}
}
cout << endl;
}
}

void printdfa(const vector<vector<int>> &states, const vector<vector<vector<int>>> &dfa) {


cout << " STATE/INPUT ";
for (char a = 'a'; a < 'a' + dfa[0].size(); ++a) {
cout << "| " << a << " ";
}
cout << endl;
for (const auto &state : states) {
cout << "{ ";
for (int val : state) {
cout << val << " ";
}
if (state.empty()) {
cout << "^ ";
}
cout << "} ";
for (const auto &row : dfa[&state - &states[0]]) {
cout << " | ";
for (int val : row) {
cout << val << " ";
}
if (row.empty()) {
cout << "^ ";
}
}
cout << endl;
}
}

vector<int> closure(int s, const vector<vector<vector<int>>> &v) {


vector<int> t;
queue<int> q;
t.push_back(s);
int a = v[s].back().size();
for (int i = 0; i < a; ++i) {
t.push_back(v[s].back()[i]);
q.push(t[i]);
}
while (!q.empty()) {
int f = q.front();
q.pop();
if (!v[f].back().empty()) {
int u = v[f].back().size();
for (int i = 0; i < u; ++i) {
int y = v[f].back()[i];
if (find(t.begin(), t.end(), y) == t.end()) {
t.push_back(y);
q.push(y);
}
}
}
}
return t;
}

int main() {
int n, alpha;
cout << "************************* NFA to DFA *************************" << endl <<
endl;
cout << "Enter total number of states in NFA : ";
cin >> n;
cout << "Enter number of elements in alphabet : ";
cin >> alpha;
vector<vector<vector<int>>> table;
for (int i = 0; i < n; ++i) {
cout << "For state " << i << endl;
vector<vector<int>> v;
char a = 'a';
int y, yn;
for (int j = 0; j < alpha; ++j) {
vector<int> t;
cout << "Enter no. of output states for input " << a++ << " : ";
cin >> yn;
cout << "Enter output states :" << endl;
for (int k = 0; k < yn; ++k) {
cin >> y;
t.push_back(y);
}
v.push_back(t);
}
vector<int> t;
cout << "Enter no. of output states for input ^ : ";
cin >> yn;
cout << "Enter output states :" << endl;
for (int k = 0; k < yn; ++k) {
cin >> y;
t.push_back(y);
}
v.push_back(t);
table.push_back(v);
}
cout << "***** TRANSITION TABLE OF NFA *****" << endl;
print(table);
cout << endl << "***** TRANSITION TABLE OF DFA *****" << endl;
vector<vector<vector<int>>> dfa;
vector<vector<int>> states;
states.push_back(closure(0, table));
queue<vector<int>> q;
q.push(states[0]);
while (!q.empty()) {
auto f = q.front();
q.pop();
vector<vector<int>> v;
for (int i = 0; i < alpha; ++i) {
vector<int> t;
set<int> s;
for (int j : f) {
for (int k : table[j][i]) {
auto cl = closure(k, table);
s.insert(cl.begin(), cl.end());
}
}
copy(s.begin(), s.end(), back_inserter(t));
v.push_back(t);
if (find(states.begin(), states.end(), t) ==
states.end()) {
states.push_back(t);
q.push(t);
}
}
dfa.push_back(v);
}
printdfa(states, dfa);
return 0;
}

LEARNING OUTCOMES
We have learnt how to convert a given NFA to DFA. It is hard for a computer program to simulate an
NFA because the transition function is multivalued. An algorithm, called the subset construction can
convert an NFA for any language into a DFA that recognizes the same languages. This algorithm is closely
related to an algorithm for constructing LR parser.
Experiment 2

AIM
Program to build a DFA to accept strings that start and end with same character (given a string of
characters a & b)

THEORY
A Deterministic Finite Automaton (DFA) is a mathematical model that recognizes patterns in strings of
symbols. It consists of a set of states, a set of input symbols, a transition function, a start state, and a set
of final or accepting states.

In this experiment, we aim to build a DFA that accepts strings that start and end with the same character,
given a string of characters 'a' and 'b'.

ALGORITHM
1. Define the set of states: q0, q1, q2, q3, q4.
2. Define the input symbols: 'a' and 'b'.
3. Define the transition functions:
• q0: If the input is 'a', transition to q1; if the input is 'b', transition to q3.
• q1: If the input is 'a', transition to q1; if the input is 'b', transition to q2.
• q2: If the input is 'a', transition to q1; if the input is 'b', transition to q2.
• q3: If the input is 'a', transition to q4; if the input is 'b', transition to q3.
• q4: If the input is 'a', transition to q4; if the input is 'b', transition to q3.
4. Define the start state: q0.
5. Define the final or accepting states: q1 and q3.
6. Implement the DFA using recursive functions for each state.
7. In the main function, read the input string and check if it contains only 'a' and 'b' characters.
8. If the input string is valid, call the q0 function with the input string and index 0.
9. Print "Accepted" if the string is accepted by the DFA, and "Not Accepted" otherwise.

CODE
#include <bits/stdc++.h> using namespace std;
void q1(string, int);void q2(string, int);void q3(string, int);void q4(string, int);
void q1(string s, int i) {
if (i == s.length()) {
cout << "Accepted";
return;
}
if (s[i] == 'a')
q1(s, i + 1);
else
q2(s, i + 1);
}
void q2(string s, int i) {
if (i == s.length()) {
cout << "Not Accepted";
return;
}
if (s[i] == 'a')
q1(s, i + 1);
else
q2(s, i + 1);
}
void q3(string s, int i) {
if (i == s.length()) {
cout << "Accepted";
return;
}
if (s[i] == 'a')
q4(s, i + 1);
else
q3(s, i + 1);
}
void q4(string s, int i) {
if (i == s.length()) {
cout << "Not Accepted";
return;
}
if (s[i] == 'a')
q4(s, i + 1);
else
q3(s, i + 1);
}
void q0(string s, int i) {
if (i == s.length()) {
cout << "Not Accepted";
return;
}
if (s[i] == 'a')
q1(s, i + 1);
else
q3(s, i + 1);
}
// Driver Code
int main() {
while (true) {
cout << "Enter Input String (with a and b): ";
string s;
cin >> s;
bool flag = 1;
for (auto it: s) {
if (it != 'a' && it != 'b') {
cout << "Invalid String";
flag = 0;
break;
}
}
if (flag == 1)
q0(s, 0);
cout << endl;
}
return 0;
}
OUTPUT

LEARNING OUTCOMES
Understanding Deterministic Finite Automata (DFA) involved grasping its core components like states,
input symbols, transition function, start state, and accepting states. Skills were developed in handling user
input and validation, problem-solving, and algorithm design. This experience reinforced comprehension
of control flow and decision-making in programming, contributing to a comprehensive understanding of
computer science concepts and their applications.
Experiment 3

AIM
Program to detect tokens in a Program

THEORY
Lexical Analysis is the initial phase of the compiler, also known as scanning. It converts the input program
into a sequence of Tokens. In a C++ program, tokens can be keywords, identifiers, constants, string
literals, or symbols. Tokens are the smallest meaningful elements of a program to the compiler and can be
classified into:

1. Keywords
2. Identifiers
3. Constants
4. Strings
5. Special Symbols
6. Operators

ALGORITHM
1. Define functions to check for delimiters, operators, valid identifiers, keywords, integers, real numbers,
and substring extraction.
2. Define a function 'parse' to analyze the input string:
1. Initialize 'left' and 'right' pointers to the start of the string.
2. Loop until 'right' pointer reaches the end of the string.
3. If the character at 'right' is not a delimiter, move 'right' pointer forward.
4. If the character at 'right' is a delimiter:
a. If 'left' equals 'right', and it's an operator, print it.
b. If 'left' is not equal to 'right', or 'right' has reached the end:
- Extract the substring from 'left' to 'right - 1'.
- Check if the substring is a keyword, integer, real number, or valid identifier, and print
accordingly.
c. Update 'left' to 'right'.
3. In the 'main' function:
1. Define a character array to store the input string.
2. Prompt the user to enter a string.
3. Read the string using 'fgets'.
4. Remove the newline character added by 'fgets'.
5. Call the 'parse' function with the input string.

CODE
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

// Returns 'true' if the character is a DELIMITER.


bool isDelimiter(char ch) {
if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ',' || ch == ';' || ch == '>' || ch == '<' || ch
== '=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}')
return true;
return false;
}

// Returns 'true' if the character is an OPERATOR.


bool isOperator(char ch) {
if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '>' || ch == '<' || ch == '=')
return true;
return false;
}

// Returns 'true' if the string is a VALID IDENTIFIER.


bool validIdentifier(char *str) {
if (str[0] == '0' || str[0] == '1' || str[0] == '2' || str[0] == '3' || str[0] == '4' || str[0] == '5' || str[0] == '6' ||
str[0] == '7' || str[0] == '8' || str[0] == '9' || isDelimiter(str[0]) == true)
return false;
return true;
}

// Returns 'true' if the string is a KEYWORD.


bool isKeyword(char *str) {
if (!strcmp(str, "if") || !strcmp(str, "else") || !strcmp(str, "while") || !strcmp(str, "do") || !strcmp(str,
"break") || !strcmp(str, "continue") || !strcmp(str, "int") || !strcmp(str, "double") || !strcmp(str, "float") ||
!strcmp(str, "return") || !strcmp(str, "char") || !strcmp(str, "case") || !strcmp(str, "char") || !strcmp(str,
"sizeof") || !strcmp(str, "long") || !strcmp(str, "short") || !strcmp(str, "typedef") || !strcmp(str, "switch") ||
!strcmp(str, "unsigned") || !strcmp(str, "void") || !strcmp(str, "static") || !strcmp(str, "struct") ||
!strcmp(str, "goto"))
return true;
return false;
}

// Returns 'true' if the string is an INTEGER.


bool isInteger(char *str) {
int i, len = strlen(str);

if (len == 0)
return false;
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i] != '5' &&
str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' || (str[i] == '-' && i > 0))
return false;
}
return true;
}

// Returns 'true' if the string is a REAL NUMBER.


bool isRealNumber(char *str) {
int i, len = strlen(str);
bool hasDecimal = false;

if (len == 0)
return false;
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2' && str[i] != '3' && str[i] != '4' && str[i] != '5' &&
str[i] != '6' && str[i] != '7' && str[i] != '8' && str[i] != '9' && str[i] != '.' ||
(str[i] == '-' && i > 0))
return false;
if (str[i] == '.')
hasDecimal = true;
}
return hasDecimal;
}

// Extracts the SUBSTRING.


char *subString(char *str, int left, int right) {
int i;
char *subStr = (char *)malloc(sizeof(char) * (right - left + 2));

for (i = left; i <= right; i++)


subStr[i - left] = str[i];
subStr[right - left + 1] = '\0';
return subStr;
}

// Parsing the input STRING.


void parse(char *str) {
int left = 0, right = 0;
int len = strlen(str);
while (right <= len && left <= right) {
if (isDelimiter(str[right]) == false)
right++;

if (isDelimiter(str[right]) == true && left == right) {


if (isOperator(str[right]) == true)
printf("'%c' IS AN OPERATOR\n", str[right]);

right++;
left = right;
} else if (isDelimiter(str[right]) == true && left != right ||
(right == len && left != right)) {
char *subStr = subString(str, left, right - 1);

if (isKeyword(subStr) == true)
printf("'%s' IS A KEYWORD\n", subStr);

else if (isInteger(subStr) == true)


printf("'%s' IS AN INTEGER\n", subStr);

else if (isRealNumber(subStr) == true)


printf("'%s' IS A REAL NUMBER\n", subStr);

else if (validIdentifier(subStr) == true &&


isDelimiter(str[right - 1]) == false)
printf("'%s' IS A VALID IDENTIFIER\n", subStr);

else if (validIdentifier(subStr) == false &&


isDelimiter(str[right - 1]) == false)
printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
left = right;
}
}
return;
}

// DRIVER FUNCTION
int main() {
// maximum length of string is 100 here
char str[100];

printf("Enter a string: ");


fgets(str, sizeof(str), stdin);

// Remove the newline character added by fgets


if (str[strlen(str) - 1] == '\n')
str[strlen(str) - 1] = '\0';
parse(str); // calling the parse function
return 0;
}

OUTPUT

LEARNING OUTCOMES
We have learnt how to count tokes in a given program. We need to have the list of keywords contained in
that language, C++ in the above program. We also need to have the rules required for the nomenclature
of identifiers in that language.
Experiment 4

AIM
Write a program to implement lexical analyzer.

THEORY
lexical analysis phase is the initial step in the compilation process for C programming. During lexical
analysis, the code is broken into tokens, which are the smallest units in programming. This process
involves parsing the input string, removing whitespaces, and categorizing tokens such as keywords,
identifiers, operators, literals, and punctuations. The main objective is to simplify subsequent stages
without checking for syntax errors. The aim is to implement a program for lexical analysis

ALGORITHM
1. Open the program file using ifstream.
2. If file has been opened successfully do :
3. While it is not the end of file do :
a. Parse the file line by line
b. If it a comment
i. Output “COMMENT”
ii. Break;
c. Check if the word in buffer currently is
i. Keyword
ii. Operator
iii. Identifier
d. Output the shorthand word for that particular token.
4. Close the file

CODE
#include <cctype> #include <cstdlib>
#include <cstring> #include <iostream>
using namespace std;
#define MAX_LENGTH 100
// This function checks for a delimiter (it is a piece of datathat separates it from others) to perform some
specific case on it
bool isDelimiter(char chr)
{
return (chr == ' ' || chr == '+' || chr == '-' || chr == '*' || chr == '/' ||
chr == ',' || chr == ';' || chr == '%' || chr == '>' || chr == '<' ||
chr == '=' || chr == '(' || chr == ')' || chr == '[' || chr == ']' ||
chr == '{' || chr == '}');
}

// This function checks for a valid operator e.g., +, -, *, etc.


bool isOperator(char chr)
{
return (chr == '+' || chr == '-' || chr == '*' || chr == '/' || chr == '>' ||
chr == '<' || chr == '=');
}

// This function checks for a valid identifier


bool isValidIdentifier(const char *str)
{
return (str[0] != '0' && str[0] != '1' && str[0] != '2' && str[0] != '3' &&
str[0] != '4' && str[0] != '5' && str[0] != '6' && str[0] != '7' &&
str[0] != '8' && str[0] != '9' && !isDelimiter(str[0]));
}

// 32 Keywords are checked in this function, and the result is returned accordingly
bool isKeyword(const char *str)
{
const char *keywords[] = {"auto", "break", "case", "char", "const",
"continue", "default", "do", "double", "else",
"enum", "extern", "float", "for", "goto",
"if", "int", "long", "register", "return",
"short", "signed", "sizeof", "static", "struct",
"switch", "typedef", "union", "unsigned", "void",
"volatile", "while"};
for (const char *keyword : keywords)
if (strcmp(str, keyword) == 0)
return true;

return false;
}
// Checks for an integer value
bool isInteger(const char *str)
{
if (str == nullptr || *str == '\0')
return false;
int i = 0;
while (isdigit(str[i]))
i++;
return str[i] == '\0';
}

// Trims a substring from a given string's start and end position


char *getSubstring(const char *str, int start, int end)
{
int length = strlen(str);
int subLength = end - start + 1;
char *subStr = new char[subLength + 1];
strncpy(subStr, str + start, subLength);
subStr[subLength] = '\0';
return subStr;
}

// This function parses the input


int lexicalAnalyzer(const char *input)
{
int left = 0, right = 0;
int len = strlen(input);
while (right <= len && left <= right)
{
if (!isDelimiter(input[right]))
right++;
if (isDelimiter(input[right]) && left == right)
{
if (isOperator(input[right]))
cout << "Token: Operator, Value: " << input[right] << "\n";
right++;
left = right;
}
else if (isDelimiter(input[right]) && left != right ||
(right == len && left != right))
{
char *subStr = getSubstring(input, left, right - 1);
if (isKeyword(subStr))
cout << "Token: Keyword, Value: " << subStr << "\n";
else if (isInteger(subStr))
cout << "Token: Integer, Value: " << subStr << "\n";
else if (isValidIdentifier(subStr) &&
!isDelimiter(input[right - 1]))
cout << "Token: Identifier, Value: " << subStr << "\n";
else if (!isValidIdentifier(subStr) &&
!isDelimiter(input[right - 1]))
cout << "Token: Unidentified, Value: " << subStr << "\n";

delete[] subStr;
left = right;
}
}
return 0;
}
int main()
{
char lex_input[MAX_LENGTH] = "int a = b + c";
cout << "For Expression \"" << lex_input << "\":\n";
lexicalAnalyzer(lex_input);
cout << "\n";
return 0;
}

OUTPUT

LEARNING OUTCOMES
We have learnt how to tokenize a given string using C.
Experiment 5

AIM
Program To implement the recursive descent parser.

THEORY
Recursive Descent Parsing is a top-down parsing technique that constructs a parse tree for a given input
string based on the grammar rules of the language. The key aspects of a Recursive Descent Parser are:

1. Grammar Definition: The first step is to define the grammar of the language to be parsed. The
grammar consists of a set of production rules that define the syntax of the language.

2. Parsing Functions: For each nonterminal symbol in the grammar, a corresponding parsing
function is created. These functions are responsible for recognizing and parsing the linguistic
constructs represented by the nonterminal.

3. Recursive Descent: The parsing process begins by invoking the parsing function for the start
symbol of the grammar. This function then recursively calls the appropriate parsing functions
based on the input symbols and the grammar rules.

4. Top-down Approach: The parser starts with the top-level rule of the grammar and recursively
expands the nonterminal symbols until it reaches the terminal symbols that match the input.

5. Backtracking: If the current parsing function is unable to match the input with the corresponding
grammar rule, it backtracks and tries alternative rules.

6. Error Handling: Recursive Descent Parsers can provide detailed error messages by identifying
the point where the parsing process failed and the expected input.

The advantages of Recursive Descent Parsing include its simplicity, readability, and ability to produce
informative error messages.

CODE
#include<stdio.h>
#include<string.h>
#include<ctype.h>
char input[10];
int i, error;
void E();
void T();
void Eprime();
void Tprime();
void F();
void main() {
i = 0;
error = 0;
printf("Enter an arithmetic expression : ");
gets(input);
E();
if (strlen(input) == i && error == 0)
printf("\nAccepted..!!!\n");
else printf("\nRejected..!!!\n");
}

void E() {
T();
Eprime();
}
void Eprime() {
if (input[i] == '+') {
i++;
T();
Eprime();
}
}
void T() {
F();
Tprime();
}
void Tprime() {
if (input[i] == '*') {
i++;
F();
Tprime();
}
}
void F() {
if (isalnum(input[i])) i++;
else if (input[i] == '(') {
i++;
E();
if (input[i] == ')')
i++;
else error = 1;
} else error = 1;
}
OUTPUT

LEARNING OUTCOMES
Through this exercise, we gained a deeper understanding of recursive descent parsing, learned to design
and implement a parser for a simple language, and developed problem-solving skills.
Experiment 6

AIM
Write a program to compute FIRST and FOLLOW for a given grammar.

THEORY
We need to find FIRST and FOLLOW sets for a given grammar, so that the parser can properly apply
the needed rule at the correct position.
1) First(α) is a set of terminal symbols that begin in strings derived from α. Rules for calculating FIRST
:
i) For a production rule X → ∈
First(X) = { ∈ }
ii) For any terminal symbol ‘a’
First(a) = { a }
iii) For a production rule X → Y1Y2Y3, First(X) =
a) If ∈ ∉ First(Y1), then First(X) = First(Y1)
b) If ∈ ∈ First(Y1), then First(X) = { First(Y1) – ∈ } ∪ First(Y2Y3)

2) Follow(α) is a set of terminal symbols that appear immediately to the right of α. Rules for calculating
FOLLOW :
i) For the start symbol S, place $ in Follow(S).
ii) For any production rule A → αB
Follow(B) = Follow(A)
iii) For any production rule A → αBβ
a) If ∈ ∉ First(β), then Follow(B) = First(β)
b) If ∈ ∈ First(β), then Follow(B) = { First(β) – ∈ } ∪ Follow(A)

ALGORITHM :
1. Take all productions as input and store them in hashmap.
2. Iterate through hashmap(char, vector<string>) :
a. // Calculating FIRST
b. For each of the char :
i. Iterate through its all productions :
1. If 1st element is terminal or null :
a. Add it to first, continue.
2. Else
a. Compute FIRST of this element.
b. Add it to FIRST.
c. If FIRST contains NULL :
i. Compute first of next element
ii. Add it to FIRST.
ii. Print FIRST for this char.
3. Iterate through hashmap(char, vector<string>) :
a. //Calculating FOLLOW
b. For each char ch:
i. Iterate through its all productions :
1. Iif RHS contains ch :
a. Index = index of ch in RHS.
b. Compute FIRST of substring starting at index.
c. Add it to FOLLOW.
d. If FIRST conatins NULL :
i. Add FOLLOW of LHS to FOLLOW.
ii. Print FOLLOW for this ch.

CODE
#include <bits/stdc++.h>
using namespace std;

string first(map<char,vector<string>>m,char c){


if(c=='^'||!(c>='A'&&c<='Z'))return c+"";
string ans;
for(auto s:m[c])for(char x:s)if(x=='^'||!(x>='A'&&x<='Z'))if(ans.find(x)==string::npos)ans+=x;
else{
string t=first(m,x);
for(char y:t)if(y=='^')break;
else if(ans.find(y)==string::npos)ans+=y;
if(x==s.back()&&t.back()=='^')ans+='^';
}
return ans;
}

string follow(map<char,vector<string>>p,char s,char c){


string ans;
if(s==c)ans+='$';
for(auto i:p)for(auto j:i.second)if(j.find(c)!=string::npos){
string t=firstofstring(p,j.substr(j.find(c)+1));
for(char x:t)if(x=='^'){
if(c==i.first)continue;
t=follow(p,s,i.first);
for(char y:t)if(ans.find(y)==string::npos)ans+=y;
}
else if(ans.find(x)==string::npos)ans+=x;
}
return ans;
}

string firstofstring(map<char,vector<string>>m,string s){


string ans;
bool c=true;
for(char x:s){
c=false;
if(x=='^'||!(x>='A'&&x<='Z'))if(ans.find(x)==string::npos)ans+=x;
else{
string t=first(m,x);
for(char y:t)if(y=='^')c=true;
else if(ans.find(y)==string::npos)ans+=y;
if(c&&x==s.back())ans+='^';
}
}
return ans.empty()?"^":ans;
}

int main(){
map<char,vector<string>>p;
int n;
cout<<"Enter number of productions: ";
cin>>n;
cout<<"Enter productions:\n";
char s;
for(int i=0;i<n;i++){
char c;
string t;
cin>>c>>t;
if(i==0)s=c;
p[c].push_back(t);
}
cout<<"Given grammar is:\n\n";
for(auto i:p){
cout<<i.first<<" -> ";
for(int j=0;j<i.second.size();j++)
cout<<i.second[j]<<(j!=i.second.size()-1?" | ":"");
cout<<"\n";
}
cout<<"FIRST:\n\n";
for(auto i:p)
cout<<"FIRST("<<i.first<<") = { "<<first(p,i.first)<<" }\n";
cout<<"FOLLOW:\n\n";
for(auto i:p)
cout<<"FOLLOW("<<i.first<<") = { "<<follow(p,s,i.first)<<" }\n";
}
OUTPUT

LEARNING OUTCOMES
We have learnt how to compute FIRST and FOLLOW for a given grammar. They are useful in the further
process of designing a parser.
Experiment 7

AIM
Program to eliminate left factoring in the given grammar

THEORY
A grammar is said to be left factored when it is of the form –

A -> αβ1 | αβ2 | αβ3 | …… | αβn | γ

i.e, the productions start with the same terminal (or set of terminals). On seeing the input α we cannot
immediately tell which production to choose to expand A. Left factoring is a grammar transformation that
is useful for producing a grammar suitable for predictive or top down parsing. When the choice between
two alternative A-productions is not clear, we may be able to rewrite the productions to defer the decision
until enough of the input has been seen to make the right choice. Left factoring is removing the common
left factor that appears in two productions of the same non-terminal. It is done to avoid back-tracing by
the parser.

For the grammar A -> αβ1 | αβ2 | αβ3 | …… | αβn | γ


The equivalent left factored grammar will be –
A -> αA’ | γ
A’ -> β1 | β2 | β3 | …… | βn

ALGORITHM

1. Take all productions as input and store them in hashmap.


2. Iterate through hash map :
a. For each RHS of production:
i. Find the common prefix.
ii. Replace all the RHS’s having that prefix by modified RHS.
iii. Add the new state to the production hashmap.
3. Print the modified grammar
4. This is the grammar after removing left factoring.

CODE
#include<bits/stdc++.h>
#include<iostream>
using namespace std;
int main(){
map < char, vector<string> > prod;
int n;
cout<<"Enter number of productions : ";
cin>>n;
cout<<"Enter productions : "<<endl;
for(int i=0;i<n;i++){
char ch;
string s;
cin>>ch;
cout<<" -> ";
cin>>s;
prod[ch].push_back(s);
}

cout<<"Given grammar is :"<<endl<<endl;


for(map<char, vector<string> >::iterator i=prod.begin(); i!=prod.end(); i++){
cout<<i->first<<" -> ";
for(int j=0;j<i->second.size();j++){
cout<<i->second[j];
if(j!= i->second.size()-1)
cout<<" | ";
}
cout<<endl;
}

for(map<char, vector<string> >::iterator itr=prod.begin(); itr!=prod.end(); ++itr){

vector<string> v= itr->second;
//first find the maximum length that can be possible for a comman prefix
int min_len=INT_MAX;
for(int j=0;j<v.size();j++){
if(v[j].length()<min_len)
min_len=v[j].length();
}
for(int i=0;i<v.size();i++){
int prev=0,curr=0,len=0;
//Iterate for all possible lengths of common prefix
for(int j= 1;j<=min_len;j++){
string s= v[i].substr(0,j);
curr=0;
for(int k=0;k<v.size();k++){
if(k==i)
continue;
if(v[k].substr(0,j).compare(s)==0)
curr++;
}
//We know that previous prefix length covered more no. of productions
//so that was the required min common prefix
if(curr<prev)
break;
prev=curr;
len=j;
}
if(prev!=0){
//we have found the common prefix as : v[i].substr(0,len)
//now replace the latter part with a new non-terminal
char ch= 'A' + prod.size();
string s= v[i].substr(0,len);
//Now see all the productions which have this prefix
for(int j=0;j<v.size();j++){
if(v[j].substr(0,len).compare(s)==0){
v[j]= s + ch;
itr->second[j]= v[j];
prod[ch].push_back(v[j].substr(len,v[j].size()-len));
}
}
}
}

}
cout<<"Grammar after removing left factoring is :"<<endl<<endl;
for(map<char, vector<string> >::iterator i=prod.begin(); i!=prod.end(); i++){
cout<<i->first<<" -> ";
for(int j=0;j<i->second.size();j++){
cout<<i->second[j];
if(j!= i->second.size()-1)
cout<<" | ";
}
cout<<endl;
}
}
OUTPUT

LEARNING OUTCOMES
We have learnt how to remove left factoring from a given grammar. Left factoring is a process by which
the grammar with common prefixes is transformed to make it useful for Top-down parsers. In left
factoring, we make one production for each common prefixes and rest of the derivation is added by new
productions. The grammar obtained after the process of left factoring is called as left factored grammar.
Experiment 8

AIM
Write a program to remove left recursion in given grammar.

THEORY
A production of grammar is said to have left recursion if the leftmost variable of its RHS is same as
variable of its LHS. A grammar containing a production having left recursion is called as Left Recursive
Grammar. For example:

S → Sa / ∈

Left recursion is considered to be a problematic situation for Top-down parsers. Therefore, left recursion
has to be eliminated from the grammar. Left recursion is eliminated by converting the grammar into a
right recursive grammar. If we have the left-recursive pair of productions (where β does not begin with an
A.) -

A → Aα / β

Then, we can eliminate left recursion by replacing the pair of productions with-

A→ βA’

A’ → αA’ / ∈

This right recursive grammar functions same as left recursive grammar.

ALGORITHM
1. Take all productions as input and store them in hash map.
2. Iterate through hashmap:
a. For each of the production:
i. Initialize alpha and beta as empty vectors
ii. If the first char in RHS equals LHS
1. Store RHS starting from 2nd char in alpha.
2. Store remaining into beta.
3. Delete this production from hashmap.
4. Make new productions starting with a new character using alpha.
3. Print the modified grammar
4. This is the grammar after removing left recursion.
CODE
#include<bits/stdc++.h>
#include<iostream>
using namespace std;
int main(){
map < char, vector<string> > prod;
int n;
cout<<"Enter number of productions : ";
cin>>n;
cout<<"Enter productions : "<<endl;
for(int i=0;i<n;i++){
char ch;
string s;
cin>>ch;
cout<<" -> ";
cin>>s;
prod[ch].push_back(s);
}
cout<<"Given grammar is :"<<endl<<endl;
for(map<char, vector<string> >::iterator i=prod.begin(); i!=prod.end(); i++){
cout<<i->first<<" -> ";
for(int j=0;j<i->second.size();j++){
cout<<i->second[j];
if(j!= i->second.size()-1)
cout<<" | ";}
cout<<endl;}
for(map<char, vector<string> >::iterator itr=prod.begin(); itr!=prod.end(); ++itr){
vector<string> alpha,beta;
for(int i= 0;i<itr->second.size();i++){
if(itr->first==itr->second[i][0]){
alpha.push_back(itr->second[i].substr(1,itr->second[i].length()-1));
}
else{
beta.push_back(itr->second[i]);
}
}
if(alpha.size()<1)
continue;
itr->second.clear();
char ch= 'A' + prod.size();
for(int i=0;i<beta.size();i++){
itr->second.push_back(beta[i] + ch);
}
for(int i=0;i<alpha.size();i++){
prod[ch].push_back(alpha[i]+ch);
}
prod[ch].push_back("^");
}
cout<<"Grammar after removing left recursion is :"<<endl<<endl;

for(map<char, vector<string> >::iterator i=prod.begin(); i!=prod.end(); i++){


cout<<i->first<<" -> ";
for(int j=0;j<i->second.size();j++){
cout<<i->second[j];
if(j!= i->second.size()-1)
cout<<" | ";
}
cout<<endl;
}
}

OUTPUT

LEARNING OUTCOMES
We have learnt how to remove left recursion from a given grammar. Left recursion often poses problems
for parsers because it leads them into infinite recursion (as in the case of most top-down parsers) .
Therefore, a grammar is often preprocessed to eliminate the left recursion.

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy