Final Document 22brs1044
Final Document 22brs1044
ENGINEERING
2
ASSESSMENT – 1
Experiment-1
Implementation of Deterministic Finite Automaton (DFA) from regular grammar using C
language.
Aim: To implement a Deterministic Finite Automaton (DFA) using a transition table input in
C++.
Algorithm:
1. Start:
o Read the DFA transition table provided by the user.
2. Input Transition Table:
o Accept states, alphabet, transitions, start state, and accepting states as input.
3. Menu Options:
o Display a menu to allow the user to:
3
#include <vector>
using namespace std;
class DFA {
private:
set<string> states;
set<char> alphabet;
map<pair<string, char>, string> transitionTable;
string startState;
set<string> acceptingStates;
public:
void defineDFA() {
int numStates, numAlphabets, numTransitions, numAccepting;
string state, fromState, toState;
char input;
4
}
void displayDFA() {
cout << "\nStates: ";
for (const auto& state : states) cout << state << " ";
return acceptingStates.find(currentState) !=
acceptingStates.end();
}
};
int main() {
DFA dfa;
int choice;
string inputString;
do {
cout << "\nMenu:\n";
cout << "1. Define DFA\n";
cout << "2. Display DFA\n";
cout << "3. Test String\n";
cout << "4. Exit\n";
cout << "Enter your choice: ";
cin >> choice;
switch (choice) {
5
case 1:
dfa.defineDFA();
break;
case 2:
dfa.displayDFA();
break;
case 3:
cout << "Enter string to test: ";
cin >> inputString;
if (dfa.testString(inputString)) {
cout << "String is accepted by the DFA.\n";
} else {
cout << "String is rejected by the DFA.\n";
}
break;
case 4:
cout << "Exiting program.\n";
break;
default:
cout << "Invalid choice. Please try again.\n";
}
} while (choice != 4);
return 0;
}
Input/Output:
PS D:\Coding & Others> cd "d:\Coding & Others\C++\Compiler\" ; if
($?) { g++ exp1.cpp -o exp1 } ; if ($?) { .\exp1 }
Menu:
1. Define DFA
2. Display DFA
3. Test String
4. Exit
Enter your choice: 1
Enter the number of states: 3
Enter the states: a b c
Enter the number of input symbols: 2
Enter the input symbols: 0 1
Enter the number of transitions: 6
Enter the transitions (Format: fromState input toState):
a 0 a
6
a 1 b
b 0 c
b 1 a
c 0 b
c 1 c
Enter the start state: a
Enter the number of accepting states: 1
Enter the accepting states: c
Menu:
1. Define DFA
2. Display DFA
3. Test String
4. Exit
Enter your choice: 3
Enter string to test: 101
String is accepted by the DFA.
Menu:
1. Define DFA
2. Display DFA
3. Test String
4. Exit
Enter your choice: 2
States: a b c
Alphabet: 0 1
Transitions:
a -- 0 --> a
a -- 1 --> b
b -- 0 --> c
b -- 1 --> a
c -- 0 --> b
7
c -- 1 --> c
Start State: a
Accepting States: c
Menu:
1. Define DFA
2. Display DFA
3. Test String
4. Exit
Enter your choice: 4
Exiting program.
PS D:\Coding & Others\C++\Compiler>
Experiment-2
Implementation of Deterministic Finite Automaton (DFA) from Non-deterministic Finite
Automata (NFA) without ε-edges using C++ language.
Aim:
To implement a Deterministic Finite Automaton (DFA) from a Non-deterministic Finite
Automaton (NFA) without ε-edges using C++.
Algorithm:
1. Start:
● Define the NFA states, input symbols, start state, transitions, and
accepting states.
2. Input NFA:
● Accept the NFA transitions, states, start state, and accepting states from
the user.
3. DFA Construction:
8
● Use a subset construction method to create the DFA.
● Create a DFA state for every subset of NFA states.
● Identify transitions for each subset and input symbol by finding the union
of possible transitions.
● Mark a DFA state as accepting if it contains any NFA accepting state.
4. Menu Options:
● Display a menu for the user to:
● Define the NFA.
● Convert the NFA to DFA.
● Display the DFA.
● Test if a string is accepted by the DFA.
5. DFA String Acceptance:
● Simulate the DFA by traversing transitions based on the input string.
● Check if the final state is an accepting state.
6. Output:
● Display the NFA, DFA, and results of string acceptance tests.
7. End.
Source Code:
#include <iostream>
#include <map>
#include <set>
#include <queue>
#include <vector>
#include <string>
class NFAtoDFA {
private:
set<string> nfaStates;
set<string> dfaStates;
set<char> alphabet;
map<pair<string, char>, set<string>> nfaTransitions;
map<pair<string, char>, string> dfaTransitions;
string startState;
set<string> nfaAcceptStates;
set<string> dfaAcceptStates;
9
public:
void defineNFA() {
int numStates, numTransitions;
cout << "Enter the number of states in NFA: ";
cin >> numStates;
int numAcceptStates;
cout << "Enter the number of accepting states in NFA: ";
cin >> numAcceptStates;
cout << "Enter NFA accepting states:\n";
for (int i = 0; i < numAcceptStates; ++i) {
string state;
cin >> state;
nfaAcceptStates.insert(state);
}
cout << "Enter the alphabet (space-separated, end with #): ";
char symbol;
while (cin >> symbol && symbol != '#') {
alphabet.insert(symbol);
}
void convertToDFA() {
queue<set<string>> unprocessedStates;
map<set<string>, string> stateMapping;
int stateCounter = 0;
10
dfaStates.insert(stateMapping[startSet]);
if (isAcceptingState(startSet)) {
dfaAcceptStates.insert(stateMapping[startSet]);
}
while (!unprocessedStates.empty()) {
set<string> currentSet = unprocessedStates.front();
unprocessedStates.pop();
string currentState = stateMapping[currentSet];
if (!newSet.empty()) {
if (stateMapping.find(newSet) == stateMapping.end()) {
stateMapping[newSet] = "Q" +
to_string(stateCounter++);
dfaStates.insert(stateMapping[newSet]);
unprocessedStates.push(newSet);
if (isAcceptingState(newSet)) {
dfaAcceptStates.insert(stateMapping[newSet]);
}
}
dfaTransitions[{currentState, symbol}] =
stateMapping[newSet];
}
}
}
}
void displayDFA() {
cout << "\nDFA States: ";
for (const auto& state : dfaStates) {
cout << state << " ";
}
11
cout << it->first.first << " -- " << it->first.second
<< " --> " << it->second << "\n";
}
if (dfaTransitions.find({currentState, symbol}) ==
dfaTransitions.end()) {
cout << "No transition for (" << currentState << ", " << symbol <<
").\n";
cout << "String is rejected by the DFA.\n";
return;
}
if (dfaAcceptStates.find(currentState) != dfaAcceptStates.end()) {
cout << "String is accepted by the DFA.\n";
} else {
cout << "String is rejected by the DFA.\n";
}
}
private:
bool isAcceptingState(const set<string>& stateSet) {
for (const auto& state : stateSet) {
if (nfaAcceptStates.find(state) != nfaAcceptStates.end()) {
return true;
}
}
return false;
}
12
};
int main() {
NFAtoDFA automaton;
int choice;
do {
cout << "\nMenu:\n";
cout << "1. Define NFA\n";
cout << "2. Convert NFA to DFA\n";
cout << "3. Display DFA\n";
cout << "4. Test String\n";
cout << "5. Exit\n";
cout << "Enter your choice: ";
cin >> choice;
switch (choice) {
case 1:
automaton.defineNFA();
break;
case 2:
automaton.convertToDFA();
break;
case 3:
automaton.displayDFA();
break;
case 4: {
string input;
cout << "Enter a string to test: ";
cin >> input;
automaton.testString(input);
break;
}
case 5:
cout << "Exiting program.\n";
break;
default:
cout << "Invalid choice. Please try again.\n";
}
} while (choice != 5);
return 0;
}
Input/Output:
PS D:\Coding & Others> cd "d:\Coding & Others\C++\Compiler\" ; if
($?) { g++ exp2.cpp -o exp2 } ; if ($?) { .\exp2 }
Menu:
1. Define NFA
13
2. Convert NFA to DFA
3. Display DFA
4. Test String
5. Exit
Enter your choice: 1
Enter the number of states in NFA: 3
Enter NFA states:
A B C
Enter the start state of NFA: A
Enter the number of accepting states in NFA: 1
Enter NFA accepting states:
C
Enter the alphabet (space-separated, end with #): 0 1 #
Enter the number of transitions in NFA: 4
Enter transitions (Format: FromState InputSymbol ToState):
A 0 B
A 1 C
B 0 C
C 1 C
Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String
5. Exit
Enter your choice: 2
Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String
14
5. Exit
Enter your choice: 3
DFA States: Q0 Q1 Q2
Alphabet: 0 1
DFA Transitions:
Q0 -- 0 --> Q1
Q0 -- 1 --> Q2
Q1 -- 0 --> Q2
Q2 -- 1 --> Q2
Start State: Q0
Accepting States: Q2
Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String
5. Exit
Enter your choice: 4
Enter a string to test: 010
Testing string: 010
Current State: Q0, Symbol: 0
Moved to State: Q1
Current State: Q1, Symbol: 1
No transition for (Q1, 1).
String is rejected by the DFA.
Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String
15
5. Exit
Enter your choice: 4
Enter a string to test: 01
Testing string: 01
Current State: Q0, Symbol: 0
Moved to State: Q1
Current State: Q1, Symbol: 1
No transition for (Q1, 1).
String is rejected by the DFA.
Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String
5. Exit
Enter your choice: 4
Enter a string to test: 001
Testing string: 001
Current State: Q0, Symbol: 0
Moved to State: Q1
Current State: Q1, Symbol: 0
Moved to State: Q2
Current State: Q2, Symbol: 1
Moved to State: Q2
String is accepted by the DFA.
Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String
5. Exit
16
Enter your choice: 4
Enter a string to test: 00
Testing string: 00
Current State: Q0, Symbol: 0
Moved to State: Q1
Current State: Q1, Symbol: 0
Moved to State: Q2
String is accepted by the DFA.
Conclusion:
The program successfully implements the conversion of a Non-deterministic Finite
Automaton (NFA) without ε-edges to a Deterministic Finite Automaton (DFA). It
demonstrates the practical application of subset construction and automata theory
principles.
Experiment-3
a) Implement a DFA in LEX code which accepts odd number of 1’s and even number of 0’s.
b) Implement a DFA in LEX code which accepts strings over {0, 1, 2} having 231 as substring.
Aim:
1. Part A: To implement a DFA in LEX code that accepts strings with an odd number of
1s and an even number of 0s.
2. Part B: To implement a DFA in LEX code that accepts strings over {0, 1, 2} having 231
as a substring.
Algorithm:
For Part A:
1. Define states for the DFA: INITIAL, A, B, C, and DEAD.
o INITIAL: Start state.
o A: Odd number of 1s and even number of 0s.
o B: Even number of 1s and even number of 0s.
o C: Odd number of 1s and odd number of 0s.
o DEAD: Invalid state for invalid input.
2. Define transitions:
o Use regular expressions to handle input transitions between states.
17
o Transition to DEAD state for invalid inputs.
3. Print whether the string is Accepted, Not Accepted, or Invalid based on the DFA
rules.
For Part B:
1. Define states: INITIAL, S1, S2, S3, and DEAD.
o INITIAL: Start state.
o S1: Match for 2.
o S2: Match for 23.
o S3: Match for 231.
o DEAD: Invalid state.
2. Define transitions:
o Move between states based on input characters.
o Print Accepted if the string reaches state S3.
Source Code:
Part A: Odd number of 1s and even number of 0s
%{
%}
%s S1 S2 S3 S4 DEAD
%%
18
<S1>\n BEGIN INITIAL; {printf("Accepted\n");}
%%
int main()
{
printf("Enter String:\n");
yylex();
return 0;
}
Part B: DFA for strings having 231 as a substring
19
%{
%}
%s S1 S2 S3 DEAD
%%
%%
20
int main()
{
printf("Enter String:\n");
yylex();
return 0;
}
Input/Output:
Part A:
Part B:
Conclusion:
● DFA implemented using LEX successfully identifies strings with an odd number of 1s
and an even number of 0s.
● DFA implemented using LEX successfully identifies strings over {0, 1, 2} containing
231 as a substring.
21
ASSESSMENT – 2
EXPERIMENT-1
Construct a lexical analyser
1. Identify the tokens from simple statement as input stored in a linear array
2. Identify the tokens from small program (not exceeding 5 lines) as input stored in a
text file
3. Identify the tokens from small program (not exceeding 5 lines) as input get it from
the user and store it in a text file
Aim: Construct a simple lexical analyser in C++ that:
● Identifies tokens from a simple statement stored in a linear array.
● Identifies tokens from a small program (≤5 lines) stored in a text file.
● Accepts a small program (≤5 lines) from the user, stores it in a text file, and then
identifies its tokens.
22
Algorithm:
1. Input Selection:
● Display a menu with three options.
● Based on user choice, either use a predefined string, read from a file, or accept
user input and store it in a file.
2. Tokenization Process:
● Traverse the input string character by character.
● For Identifiers/Keywords:
o If a letter is encountered, collect subsequent alphanumeric characters
(and underscores) to form a token.
o Check if the token is a keyword (from a predefined list); if yes, classify as
Keyword, else as Identifier.
● For Numbers:
o If a digit is encountered, collect all consecutive digits to form a numeric
token.
● For Operators:
o Check if the character is one of the operator symbols (e.g., +, -, *, /, =, %);
if so, classify as Operator.
● For Delimiters:
o Identify punctuation symbols (e.g., ;, ,, (, ), {, }, [, ]) and classify as
Delimiter.
● Skip Whitespaces:
o Ignore spaces and newlines during tokenization.
3. Output:
● Display each token along with its identified type.
Source Code:
#include <iostream>
#include <fstream>
#include <sstream>
#include <cctype>
#include <string>
using namespace std;
23
// Check if a string is a keyword
bool isKeyword(const string &str) {
string keywords[] = {"int", "float", "if", "else", "while", "for", "do",
"return", "void", "char", "double"};
int n = sizeof(keywords) / sizeof(string);
for (int i = 0; i < n; i++) {
if (str == keywords[i])
return true;
}
return false;
}
// Tokenization function
void tokenize(const string &input) {
int len = input.length();
int i = 0;
while (i < len) {
// Skip whitespace
if (isspace(input[i])) {
i++;
continue;
}
// Identifiers or Keywords
if (isalpha(input[i])) {
string token = "";
while (i < len && (isalnum(input[i]) || input[i] == '_')) {
token.push_back(input[i]);
i++;
}
if (isKeyword(token))
cout << token << " : Keyword" << endl;
else
cout << token << " : Identifier" << endl;
}
// Numbers
else if (isdigit(input[i])) {
string token = "";
while (i < len && isdigit(input[i])) {
token.push_back(input[i]);
i++;
24
}
cout << token << " : Number" << endl;
}
// Operators
else if (isOperator(input[i])) {
cout << input[i] << " : Operator" << endl;
i++;
}
// Delimiters
else if (isDelimiter(input[i])) {
// Avoid printing spaces as delimiters
if (!isspace(input[i]))
cout << input[i] << " : Delimiter" << endl;
i++;
}
// Unknown characters
else {
cout << input[i] << " : Unknown" << endl;
i++;
}
}
}
int main() {
int choice;
cout << "Lexical Analyzer Options:" << endl;
cout << "1. Analyze tokens from a linear array input" << endl;
cout << "2. Analyze tokens from a text file" << endl;
cout << "3. Input program from user, store in file, and analyze tokens" <<
endl;
cout << "Enter your choice: ";
cin >> choice;
cin.ignore(); // Clear newline from input buffer
if (choice == 1) {
// Option 1: User inputs a simple statement
string input;
cout << "\nEnter a simple statement: ";
getline(cin, input);
cout << "\nTokens:" << endl;
tokenize(input);
}
else if (choice == 2) {
// Option 2: Read input from a text file (input.txt)
ifstream file("input.txt");
if (!file) {
cout << "\nUnable to open file input.txt" << endl;
return 1;
}
string line, programText = "";
while (getline(file, line)) {
programText += line + "\n";
25
}
file.close();
cout << "\nProgram from file:" << endl << programText << "\nTokens:"
<< endl;
tokenize(programText);
}
else if (choice == 3) {
// Option 3: Get program from user, store in file, and analyze tokens
cout << "\nEnter your program (max 5 lines). Enter an empty line to
finish:" << endl;
string programText = "", line;
int count = 0;
while (count < 5 && getline(cin, line)) {
if (line.empty())
break;
programText += line + "\n";
count++;
}
// Store the user program in a text file
ofstream file("user_program.txt");
file << programText;
file.close();
cout << "\nProgram stored in user_program.txt" << endl;
cout << "Tokens:" << endl;
tokenize(programText);
}
else {
cout << "\nInvalid choice." << endl;
}
return 0;
}
Input/Output:
26
27
Conclusion:
● The program now exclusively takes input from the user, ensuring flexibility.
● It handles three distinct input methods, tokenizing the content appropriately.
● This modular approach serves as a foundation for a lexical analyser in C++.
EXPERIMENT-2
Construct a lexical analyser using LEX tool.
Aim: Construct a lexical analyser using the LEX tool to tokenize input such as keywords,
identifiers, numbers, and operators.
Algorithm:
1. Read the input character stream.
2. Use regular expressions to match patterns:
a. Match specific keywords (e.g., if, else).
b. Recognize identifiers (letters followed by letters/digits).
c. Identify numbers (sequences of digits).
28
d. Detect arithmetic operators and other symbols
3. Print the token type along with the matched lexeme.
4. Continue processing until the end of input.
Source Code:
%{
#include <stdio.h>
#include <string.h>
int line_num = 1;
%}
DIGIT [0-9]
LETTER [a-zA-Z]
ID {LETTER}({LETTER}|{DIGIT})*
NUMBER {DIGIT}+(\.{DIGIT}+)?
WHITESPACE [ \t]+
%%
{WHITESPACE} ;
\n {line_num++;}
"if" {printf("Keyword: IF\n");}
"else" {printf("Keyword: ELSE\n");}
"while" {printf("Keyword: WHILE\n");}
"int" {printf("Keyword: INT\n");}
"float" {printf("Keyword: FLOAT\n");}
"return" {printf("Keyword: RETURN\n");}
{ID} {printf("Identifier: %s\n", yytext);}
{NUMBER} {printf("Number: %s\n", yytext);}
"+" {printf("Operator: PLUS\n");}
"-" {printf("Operator: MINUS\n");}
"*" {printf("Operator: MULTIPLY\n");}
"/" {printf("Operator: DIVIDE\n");}
"=" {printf("Operator: ASSIGN\n");}
29
"==" {printf("Operator: EQUAL\n");}
"!=" {printf("Operator: NOT_EQUAL\n");}
"<" {printf("Operator: LESS_THAN\n");}
">" {printf("Operator: GREATER_THAN\n");}
"(" {printf("Delimiter: LEFT_PAREN\n");}
")" {printf("Delimiter: RIGHT_PAREN\n");}
"{" {printf("Delimiter: LEFT_BRACE\n");}
"}" {printf("Delimiter: RIGHT_BRACE\n");}
";" {printf("Delimiter: SEMICOLON\n");}
"," {printf("Delimiter: COMMA\n");}
"//"[^\n]* {printf("Comment: %s\n", yytext);}
. {printf("Error: Unexpected character %s at line %d\n", yytext, line_num);}
%%
int yywrap() {
return 1;
}
int main() {
printf("Lexical Analysis Started...\n");
yylex();
printf("\nLexical Analysis Completed.\n");
printf("Total lines processed: %d\n", line_num);
return 0;
}
Input/Output:
30
Conclusion:
● The lex code efficiently tokenizes various components of the input.
● It demonstrates how to use regular expressions in LEX for constructing a simple
lexical analyser.
● The approach can be expanded to cover additional tokens and more complex
grammars as needed.
31
EXPERIMENT-3
Construct Predictive parse table using C language.
Hint: Consider the input grammar without left recursion, find FIRST and FOLLOW for each
non-terminal and then construct the parse table.
Aim: Construct a predictive parsing table from a given grammar (assumed free from left
recursion) by computing the FIRST and FOLLOW sets for each non-terminal.
Algorithm:
1. Input Grammar:
● Read the number of productions.
● Input each production in the format A->α (use # to denote epsilon).
2. Compute FIRST Sets:
● For each production A → α, scan symbols in α from left to right.
● If a symbol is terminal (or not an uppercase letter), add it to FIRST(A); if it’s #
(epsilon), mark nullable.
● If the symbol is a non-terminal, add FIRST (symbol) (excluding epsilon) to FIRST(A); if
the non-terminal is nullable, continue to the next symbol.
● If all symbols are nullable, add epsilon (#) to FIRST(A).
3. Compute FOLLOW Sets:
● Add $ to FOLLOW (start symbol).
● For each production A → αBβ, add all terminals from FIRST(β) (excluding epsilon) to
FOLLOW(B).
● If β is nullable (or if B is the last symbol), add FOLLOW(A) to FOLLOW(B).
4. Construct Predictive Parsing Table:
● For each production A → α:
o For every terminal t in FIRST(α) (except epsilon), set table entry [A][t] = A -> α.
o If FIRST(α) contains epsilon, for every terminal in FOLLOW(A) (including $), set
table entry [A][terminal] = A -> α.
Source Code:
#include <stdio.h>
#include <ctype.h>
#include <string.h>
32
#include <stdlib.h>
int count, n = 0;
char calc_first[10][100];
char calc_follow[10][100];
int m = 0;
char production[10][10], first[10];
char f[10];
int k;
char ck;
int e;
33
findfirst(c, 0, 0);
ptr += 1;
done[ptr] = c;
printf("First(%c)= { ", c);
calc_first[point1][point2++] = c;
for (i = 0 + jm; i < n; i++)
{
int lark = 0, chk = 0;
for (lark = 0; lark < point2; lark++)
{
if (first[i] == calc_first[point1][lark])
{
chk = 1;
break;
}
}
if (chk == 0)
{
printf("%c, ", first[i]);
calc_first[point1][point2++] = first[i];
}
}
printf("}\n");
jm = n;
point1++;
}
printf("\n");
printf("-----------------------------------------------\n\n");
char donee[count];
ptr = -1;
for (k = 0; k < count; k++)
{
for (kay = 0; kay < 100; kay++)
{
calc_follow[k][kay] = '!';
}
}
point1 = 0;
int land = 0;
for (e = 0; e < count; e++)
{
ck = production[e][0];
point2 = 0;
xxx = 0;
for (kay = 0; kay <= ptr; kay++)
if (ck == donee[kay])
xxx = 1;
if (xxx == 1)
continue;
land += 1;
follow(ck);
ptr += 1;
34
donee[ptr] = ck;
printf("Follow(%c) = { ", ck);
calc_follow[point1][point2++] = ck;
for (i = 0 + km; i < m; i++)
{
int lark = 0, chk = 0;
for (lark = 0; lark < point2; lark++)
{
if (f[i] == calc_follow[point1][lark])
{
chk = 1;
break;
}
}
if (chk == 0)
{
printf("%c, ", f[i]);
calc_follow[point1][point2++] = f[i];
}
}
printf(" }\n");
km = m;
point1++;
}
char ter[10];
for (k = 0; k < 10; k++)
{
ter[k] = '!';
}
int ap, vp, sid = 0;
for (k = 0; k < count; k++)
{
for (kay = 0; kay < count; kay++)
{
if (!isupper(production[k][kay]) && production[k][kay] != '#' &&
production[k][kay] != '=' && production[k][kay] != '\0')
{
vp = 0;
for (ap = 0; ap < sid; ap++)
{
if (production[k][kay] == ter[ap])
{
vp = 1;
break;
}
}
if (vp == 0)
{
ter[sid] = production[k][kay];
sid++;
}
}
35
}
}
ter[sid] = '$';
sid++;
printf("\nParsing Table:");
printf("\n====================================================================
=================================================\n");
printf("\t|\t");
for (ap = 0; ap < sid; ap++)
{
printf("%c\t\t", ter[ap]);
}
printf("\n====================================================================
=================================================\n");
char first_prod[count][sid];
for (ap = 0; ap < count; ap++)
{
int destiny = 0;
k = 2;
int ct = 0;
char tem[100];
while (production[ap][k] != '\0')
{
if (!isupper(production[ap][k]))
{
tem[ct++] = production[ap][k];
tem[ct++] = '_';
tem[ct++] = '\0';
k++;
break;
}
else
{
int zap = 0;
int tuna = 0;
for (zap = 0; zap < count; zap++)
{
if (calc_first[zap][0] == production[ap][k])
{
for (tuna = 1; tuna < 100; tuna++)
{
if (calc_first[zap][tuna] != '!')
{
tem[ct++] = calc_first[zap][tuna];
}
else
break;
}
break;
}
36
}
tem[ct++] = '_';
}
k++;
}
int zap = 0, tuna;
for (tuna = 0; tuna < ct; tuna++)
{
if (tem[tuna] == '#')
{
zap = 1;
}
else if (tem[tuna] == '_')
{
if (zap == 1)
{
zap = 0;
}
else
break;
}
else
{
first_prod[ap][destiny++] = tem[tuna];
}
}
}
char table[land][sid + 1];
ptr = -1;
for (ap = 0; ap < land; ap++)
{
for (kay = 0; kay < (sid + 1); kay++)
{
table[ap][kay] = '!';
}
}
for (ap = 0; ap < count; ap++)
{
ck = production[ap][0];
xxx = 0;
for (kay = 0; kay <= ptr; kay++)
if (ck == table[kay][0])
xxx = 1;
if (xxx == 1)
continue;
else
{
ptr = ptr + 1;
table[ptr][0] = ck;
}
}
for (ap = 0; ap < count; ap++)
37
{
int tuna = 0;
while (first_prod[ap][tuna] != '\0')
{
int to, ni = 0;
for (to = 0; to < sid; to++)
{
if (first_prod[ap][tuna] == ter[to])
{
ni = 1;
}
}
if (ni == 1)
{
char xz = production[ap][0];
int cz = 0;
while (table[cz][0] != xz)
{
cz = cz + 1;
}
int vz = 0;
while (ter[vz] != first_prod[ap][tuna])
{
vz = vz + 1;
}
table[cz][vz + 1] = (char)(ap + 65);
}
tuna++;
}
}
for (k = 0; k < sid; k++)
{
for (kay = 0; kay < 100; kay++)
{
if (calc_first[k][kay] == '!')
{
break;
}
else if (calc_first[k][kay] == '#')
{
int fz = 1;
while (calc_follow[k][fz] != '!')
{
char xz = production[k][0];
int cz = 0;
while (table[cz][0] != xz)
{
cz = cz + 1;
}
int vz = 0;
while (ter[vz] != calc_follow[k][fz])
{
38
vz = vz + 1;
}
table[k][vz + 1] = '#';
fz++;
}
break;
}
}
}
for (ap = 0; ap < land; ap++)
{
printf(" %c\t|\t", table[ap][0]);
for (kay = 1; kay < (sid + 1); kay++)
{
if (table[ap][kay] == '!')
printf("\t\t");
else if (table[ap][kay] == '#')
printf("%c=#\t\t", table[ap][0]);
else
{
int mum = (int)(table[ap][kay]);
mum -= 65;
printf("%s\t\t", production[mum]);
}
}
printf("\n");
printf("----------------------------------------------------------------------
-----------------------------------------------");
printf("\n");
}
void follow(char c)
{
int i, j;
if (production[0][0] == c)
{
f[m++] = '$';
}
for (i = 0; i < 10; i++)
{
for (j = 2; j < 10; j++)
{
if (production[i][j] == c)
{
if (production[i][j + 1] != '\0')
{
followfirst(production[i][j + 1], i, (j + 2));
}
if (production[i][j + 1] == '\0' && c != production[i][0])
39
{
follow(production[i][0]);
}
}
}
}
}
40
if (calc_first[i][0] == c)
break;
}
while (calc_first[i][j] != '!')
{
if (calc_first[i][j] != '#')
{
f[m++] = calc_first[i][j];
}
else
{
if (production[c1][c2] == '\0')
{
follow(production[c1][0]);
}
else
{
followfirst(production[c1][c2], c1, c2 + 1);
}
}
j++;
}
}
}
Input/Output:
41
Conclusion: The program reads an input grammar, computes the FIRST and FOLLOW sets,
and successfully constructs a predictive parsing table based on these sets.
EXPERIMENT-4
Implement the Predictive parsing algorithm, get parse table and input string is inputs. Use C
language for implementation.
Aim:
● Implement a predictive parsing algorithm in C++.
● Simulate parsing using a stack.
● Display each parsing step in a bordered table with columns: Stack, Input String, and
Action.
Algorithm:
1. Initialize Resources:
▪ Input a grammar
▪ Read the input string (tokens separated by spaces) and ensure it ends
with a terminal symbol "$".
▪ Initialize a stack with "$" (bottom) and the start symbol "E" (top).
2. Parsing Process:
▪ If the top of the stack and current input token are both "$":
42
▪ Else:
▪ If a rule exists:
▪ Else:
▪ Save each step (current stack, input string, and action) as a row in a
table.
▪ After processing, print the table with borders around each cell.
Source Code:
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
int count, n = 0;
char calc_first[10][100];
char calc_follow[10][100];
43
int m = 0;
char production[10][10], first[10];
char f[10];
int k;
char ck;
int e;
44
{
if (first[i] == calc_first[point1][lark])
{
chk = 1;
break;
}
}
if (chk == 0)
{
printf("%c, ", first[i]);
calc_first[point1][point2++] = first[i];
}
}
printf("}\n");
jm = n;
point1++;
}
printf("\n");
printf("-----------------------------------------------\n\n");
char donee[count];
ptr = -1;
for (k = 0; k < count; k++)
{
for (kay = 0; kay < 100; kay++)
{
calc_follow[k][kay] = '!';
}
}
point1 = 0;
int land = 0;
for (e = 0; e < count; e++)
{
ck = production[e][0];
point2 = 0;
xxx = 0;
for (kay = 0; kay <= ptr; kay++)
if (ck == donee[kay])
xxx = 1;
if (xxx == 1)
continue;
land += 1;
follow(ck);
ptr += 1;
donee[ptr] = ck;
printf("Follow(%c) = { ", ck);
calc_follow[point1][point2++] = ck;
for (i = 0 + km; i < m; i++)
{
int lark = 0, chk = 0;
for (lark = 0; lark < point2; lark++)
{
if (f[i] == calc_follow[point1][lark])
45
{
chk = 1;
break;
}
}
if (chk == 0)
{
printf("%c, ", f[i]);
calc_follow[point1][point2++] = f[i];
}
}
printf(" }\n");
km = m;
point1++;
}
char ter[10];
for (k = 0; k < 10; k++)
{
ter[k] = '!';
}
int ap, vp, sid = 0;
for (k = 0; k < count; k++)
{
for (kay = 0; kay < count; kay++)
{
if (!isupper(production[k][kay]) && production[k][kay] != '#' &&
production[k][kay] != '=' && production[k][kay] != '\0')
{
vp = 0;
for (ap = 0; ap < sid; ap++)
{
if (production[k][kay] == ter[ap])
{
vp = 1;
break;
}
}
if (vp == 0)
{
ter[sid] = production[k][kay];
sid++;
}
}
}
}
ter[sid] = '$';
sid++;
printf("\nParsing Table:");
printf("\n====================================================================
=================================================\n");
printf("\t|\t");
46
for (ap = 0; ap < sid; ap++)
{
printf("%c\t\t", ter[ap]);
}
printf("\n====================================================================
=================================================\n");
char first_prod[count][sid];
for (ap = 0; ap < count; ap++)
{
int destiny = 0;
k = 2;
int ct = 0;
char tem[100];
while (production[ap][k] != '\0')
{
if (!isupper(production[ap][k]))
{
tem[ct++] = production[ap][k];
tem[ct++] = '_';
tem[ct++] = '\0';
k++;
break;
}
else
{
int zap = 0;
int tuna = 0;
for (zap = 0; zap < count; zap++)
{
if (calc_first[zap][0] == production[ap][k])
{
for (tuna = 1; tuna < 100; tuna++)
{
if (calc_first[zap][tuna] != '!')
{
tem[ct++] = calc_first[zap][tuna];
}
else
break;
}
break;
}
}
tem[ct++] = '_';
}
k++;
}
int zap = 0, tuna;
for (tuna = 0; tuna < ct; tuna++)
{
if (tem[tuna] == '#')
47
{
zap = 1;
}
else if (tem[tuna] == '_')
{
if (zap == 1)
{
zap = 0;
}
else
break;
}
else
{
first_prod[ap][destiny++] = tem[tuna];
}
}
}
char table[land][sid + 1];
ptr = -1;
for (ap = 0; ap < land; ap++)
{
for (kay = 0; kay < (sid + 1); kay++)
{
table[ap][kay] = '!';
}
}
for (ap = 0; ap < count; ap++)
{
ck = production[ap][0];
xxx = 0;
for (kay = 0; kay <= ptr; kay++)
if (ck == table[kay][0])
xxx = 1;
if (xxx == 1)
continue;
else
{
ptr = ptr + 1;
table[ptr][0] = ck;
}
}
for (ap = 0; ap < count; ap++)
{
int tuna = 0;
while (first_prod[ap][tuna] != '\0')
{
int to, ni = 0;
for (to = 0; to < sid; to++)
{
if (first_prod[ap][tuna] == ter[to])
{
48
ni = 1;
}
}
if (ni == 1)
{
char xz = production[ap][0];
int cz = 0;
while (table[cz][0] != xz)
{
cz = cz + 1;
}
int vz = 0;
while (ter[vz] != first_prod[ap][tuna])
{
vz = vz + 1;
}
table[cz][vz + 1] = (char)(ap + 65);
}
tuna++;
}
}
for (k = 0; k < sid; k++)
{
for (kay = 0; kay < 100; kay++)
{
if (calc_first[k][kay] == '!')
{
break;
}
else if (calc_first[k][kay] == '#')
{
int fz = 1;
while (calc_follow[k][fz] != '!')
{
char xz = production[k][0];
int cz = 0;
while (table[cz][0] != xz)
{
cz = cz + 1;
}
int vz = 0;
while (ter[vz] != calc_follow[k][fz])
{
vz = vz + 1;
}
table[k][vz + 1] = '#';
fz++;
}
break;
}
}
}
49
for (ap = 0; ap < land; ap++)
{
printf(" %c\t|\t", table[ap][0]);
for (kay = 1; kay < (sid + 1); kay++)
{
if (table[ap][kay] == '!')
printf("\t\t");
else if (table[ap][kay] == '#')
printf("%c=#\t\t", table[ap][0]);
else
{
int mum = (int)(table[ap][kay]);
mum -= 65;
printf("%s\t\t", production[mum]);
}
}
printf("\n");
printf("----------------------------------------------------------------------
-----------------------------------------------");
printf("\n");
}
int j;
printf("\n\nEnter the Input String: ");
char input[100];
scanf("%s%c", input, &ch);
printf("\n====================================================================
=======\n");
printf("\tStack\t\t\tInput\t\t\tAction");
printf("\n====================================================================
=======\n");
int i_ptr = 0, s_ptr = 1;
char stack[100];
stack[0] = '$';
stack[1] = table[0][0];
while (s_ptr != -1)
{
printf("\t");
int vamp = 0;
for (vamp = 0; vamp <= s_ptr; vamp++)
{
printf("%c", stack[vamp]);
}
printf("\t\t\t");
vamp = i_ptr;
while (input[vamp] != '\0')
{
printf("%c", input[vamp]);
vamp++;
}
50
printf("\t\t\t");
char her = input[i_ptr];
char him = stack[s_ptr];
s_ptr--;
if (!isupper(him))
{
if (her == him)
{
i_ptr++;
printf("POP\n");
}
else
{
printf("\nString Not Accepted!\n");
exit(0);
}
}
else
{
for (i = 0; i < sid; i++)
{
if (ter[i] == her)
break;
}
char produ[100];
for (j = 0; j < land; j++)
{
if (him == table[j][0])
{
if (table[j][i + 1] == '#')
{
printf("%c=#\n", table[j][0]);
produ[0] = '#';
produ[1] = '\0';
}
else if (table[j][i + 1] != '!')
{
int mum = (int)(table[j][i + 1]);
mum -= 65;
strcpy(produ, production[mum]);
printf("%s\n", produ);
}
else
{
printf("\nString Not Accepted!\n");
exit(0);
}
}
}
int le = strlen(produ);
le = le - 1;
if (le == 0)
51
{
continue;
}
for (j = le; j >= 2; j--)
{
s_ptr++;
stack[s_ptr] = produ[j];
}
}
}
printf("\n");
if (input[i_ptr] == '\0')
{
printf("\tString Accepted!\n");
}
else
printf("\n\tString Rejected!\n");
printf("\n");
}
void follow(char c)
{
int i, j;
if (production[0][0] == c)
{
f[m++] = '$';
}
for (i = 0; i < 10; i++)
{
for (j = 2; j < 10; j++)
{
if (production[i][j] == c)
{
if (production[i][j + 1] != '\0')
{
followfirst(production[i][j + 1], i, (j + 2));
}
if (production[i][j + 1] == '\0' && c != production[i][0])
{
follow(production[i][0]);
}
}
}
}
}
52
}
for (j = 0; j < count; j++)
{
if (production[j][0] == c)
{
if (production[j][2] == '#')
{
if (production[q1][q2] == '\0')
first[n++] = '#';
else if (production[q1][q2] != '\0' && (q1 != 0 || q2 != 0))
{
findfirst(production[q1][q2], q1, (q2 + 1));
}
else
first[n++] = '#';
}
else if (!isupper(production[j][2]))
{
first[n++] = production[j][2];
}
else
{
findfirst(production[j][2], j, 3);
}
}
}
}
53
}
else
{
followfirst(production[c1][c2], c1, c2 + 1);
}
}
j++;
}
}
}
Input/Output:
54
Conclusion:
● The code implements a predictive parser using a stack and a pre-defined parse table.
● Each step of the parsing process is recorded and displayed in a neatly formatted
bordered table.
● The parser successfully accepts or rejects the input string based on the grammar
rules.
55
ASSESSMENT – 3
EXPERIMENT-1
a) Construct Simple LR (SLR) parse table using C language.
b) Implement the LR parsing algorithm, get both parse table and input string are inputs.
Use C language for implementation.
Aim: To construct a Simple LR (SLR) parse table and implement the SLR parsing algorithm in
C++, taking both the grammar and input string as user inputs.
Algorithm:
Source Code:
#include <iostream>
#include <vector>
#include <map>
#include <set>
#include <stack>
#include <string>
#include <algorithm>
#include <iomanip>
56
using namespace std;
class SLRParser
{
private:
vector<Production> productions;
set<char> terminals;
set<char> non_terminals;
char start_symbol;
map<char, set<char>> first;
map<char, set<char>> follow;
vector<State> canonical_collection;
map<int, map<char, string>> action_table;
map<int, map<char, int>> goto_table;
// Helper functions
57
bool is_terminal(char c) { return terminals.find(c) != terminals.end(); }
bool is_non_terminal(char c) { return non_terminals.find(c) !=
non_terminals.end(); }
public:
SLRParser()
{
// Don't pre-initialize start symbol - we'll determine it from input
}
void input_grammar()
{
cout << "Enter the number of productions: ";
int n;
cin >> n;
productions.push_back(Production(left, right));
non_terminals.insert(left);
58
augmented_start++; // Find an unused letter for the augmented
start
}
productions.insert(productions.begin(), Production(augmented_start,
string(1, start_symbol)));
non_terminals.insert(augmented_start);
start_symbol = augmented_start;
}
void compute_first_sets()
{
// Initialize FIRST sets
for (char nt : non_terminals)
{
first[nt] = set<char>();
}
if (alpha == "e")
{ // If X -> e is a production, add e to FIRST(X)
if (first[A].insert('e').second)
{
changed = true;
}
continue;
}
if (is_terminal(Yi))
{
// If Yi is terminal, add Yi to FIRST(X)
if (first[A].insert(Yi).second)
{
changed = true;
}
all_derive_epsilon = false;
break;
59
}
else
{
// If Yi is non-terminal
bool epsilon_in_first_Yi = false;
void compute_follow_sets()
{
// Initialize FOLLOW sets
for (char nt : non_terminals)
{
follow[nt] = set<char>();
}
60
// Add $ to FOLLOW(S) where S is the start symbol
follow[start_symbol].insert('$');
if (is_terminal(Y))
{
if (follow[B].insert(Y).second)
{
changed = true;
}
all_derive_epsilon = false;
break;
}
else
{
// Add FIRST(Y) - {e} to FOLLOW(B)
bool epsilon_in_first_Y = false;
for (char c : first[Y])
{
if (c == 'e')
{
epsilon_in_first_Y = true;
}
else
{
61
if (follow[B].insert(c).second)
{
changed = true;
}
}
}
if (!epsilon_in_first_Y)
{
all_derive_epsilon = false;
break;
}
}
}
}
62
// Collect symbols after dots
set<char> symbols;
for (const Item &item : current)
{
if (item.dot < item.right.length())
{
symbols.insert(item.right[item.dot]);
}
}
63
for (size_t j = 0; j < canonical_collection.size(); j++)
{
if (canonical_collection[j] == next)
{
next_state_index = j;
break;
}
}
if (next_state_index == -1)
{
// Add new state
next_state_index = canonical_collection.size();
canonical_collection.push_back(next);
}
64
// Add reduce actions for all terminals in
FOLLOW(item.left)
for (char t : follow[item.left])
{
// Only add reduce action if there isn't already a
shift action
if (action_table[i].find(t) == action_table[i].end())
{
action_table[i][t] = "r" + to_string(prod_num);
}
}
}
}
}
}
State closure(State I)
{
State result = I;
vector<Item> worklist(I.begin(), I.end());
while (!worklist.empty())
{
Item current = worklist.back();
worklist.pop_back();
return result;
}
65
{
State J;
void display_grammar()
{
cout << "\n=== Grammar ===\n";
for (int i = 0; i < productions.size(); i++)
{
cout << i << ": " << productions[i].left << " -> " <<
productions[i].right << endl;
}
void display_first_follow()
{
cout << "\n=== FIRST Sets ===\n";
for (char nt : non_terminals)
{
cout << "FIRST(" << nt << ") = { ";
66
for (char c : first[nt])
{
cout << c << " ";
}
cout << "}" << endl;
}
void display_canonical_collection()
{
cout << "\n=== Canonical Collection of LR(0) Items ===\n";
for (int i = 0; i < canonical_collection.size(); i++)
{
cout << "I_" << i << ":" << endl;
for (const Item &item : canonical_collection[i])
{
cout << " " << item.left << " -> ";
for (int j = 0; j < item.right.length(); j++)
{
if (j == item.dot)
cout << ". ";
cout << item.right[j] << " ";
}
if (item.dot == item.right.length())
cout << ". ";
cout << endl;
}
cout << endl;
}
}
void display_parsing_table()
{
cout << "\n=== SLR Parsing Table ===\n";
67
{
if (terminals.find(t) != terminals.end())
{
cout << setw(8) << t;
}
}
68
cout << setw(8) << "";
}
}
}
cout << endl;
}
}
stack<int> state_stack;
stack<char> symbol_stack;
int position = 0;
char current_input = str[position];
while (true)
{
int s = state_stack.top();
while (!temp_state.empty())
{
stack_content = to_string(temp_state.top()) + stack_content;
temp_state.pop();
if (!temp_symbol.empty())
{
stack_content = temp_symbol.top() + stack_content;
temp_symbol.pop();
}
}
if (action_table[s].find(current_input) == action_table[s].end())
{
cout << setw(20) << "Error: No action" << endl;
69
return false;
}
if (action[0] == 's')
{
// Shift
int t = stoi(action.substr(1));
state_stack.push(t);
symbol_stack.push(current_input);
position++;
current_input = str[position];
int t = state_stack.top();
symbol_stack.push(prod.left);
if (goto_table[t].find(prod.left) == goto_table[t].end())
{
cout << setw(20) << "Error: No goto" << endl;
return false;
}
state_stack.push(goto_table[t][prod.left]);
70
}
else
{
// Error
cout << setw(20) << "Error: Invalid action" << endl;
return false;
}
}
}
void run()
{
// Input the grammar
input_grammar();
if (parse_string(input))
{
cout << "\nString accepted by the grammar." << endl;
}
else
{
cout << "\nString rejected by the grammar." << endl;
}
}
71
cout << "Action table entries: " << endl;
for (const auto &state_entry : action_table)
{
for (const auto &symbol_entry : state_entry.second)
{
cout << "Action[" << state_entry.first << "][" <<
symbol_entry.first
<< "] = " << symbol_entry.second << endl;
}
}
int main()
{
SLRParser parser;
parser.run();
return 0;
}
Input/Output:
72
73
74
Conclusion: The SLR parser successfully constructs a parse table using LR(0) items and uses it
to parse an input string. However, SLR has limitations because it relies on FOLLOW sets,
leading to possible conflicts for complex grammars.
75
EXPERIMENT-2
a) Construct Canonical LR (CLR) parse table using C language.
b) Implement the LR parsing algorithm, get both parse table and input string are inputs.
Use C language for implementation.
Aim: To construct a Canonical LR (CLR) parse table and implement the LR parsing algorithm
in C++, taking both the grammar and input string as user inputs.
Algorithm:
5. Perform LR Parsing:
Source Code:
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <map>
#include <set>
#include <stack>
#include <queue>
76
#include <algorithm>
#include <iomanip>
// Production class
class Production
{
public:
Symbol lhs;
vector<Symbol> rhs;
77
class LRItem
{
public:
int productionIdx;
int dotPosition;
mutable set<Symbol> lookaheads; // Make lookaheads mutable
// Compare lookaheads
vector<Symbol> thisLA(lookaheads.begin(), lookaheads.end());
vector<Symbol> otherLA(other.lookaheads.begin(),
other.lookaheads.end());
if (thisLA.size() != otherLA.size())
{
return thisLA.size() < otherLA.size();
}
return false;
}
78
public:
set<LRItem> items;
return true;
}
};
vector<ItemSet> canonicalCollection;
map<pair<int, Symbol>, int> gotoTable;
map<int, map<Symbol, pair<char, int>>> actionTable;
public:
CLRParser()
79
{
endSymbol = Symbol("$", true);
epsilonSymbol = Symbol("#", true);
}
void readGrammar()
{
cout << "Enter the number of non-terminals: ";
int numNonTerminals;
cin >> numNonTerminals;
cin.ignore();
terminals.push_back(endSymbol);
80
// Add augmented production S' -> S
Symbol augmentedStartSymbol(startSymbol.name + "'", false);
nonTerminals.insert(nonTerminals.begin(), augmentedStartSymbol);
productions.push_back(Production(augmentedStartSymbol,
{startSymbol}));
cout << "Enter productions in the format 'A -> B C | D' (use '#' for
epsilon):" << endl;
for (int i = 0; i < numProductions; i++)
{
getline(cin, line);
size_t arrowPos = line.find("->");
if (arrowPos != string::npos)
{
string lhsStr = line.substr(0, arrowPos);
string rhsStr = line.substr(arrowPos + 2);
// Trim whitespace
lhsStr.erase(0, lhsStr.find_first_not_of(" \t"));
lhsStr.erase(lhsStr.find_last_not_of(" \t") + 1);
Symbol lhs;
for (const auto &nt : nonTerminals)
{
if (nt.name == lhsStr)
{
lhs = nt;
break;
}
}
size_t pos = 0;
while (pos < rhsStr.length())
{
vector<Symbol> rhs;
size_t pipePos = rhsStr.find('|', pos);
string prod;
if (pipePos != string::npos)
{
prod = rhsStr.substr(pos, pipePos - pos);
pos = pipePos + 1;
}
else
{
prod = rhsStr.substr(pos);
81
pos = rhsStr.length();
}
// Trim whitespace
prod.erase(0, prod.find_first_not_of(" \t"));
prod.erase(prod.find_last_not_of(" \t") + 1);
istringstream prodStream(prod);
string symbolStr;
if (!found)
{
for (const auto &t : terminals)
{
if (t.name == symbolStr)
{
rhs.push_back(t);
found = true;
break;
}
}
}
if (!found)
{
cout << "Symbol " << symbolStr << " not found in
grammar!" << endl;
}
}
productions.push_back(Production(lhs, rhs));
}
}
82
}
}
set<Symbol> first;
if (symbol.isTerminal)
{
first.insert(symbol);
return first;
}
if (firstOfSymbol.find(epsilonSymbol) !=
firstOfSymbol.end())
{
continueToNext = true;
if (i == prod.rhs.size() - 1)
{
first.insert(epsilonSymbol);
83
}
}
i++;
}
}
}
}
firstSets[symbol] = first;
return first;
}
if (firstOfSymbol.find(epsilonSymbol) == firstOfSymbol.end())
{
allHaveEpsilon = false;
}
84
return result;
}
void computeFollowSets()
{
// Initialize FOLLOW sets
for (const auto &nt : nonTerminals)
{
followSets[nt] = set<Symbol>();
}
if (i < prod.rhs.size() - 1)
{
// Compute FIRST(β)
set<Symbol> firstOfBeta = computeFirst(prod.rhs, i
+ 1);
85
}
}
else
{
// If B is at the end of the production, add
FOLLOW(A) to FOLLOW(B)
for (const auto &s : followSets[prod.lhs])
{
followSets[prod.rhs[i]].insert(s);
}
}
if (followSets[prod.rhs[i]].size() >
followSetSizeBefore)
{
changed = true;
}
}
}
}
}
}
void computeFirstAndFollowSets()
{
// Compute FIRST sets
for (const auto &symbol : terminals)
{
firstSets[symbol] = {symbol};
}
while (changed)
{
changed = false;
set<LRItem> newItems = itemSet.items;
86
if (item.dotPosition <
productions[item.productionIdx].rhs.size())
{
Symbol symbolAfterDot =
productions[item.productionIdx].rhs[item.dotPosition];
if (!symbolAfterDot.isTerminal)
{
// For each production B -> γ where B is the symbol
after dot
for (size_t i = 0; i < productions.size(); i++)
{
if (productions[i].lhs.name ==
symbolAfterDot.name)
{
// Compute lookaheads
set<Symbol> lookaheads;
beta.push_back(productions[item.productionIdx].rhs[j]);
}
}
set<Symbol> firstOfBetaA =
computeFirst(betaA);
if (firstOfBetaA.find(epsilonSymbol) !=
firstOfBetaA.end() || beta.empty())
{
87
lookaheads.insert(la);
}
}
existingItem.lookaheads.insert(la);
}
if (existingItem.lookaheads.size() >
oldSize)
{
changed = true;
}
found = true;
break;
}
}
if (!found)
{
newItems.insert(newItem);
changed = true;
}
}
}
}
}
}
itemSet.items = newItems;
}
}
88
ItemSet goTo(const ItemSet &itemSet, const Symbol &symbol)
{
ItemSet result;
if (!result.items.empty())
{
closure(result);
}
return result;
}
void constructCanonicalCollection()
{
canonicalCollection.clear();
gotoTable.clear();
canonicalCollection.push_back(initialItemSet);
vector<bool> processed(1, false);
89
for (const auto &item : canonicalCollection[i].items)
{
if (item.dotPosition <
productions[item.productionIdx].rhs.size())
{
Symbol sym =
productions[item.productionIdx].rhs[item.dotPosition];
if (sym.isTerminal)
terminalSymbols.insert(sym);
else
nonTerminalSymbols.insert(sym);
}
}
if (!found)
{
canonicalCollection.push_back(gotoSet);
processed.push_back(false);
gotoTable[make_pair(i, symbol)] =
canonicalCollection.size() - 1;
}
else
{
gotoTable[make_pair(i, symbol)] = existingIndex;
90
}
}
}
if (!found)
{
canonicalCollection.push_back(gotoSet);
processed.push_back(false);
gotoTable[make_pair(i, symbol)] =
canonicalCollection.size() - 1;
}
else
{
gotoTable[make_pair(i, symbol)] = existingIndex;
}
}
}
}
}
void constructParsingTable()
{
actionTable.clear();
91
{
// Add goto actions for non-terminals
for (const auto &nt : nonTerminals)
{
if (nt.name != productions[0].lhs.name) // Skip augmented
start symbol
{
if (gotoTable.find(make_pair(i, nt)) != gotoTable.end())
{
int j = gotoTable[make_pair(i, nt)];
actionTable[i][nt] = make_pair('g', j);
}
}
}
productions[item.productionIdx].rhs[item.dotPosition].isTerminal)
{
Symbol a =
productions[item.productionIdx].rhs[item.dotPosition];
if (gotoTable.find(make_pair(i, a)) != gotoTable.end())
{
int j = gotoTable[make_pair(i, a)];
actionTable[i][a] = make_pair('s', j);
}
}
92
{
// Only add reduce action if there's no existing
action
if (actionTable[i].find(a) == actionTable[i].end())
{
actionTable[i][a] = make_pair('r',
item.productionIdx);
}
}
}
}
}
}
void printGrammar()
{
cout << "\nGrammar:\n";
for (size_t i = 0; i < productions.size(); i++)
{
cout << i << ": " << productions[i].toString() << endl;
}
}
void printFirstAndFollowSets()
{
cout << "\nFIRST Sets:\n";
for (const auto &nt : nonTerminals)
{
cout << "FIRST(" << nt.name << ") = { ";
bool first = true;
for (const auto &s : firstSets[nt])
{
if (!first)
cout << ", ";
cout << s.name;
first = false;
}
cout << " }" << endl;
}
93
cout << " }" << endl;
}
}
void printCanonicalCollection()
{
cout << "\nCanonical Collection of LR(1) Items:\n";
for (size_t i = 0; i < canonicalCollection.size(); i++)
{
cout << "I" << i << ":\n";
for (const auto &item : canonicalCollection[i].items)
{
cout << " [" << productions[item.productionIdx].lhs.name <<
" -> ";
if (item.dotPosition ==
productions[item.productionIdx].rhs.size())
{
cout << "."; // Using simple dot instead of special
character
}
// Print lookaheads
bool first = true;
for (const auto &la : item.lookaheads)
{
if (!first)
cout << "/";
cout << la.name;
first = false;
}
94
void printParsingTable()
{
cout << "\nCLR Parsing Table:\n";
// Print header
cout << "+-----+";
for (const auto &t : terminals)
{
cout << "--------+";
}
for (const auto &nt : nonTerminals)
{
if (nt.name != productions[0].lhs.name)
{ // Skip the augmented start symbol
cout << "--------+";
}
}
cout << "\n| Sta |";
// Print rows
for (size_t i = 0; i < canonicalCollection.size(); i++)
{
cout << "| " << setw(3) << i << " |";
95
{
char action = actionTable[i][t].first;
int value = actionTable[i][t].second;
if (action == 's')
{
cout << setw(7) << "s" + to_string(value) << " |";
}
else if (action == 'r')
{
cout << setw(7) << "r" + to_string(value) << " |";
}
else if (action == 'a')
{
cout << setw(7) << "acc" << " |";
}
else
{
cout << setw(7) << " " << " |";
}
}
else
{
cout << setw(7) << " " << " |";
}
}
96
{
if (nt.name != productions[0].lhs.name)
{ // Skip the augmented start symbol
cout << "--------+";
}
}
cout << endl;
}
vector<Symbol> inputSymbols;
istringstream iss(input);
string token;
if (!found)
{
cout << "Invalid token: " << token << endl;
return false;
}
}
inputSymbols.push_back(endSymbol);
size_t currentPos = 0;
97
cout <<
"+----------------------+------------------+-----------------+----------------
----------------+\n";
while (true)
{
int currentState = stateSymbolStack.top().first;
Symbol currentSymbol = inputSymbols[currentPos];
while (!tempStack.empty())
{
tempVec.push_back(tempStack.top());
tempStack.pop();
}
reverse(tempVec.begin(), tempVec.end());
if (actionTable[currentState].find(currentSymbol) ==
actionTable[currentState].end())
{
cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "ERROR" << " | "
<< setw(30) << left << "No action defined" << " |\n";
cout <<
"+----------------------+------------------+-----------------+----------------
----------------+\n";
return false;
}
98
pair<char, int> action = actionTable[currentState][currentSymbol];
if (action.first == 's')
{
// Shift action
stateSymbolStack.push(make_pair(action.second,
currentSymbol));
currentPos++;
cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "Shift " +
to_string(action.second) << " | "
<< setw(30) << left << "Shift and goto state " +
to_string(action.second) << " |\n";
}
else if (action.first == 'r')
{
// Reduce action
int prodIdx = action.second;
Production prod = productions[prodIdx];
cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "Reduce by " +
to_string(prodIdx) << " | "
<< setw(30) << left << prod.toString() << " |\n";
}
else
{
cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "ERROR" << " | "
<< setw(30) << left << "No goto action for " +
prod.lhs.name << " |\n";
99
cout <<
"+----------------------+------------------+-----------------+----------------
----------------+\n";
return false;
}
}
else if (action.first == 'a')
{
// Accept action
cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "Accept" << " | "
<< setw(30) << left << "Input accepted" << " |\n";
cout <<
"+----------------------+------------------+-----------------+----------------
----------------+\n";
return true;
}
else
{
cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "ERROR" << " | "
<< setw(30) << left << "Invalid action" << " |\n";
cout <<
"+----------------------+------------------+-----------------+----------------
----------------+\n";
return false;
}
}
return false;
}
void run()
{
readGrammar();
printGrammar();
computeFirstAndFollowSets();
printFirstAndFollowSets();
constructCanonicalCollection();
printCanonicalCollection();
constructParsingTable();
printParsingTable();
100
bool accepted = parse(input);
cout << "\nInput string " << (accepted ? "ACCEPTED" : "REJECTED") <<
endl;
}
};
int main()
{
CLRParser parser;
parser.run();
return 0;
}
Input/Output:
Test Case 1
101
Test Case 2
102
103
104
Conclusion: The CLR parser constructs a more powerful parse table using LR(1) items,
avoiding conflicts that occur in SLR parsing. This makes CLR more robust but computationally
expensive due to larger state sets.
105
ASSESSMENT – 4
EXPERIMENT-1
Implementation of a simple calculator using LEX and YACC tools.
Aim: To implement a simple calculator using LEX and YACC tools that can perform basic
arithmetic operations such as addition, subtraction, multiplication, and division.
Algorithm:
1. Define Tokens in LEX
o Identify tokens for numbers, operators (+, -, *, /), and parentheses.
o Write regular expressions to recognize these tokens.
2. Define Grammar in YACC
o Specify grammar rules for arithmetic expressions.
o Use precedence and associativity rules to resolve ambiguities.
3. Implement Actions
106
o Assign actions to perform calculations during parsing.
4. Compile and Execute
o Compile the LEX file using lex and generate lex.yy.c.
o Compile the YACC file using yacc and generate y.tab.c.
o Link both and run the executable to evaluate expressions.
Source Code:
Calc.l
%{
#include "y.tab.h"
#include <stdlib.h>
%}
%%
[0-9]+ { yylval = atoi(yytext); return NUMBER; }
[ \t] { /* Ignore whitespace */ }
\n { return EOL; }
"+" { return PLUS; }
"-" { return MINUS; }
"*" { return MULTIPLY; }
"/" { return DIVIDE; }
"(" { return LPAREN; }
")" { return RPAREN; }
. { printf("Unexpected character: %s\n", yytext); }
%%
int yywrap(void) {
return 1;
}
107
Calc.y
%{
#include <stdio.h>
#include <stdlib.h>
%}
%%
input:
/* empty */
| input line
;
line:
EOL
| exp EOL { printf("Result: %d\n", $1); }
;
exp:
NUMBER { $$ = $1; }
| exp PLUS exp { $$ = $1 + $3; }
| exp MINUS exp { $$ = $1 - $3; }
| exp MULTIPLY exp { $$ = $1 * $3; }
| exp DIVIDE exp {
if($3 != 0)
$$ = $1 / $3;
else {
printf("Error: Division by zero\n");
108
$$ = 0;
}
}
| LPAREN exp RPAREN { $$ = $2; }
;
%%
int main(void) {
printf("Enter expression:\n");
yyparse();
return 0;
}
109
Conclusion: The implementation successfully demonstrates the use of LEX and YACC to build a
simple calculator that can evaluate arithmetic expressions based on operator precedence and
associativity.
EXPERIMENT-2
Implementation of Abstract syntax tree –Infix to postfix using the LEX and YACC tools.
Aim: To implement an Abstract Syntax Tree (AST) for converting an infix expression to a postfix
expression using LEX and YACC tools.
Algorithm:
1. Define Tokens in LEX
o Recognize numbers, operators (+, -, *, /), and parentheses.
2. Define Grammar in YACC
o Parse expressions following operator precedence rules.
o Construct an AST representing the expression.
3. Implement Postfix Conversion
o Traverse the AST using post-order traversal to generate postfix notation.
4. Compile and Execute
o Compile the LEX and YACC files, link them, and execute the program.
Source Code:
Lexer.l
%{
#include "y.tab.h"
#include <stdlib.h>
%}
digit [0-9]+
%%
{digit} { yylval.num = atoi(yytext); return NUM; }
[ \t\n]+ { /* ignore whitespace */ }
"+" { return PLUS; }
110
"-" { return MINUS; }
"*" { return MUL; }
"/" { return DIV; }
"(" { return '('; }
")" { return ')'; }
%%
int yywrap(void) {
return 1;
}
Parser.y
%{
#include <stdio.h>
#include <stdlib.h>
%union {
111
int num;
Node* node;
}
%%
input: expr { root = $1; }
;
112
%%
113
int main() {
yyparse();
printf("Postfix Expression: ");
printPostfix(root);
printf("\n");
return 0;
}
Conclusion: The experiment demonstrates the construction of an Abstract Syntax Tree and its
traversal to convert infix expressions into postfix notation using LEX and YACC.
EXPERIMENT-3
Using LEX and YACC tools to recognize the strings of the following context-free
languages:
1. L(G) = { anbm / m ≠ n}
2. L(G) = { ab (bbaa)n bba (ba)n / n ≥ 0}
Aim: To use LEX and YACC tools to recognize strings belonging to the given context-free languages:
1. L(G) = { anbm / m ≠ n }
2. L(G) = { ab (bbaa)n bba (ba)n / n ≥ 0 }
Algorithm:
114
1. Define Tokens in LEX
o Recognize symbols (a, b) based on given language rules.
2. Define Grammar in YACC
o Define rules to parse valid strings belonging to the specified languages.
o Implement conditions to enforce constraints (m ≠ n, repetitions based on n).
3. Perform String Validation
o Accept strings if they belong to the language; reject otherwise.
4. Compile and Execute
o Use lex and yacc to generate and compile the parser.
o Run the program to check input strings.
Source Code:
Lang1.l
%{
#include "y.tab.h"
%}
%%
a { return ACHAR; }
b { return BCHAR; }
[ \t\n]+ /* skip whitespace */
. { printf("Invalid character: %s\n", yytext); exit(1); }
%%
Lang1.y
%{
#include <stdio.h>
#include <stdlib.h>
int yyerror(char *s);
int yylex(void);
%}
%token ACHAR BCHAR
115
%%
S: A B { if($1 != $2)
printf("Accepted\n");
else
printf("Not Accepted\n");
}
;
A: /* empty */ { $$ = 0; }
| A ACHAR { $$ = $1 + 1; }
;
B: /* empty */ { $$ = 0; }
| B BCHAR { $$ = $1 + 1; }
;
%%
int main(void) {
yyparse();
return 0;
}
int yyerror(char *s) {
fprintf(stderr, "%s\n", s);
return 0;
}
Lang2.l
%{
#include "y.tab.h"
%}
%%
bbaa { return BBAA; }
bba { return BBA; }
116
ab { return AB; }
ba { return BA; }
[ \t\n]+ /* skip whitespace */
. { printf("Invalid character: %s\n", yytext); exit(1); }
%%
Lang2.y
%{
#include <stdio.h>
#include <stdlib.h>
int yyerror(char *s);
int yylex(void);
%}
%token AB BBAA BBA BA
%%
S: AB L BBA R { if($2 == $4)
printf("Accepted\n");
else
printf("Not Accepted\n");
}
;
L: /* empty */ { $$ = 0; }
| L BBAA { $$ = $1 + 1; }
;
R: /* empty */ { $$ = 0; }
| R BA { $$ = $1 + 1; }
;
%%
int main(void) {
yyparse();
117
return 0;
}
int yyerror(char *s) {
fprintf(stderr, "%s\n", s);
return 0;
}
Input/Output:
Lang1
Lang2
118
Conclusion: The experiment successfully implements a parser that recognizes strings
belonging to the given context-free languages, demonstrating how LEX and YACC can be
used for language recognition.
119
ASSESSMENT – 5
EXPERIMENT-1
Implementation of three address codes for a simple program using LEX and YACC tools.
Aim: To implement a program that generates three-address codes (TAC) using LEX and YACC.
Algorithm:
4. Execution:
Source Code:
Lexer.l
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "y.tab.h"
120
};
%%
[0-9]+ { yylval.num = atoi(yytext); return NUMBER; }
[a-zA-Z][a-zA-Z0-9]* {
struct symtab *sp = lookup(yytext);
if (sp == NULL)
sp = install(yytext);
yylval.sym = sp;
return IDENTIFIER;
}
"=" { return ASSIGN; }
"+" { return PLUS; }
"-" { return MINUS; }
"*" { return MULTIPLY; }
"/" { return DIVIDE; }
"(" { return LPAREN; }
")" { return RPAREN; }
";" { return SEMICOLON; }
[ \t\n] { /* ignore whitespace */ }
. { printf("Unrecognized character: %s\n", yytext); }
%%
int yywrap() {
return 1;
121
}
Parser.y
/* parser.y - YACC file for syntax analysis and code generation */
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Global variables */
#define SYMTAB_SIZE 100
struct symtab symtab[SYMTAB_SIZE];
int symtab_index = 0;
122
int tac_index = 0;
int temp_var_count = 0;
/* Function declarations */
void emit(char op, char *arg1, char *arg2, char *result);
char *new_temp();
struct symtab *lookup(char *name);
struct symtab *install(char *name);
%}
%union {
int num;
struct symtab *sym;
char *code;
}
%%
program:
statement_list
;
statement_list:
123
statement
| statement_list statement
;
statement:
IDENTIFIER ASSIGN expr SEMICOLON {
emit('=', $3, "", $1->name);
}
;
expr:
expr PLUS term {
char *temp = new_temp();
emit('+', $1, $3, temp);
$$ = temp;
}
| expr MINUS term {
char *temp = new_temp();
emit('-', $1, $3, temp);
$$ = temp;
}
| term {
$$ = $1;
}
;
term:
term MULTIPLY factor {
char *temp = new_temp();
124
emit('*', $1, $3, temp);
$$ = temp;
}
| term DIVIDE factor {
char *temp = new_temp();
emit('/', $1, $3, temp);
$$ = temp;
}
| factor {
$$ = $1;
}
;
factor:
LPAREN expr RPAREN {
$$ = $2;
}
| NUMBER {
char buffer[20];
sprintf(buffer, "%d", $1);
$$ = strdup(buffer);
}
| IDENTIFIER {
$$ = $1->name;
}
;
%%
125
char *new_temp() {
char buffer[20];
sprintf(buffer, "t%d", temp_var_count++);
return strdup(buffer);
}
126
exit(1);
}
symtab[symtab_index].name = strdup(name);
symtab[symtab_index].val = 0;
return &symtab[symtab_index++];
}
switch (tac[i].op) {
case '=':
printf("%s = %s\n", tac[i].result, tac[i].arg1);
break;
case '+':
case '-':
case '*':
case '/':
printf("%s = %s %c %s\n", tac[i].result, tac[i].arg1, tac[i].op, tac[i].arg2);
break;
default:
printf("Unknown operation\n");
}
}
}
int main() {
127
printf("Enter expressions (end with Ctrl+D):\n");
yyparse();
printf("\nGenerated Three-Address Code:\n");
print_tac();
return 0;
}
128
129
Conclusion: The implementation successfully converts a simple program into its
three-address code representation using LEX and YACC, demonstrating syntax-directed
translation and intermediate code generation.
EXPERIMENT-2
Implement simple code optimization techniques (Constant folding, Strength reduction and
Algebraic transformation, etc...
Aim: To implement simple code optimization techniques such as constant folding, strength
reduction, and algebraic transformations.
Algorithm:
1. Constant Folding:
2. Strength Reduction:
3. Algebraic Transformations:
4. Implementation:
Source Code:
#include <iostream>
#include <vector>
#include <chrono>
#include <cmath>
#include <string>
#include <sstream>
#include <stack>
#include <map>
130
{
enum Type
{
CONSTANT,
VARIABLE,
BINARY_OP
};
Type type;
double value; // For constants
char var_name; // For variables
char op; // For binary operations
ExprNode *left; // Left operand
ExprNode *right; // Right operand
~ExprNode()
{
if (left)
delete left;
if (right)
delete right;
}
};
131
{
copy = new ExprNode(node->op, clone(node->left), clone(node->right));
}
return copy;
}
if (node->type == ExprNode::CONSTANT)
{
cout << node->value;
}
else if (node->type == ExprNode::VARIABLE)
{
cout << node->var_name;
}
else if (node->type == ExprNode::BINARY_OP)
{
cout << "(";
printExpr(node->left);
cout << " " << node->op << " ";
printExpr(node->right);
cout << ")";
}
}
// 1. Constant Folding
ExprNode *constantFolding(ExprNode *node)
{
if (!node)
return nullptr;
switch (node->op)
132
{
case '+':
result = left_val + right_val;
break;
case '-':
result = left_val - right_val;
break;
case '*':
result = left_val * right_val;
break;
case '/':
if (right_val != 0)
{
result = left_val / right_val;
}
else
{
// Division by zero - return the original node
return node;
}
break;
default:
return node; // Unknown operator
}
return folded;
}
}
return node;
}
// 2. Algebraic Simplification
ExprNode *algebraicSimplification(ExprNode *node)
{
if (!node)
return nullptr;
133
// x + 0 = x, x - 0 = x
if ((node->op == '+' || node->op == '-') &&
node->right->type == ExprNode::CONSTANT && node->right->value ==
0)
{
ExprNode *result = node->left;
node->left = nullptr; // Avoid double deletion
delete node;
return result;
}
// 0 + x = x
if (node->op == '+' &&
node->left->type == ExprNode::CONSTANT && node->left->value == 0)
{
ExprNode *result = node->right;
node->right = nullptr; // Avoid double deletion
delete node;
return result;
}
// x * 1 = x, x / 1 = x
if ((node->op == '*' || node->op == '/') &&
node->right->type == ExprNode::CONSTANT && node->right->value ==
1)
{
ExprNode *result = node->left;
node->left = nullptr; // Avoid double deletion
delete node;
return result;
}
// 1 * x = x
if (node->op == '*' &&
node->left->type == ExprNode::CONSTANT && node->left->value == 1)
{
ExprNode *result = node->right;
node->right = nullptr; // Avoid double deletion
delete node;
return result;
}
// x * 0 = 0, 0 * x = 0
if (node->op == '*' &&
((node->left->type == ExprNode::CONSTANT && node->left->value ==
0) ||
(node->right->type == ExprNode::CONSTANT && node->right->value ==
0)))
{
ExprNode *result = new ExprNode(static_cast<double>(0));
delete node;
return result;
134
}
return node;
}
// 3. Strength Reduction
ExprNode *strengthReduction(ExprNode *node)
{
if (!node)
return nullptr;
// We'll just update the node value and message rather than
creating a new operator
// In a real compiler, you'd replace this with a shift operator
node->right->value = power;
cout << "Replaced multiplication with shift left by " << power <<
endl;
}
135
(static_cast<int>(node->right->value) &
(static_cast<int>(node->right->value) - 1)) == 0)
{
// We'll just update the node value and message rather than
creating a new operator
// In a real compiler, you'd replace this with a shift operator
node->right->value = power;
cout << "Replaced division with shift right by " << power << endl;
}
}
return node;
}
return result;
}
136
double evaluateExpr(ExprNode *node, double x_value)
{
if (!node)
return 0.0;
if (node->type == ExprNode::CONSTANT)
{
return node->value;
}
else if (node->type == ExprNode::VARIABLE)
{
if (node->var_name == 'x')
return x_value;
return 0.0; // Default for other variables
}
else if (node->type == ExprNode::BINARY_OP)
{
double left_val = evaluateExpr(node->left, x_value);
double right_val = evaluateExpr(node->right, x_value);
switch (node->op)
{
case '+':
return left_val + right_val;
case '-':
return left_val - right_val;
case '*':
return left_val * right_val;
case '/':
return right_val != 0 ? left_val / right_val : 0.0;
default:
return 0.0;
}
}
return 0.0;
}
137
return duration.count();
}
if (expr[i] == '(')
{
ops.push(expr[i]);
}
else if (isdigit(expr[i]))
{
stringstream ss;
while (i < expr.length() && (isdigit(expr[i]) || expr[i] == '.'))
{
ss << expr[i++];
}
i--; // Back one step since loop will increment again
double val;
ss >> val;
values.push(new ExprNode(val));
}
else if (expr[i] == 'x')
{
values.push(new ExprNode('x'));
}
else if (expr[i] == ')')
{
while (!ops.empty() && ops.top() != '(')
{
char op = ops.top();
ops.pop();
138
}
if (!ops.empty())
ops.pop(); // Remove the '('
}
else if (expr[i] == '+' || expr[i] == '-' || expr[i] == '*' || expr[i]
== '/')
{
while (!ops.empty() && ops.top() != '(' &&
precedence[ops.top()] >= precedence[expr[i]])
{
char op = ops.top();
ops.pop();
ops.push(expr[i]);
}
}
while (!ops.empty())
{
char op = ops.top();
ops.pop();
// Function to run all optimizations individually and show the result of each
void showDetailedOptimization(ExprNode *expr)
{
cout << "\n--- Individual Optimization Effects ---\n";
// Original expression
cout << "\nOriginal expression: ";
printExpr(expr);
cout << endl;
139
// Just constant folding
ExprNode *after_cf = clone(expr);
after_cf = constantFolding(after_cf);
cout << "\nAfter ONLY Constant Folding: ";
printExpr(after_cf);
cout << endl;
delete after_cf;
int main()
{
string input;
cout << "Enter an expression (e.g., '(2*x+0)*(3+4)' or 'x*8/4+2*0'): ";
getline(cin, input);
140
cout << "Fully optimized expression: ";
printExpr(optimized);
cout << endl;
// Clean up
delete expr;
delete optimized;
return 0;
}
Input/Output:
141
142
Conclusion: The experiment demonstrates various code optimization techniques that reduce
computational overhead, making the generated code more efficient without altering its
functionality.
EXPERIMENT-3
Implement Back-End of the compiler for which three address code is given as input and the
8086-assembly language is produced as output.
Aim: To implement a compiler backend that takes three-address code as input and generates
8086 assembly code.
Algorithm:
1. Input Parsing:
2. Register Allocation:
3. Instruction Translation:
4. Memory Management:
5. Code Generation:
Source Code:
#include <iostream>
#include <vector>
#include <string>
#include <sstream>
#include <fstream>
#include <unordered_set>
#include <unordered_map>
#include <algorithm>
#include <stdexcept>
143
std::vector<std::string> tokens;
std::stringstream ss(s);
std::string token;
while (ss >> token)
{
tokens.push_back(token);
}
return tokens;
}
class Backend8086
{
private:
std::vector<std::string> tacLines;
std::vector<std::string> assemblyCode;
std::unordered_set<std::string> variables; // To store all
variable names
std::unordered_map<std::string, std::string> labelMap; // Map TAC labels
to assembly labels
144
int labelCounter = 0; // For generating
unique assembly labels if needed
// Generates a unique label name (not strictly needed if TAC labels are
unique)
std::string generateNewLabel()
{
return "L_INTERNAL_" + std::to_string(labelCounter++);
}
if (isNumber(op1))
{
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; " + tokens[0] +
" = " + tokens[2]);
}
else
{
145
variables.insert(op1);
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load " + op1);
}
assemblyCode.push_back("\tMOV " + result + ", AX" + "\t\t; Store
result in " + result);
}
// Perform operation
if (op == "+")
{
assemblyCode.push_back(std::string("\tADD AX, BX") + "\t\t; AX = "
+ op1 + " + " + op2);
}
else if (op == "-")
{
146
assemblyCode.push_back(std::string("\tSUB AX, BX") + "\t\t; AX = "
+ op1 + " - " + op2);
}
else if (op == "*")
{
// 16-bit multiplication: AX = AX * BX. Result in DX:AX. Assume
fits in AX.
assemblyCode.push_back(std::string("\tMUL BX") + "\t\t; AX = AX *
BX (result in DX:AX)");
// assemblyCode.push_back("\t; Assuming result fits in AX for " +
result);
}
else if (op == "/")
{
// 16-bit division: AX = DX:AX / BX. Quotient in AX, Remainder in
DX.
assemblyCode.push_back(std::string("\tMOV DX, 0") + "\t\t; Clear
DX for division"); // Important! Assumes positive numbers or signed handled
correctly
assemblyCode.push_back(std::string("\tDIV BX") + "\t\t; AX = AX /
BX, Remainder in DX");
// assemblyCode.push_back("\t; Quotient stored in " + result);
}
else
{
assemblyCode.push_back("\t; Unsupported binary operator: " + op);
// Consider throwing an error
}
// Store result
assemblyCode.push_back("\tMOV " + result + ", AX" + "\t\t; Store
result in " + result);
}
variables.insert(result);
if (isNumber(op1))
{
// Negate constant directly
try
{
long val = std::stol(op1);
assemblyCode.push_back("\tMOV AX, " + std::to_string(-val) +
"\t\t; Load negated constant");
147
}
catch (...)
{
assemblyCode.push_back("\t; Error negating constant " + op1);
return;
}
}
else
{
variables.insert(op1);
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load " + op1);
assemblyCode.push_back(std::string("\tNEG AX") + "\t\t; Negate
value in AX");
}
assemblyCode.push_back("\tMOV " + result + ", AX" + "\t\t; Store
result in " + result);
}
148
// Load op1 into AX
if (isNumber(op1))
{
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load immediate
" + op1);
}
else
{
variables.insert(op1);
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load " + op1);
}
149
void generateParam(const std::vector<std::string> &tokens)
{
// Format: param op1
if (tokens.size() != 2)
return;
std::string op1 = tokens[1];
if (isNumber(op1))
{
assemblyCode.push_back(std::string("\tMOV AX, ") + op1 + "\t\t;
Load immediate param");
assemblyCode.push_back(std::string("\tPUSH AX") + "\t\t; Push
param onto stack");
}
else
{
variables.insert(op1);
assemblyCode.push_back("\tPUSH " + op1 + "\t\t; Push param " + op1
+ " onto stack");
}
}
150
// Remove comma if present in funcName or paramCountStr (depends on
exact TAC format)
if (!funcName.empty() && funcName.back() == ',')
{
funcName.pop_back();
}
if (!paramCountStr.empty() && paramCountStr.back() == ',')
{
paramCountStr.pop_back();
}
// Ensure param count is valid number
try
{
paramCount = std::stoi(paramCountStr);
}
catch (...)
{
assemblyCode.push_back("\t; Invalid parameter count in call: " +
paramCountStr);
return;
}
151
{ // "return op1"
std::string op1 = tokens[1];
if (isNumber(op1))
{
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load return
value");
}
else
{
variables.insert(op1);
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load return
value from " + op1);
}
// The actual RET instruction is usually part of function epilogue
assemblyCode.push_back("\t; Return value placed in AX");
}
// Note: A real function would have a proper epilogue (MOV SP, BP; POP
BP; RET)
// We'll add a simple RET for now, assuming it's end of MAIN or a
simple function
assemblyCode.push_back("\tRET\t\t; Return from procedure");
}
if (isNumber(op1))
{
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load immediate
value to print");
}
else
{
variables.insert(op1);
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load variable "
+ op1 + " to print");
}
assemblyCode.push_back("\tCALL PRINT_NUM\t; Call procedure to print
number in AX");
// Add a newline after printing
assemblyCode.push_back("\tCALL PRINT_NEWLINE");
}
152
std::string op1 = tokens[1];
variables.insert(op1);
void generateDataSection()
{
assemblyCode.push_back(".DATA");
// Declare all identified variables
for (const std::string &var : variables)
{
assemblyCode.push_back(var + "\tDW\t?"); // Define Word,
initialized to undefined (?)
}
// Add any necessary data for helper procedures (like newline string)
assemblyCode.push_back("NEWLINE\tDB\t0DH, 0AH, '$' ; Carriage return,
Line feed, End of string");
assemblyCode.push_back("INPUT_MSG\tDB\t'Enter number: $'");
assemblyCode.push_back("OUTPUT_MSG\tDB\t'Output: $'"); // Optional
output prefix
assemblyCode.push_back("TEMP_STR\tDB\t6 DUP('$') ; Buffer for number
conversion (max 5 digits + sign)");
}
void generateCodeSection()
{
assemblyCode.push_back(".CODE");
assemblyCode.push_back("MAIN\tPROC FAR"); // Assuming FAR proc for
.EXE entry point
153
// Determine instruction type based on tokens
if (tokens.size() >= 3 && tokens[1] == "=")
{
if (tokens.size() == 3)
{ // Assignment: x = y
generateAssignment(tokens);
}
else if (tokens.size() == 5 && (tokens[3] == "+" || tokens[3]
== "-" || tokens[3] == "*" || tokens[3] == "/"))
{ // Binary: x = y op z
generateBinaryOperation(tokens);
}
else if (tokens.size() == 4 && tokens[2] == "-")
{ // Unary: x = - y
generateUnaryOperation(tokens);
}
else if (tokens.size() == 5 && tokens[2] == "call")
{ // Call with result: x = call func, n
generateCall(tokens);
}
else
{
assemblyCode.push_back("\t; Unrecognized
assignment/expression format");
}
}
else if (tokens.size() == 1 && tokens[0].back() == ':')
{ // Label: L1:
generateLabel(tokens);
}
else if (tokens.size() == 2 && tokens[0] == "goto")
{ // Unconditional Jump: goto L1
generateGoto(tokens);
}
else if (tokens.size() == 6 && tokens[0] == "if" && tokens[4] ==
"goto")
{ // Conditional Jump: if x > y goto L1
generateConditionalJump(tokens);
}
else if (tokens.size() == 2 && tokens[0] == "param")
{ // Parameter: param z
generateParam(tokens);
}
else if (tokens.size() == 3 && tokens[0] == "call")
{ // Call without result: call func, n
generateCall(tokens);
}
else if (tokens.size() >= 1 && tokens[0] == "return")
{ // Return: return [x]
generateReturn(tokens);
}
154
else if (tokens.size() == 2 && tokens[0] == "print")
{ // Output: print x
generatePrint(tokens);
}
else if (tokens.size() == 2 && tokens[0] == "read")
{ // Input: read y
generateRead(tokens);
}
else
{
assemblyCode.push_back("\t; Unrecognized TAC instruction: " +
line);
}
assemblyCode.push_back(""); // Blank line between TAC translations
}
assemblyCode.push_back("MAIN\tENDP");
assemblyCode.push_back("");
}
155
assemblyCode.push_back("\tNEG AX\t\t; Make AX positive for
conversion");
assemblyCode.push_back("PN_POSITIVE:");
assemblyCode.push_back("\tCMP AX, 0 \t ;Handle zero case");
assemblyCode.push_back("\tJNE PN_CONVERT_LOOP");
assemblyCode.push_back("\tPUSH 0 \t\t ; Push '0' digit if AX is 0");
assemblyCode.push_back("\tINC CX");
assemblyCode.push_back("PN_CONVERT_LOOP:");
assemblyCode.push_back("\tCMP AX, 0");
assemblyCode.push_back("\tJE PN_PRINT_LOOP");
assemblyCode.push_back("\tMOV DX, 0\t\t; Clear DX for division");
assemblyCode.push_back("\tDIV BX\t\t; AX = AX / 10, DX = AX % 10");
assemblyCode.push_back("\tPUSH DX\t\t; Push remainder (digit) onto
stack");
assemblyCode.push_back("\tINC CX\t\t; Increment digit count");
assemblyCode.push_back("\tJMP PN_CONVERT_LOOP");
assemblyCode.push_back("");
assemblyCode.push_back("PN_PRINT_LOOP:");
assemblyCode.push_back("\tCMP CX, 0");
assemblyCode.push_back("\tJE PN_DONE");
assemblyCode.push_back("\tPOP DX\t\t; Pop digit from stack");
assemblyCode.push_back("\tADD DL, '0'\t\t; Convert digit to ASCII
char");
assemblyCode.push_back("\tMOV AH, 02H\t; DOS print char function");
assemblyCode.push_back("\tINT 21H\t\t; Print character");
assemblyCode.push_back("\tDEC CX\t\t; Decrement digit count");
assemblyCode.push_back("\tJMP PN_PRINT_LOOP");
assemblyCode.push_back("");
assemblyCode.push_back("PN_DONE:");
assemblyCode.push_back("\tPOP SI\t\t; Restore registers");
assemblyCode.push_back("\tPOP DX");
assemblyCode.push_back("\tPOP CX");
assemblyCode.push_back("\tPOP BX");
assemblyCode.push_back("\tPOP AX");
assemblyCode.push_back("\tRET");
assemblyCode.push_back("PRINT_NUM\tENDP");
assemblyCode.push_back("");
156
// READ_NUM: Reads a signed integer from input, stores in AX
// Basic implementation - reads char by char, simple conversion
// Does not handle backspace or extensive error checking
assemblyCode.push_back("READ_NUM\tPROC NEAR");
assemblyCode.push_back("\t; Reads signed number, stores in AX");
assemblyCode.push_back("\tPUSH BX\t\t; Save registers");
assemblyCode.push_back("\tPUSH CX");
assemblyCode.push_back("\tPUSH DX");
assemblyCode.push_back("\tPUSH SI");
assemblyCode.push_back("");
// Optional: Print prompt message
// assemblyCode.push_back("\tMOV AH, 09H");
// assemblyCode.push_back("\tLEA DX, INPUT_MSG");
// assemblyCode.push_back("\tINT 21H");
assemblyCode.push_back("RN_READ_CHAR:");
assemblyCode.push_back("\tMOV AH, 01H\t; DOS read char with echo");
assemblyCode.push_back("\tINT 21H\t\t; Character in AL");
assemblyCode.push_back("RN_CHECK_DIGIT:");
assemblyCode.push_back("\tCMP AL, '0'\t; Check if >= '0'");
assemblyCode.push_back("\tJL RN_INVALID_INPUT");
assemblyCode.push_back("\tCMP AL, '9'\t; Check if <= '9'");
assemblyCode.push_back("\tJG RN_INVALID_INPUT");
157
assemblyCode.push_back("\tIMUL WORD PTR [SP+2] ; AX = AX * Current
value (on stack below return addr)");
// This IMUL usage is incorrect. Need to multiply the accumulated
value.
assemblyCode.push_back("\t; Correction for accumulation:");
assemblyCode.push_back("\tPOP DX\t\t; DX = new digit value (0-9)");
assemblyCode.push_back("\tPUSH DX\t\t; Save it again temporarily");
assemblyCode.push_back("\tMOV CX, AX\t; Save current AX (accumulated
value)");
assemblyCode.push_back("\tMOV AX, 10\t; AX = 10");
assemblyCode.push_back("\tMUL CX\t\t; AX = Accumulated value * 10");
// Add error checking for overflow here if needed (check DX)
assemblyCode.push_back("\tPOP CX\t\t; CX = new digit value");
assemblyCode.push_back("\tADD AX, CX\t; AX = (Accumulated value * 10)
+ new digit");
assemblyCode.push_back("RN_INVALID_INPUT:");
assemblyCode.push_back("\t; Handle invalid input - for simplicity,
just finish");
// Could print an error message here
assemblyCode.push_back("\t; Fall through to RN_DONE_READING");
assemblyCode.push_back("RN_DONE_READING:");
// Apply sign
assemblyCode.push_back("\tCMP SI, 0");
assemblyCode.push_back("\tJGE RN_POSITIVE_NUM");
assemblyCode.push_back("\tNEG AX\t\t; Negate AX if SI is negative");
assemblyCode.push_back("RN_POSITIVE_NUM:");
// Print newline after input
assemblyCode.push_back("\tCALL PRINT_NEWLINE");
public:
Backend8086(const std::vector<std::string> &inputTac) : tacLines(inputTac)
{}
std::vector<std::string> generate()
{
assemblyCode.clear();
158
variables.clear();
labelMap.clear();
labelCounter = 0;
return assemblyCode;
}
};
159
"L1:",
"print t2",
"L2:",
"z = t1 - 5",
"print z"
// Add more complex TAC examples here:
// "param x",
// "param y",
// "t3 = call AddFunc, 2", // Assuming AddFunc exists elsewhere
// "print t3",
// "return z"
};
160
std::cerr << "Error during assembly generation: " << e.what() <<
std::endl;
return 1;
}
if (outFile)
{
std::cout << "\nAssembly code written to " << outputFilename <<
std::endl;
outFile.close();
}
return 0;
}
Input/Output:
TEST CASE 1
My_tac.txt
read count
LOOP_START:
if count < 1 goto LOOP_END
; Calculate negative and print
neg_val = - count
print neg_val
161
; Decrement count
count = count - 1
goto LOOP_START
LOOP_END:
; End of loop
print 999 ; Indicate loop finished
Output
162
163
164
TEST CASE 2
My_tac.txt
read num1
read num2
t_sum = num1 + num2
t_prod = num1 * num2
if t_sum > t_prod goto PRINT_SUM
; Product is greater or equal
print t_prod
goto END_COMPARE
PRINT_SUM:
; Sum is greater
print t_sum
165
END_COMPARE:
; Program continues or ends
Output
PS D:\Coding & Others\C++\Compiler> cd "d:\Coding & Others\C++\Compiler\" ; if ($?) { g++
8086.cpp -o 8086 } ; if ($?)
{ .\8086 my_tac.txt}
Read 13 TAC lines from my_tac.txt
.CODE
MAIN PROC FAR
MOV AX, @DATA ; Get data segment address
MOV DS, AX ; Initialize DS
MOV ES, AX ; Initialize ES (often needed for string ops, good practice)
166
; TAC: read num2
CALL READ_NUM ; Call procedure to read number into AX
MOV num2, AX ; Store read value in num2
167
; TAC: PRINT_SUM:
PRINT_SUM:
; TAC: END_COMPARE:
END_COMPARE:
EXIT_PROG:
MOV AH, 4CH ; DOS exit function
INT 21H ; Call DOS interrupt
MAIN ENDP
168
PUSH CX
PUSH DX
PUSH SI
169
PN_PRINT_LOOP:
CMP CX, 0
JE PN_DONE
POP DX ; Pop digit from stack
ADD DL, '0' ; Convert digit to ASCII char
MOV AH, 02H ; DOS print char function
INT 21H ; Print character
DEC CX ; Decrement digit count
JMP PN_PRINT_LOOP
PN_DONE:
POP SI ; Restore registers
POP DX
POP CX
POP BX
POP AX
RET
PRINT_NUM ENDP
170
READ_NUM PROC NEAR
; Reads signed number, stores in AX
PUSH BX ; Save registers
PUSH CX
PUSH DX
PUSH SI
171
AND AL, 0FH ; Convert ASCII digit to number (AL = AL - '0')
PUSH AX ; Save digit
MOV AX, BX ; AX = 10
POP DX ; DX = digit
PUSH DX ; Save digit again
IMUL WORD PTR [SP+2] ; AX = AX * Current value (on stack below return addr)
; Correction for accumulation:
POP DX ; DX = new digit value (0-9)
PUSH DX ; Save it again temporarily
MOV CX, AX ; Save current AX (accumulated value)
MOV AX, 10 ; AX = 10
MUL CX ; AX = Accumulated value * 10
POP CX ; CX = new digit value
ADD AX, CX ; AX = (Accumulated value * 10) + new digit
INC CX ; Increment digit count (now used for general count)
JMP RN_READ_CHAR
RN_INVALID_INPUT:
; Handle invalid input - for simplicity, just finish
; Fall through to RN_DONE_READING
RN_DONE_READING:
CMP SI, 0
JGE RN_POSITIVE_NUM
NEG AX ; Negate AX if SI is negative
RN_POSITIVE_NUM:
CALL PRINT_NEWLINE
POP SI ; Restore registers
POP DX
POP CX
POP BX
172
RET
READ_NUM ENDP
END MAIN
173