0% found this document useful (0 votes)
16 views173 pages

Final Document 22brs1044

The document outlines a laboratory report for the Compiler Design course, detailing various experiments related to the implementation of Deterministic Finite Automata (DFA) and Non-deterministic Finite Automata (NFA) using C++. It includes specific algorithms, source code, and user interaction menus for defining, displaying, and testing automata. The report demonstrates practical applications of automata theory through structured experiments and their outcomes.

Uploaded by

Sayandeep Das
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views173 pages

Final Document 22brs1044

The document outlines a laboratory report for the Compiler Design course, detailing various experiments related to the implementation of Deterministic Finite Automata (DFA) and Non-deterministic Finite Automata (NFA) using C++. It includes specific algorithms, source code, and user interaction menus for defining, displaying, and testing automata. The report demonstrates practical applications of automata theory through structured experiments and their outcomes.

Uploaded by

Sayandeep Das
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 173

SCHOOL OF COMPUTER SCIENCE AND

ENGINEERING

BCSE307P – COMPILER DESIGN LABORATORY

REGISTER NUMBER: 22BRS1044

NAME OF STUDENT: SAYANDEEP DAS


Contents
ASSESSMENT – 1​ 3
Experiment-1​ 3
Experiment-2​ 9
Experiment-3​ 18
ASSESSMENT – 2​ 24
EXPERIMENT-1​ 24
EXPERIMENT-2​ 30
EXPERIMENT-3​ 33
EXPERIMENT-4​ 44
ASSESSMENT – 3​ 59
EXPERIMENT-1​ 59
EXPERIMENT-2​ 80
ASSESSMENT – 4​ 112
EXPERIMENT-1​ 112
EXPERIMENT-2​ 115
EXPERIMENT-3​ 120
ASSESSMENT – 5​ 125
EXPERIMENT-1​ 125
EXPERIMENT-2​ 135
EXPERIMENT-3​ 148

2
ASSESSMENT – 1
Experiment-1
Implementation of Deterministic Finite Automaton (DFA) from regular grammar using C
language.
Aim: To implement a Deterministic Finite Automaton (DFA) using a transition table input in
C++.
Algorithm:
1.​ Start:
o​ Read the DFA transition table provided by the user.
2.​ Input Transition Table:
o​ Accept states, alphabet, transitions, start state, and accepting states as input.
3.​ Menu Options:
o​ Display a menu to allow the user to:

▪​ Define the DFA using a transition table.

▪​ Display the DFA states and transitions.

▪​ Test if a string is accepted by the DFA.

4.​ Transition Table Representation:


o​ Store transitions in a table or map for efficient lookup.
5.​ String Acceptance:
o​ Simulate the DFA by starting from the initial state and traversing through
transitions based on the input string.
o​ Check if the final state reached is an accepting state.
6.​ Display Results:
o​ Display the DFA structure and indicate if the input string is accepted or
rejected.
7.​ End.
Source Code:
#include <iostream>
#include <map>
#include <set>
#include <string>

3
#include <vector>
using namespace std;

class DFA {
private:
set<string> states;
set<char> alphabet;
map<pair<string, char>, string> transitionTable;
string startState;
set<string> acceptingStates;

public:
void defineDFA() {
int numStates, numAlphabets, numTransitions, numAccepting;
string state, fromState, toState;
char input;

cout << "Enter the number of states: ";


cin >> numStates;
cout << "Enter the states: ";
for (int i = 0; i < numStates; ++i) {
cin >> state;
states.insert(state);
}

cout << "Enter the number of input symbols: ";


cin >> numAlphabets;
cout << "Enter the input symbols: ";
for (int i = 0; i < numAlphabets; ++i) {
cin >> input;
alphabet.insert(input);
}

cout << "Enter the number of transitions: ";


cin >> numTransitions;
cout << "Enter the transitions (Format: fromState input
toState):\n";
for (int i = 0; i < numTransitions; ++i) {
cin >> fromState >> input >> toState;
transitionTable[{fromState, input}] = toState;
}

cout << "Enter the start state: ";


cin >> startState;

cout << "Enter the number of accepting states: ";


cin >> numAccepting;
cout << "Enter the accepting states: ";
for (int i = 0; i < numAccepting; ++i) {
cin >> state;
acceptingStates.insert(state);
}

4
}

void displayDFA() {
cout << "\nStates: ";
for (const auto& state : states) cout << state << " ";

cout << "\nAlphabet: ";


for (const auto& symbol : alphabet) cout << symbol << " ";

cout << "\nTransitions: \n";


for (const auto& entry : transitionTable) {
cout << entry.first.first << " -- " << entry.first.second << "
--> " << entry.second << "\n";
}

cout << "Start State: " << startState << "\n";


cout << "Accepting States: ";
for (const auto& state : acceptingStates) cout << state << " ";
cout << endl;
}

bool testString(const string& input) {


string currentState = startState;

for (char symbol : input) {


if (transitionTable.find({currentState, symbol}) ==
transitionTable.end()) {
return false; // No valid transition
}
currentState = transitionTable[{currentState, symbol}];
}

return acceptingStates.find(currentState) !=
acceptingStates.end();
}
};

int main() {
DFA dfa;
int choice;
string inputString;

do {
cout << "\nMenu:\n";
cout << "1. Define DFA\n";
cout << "2. Display DFA\n";
cout << "3. Test String\n";
cout << "4. Exit\n";
cout << "Enter your choice: ";
cin >> choice;

switch (choice) {

5
case 1:
dfa.defineDFA();
break;
case 2:
dfa.displayDFA();
break;
case 3:
cout << "Enter string to test: ";
cin >> inputString;
if (dfa.testString(inputString)) {
cout << "String is accepted by the DFA.\n";
} else {
cout << "String is rejected by the DFA.\n";
}
break;
case 4:
cout << "Exiting program.\n";
break;
default:
cout << "Invalid choice. Please try again.\n";
}
} while (choice != 4);

return 0;
}

Input/Output:
PS D:\Coding & Others> cd "d:\Coding & Others\C++\Compiler\" ; if
($?) { g++ exp1.cpp -o exp1 } ; if ($?) { .\exp1 }

Menu:
1. Define DFA
2. Display DFA
3. Test String
4. Exit
Enter your choice: 1
Enter the number of states: 3
Enter the states: a b c
Enter the number of input symbols: 2
Enter the input symbols: 0 1
Enter the number of transitions: 6
Enter the transitions (Format: fromState input toState):
a 0 a

6
a 1 b
b 0 c
b 1 a
c 0 b
c 1 c
Enter the start state: a
Enter the number of accepting states: 1
Enter the accepting states: c

Menu:
1. Define DFA
2. Display DFA
3. Test String
4. Exit
Enter your choice: 3
Enter string to test: 101
String is accepted by the DFA.

Menu:
1. Define DFA
2. Display DFA
3. Test String
4. Exit
Enter your choice: 2

States: a b c
Alphabet: 0 1
Transitions:
a -- 0 --> a
a -- 1 --> b
b -- 0 --> c
b -- 1 --> a
c -- 0 --> b

7
c -- 1 --> c
Start State: a
Accepting States: c

Menu:
1. Define DFA
2. Display DFA
3. Test String
4. Exit
Enter your choice: 4
Exiting program.
PS D:\Coding & Others\C++\Compiler>

Conclusion: The program successfully implements a Deterministic Finite Automaton (DFA)


using a transition table input. It can construct the DFA, display its components, and test the
acceptance of input strings effectively, demonstrating the practical application of automata
theory.

Experiment-2
Implementation of Deterministic Finite Automaton (DFA) from Non-deterministic Finite
Automata (NFA) without ε-edges using C++ language.
Aim:
To implement a Deterministic Finite Automaton (DFA) from a Non-deterministic Finite
Automaton (NFA) without ε-edges using C++.
Algorithm:

1.​ Start:
●​ Define the NFA states, input symbols, start state, transitions, and
accepting states.
2.​ Input NFA:
●​ Accept the NFA transitions, states, start state, and accepting states from
the user.
3.​ DFA Construction:

8
●​ Use a subset construction method to create the DFA.
●​ Create a DFA state for every subset of NFA states.
●​ Identify transitions for each subset and input symbol by finding the union
of possible transitions.
●​ Mark a DFA state as accepting if it contains any NFA accepting state.
4.​ Menu Options:
●​ Display a menu for the user to:
●​ Define the NFA.
●​ Convert the NFA to DFA.
●​ Display the DFA.
●​ Test if a string is accepted by the DFA.
5.​ DFA String Acceptance:
●​ Simulate the DFA by traversing transitions based on the input string.
●​ Check if the final state is an accepting state.
6.​ Output:
●​ Display the NFA, DFA, and results of string acceptance tests.
7.​ End.

Source Code:
#include <iostream>
#include <map>
#include <set>
#include <queue>
#include <vector>
#include <string>

using namespace std;

class NFAtoDFA {
private:
set<string> nfaStates;
set<string> dfaStates;
set<char> alphabet;
map<pair<string, char>, set<string>> nfaTransitions;
map<pair<string, char>, string> dfaTransitions;
string startState;
set<string> nfaAcceptStates;
set<string> dfaAcceptStates;

9
public:
void defineNFA() {
int numStates, numTransitions;
cout << "Enter the number of states in NFA: ";
cin >> numStates;

cout << "Enter NFA states:\n";


for (int i = 0; i < numStates; ++i) {
string state;
cin >> state;
nfaStates.insert(state);
}

cout << "Enter the start state of NFA: ";


cin >> startState;

int numAcceptStates;
cout << "Enter the number of accepting states in NFA: ";
cin >> numAcceptStates;
cout << "Enter NFA accepting states:\n";
for (int i = 0; i < numAcceptStates; ++i) {
string state;
cin >> state;
nfaAcceptStates.insert(state);
}

cout << "Enter the alphabet (space-separated, end with #): ";
char symbol;
while (cin >> symbol && symbol != '#') {
alphabet.insert(symbol);
}

cout << "Enter the number of transitions in NFA: ";


cin >> numTransitions;
cout << "Enter transitions (Format: FromState InputSymbol
ToState):\n";
for (int i = 0; i < numTransitions; ++i) {
string fromState, toState;
char inputSymbol;
cin >> fromState >> inputSymbol >> toState;
nfaTransitions[{fromState, inputSymbol}].insert(toState);
}
}

void convertToDFA() {
queue<set<string>> unprocessedStates;
map<set<string>, string> stateMapping;
int stateCounter = 0;

set<string> startSet = {startState};


unprocessedStates.push(startSet);
stateMapping[startSet] = "Q" + to_string(stateCounter++);

10
dfaStates.insert(stateMapping[startSet]);

if (isAcceptingState(startSet)) {
dfaAcceptStates.insert(stateMapping[startSet]);
}

while (!unprocessedStates.empty()) {
set<string> currentSet = unprocessedStates.front();
unprocessedStates.pop();
string currentState = stateMapping[currentSet];

for (char symbol : alphabet) {


set<string> newSet;
for (const string& state : currentSet) {
if (nfaTransitions.count({state, symbol})) {
newSet.insert(nfaTransitions[{state, symbol}].begin(),
nfaTransitions[{state, symbol}].end());
}
}

if (!newSet.empty()) {
if (stateMapping.find(newSet) == stateMapping.end()) {
stateMapping[newSet] = "Q" +
to_string(stateCounter++);
dfaStates.insert(stateMapping[newSet]);
unprocessedStates.push(newSet);

if (isAcceptingState(newSet)) {
dfaAcceptStates.insert(stateMapping[newSet]);
}
}

dfaTransitions[{currentState, symbol}] =
stateMapping[newSet];
}
}
}
}

void displayDFA() {
cout << "\nDFA States: ";
for (const auto& state : dfaStates) {
cout << state << " ";
}

cout << "\nAlphabet: ";


for (const auto& symbol : alphabet) {
cout << symbol << " ";
}

cout << "\nDFA Transitions:\n";


for (auto it = dfaTransitions.begin(); it != dfaTransitions.end(); ++it) {

11
cout << it->first.first << " -- " << it->first.second
<< " --> " << it->second << "\n";
}

cout << "Start State: Q0\n";


cout << "Accepting States: ";
for (const auto& state : dfaAcceptStates) {
cout << state << " ";
}
cout << endl;
}

void testString(const string& input) {


string currentState = "Q0"; // Ensure the initial DFA state is correctly
set.

cout << "Testing string: " << input << "\n";

for (char symbol : input) {


cout << "Current State: " << currentState << ", Symbol: " << symbol <<
"\n";

if (dfaTransitions.find({currentState, symbol}) ==
dfaTransitions.end()) {
cout << "No transition for (" << currentState << ", " << symbol <<
").\n";
cout << "String is rejected by the DFA.\n";
return;
}

currentState = dfaTransitions[{currentState, symbol}];


cout << "Moved to State: " << currentState << "\n";
}

if (dfaAcceptStates.find(currentState) != dfaAcceptStates.end()) {
cout << "String is accepted by the DFA.\n";
} else {
cout << "String is rejected by the DFA.\n";
}
}

private:
bool isAcceptingState(const set<string>& stateSet) {
for (const auto& state : stateSet) {
if (nfaAcceptStates.find(state) != nfaAcceptStates.end()) {
return true;
}
}
return false;
}

12
};

int main() {
NFAtoDFA automaton;
int choice;

do {
cout << "\nMenu:\n";
cout << "1. Define NFA\n";
cout << "2. Convert NFA to DFA\n";
cout << "3. Display DFA\n";
cout << "4. Test String\n";
cout << "5. Exit\n";
cout << "Enter your choice: ";
cin >> choice;

switch (choice) {
case 1:
automaton.defineNFA();
break;
case 2:
automaton.convertToDFA();
break;
case 3:
automaton.displayDFA();
break;
case 4: {
string input;
cout << "Enter a string to test: ";
cin >> input;
automaton.testString(input);
break;
}
case 5:
cout << "Exiting program.\n";
break;
default:
cout << "Invalid choice. Please try again.\n";
}
} while (choice != 5);

return 0;
}

Input/Output:
PS D:\Coding & Others> cd "d:\Coding & Others\C++\Compiler\" ; if
($?) { g++ exp2.cpp -o exp2 } ; if ($?) { .\exp2 }

Menu:
1. Define NFA

13
2. Convert NFA to DFA
3. Display DFA
4. Test String
5. Exit
Enter your choice: 1
Enter the number of states in NFA: 3
Enter NFA states:
A B C
Enter the start state of NFA: A
Enter the number of accepting states in NFA: 1
Enter NFA accepting states:
C
Enter the alphabet (space-separated, end with #): 0 1 #
Enter the number of transitions in NFA: 4
Enter transitions (Format: FromState InputSymbol ToState):
A 0 B
A 1 C
B 0 C
C 1 C

Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String
5. Exit
Enter your choice: 2

Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String

14
5. Exit
Enter your choice: 3

DFA States: Q0 Q1 Q2
Alphabet: 0 1
DFA Transitions:
Q0 -- 0 --> Q1
Q0 -- 1 --> Q2
Q1 -- 0 --> Q2
Q2 -- 1 --> Q2
Start State: Q0
Accepting States: Q2

Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String
5. Exit
Enter your choice: 4
Enter a string to test: 010
Testing string: 010
Current State: Q0, Symbol: 0
Moved to State: Q1
Current State: Q1, Symbol: 1
No transition for (Q1, 1).
String is rejected by the DFA.

Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String

15
5. Exit
Enter your choice: 4
Enter a string to test: 01
Testing string: 01
Current State: Q0, Symbol: 0
Moved to State: Q1
Current State: Q1, Symbol: 1
No transition for (Q1, 1).
String is rejected by the DFA.

Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String
5. Exit
Enter your choice: 4
Enter a string to test: 001
Testing string: 001
Current State: Q0, Symbol: 0
Moved to State: Q1
Current State: Q1, Symbol: 0
Moved to State: Q2
Current State: Q2, Symbol: 1
Moved to State: Q2
String is accepted by the DFA.

Menu:
1. Define NFA
2. Convert NFA to DFA
3. Display DFA
4. Test String
5. Exit

16
Enter your choice: 4
Enter a string to test: 00
Testing string: 00
Current State: Q0, Symbol: 0
Moved to State: Q1
Current State: Q1, Symbol: 0
Moved to State: Q2
String is accepted by the DFA.
Conclusion:
The program successfully implements the conversion of a Non-deterministic Finite
Automaton (NFA) without ε-edges to a Deterministic Finite Automaton (DFA). It
demonstrates the practical application of subset construction and automata theory
principles.

Experiment-3
a) Implement a DFA in LEX code which accepts odd number of 1’s and even number of 0’s.
b) Implement a DFA in LEX code which accepts strings over {0, 1, 2} having 231 as substring.
Aim:
1.​ Part A: To implement a DFA in LEX code that accepts strings with an odd number of
1s and an even number of 0s.
2.​ Part B: To implement a DFA in LEX code that accepts strings over {0, 1, 2} having 231
as a substring.
Algorithm:
For Part A:
1.​ Define states for the DFA: INITIAL, A, B, C, and DEAD.
o​ INITIAL: Start state.
o​ A: Odd number of 1s and even number of 0s.
o​ B: Even number of 1s and even number of 0s.
o​ C: Odd number of 1s and odd number of 0s.
o​ DEAD: Invalid state for invalid input.
2.​ Define transitions:
o​ Use regular expressions to handle input transitions between states.

17
o​ Transition to DEAD state for invalid inputs.
3.​ Print whether the string is Accepted, Not Accepted, or Invalid based on the DFA
rules.
For Part B:
1.​ Define states: INITIAL, S1, S2, S3, and DEAD.
o​ INITIAL: Start state.
o​ S1: Match for 2.
o​ S2: Match for 23.
o​ S3: Match for 231.
o​ DEAD: Invalid state.
2.​ Define transitions:
o​ Move between states based on input characters.
o​ Print Accepted if the string reaches state S3.
Source Code:
Part A: Odd number of 1s and even number of 0s
%{
%}

%s S1 S2 S3 S4 DEAD

%%

<INITIAL>1 BEGIN S1;


<INITIAL>0 BEGIN S2;
<INITIAL>[^01\n] BEGIN DEAD;
<INITIAL>\n BEGIN INITIAL; {printf("Not Accepted\n");}

<S1>1 BEGIN S2;


<S1>0 BEGIN S3;
<S1>[^01\n] BEGIN DEAD;

18
<S1>\n BEGIN INITIAL; {printf("Accepted\n");}

<S2>1 BEGIN S1;


<S2>0 BEGIN S4;
<S2>[^01\n] BEGIN DEAD;
<S2>\n BEGIN INITIAL; {printf("Not Accepted\n");}

<S3>1 BEGIN S4;


<S3>0 BEGIN S1;
<S3>[^01\n] BEGIN DEAD;
<S3>\n BEGIN INITIAL; {printf("Not Accepted\n");}

<S4>1 BEGIN S3;


<S4>0 BEGIN S2;
<S4>[^01\n] BEGIN DEAD;
<S4>\n BEGIN INITIAL; {printf("Not Accepted\n");}

<DEAD>[^\n] BEGIN DEAD;


<DEAD>\n BEGIN INITIAL; {printf("Invalid Input\n");}

%%

int main()
{
printf("Enter String:\n");
yylex();
return 0;
}
Part B: DFA for strings having 231 as a substring

19
%{
%}

%s S1 S2 S3 DEAD

%%

<INITIAL>2 BEGIN S1;


<INITIAL>[01] BEGIN INITIAL;
<INITIAL>[^012\n] BEGIN DEAD;
<INITIAL>\n BEGIN INITIAL; {printf("Not Accepted\n");}

<S1>3 BEGIN S2;


<S1>[01] BEGIN INITIAL;
<S1>[^012\n] BEGIN DEAD;
<S1>\n BEGIN INITIAL; {printf("Not Accepted\n");}

<S2>1 BEGIN S3;


<S2>[023] BEGIN INITIAL;
<S2>[^012\n] BEGIN DEAD;
<S2>\n BEGIN INITIAL; {printf("Not Accepted\n");}

<S3>[012]* BEGIN S3;


<S3>\n BEGIN INITIAL; {printf("Accepted\n");}

<DEAD>[^\n] BEGIN DEAD;


<DEAD>\n BEGIN INITIAL; {printf("Not Accepted\n");}

%%

20
int main()
{
printf("Enter String:\n");
yylex();
return 0;
}
Input/Output:
Part A:

Part B:

Conclusion:
●​ DFA implemented using LEX successfully identifies strings with an odd number of 1s
and an even number of 0s.
●​ DFA implemented using LEX successfully identifies strings over {0, 1, 2} containing
231 as a substring.

21
ASSESSMENT – 2
EXPERIMENT-1
Construct a lexical analyser
1.​ Identify the tokens from simple statement as input stored in a linear array
2.​ Identify the tokens from small program (not exceeding 5 lines) as input stored in a
text file
3.​ Identify the tokens from small program (not exceeding 5 lines) as input get it from
the user and store it in a text file
Aim: Construct a simple lexical analyser in C++ that:
●​ Identifies tokens from a simple statement stored in a linear array.
●​ Identifies tokens from a small program (≤5 lines) stored in a text file.
●​ Accepts a small program (≤5 lines) from the user, stores it in a text file, and then
identifies its tokens.

22
Algorithm:
1.​ Input Selection:
●​ Display a menu with three options.
●​ Based on user choice, either use a predefined string, read from a file, or accept
user input and store it in a file.
2.​ Tokenization Process:
●​ Traverse the input string character by character.
●​ For Identifiers/Keywords:
o​ If a letter is encountered, collect subsequent alphanumeric characters
(and underscores) to form a token.
o​ Check if the token is a keyword (from a predefined list); if yes, classify as
Keyword, else as Identifier.
●​ For Numbers:
o​ If a digit is encountered, collect all consecutive digits to form a numeric
token.
●​ For Operators:
o​ Check if the character is one of the operator symbols (e.g., +, -, *, /, =, %);
if so, classify as Operator.
●​ For Delimiters:
o​ Identify punctuation symbols (e.g., ;, ,, (, ), {, }, [, ]) and classify as
Delimiter.
●​ Skip Whitespaces:
o​ Ignore spaces and newlines during tokenization.
3.​ Output:
●​ Display each token along with its identified type.

Source Code:
#include <iostream>
#include <fstream>
#include <sstream>
#include <cctype>
#include <string>
using namespace std;

23
// Check if a string is a keyword
bool isKeyword(const string &str) {
string keywords[] = {"int", "float", "if", "else", "while", "for", "do",
"return", "void", "char", "double"};
int n = sizeof(keywords) / sizeof(string);
for (int i = 0; i < n; i++) {
if (str == keywords[i])
return true;
}
return false;
}

// Check if a character is an operator


bool isOperator(char ch) {
string ops = "+-*/=%";
return ops.find(ch) != string::npos;
}

// Check if a character is a delimiter


bool isDelimiter(char ch) {
string delims = " ;,(){}[]";
return delims.find(ch) != string::npos;
}

// Tokenization function
void tokenize(const string &input) {
int len = input.length();
int i = 0;
while (i < len) {
// Skip whitespace
if (isspace(input[i])) {
i++;
continue;
}
// Identifiers or Keywords
if (isalpha(input[i])) {
string token = "";
while (i < len && (isalnum(input[i]) || input[i] == '_')) {
token.push_back(input[i]);
i++;
}
if (isKeyword(token))
cout << token << " : Keyword" << endl;
else
cout << token << " : Identifier" << endl;
}
// Numbers
else if (isdigit(input[i])) {
string token = "";
while (i < len && isdigit(input[i])) {
token.push_back(input[i]);
i++;

24
}
cout << token << " : Number" << endl;
}
// Operators
else if (isOperator(input[i])) {
cout << input[i] << " : Operator" << endl;
i++;
}
// Delimiters
else if (isDelimiter(input[i])) {
// Avoid printing spaces as delimiters
if (!isspace(input[i]))
cout << input[i] << " : Delimiter" << endl;
i++;
}
// Unknown characters
else {
cout << input[i] << " : Unknown" << endl;
i++;
}
}
}

int main() {
int choice;
cout << "Lexical Analyzer Options:" << endl;
cout << "1. Analyze tokens from a linear array input" << endl;
cout << "2. Analyze tokens from a text file" << endl;
cout << "3. Input program from user, store in file, and analyze tokens" <<
endl;
cout << "Enter your choice: ";
cin >> choice;
cin.ignore(); // Clear newline from input buffer

if (choice == 1) {
// Option 1: User inputs a simple statement
string input;
cout << "\nEnter a simple statement: ";
getline(cin, input);
cout << "\nTokens:" << endl;
tokenize(input);
}
else if (choice == 2) {
// Option 2: Read input from a text file (input.txt)
ifstream file("input.txt");
if (!file) {
cout << "\nUnable to open file input.txt" << endl;
return 1;
}
string line, programText = "";
while (getline(file, line)) {
programText += line + "\n";

25
}
file.close();
cout << "\nProgram from file:" << endl << programText << "\nTokens:"
<< endl;
tokenize(programText);
}
else if (choice == 3) {
// Option 3: Get program from user, store in file, and analyze tokens
cout << "\nEnter your program (max 5 lines). Enter an empty line to
finish:" << endl;
string programText = "", line;
int count = 0;
while (count < 5 && getline(cin, line)) {
if (line.empty())
break;
programText += line + "\n";
count++;
}
// Store the user program in a text file
ofstream file("user_program.txt");
file << programText;
file.close();
cout << "\nProgram stored in user_program.txt" << endl;
cout << "Tokens:" << endl;
tokenize(programText);
}
else {
cout << "\nInvalid choice." << endl;
}
return 0;
}

Input/Output:

26
27
Conclusion:
●​ The program now exclusively takes input from the user, ensuring flexibility.
●​ It handles three distinct input methods, tokenizing the content appropriately.
●​ This modular approach serves as a foundation for a lexical analyser in C++.

EXPERIMENT-2
Construct a lexical analyser using LEX tool.
Aim: Construct a lexical analyser using the LEX tool to tokenize input such as keywords,
identifiers, numbers, and operators.
Algorithm:
1.​ Read the input character stream.
2.​ Use regular expressions to match patterns:
a.​ Match specific keywords (e.g., if, else).
b.​ Recognize identifiers (letters followed by letters/digits).
c.​ Identify numbers (sequences of digits).

28
d.​ Detect arithmetic operators and other symbols
3.​ Print the token type along with the matched lexeme.
4.​ Continue processing until the end of input.
Source Code:
%{
#include <stdio.h>
#include <string.h>
int line_num = 1;
%}
DIGIT [0-9]
LETTER [a-zA-Z]
ID {LETTER}({LETTER}|{DIGIT})*
NUMBER {DIGIT}+(\.{DIGIT}+)?
WHITESPACE [ \t]+
%%
{WHITESPACE} ;
\n {line_num++;}
"if" {printf("Keyword: IF\n");}
"else" {printf("Keyword: ELSE\n");}
"while" {printf("Keyword: WHILE\n");}
"int" {printf("Keyword: INT\n");}
"float" {printf("Keyword: FLOAT\n");}
"return" {printf("Keyword: RETURN\n");}
{ID} {printf("Identifier: %s\n", yytext);}
{NUMBER} {printf("Number: %s\n", yytext);}
"+" {printf("Operator: PLUS\n");}
"-" {printf("Operator: MINUS\n");}
"*" {printf("Operator: MULTIPLY\n");}
"/" {printf("Operator: DIVIDE\n");}
"=" {printf("Operator: ASSIGN\n");}

29
"==" {printf("Operator: EQUAL\n");}
"!=" {printf("Operator: NOT_EQUAL\n");}
"<" {printf("Operator: LESS_THAN\n");}
">" {printf("Operator: GREATER_THAN\n");}
"(" {printf("Delimiter: LEFT_PAREN\n");}
")" {printf("Delimiter: RIGHT_PAREN\n");}
"{" {printf("Delimiter: LEFT_BRACE\n");}
"}" {printf("Delimiter: RIGHT_BRACE\n");}
";" {printf("Delimiter: SEMICOLON\n");}
"," {printf("Delimiter: COMMA\n");}
"//"[^\n]* {printf("Comment: %s\n", yytext);}
. {printf("Error: Unexpected character %s at line %d\n", yytext, line_num);}
%%
int yywrap() {
return 1;
}
int main() {
printf("Lexical Analysis Started...\n");
yylex();
printf("\nLexical Analysis Completed.\n");
printf("Total lines processed: %d\n", line_num);
return 0;
}
Input/Output:

30
Conclusion:
●​ The lex code efficiently tokenizes various components of the input.
●​ It demonstrates how to use regular expressions in LEX for constructing a simple
lexical analyser.
●​ The approach can be expanded to cover additional tokens and more complex
grammars as needed.

31
EXPERIMENT-3
Construct Predictive parse table using C language.
Hint: Consider the input grammar without left recursion, find FIRST and FOLLOW for each
non-terminal and then construct the parse table.
Aim: Construct a predictive parsing table from a given grammar (assumed free from left
recursion) by computing the FIRST and FOLLOW sets for each non-terminal.
Algorithm:
1. Input Grammar:
●​ Read the number of productions.
●​ Input each production in the format A->α (use # to denote epsilon).
2. Compute FIRST Sets:
●​ For each production A → α, scan symbols in α from left to right.
●​ If a symbol is terminal (or not an uppercase letter), add it to FIRST(A); if it’s #
(epsilon), mark nullable.
●​ If the symbol is a non-terminal, add FIRST (symbol) (excluding epsilon) to FIRST(A); if
the non-terminal is nullable, continue to the next symbol.
●​ If all symbols are nullable, add epsilon (#) to FIRST(A).
3. Compute FOLLOW Sets:
●​ Add $ to FOLLOW (start symbol).
●​ For each production A → αBβ, add all terminals from FIRST(β) (excluding epsilon) to
FOLLOW(B).
●​ If β is nullable (or if B is the last symbol), add FOLLOW(A) to FOLLOW(B).
4. Construct Predictive Parsing Table:
●​ For each production A → α:
o​ For every terminal t in FIRST(α) (except epsilon), set table entry [A][t] = A -> α.
o​ If FIRST(α) contains epsilon, for every terminal in FOLLOW(A) (including $), set
table entry [A][terminal] = A -> α.
Source Code:
#include <stdio.h>
#include <ctype.h>
#include <string.h>

32
#include <stdlib.h>

void followfirst(char, int, int);


void findfirst(char, int, int);
void follow(char c);

int count, n = 0;
char calc_first[10][100];
char calc_follow[10][100];
int m = 0;
char production[10][10], first[10];
char f[10];
int k;
char ck;
int e;

int main(int argc, char **argv)


{
int jm = 0;
int km = 0;
int i, choice;
char c, ch;
printf("Enter number of Productions: ");
scanf("%d", &count);
printf("Enter %d productions in form A=B (use '#' for epsilon): \n",
count);
for (i = 0; i < count; i++)
{
scanf("%s%c", production[i], &ch);
}
printf("\n");
int kay;
char done[count];
int ptr = -1;
for (k = 0; k < count; k++)
{
for (kay = 0; kay < 100; kay++)
{
calc_first[k][kay] = '!';
}
}
int point1 = 0, point2, xxx;
for (k = 0; k < count; k++)
{
c = production[k][0];
point2 = 0;
xxx = 0;
for (kay = 0; kay <= ptr; kay++)
if (c == done[kay])
xxx = 1;
if (xxx == 1)
continue;

33
findfirst(c, 0, 0);
ptr += 1;
done[ptr] = c;
printf("First(%c)= { ", c);
calc_first[point1][point2++] = c;
for (i = 0 + jm; i < n; i++)
{
int lark = 0, chk = 0;
for (lark = 0; lark < point2; lark++)
{
if (first[i] == calc_first[point1][lark])
{
chk = 1;
break;
}
}
if (chk == 0)
{
printf("%c, ", first[i]);
calc_first[point1][point2++] = first[i];
}
}
printf("}\n");
jm = n;
point1++;
}
printf("\n");
printf("-----------------------------------------------\n\n");
char donee[count];
ptr = -1;
for (k = 0; k < count; k++)
{
for (kay = 0; kay < 100; kay++)
{
calc_follow[k][kay] = '!';
}
}
point1 = 0;
int land = 0;
for (e = 0; e < count; e++)
{
ck = production[e][0];
point2 = 0;
xxx = 0;
for (kay = 0; kay <= ptr; kay++)
if (ck == donee[kay])
xxx = 1;
if (xxx == 1)
continue;
land += 1;
follow(ck);
ptr += 1;

34
donee[ptr] = ck;
printf("Follow(%c) = { ", ck);
calc_follow[point1][point2++] = ck;
for (i = 0 + km; i < m; i++)
{
int lark = 0, chk = 0;
for (lark = 0; lark < point2; lark++)
{
if (f[i] == calc_follow[point1][lark])
{
chk = 1;
break;
}
}
if (chk == 0)
{
printf("%c, ", f[i]);
calc_follow[point1][point2++] = f[i];
}
}
printf(" }\n");
km = m;
point1++;
}
char ter[10];
for (k = 0; k < 10; k++)
{
ter[k] = '!';
}
int ap, vp, sid = 0;
for (k = 0; k < count; k++)
{
for (kay = 0; kay < count; kay++)
{
if (!isupper(production[k][kay]) && production[k][kay] != '#' &&
production[k][kay] != '=' && production[k][kay] != '\0')
{
vp = 0;
for (ap = 0; ap < sid; ap++)
{
if (production[k][kay] == ter[ap])
{
vp = 1;
break;
}
}
if (vp == 0)
{
ter[sid] = production[k][kay];
sid++;
}
}

35
}
}
ter[sid] = '$';
sid++;
printf("\nParsing Table:");

printf("\n====================================================================
=================================================\n");
printf("\t|\t");
for (ap = 0; ap < sid; ap++)
{
printf("%c\t\t", ter[ap]);
}

printf("\n====================================================================
=================================================\n");
char first_prod[count][sid];
for (ap = 0; ap < count; ap++)
{
int destiny = 0;
k = 2;
int ct = 0;
char tem[100];
while (production[ap][k] != '\0')
{
if (!isupper(production[ap][k]))
{
tem[ct++] = production[ap][k];
tem[ct++] = '_';
tem[ct++] = '\0';
k++;
break;
}
else
{
int zap = 0;
int tuna = 0;
for (zap = 0; zap < count; zap++)
{
if (calc_first[zap][0] == production[ap][k])
{
for (tuna = 1; tuna < 100; tuna++)
{
if (calc_first[zap][tuna] != '!')
{
tem[ct++] = calc_first[zap][tuna];
}
else
break;
}
break;
}

36
}
tem[ct++] = '_';
}
k++;
}
int zap = 0, tuna;
for (tuna = 0; tuna < ct; tuna++)
{
if (tem[tuna] == '#')
{
zap = 1;
}
else if (tem[tuna] == '_')
{
if (zap == 1)
{
zap = 0;
}
else
break;
}
else
{
first_prod[ap][destiny++] = tem[tuna];
}
}
}
char table[land][sid + 1];
ptr = -1;
for (ap = 0; ap < land; ap++)
{
for (kay = 0; kay < (sid + 1); kay++)
{
table[ap][kay] = '!';
}
}
for (ap = 0; ap < count; ap++)
{
ck = production[ap][0];
xxx = 0;
for (kay = 0; kay <= ptr; kay++)
if (ck == table[kay][0])
xxx = 1;
if (xxx == 1)
continue;
else
{
ptr = ptr + 1;
table[ptr][0] = ck;
}
}
for (ap = 0; ap < count; ap++)

37
{
int tuna = 0;
while (first_prod[ap][tuna] != '\0')
{
int to, ni = 0;
for (to = 0; to < sid; to++)
{
if (first_prod[ap][tuna] == ter[to])
{
ni = 1;
}
}
if (ni == 1)
{
char xz = production[ap][0];
int cz = 0;
while (table[cz][0] != xz)
{
cz = cz + 1;
}
int vz = 0;
while (ter[vz] != first_prod[ap][tuna])
{
vz = vz + 1;
}
table[cz][vz + 1] = (char)(ap + 65);
}
tuna++;
}
}
for (k = 0; k < sid; k++)
{
for (kay = 0; kay < 100; kay++)
{
if (calc_first[k][kay] == '!')
{
break;
}
else if (calc_first[k][kay] == '#')
{
int fz = 1;
while (calc_follow[k][fz] != '!')
{
char xz = production[k][0];
int cz = 0;
while (table[cz][0] != xz)
{
cz = cz + 1;
}
int vz = 0;
while (ter[vz] != calc_follow[k][fz])
{

38
vz = vz + 1;
}
table[k][vz + 1] = '#';
fz++;
}
break;
}
}
}
for (ap = 0; ap < land; ap++)
{
printf(" %c\t|\t", table[ap][0]);
for (kay = 1; kay < (sid + 1); kay++)
{
if (table[ap][kay] == '!')
printf("\t\t");
else if (table[ap][kay] == '#')
printf("%c=#\t\t", table[ap][0]);
else
{
int mum = (int)(table[ap][kay]);
mum -= 65;
printf("%s\t\t", production[mum]);
}
}
printf("\n");

printf("----------------------------------------------------------------------
-----------------------------------------------");
printf("\n");
}

void follow(char c)
{
int i, j;
if (production[0][0] == c)
{
f[m++] = '$';
}
for (i = 0; i < 10; i++)
{
for (j = 2; j < 10; j++)
{
if (production[i][j] == c)
{
if (production[i][j + 1] != '\0')
{
followfirst(production[i][j + 1], i, (j + 2));
}
if (production[i][j + 1] == '\0' && c != production[i][0])

39
{
follow(production[i][0]);
}
}
}
}
}

void findfirst(char c, int q1, int q2)


{
int j;
if (!(isupper(c)))
{
first[n++] = c;
}
for (j = 0; j < count; j++)
{
if (production[j][0] == c)
{
if (production[j][2] == '#')
{
if (production[q1][q2] == '\0')
first[n++] = '#';
else if (production[q1][q2] != '\0' && (q1 != 0 || q2 != 0))
{
findfirst(production[q1][q2], q1, (q2 + 1));
}
else
first[n++] = '#';
}
else if (!isupper(production[j][2]))
{
first[n++] = production[j][2];
}
else
{
findfirst(production[j][2], j, 3);
}
}
}
}

void followfirst(char c, int c1, int c2)


{
int k;
if (!(isupper(c)))
f[m++] = c;
else
{
int i = 0, j = 1;
for (i = 0; i < count; i++)
{

40
if (calc_first[i][0] == c)
break;
}
while (calc_first[i][j] != '!')
{
if (calc_first[i][j] != '#')
{
f[m++] = calc_first[i][j];
}
else
{
if (production[c1][c2] == '\0')
{
follow(production[c1][0]);
}
else
{
followfirst(production[c1][c2], c1, c2 + 1);
}
}
j++;
}
}
}
Input/Output:

41
Conclusion: The program reads an input grammar, computes the FIRST and FOLLOW sets,
and successfully constructs a predictive parsing table based on these sets.

EXPERIMENT-4
Implement the Predictive parsing algorithm, get parse table and input string is inputs. Use C
language for implementation.
Aim:
●​ Implement a predictive parsing algorithm in C++.
●​ Simulate parsing using a stack.
●​ Display each parsing step in a bordered table with columns: Stack, Input String, and
Action.
Algorithm:
1. Initialize Resources:

▪​ Input a grammar

▪​ Construct parse table

▪​ Read the input string (tokens separated by spaces) and ensure it ends
with a terminal symbol "$".

▪​ Initialize a stack with "$" (bottom) and the start symbol "E" (top).

2. Parsing Process:

▪​ Loop until the stack is empty:

▪​ Retrieve the current stack content and the remaining input


tokens.

▪​ If the top of the stack and current input token are both "$":

▪​ Record the action "Accept" and exit the loop.

▪​ If the top is a terminal:

▪​ If it matches the current input token:

▪​ Pop the terminal and advance the input pointer.

▪​ Record the action "pop".

42
▪​ Else:

▪​ Record an error ("Error: mismatch") and


terminate parsing.

▪​ If the top is a non-terminal:

▪​ Look up the production rule from the parse table using


the non-terminal and current token.

▪​ If a rule exists:

▪​ Pop the non-terminal.

▪​ If the production is not ε, push its symbols in


reverse order.

▪​ Record the action as "NonTerminal ->


Production".

▪​ Else:

▪​ Record an error ("Error: no rule") and terminate


parsing.
3. Output:

▪​ Save each step (current stack, input string, and action) as a row in a
table.

▪​ After processing, print the table with borders around each cell.

▪​ Finally, display whether the input string is accepted or rejected.

Source Code:
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

void followfirst(char, int, int);


void findfirst(char, int, int);
void follow(char c);

int count, n = 0;
char calc_first[10][100];
char calc_follow[10][100];

43
int m = 0;
char production[10][10], first[10];
char f[10];
int k;
char ck;
int e;

int main(int argc, char **argv)


{
int jm = 0;
int km = 0;
int i, choice;
char c, ch;
printf("Enter number of Productions: ");
scanf("%d", &count);
printf("Enter %d productions in form A=B (use '#' for epsilon): \n",
count);
for (i = 0; i < count; i++)
{
scanf("%s%c", production[i], &ch);
}
printf("\n");
int kay;
char done[count];
int ptr = -1;
for (k = 0; k < count; k++)
{
for (kay = 0; kay < 100; kay++)
{
calc_first[k][kay] = '!';
}
}
int point1 = 0, point2, xxx;
for (k = 0; k < count; k++)
{
c = production[k][0];
point2 = 0;
xxx = 0;
for (kay = 0; kay <= ptr; kay++)
if (c == done[kay])
xxx = 1;
if (xxx == 1)
continue;
findfirst(c, 0, 0);
ptr += 1;
done[ptr] = c;
printf("First(%c)= { ", c);
calc_first[point1][point2++] = c;
for (i = 0 + jm; i < n; i++)
{
int lark = 0, chk = 0;
for (lark = 0; lark < point2; lark++)

44
{
if (first[i] == calc_first[point1][lark])
{
chk = 1;
break;
}
}
if (chk == 0)
{
printf("%c, ", first[i]);
calc_first[point1][point2++] = first[i];
}
}
printf("}\n");
jm = n;
point1++;
}
printf("\n");
printf("-----------------------------------------------\n\n");
char donee[count];
ptr = -1;
for (k = 0; k < count; k++)
{
for (kay = 0; kay < 100; kay++)
{
calc_follow[k][kay] = '!';
}
}
point1 = 0;
int land = 0;
for (e = 0; e < count; e++)
{
ck = production[e][0];
point2 = 0;
xxx = 0;
for (kay = 0; kay <= ptr; kay++)
if (ck == donee[kay])
xxx = 1;
if (xxx == 1)
continue;
land += 1;
follow(ck);
ptr += 1;
donee[ptr] = ck;
printf("Follow(%c) = { ", ck);
calc_follow[point1][point2++] = ck;
for (i = 0 + km; i < m; i++)
{
int lark = 0, chk = 0;
for (lark = 0; lark < point2; lark++)
{
if (f[i] == calc_follow[point1][lark])

45
{
chk = 1;
break;
}
}
if (chk == 0)
{
printf("%c, ", f[i]);
calc_follow[point1][point2++] = f[i];
}
}
printf(" }\n");
km = m;
point1++;
}
char ter[10];
for (k = 0; k < 10; k++)
{
ter[k] = '!';
}
int ap, vp, sid = 0;
for (k = 0; k < count; k++)
{
for (kay = 0; kay < count; kay++)
{
if (!isupper(production[k][kay]) && production[k][kay] != '#' &&
production[k][kay] != '=' && production[k][kay] != '\0')
{
vp = 0;
for (ap = 0; ap < sid; ap++)
{
if (production[k][kay] == ter[ap])
{
vp = 1;
break;
}
}
if (vp == 0)
{
ter[sid] = production[k][kay];
sid++;
}
}
}
}
ter[sid] = '$';
sid++;
printf("\nParsing Table:");

printf("\n====================================================================
=================================================\n");
printf("\t|\t");

46
for (ap = 0; ap < sid; ap++)
{
printf("%c\t\t", ter[ap]);
}

printf("\n====================================================================
=================================================\n");
char first_prod[count][sid];
for (ap = 0; ap < count; ap++)
{
int destiny = 0;
k = 2;
int ct = 0;
char tem[100];
while (production[ap][k] != '\0')
{
if (!isupper(production[ap][k]))
{
tem[ct++] = production[ap][k];
tem[ct++] = '_';
tem[ct++] = '\0';
k++;
break;
}
else
{
int zap = 0;
int tuna = 0;
for (zap = 0; zap < count; zap++)
{
if (calc_first[zap][0] == production[ap][k])
{
for (tuna = 1; tuna < 100; tuna++)
{
if (calc_first[zap][tuna] != '!')
{
tem[ct++] = calc_first[zap][tuna];
}
else
break;
}
break;
}
}
tem[ct++] = '_';
}
k++;
}
int zap = 0, tuna;
for (tuna = 0; tuna < ct; tuna++)
{
if (tem[tuna] == '#')

47
{
zap = 1;
}
else if (tem[tuna] == '_')
{
if (zap == 1)
{
zap = 0;
}
else
break;
}
else
{
first_prod[ap][destiny++] = tem[tuna];
}
}
}
char table[land][sid + 1];
ptr = -1;
for (ap = 0; ap < land; ap++)
{
for (kay = 0; kay < (sid + 1); kay++)
{
table[ap][kay] = '!';
}
}
for (ap = 0; ap < count; ap++)
{
ck = production[ap][0];
xxx = 0;
for (kay = 0; kay <= ptr; kay++)
if (ck == table[kay][0])
xxx = 1;
if (xxx == 1)
continue;
else
{
ptr = ptr + 1;
table[ptr][0] = ck;
}
}
for (ap = 0; ap < count; ap++)
{
int tuna = 0;
while (first_prod[ap][tuna] != '\0')
{
int to, ni = 0;
for (to = 0; to < sid; to++)
{
if (first_prod[ap][tuna] == ter[to])
{

48
ni = 1;
}
}
if (ni == 1)
{
char xz = production[ap][0];
int cz = 0;
while (table[cz][0] != xz)
{
cz = cz + 1;
}
int vz = 0;
while (ter[vz] != first_prod[ap][tuna])
{
vz = vz + 1;
}
table[cz][vz + 1] = (char)(ap + 65);
}
tuna++;
}
}
for (k = 0; k < sid; k++)
{
for (kay = 0; kay < 100; kay++)
{
if (calc_first[k][kay] == '!')
{
break;
}
else if (calc_first[k][kay] == '#')
{
int fz = 1;
while (calc_follow[k][fz] != '!')
{
char xz = production[k][0];
int cz = 0;
while (table[cz][0] != xz)
{
cz = cz + 1;
}
int vz = 0;
while (ter[vz] != calc_follow[k][fz])
{
vz = vz + 1;
}
table[k][vz + 1] = '#';
fz++;
}
break;
}
}
}

49
for (ap = 0; ap < land; ap++)
{
printf(" %c\t|\t", table[ap][0]);
for (kay = 1; kay < (sid + 1); kay++)
{
if (table[ap][kay] == '!')
printf("\t\t");
else if (table[ap][kay] == '#')
printf("%c=#\t\t", table[ap][0]);
else
{
int mum = (int)(table[ap][kay]);
mum -= 65;
printf("%s\t\t", production[mum]);
}
}
printf("\n");

printf("----------------------------------------------------------------------
-----------------------------------------------");
printf("\n");
}
int j;
printf("\n\nEnter the Input String: ");
char input[100];
scanf("%s%c", input, &ch);

printf("\n====================================================================
=======\n");
printf("\tStack\t\t\tInput\t\t\tAction");

printf("\n====================================================================
=======\n");
int i_ptr = 0, s_ptr = 1;
char stack[100];
stack[0] = '$';
stack[1] = table[0][0];
while (s_ptr != -1)
{
printf("\t");
int vamp = 0;
for (vamp = 0; vamp <= s_ptr; vamp++)
{
printf("%c", stack[vamp]);
}
printf("\t\t\t");
vamp = i_ptr;
while (input[vamp] != '\0')
{
printf("%c", input[vamp]);
vamp++;
}

50
printf("\t\t\t");
char her = input[i_ptr];
char him = stack[s_ptr];
s_ptr--;
if (!isupper(him))
{
if (her == him)
{
i_ptr++;
printf("POP\n");
}
else
{
printf("\nString Not Accepted!\n");
exit(0);
}
}
else
{
for (i = 0; i < sid; i++)
{
if (ter[i] == her)
break;
}
char produ[100];
for (j = 0; j < land; j++)
{
if (him == table[j][0])
{
if (table[j][i + 1] == '#')
{
printf("%c=#\n", table[j][0]);
produ[0] = '#';
produ[1] = '\0';
}
else if (table[j][i + 1] != '!')
{
int mum = (int)(table[j][i + 1]);
mum -= 65;
strcpy(produ, production[mum]);
printf("%s\n", produ);
}
else
{
printf("\nString Not Accepted!\n");
exit(0);
}
}
}
int le = strlen(produ);
le = le - 1;
if (le == 0)

51
{
continue;
}
for (j = le; j >= 2; j--)
{
s_ptr++;
stack[s_ptr] = produ[j];
}
}
}
printf("\n");
if (input[i_ptr] == '\0')
{
printf("\tString Accepted!\n");
}
else
printf("\n\tString Rejected!\n");
printf("\n");
}

void follow(char c)
{
int i, j;
if (production[0][0] == c)
{
f[m++] = '$';
}
for (i = 0; i < 10; i++)
{
for (j = 2; j < 10; j++)
{
if (production[i][j] == c)
{
if (production[i][j + 1] != '\0')
{
followfirst(production[i][j + 1], i, (j + 2));
}
if (production[i][j + 1] == '\0' && c != production[i][0])
{
follow(production[i][0]);
}
}
}
}
}

void findfirst(char c, int q1, int q2)


{
int j;
if (!(isupper(c)))
{
first[n++] = c;

52
}
for (j = 0; j < count; j++)
{
if (production[j][0] == c)
{
if (production[j][2] == '#')
{
if (production[q1][q2] == '\0')
first[n++] = '#';
else if (production[q1][q2] != '\0' && (q1 != 0 || q2 != 0))
{
findfirst(production[q1][q2], q1, (q2 + 1));
}
else
first[n++] = '#';
}
else if (!isupper(production[j][2]))
{
first[n++] = production[j][2];
}
else
{
findfirst(production[j][2], j, 3);
}
}
}
}

void followfirst(char c, int c1, int c2)


{
int k;
if (!(isupper(c)))
f[m++] = c;
else
{
int i = 0, j = 1;
for (i = 0; i < count; i++)
{
if (calc_first[i][0] == c)
break;
}
while (calc_first[i][j] != '!')
{
if (calc_first[i][j] != '#')
{
f[m++] = calc_first[i][j];
}
else
{
if (production[c1][c2] == '\0')
{
follow(production[c1][0]);

53
}
else
{
followfirst(production[c1][c2], c1, c2 + 1);
}
}
j++;
}
}
}

Input/Output:

54
Conclusion:
●​ The code implements a predictive parser using a stack and a pre-defined parse table.
●​ Each step of the parsing process is recorded and displayed in a neatly formatted
bordered table.
●​ The parser successfully accepts or rejects the input string based on the grammar
rules.

55
ASSESSMENT – 3
EXPERIMENT-1
a) Construct Simple LR (SLR) parse table using C language.
b) Implement the LR parsing algorithm, get both parse table and input string are inputs.
Use C language for implementation.
Aim: To construct a Simple LR (SLR) parse table and implement the SLR parsing algorithm in
C++, taking both the grammar and input string as user inputs.
Algorithm:

1. Input the Grammar:

●​ Read the augmented grammar, terminals, and non-terminals.

2. Compute FIRST and FOLLOW sets:

●​ Calculate FIRST sets for all non-terminals.


●​ Compute FOLLOW sets using FIRST and production rules.

3. Construct LR(0) Items:

●​ Generate LR(0) closure and Goto function.


●​ Construct canonical LR(0) item sets.

4. Build the SLR Parsing Table:

●​ Populate ACTION and GOTO tables using LR(0) items.


●​ Resolve shift/reduce conflicts using FOLLOW sets.

5. Perform SLR Parsing:

●​ Take the input string from the user.


●​ Simulate the LR parsing algorithm using stack operations.
●​ Print step-by-step parsing and the final result (accept/reject).

Source Code:
#include <iostream>
#include <vector>
#include <map>
#include <set>
#include <stack>
#include <string>
#include <algorithm>
#include <iomanip>

56
using namespace std;

// Structure to represent a production rule


struct Production
{
char left;
string right;

Production(char l, string r) : left(l), right(r) {}


};

// Structure to represent an LR(0) item


struct Item
{
char left;
string right;
int dot;

Item(char l, string r, int d) : left(l), right(r), dot(d) {}

bool operator==(const Item &other) const


{
return left == other.left && right == other.right && dot == other.dot;
}

bool operator<(const Item &other) const


{
if (left != other.left)
return left < other.left;
if (right != other.right)
return right < other.right;
return dot < other.dot;
}
};

// Structure to represent a state in the canonical collection


typedef set<Item> State;

class SLRParser
{
private:
vector<Production> productions;
set<char> terminals;
set<char> non_terminals;
char start_symbol;
map<char, set<char>> first;
map<char, set<char>> follow;
vector<State> canonical_collection;
map<int, map<char, string>> action_table;
map<int, map<char, int>> goto_table;

// Helper functions

57
bool is_terminal(char c) { return terminals.find(c) != terminals.end(); }
bool is_non_terminal(char c) { return non_terminals.find(c) !=
non_terminals.end(); }

public:
SLRParser()
{
// Don't pre-initialize start symbol - we'll determine it from input
}

void input_grammar()
{
cout << "Enter the number of productions: ";
int n;
cin >> n;

cout << "Enter the productions (format: A->BC or A->a):\n";


for (int i = 0; i < n; i++)
{
string prod;
cin >> prod;

char left = prod[0];


string right = prod.substr(3); // Skip "->", assuming format is
"A->..."

productions.push_back(Production(left, right));
non_terminals.insert(left);

for (char c : right)


{
if (isupper(c))
{
non_terminals.insert(c);
}
else if (c != 'e')
{ // epsilon represented as 'e'
terminals.insert(c);
}
}
}

terminals.insert('$'); // End marker

// Identify the start symbol as the LHS of the first production


start_symbol = productions[0].left;

// Add augmented production S' -> S (at the beginning)


char augmented_start = 'S';
while (non_terminals.find(augmented_start) != non_terminals.end())
{

58
augmented_start++; // Find an unused letter for the augmented
start
}

productions.insert(productions.begin(), Production(augmented_start,
string(1, start_symbol)));
non_terminals.insert(augmented_start);
start_symbol = augmented_start;
}

void compute_first_sets()
{
// Initialize FIRST sets
for (char nt : non_terminals)
{
first[nt] = set<char>();
}

bool changed = true;


while (changed)
{
changed = false;

for (const Production &prod : productions)


{
char A = prod.left;
string alpha = prod.right;

if (alpha == "e")
{ // If X -> e is a production, add e to FIRST(X)
if (first[A].insert('e').second)
{
changed = true;
}
continue;
}

// Handle the case X -> Y1 Y2 ... Yk


bool all_derive_epsilon = true;
for (int i = 0; i < alpha.length(); i++)
{
char Yi = alpha[i];

if (is_terminal(Yi))
{
// If Yi is terminal, add Yi to FIRST(X)
if (first[A].insert(Yi).second)
{
changed = true;
}
all_derive_epsilon = false;
break;

59
}
else
{
// If Yi is non-terminal
bool epsilon_in_first_Yi = false;

// Add all elements from FIRST(Yi) except e to


FIRST(X)
for (char c : first[Yi])
{
if (c == 'e')
{
epsilon_in_first_Yi = true;
}
else
{
if (first[A].insert(c).second)
{
changed = true;
}
}
}

// If FIRST(Yi) doesn't contain e, break


if (!epsilon_in_first_Yi)
{
all_derive_epsilon = false;
break;
}
}
}

// If all Yi can derive e, add e to FIRST(X)


if (all_derive_epsilon)
{
if (first[A].insert('e').second)
{
changed = true;
}
}
}
}
}

void compute_follow_sets()
{
// Initialize FOLLOW sets
for (char nt : non_terminals)
{
follow[nt] = set<char>();
}

60
// Add $ to FOLLOW(S) where S is the start symbol
follow[start_symbol].insert('$');

bool changed = true;


while (changed)
{
changed = false;

for (const Production &prod : productions)


{
char A = prod.left;
string beta = prod.right;

for (int i = 0; i < beta.length(); i++)


{
char B = beta[i];
if (!is_non_terminal(B))
continue;

bool all_derive_epsilon = true;

// For each production A -> αBβ, add FIRST(β) - {e} to


FOLLOW(B)
if (i < beta.length() - 1)
{
string remainder = beta.substr(i + 1);

for (int j = 0; j < remainder.length(); j++)


{
char Y = remainder[j];

if (is_terminal(Y))
{
if (follow[B].insert(Y).second)
{
changed = true;
}
all_derive_epsilon = false;
break;
}
else
{
// Add FIRST(Y) - {e} to FOLLOW(B)
bool epsilon_in_first_Y = false;
for (char c : first[Y])
{
if (c == 'e')
{
epsilon_in_first_Y = true;
}
else
{

61
if (follow[B].insert(c).second)
{
changed = true;
}
}
}

if (!epsilon_in_first_Y)
{
all_derive_epsilon = false;
break;
}
}
}
}

// For each production A -> αB or A -> αBβ where FIRST(β)


contains e
// Add FOLLOW(A) to FOLLOW(B)
if (i == beta.length() - 1 || all_derive_epsilon)
{
for (char c : follow[A])
{
if (follow[B].insert(c).second)
{
changed = true;
}
}
}
}
}
}
}

// Functions to generate LR(0) items and construct SLR parse table


void generate_canonical_collection()
{
canonical_collection.clear();
action_table.clear();
goto_table.clear();

// Create initial item for the augmented grammar


Item initial_item(productions[0].left, productions[0].right, 0);
State initial_state;
initial_state.insert(initial_item);
initial_state = closure(initial_state);
canonical_collection.push_back(initial_state);

// Process states until no new states are added


for (size_t i = 0; i < canonical_collection.size(); i++)
{
State current = canonical_collection[i];

62
// Collect symbols after dots
set<char> symbols;
for (const Item &item : current)
{
if (item.dot < item.right.length())
{
symbols.insert(item.right[item.dot]);
}
}

// Define fixed processing order for symbols to ensure consistent


state generation
vector<char> processed_symbols;

// Process non-terminals first in a specific order (E, T, F)


vector<char> nonterm_priority = {'E', 'T', 'F'};
for (char nt : nonterm_priority)
{
if (symbols.find(nt) != symbols.end())
{
processed_symbols.push_back(nt);
symbols.erase(nt);
}
}

// Process terminals next in a specific order


vector<char> term_priority = {'(', 'a', '+', '*', ')'};
for (char t : term_priority)
{
if (symbols.find(t) != symbols.end())
{
processed_symbols.push_back(t);
symbols.erase(t);
}
}

// Add any remaining symbols


for (char sym : symbols)
{
processed_symbols.push_back(sym);
}

// Process each symbol in the determined order


for (char X : processed_symbols)
{
State next = goto_operation(current, X);
if (next.empty())
continue;

// Check if this state already exists


int next_state_index = -1;

63
for (size_t j = 0; j < canonical_collection.size(); j++)
{
if (canonical_collection[j] == next)
{
next_state_index = j;
break;
}
}

if (next_state_index == -1)
{
// Add new state
next_state_index = canonical_collection.size();
canonical_collection.push_back(next);
}

// Add appropriate entry to action/goto table


if (is_terminal(X))
{
action_table[i][X] = "s" + to_string(next_state_index);
}
else
{
goto_table[i][X] = next_state_index;
}
}

// Add reduce actions for completed items


for (const Item &item : current)
{
if (item.dot == item.right.length())
{
// Accept action for augmented production
if (item.left == start_symbol && item.right == string(1,
productions[1].left))
{
action_table[i]['$'] = "acc";
continue;
}

// Find production number


int prod_num = 0;
for (size_t j = 0; j < productions.size(); j++)
{
if (productions[j].left == item.left &&
productions[j].right == item.right)
{
prod_num = j;
break;
}
}

64
// Add reduce actions for all terminals in
FOLLOW(item.left)
for (char t : follow[item.left])
{
// Only add reduce action if there isn't already a
shift action
if (action_table[i].find(t) == action_table[i].end())
{
action_table[i][t] = "r" + to_string(prod_num);
}
}
}
}
}
}

State closure(State I)
{
State result = I;
vector<Item> worklist(I.begin(), I.end());

while (!worklist.empty())
{
Item current = worklist.back();
worklist.pop_back();

// If there is a non-terminal after the dot


if (current.dot < current.right.length() &&
is_non_terminal(current.right[current.dot]))
{
char B = current.right[current.dot];

// Process productions in the order they appear in the grammar


for (const Production &prod : productions)
{
if (prod.left == B)
{
Item new_item(B, prod.right, 0);
if (result.insert(new_item).second)
{
// If this is a new item, add it to the worklist
worklist.push_back(new_item);
}
}
}
}
}

return result;
}

State goto_operation(const State &I, char X)

65
{
State J;

// For each item [A → α.Xβ] in I


for (const Item &item : I)
{
// If X is the symbol after the dot
if (item.dot < item.right.length() && item.right[item.dot] == X)
{
// Create new item [A → αX.β]
Item new_item(item.left, item.right, item.dot + 1);
J.insert(new_item);
}
}

// Return empty state if no items were added


if (J.empty())
return J;

// Compute closure of the state


return closure(J);
}

void display_grammar()
{
cout << "\n=== Grammar ===\n";
for (int i = 0; i < productions.size(); i++)
{
cout << i << ": " << productions[i].left << " -> " <<
productions[i].right << endl;
}

cout << "\nTerminals: ";


for (char t : terminals)
{
cout << t << " ";
}

cout << "\nNon-terminals: ";


for (char nt : non_terminals)
{
cout << nt << " ";
}
cout << endl;
}

void display_first_follow()
{
cout << "\n=== FIRST Sets ===\n";
for (char nt : non_terminals)
{
cout << "FIRST(" << nt << ") = { ";

66
for (char c : first[nt])
{
cout << c << " ";
}
cout << "}" << endl;
}

cout << "\n=== FOLLOW Sets ===\n";


for (char nt : non_terminals)
{
cout << "FOLLOW(" << nt << ") = { ";
for (char c : follow[nt])
{
cout << c << " ";
}
cout << "}" << endl;
}
}

void display_canonical_collection()
{
cout << "\n=== Canonical Collection of LR(0) Items ===\n";
for (int i = 0; i < canonical_collection.size(); i++)
{
cout << "I_" << i << ":" << endl;
for (const Item &item : canonical_collection[i])
{
cout << " " << item.left << " -> ";
for (int j = 0; j < item.right.length(); j++)
{
if (j == item.dot)
cout << ". ";
cout << item.right[j] << " ";
}
if (item.dot == item.right.length())
cout << ". ";
cout << endl;
}
cout << endl;
}
}

void display_parsing_table()
{
cout << "\n=== SLR Parsing Table ===\n";

// Print the table header


cout << setw(5) << "State";

// ACTION columns - in specific order


vector<char> terminal_order = {'$', '(', ')', '*', '+', 'a'};
for (char t : terminal_order)

67
{
if (terminals.find(t) != terminals.end())
{
cout << setw(8) << t;
}
}

// GOTO columns - in specific order


vector<char> non_terminal_order = {'E', 'F', 'T'};
for (char nt : non_terminal_order)
{
if (non_terminals.find(nt) != non_terminals.end() && nt !=
start_symbol)
{
cout << setw(8) << nt;
}
}
cout << endl;

// Print the table rows


for (int i = 0; i < canonical_collection.size(); i++)
{
cout << setw(5) << i;

// ACTION columns - in specific order


for (char t : terminal_order)
{
if (terminals.find(t) != terminals.end())
{
if (action_table[i].find(t) != action_table[i].end())
{
cout << setw(8) << action_table[i][t];
}
else
{
cout << setw(8) << "";
}
}
}

// GOTO columns - in specific order


for (char nt : non_terminal_order)
{
if (non_terminals.find(nt) != non_terminals.end() && nt !=
start_symbol)
{
if (goto_table[i].find(nt) != goto_table[i].end())
{
cout << setw(8) << goto_table[i][nt];
}
else
{

68
cout << setw(8) << "";
}
}
}
cout << endl;
}
}

bool parse_string(const string &input)


{
string str = input + "$"; // Add end marker

stack<int> state_stack;
stack<char> symbol_stack;

state_stack.push(0); // Initial state


symbol_stack.push('$'); // Bottom marker

int position = 0;
char current_input = str[position];

cout << "\n=== Parsing Steps ===\n";


cout << setw(15) << "Stack" << setw(15) << "Input" << setw(20) <<
"Action" << endl;

while (true)
{
int s = state_stack.top();

// Print current configuration


string stack_content = "";
stack<int> temp_state = state_stack;
stack<char> temp_symbol = symbol_stack;

while (!temp_state.empty())
{
stack_content = to_string(temp_state.top()) + stack_content;
temp_state.pop();
if (!temp_symbol.empty())
{
stack_content = temp_symbol.top() + stack_content;
temp_symbol.pop();
}
}

string remaining_input = str.substr(position);

cout << setw(15) << stack_content << setw(15) << remaining_input;

if (action_table[s].find(current_input) == action_table[s].end())
{
cout << setw(20) << "Error: No action" << endl;

69
return false;
}

string action = action_table[s][current_input];

if (action[0] == 's')
{
// Shift
int t = stoi(action.substr(1));
state_stack.push(t);
symbol_stack.push(current_input);
position++;
current_input = str[position];

cout << setw(20) << "Shift " + to_string(t) << endl;


}
else if (action[0] == 'r')
{
// Reduce
int prod_num = stoi(action.substr(1));
Production prod = productions[prod_num];

// Pop |β| states from stack


int length = prod.right.size();
if (prod.right == "e")
length = 0; // Don't pop for epsilon production

for (int i = 0; i < length; i++)


{
state_stack.pop();
symbol_stack.pop();
}

int t = state_stack.top();
symbol_stack.push(prod.left);

if (goto_table[t].find(prod.left) == goto_table[t].end())
{
cout << setw(20) << "Error: No goto" << endl;
return false;
}

state_stack.push(goto_table[t][prod.left]);

cout << setw(20) << "Reduce " + to_string(prod_num) + ": " +


prod.left + "->" + prod.right << endl;
}
else if (action == "acc")
{
// Accept
cout << setw(20) << "Accept" << endl;
return true;

70
}
else
{
// Error
cout << setw(20) << "Error: Invalid action" << endl;
return false;
}
}
}

void run()
{
// Input the grammar
input_grammar();

// Compute FIRST and FOLLOW sets


compute_first_sets();
compute_follow_sets();

// Generate the canonical collection of LR(0) items and SLR parsing


table
generate_canonical_collection();

// Display grammar, sets, and parsing table


display_grammar();
display_first_follow();
display_canonical_collection();
display_parsing_table();

// Parse input string


cout << "\nEnter the input string to parse: ";
string input;
cin >> input;

if (parse_string(input))
{
cout << "\nString accepted by the grammar." << endl;
}
else
{
cout << "\nString rejected by the grammar." << endl;
}
}

// Add a debug function to help diagnose issues


void debug_info()
{
cout << "\n=== Debug Information ===\n";
cout << "Number of productions: " << productions.size() << endl;
cout << "Start symbol: " << start_symbol << endl;
cout << "Number of states: " << canonical_collection.size() << endl;

71
cout << "Action table entries: " << endl;
for (const auto &state_entry : action_table)
{
for (const auto &symbol_entry : state_entry.second)
{
cout << "Action[" << state_entry.first << "][" <<
symbol_entry.first
<< "] = " << symbol_entry.second << endl;
}
}

cout << "Goto table entries: " << endl;


for (const auto &state_entry : goto_table)
{
for (const auto &symbol_entry : state_entry.second)
{
cout << "Goto[" << state_entry.first << "][" <<
symbol_entry.first
<< "] = " << symbol_entry.second << endl;
}
}
}
};

int main()
{
SLRParser parser;
parser.run();
return 0;
}

Input/Output:

72
73
74
Conclusion: The SLR parser successfully constructs a parse table using LR(0) items and uses it
to parse an input string. However, SLR has limitations because it relies on FOLLOW sets,
leading to possible conflicts for complex grammars.

75
EXPERIMENT-2
a) Construct Canonical LR (CLR) parse table using C language.
b) Implement the LR parsing algorithm, get both parse table and input string are inputs.
Use C language for implementation.
Aim: To construct a Canonical LR (CLR) parse table and implement the LR parsing algorithm
in C++, taking both the grammar and input string as user inputs.
Algorithm:

1. Input the Grammar:

●​ Read the augmented grammar, including terminals and non-terminals.

2. Compute FIRST and FOLLOW sets:

●​ Compute FIRST sets for each non-terminal.


●​ Compute FOLLOW sets using grammar rules.

3. Construct LR(1) Items:

●​ Generate LR(1) closure (items with lookaheads).


●​ Compute Goto function for LR(1) states.
●​ Construct canonical LR(1) item sets.

4. Build the CLR Parsing Table:

●​ Populate ACTION and GOTO tables using LR(1) items.


●​ CLR avoids conflicts by considering lookaheads in the reduce step.

5. Perform LR Parsing:

●​ Take an input string from the user.


●​ Simulate LR parsing using stack operations.
●​ Print step-by-step parsing and the final result (accept/reject).

Source Code:
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <map>
#include <set>
#include <stack>
#include <queue>

76
#include <algorithm>
#include <iomanip>

using namespace std;

// Grammar Symbol class (for terminals and non-terminals)


class Symbol
{
public:
string name;
bool isTerminal;

Symbol(string name = "", bool isTerminal = true) : name(name),


isTerminal(isTerminal) {}

bool operator<(const Symbol &other) const


{
if (isTerminal != other.isTerminal)
{
return isTerminal < other.isTerminal;
}
return name < other.name;
}

bool operator==(const Symbol &other) const


{
return name == other.name && isTerminal == other.isTerminal;
}
};

// Production class
class Production
{
public:
Symbol lhs;
vector<Symbol> rhs;

Production(Symbol lhs = Symbol(), vector<Symbol> rhs = {}) : lhs(lhs),


rhs(rhs) {}

string toString() const


{
string result = lhs.name + " -> ";
for (const auto &sym : rhs)
{
result += sym.name + " ";
}
return result;
}
};

// LR(1) Item class

77
class LRItem
{
public:
int productionIdx;
int dotPosition;
mutable set<Symbol> lookaheads; // Make lookaheads mutable

LRItem(int prodIdx = 0, int dotPos = 0) : productionIdx(prodIdx),


dotPosition(dotPos) {}

bool operator<(const LRItem &other) const


{
if (productionIdx != other.productionIdx)
{
return productionIdx < other.productionIdx;
}
if (dotPosition != other.dotPosition)
{
return dotPosition < other.dotPosition;
}

// Compare lookaheads
vector<Symbol> thisLA(lookaheads.begin(), lookaheads.end());
vector<Symbol> otherLA(other.lookaheads.begin(),
other.lookaheads.end());

if (thisLA.size() != otherLA.size())
{
return thisLA.size() < otherLA.size();
}

for (size_t i = 0; i < thisLA.size(); i++)


{
if (thisLA[i].name != otherLA[i].name)
{
return thisLA[i].name < otherLA[i].name;
}
}

return false;
}

bool hasSameCore(const LRItem &other) const


{
return productionIdx == other.productionIdx && dotPosition ==
other.dotPosition;
}
};

// Item Set (State) class


class ItemSet
{

78
public:
set<LRItem> items;

bool operator<(const ItemSet &other) const


{
return items < other.items;
}

bool operator==(const ItemSet &other) const


{
if (items.size() != other.items.size())
{
return false;
}

auto it1 = items.begin();


auto it2 = other.items.begin();

while (it1 != items.end())


{
if (!it1->hasSameCore(*it2) || it1->lookaheads != it2->lookaheads)
{
return false;
}
++it1;
++it2;
}

return true;
}
};

// CLR Parser class


class CLRParser
{
private:
vector<Production> productions;
vector<Symbol> terminals;
vector<Symbol> nonTerminals;
Symbol startSymbol;
Symbol endSymbol;
Symbol epsilonSymbol;

map<Symbol, set<Symbol>> firstSets;


map<Symbol, set<Symbol>> followSets;

vector<ItemSet> canonicalCollection;
map<pair<int, Symbol>, int> gotoTable;
map<int, map<Symbol, pair<char, int>>> actionTable;

public:
CLRParser()

79
{
endSymbol = Symbol("$", true);
epsilonSymbol = Symbol("#", true);
}

void readGrammar()
{
cout << "Enter the number of non-terminals: ";
int numNonTerminals;
cin >> numNonTerminals;
cin.ignore();

cout << "Enter the non-terminals (space-separated): ";


string line;
getline(cin, line);
istringstream iss(line);
string token;

while (iss >> token && nonTerminals.size() < numNonTerminals)


{
nonTerminals.push_back(Symbol(token, false));
}

cout << "Enter the number of terminals: ";


int numTerminals;
cin >> numTerminals;
cin.ignore();

cout << "Enter the terminals (space-separated): ";


getline(cin, line);
iss = istringstream(line);

while (iss >> token && terminals.size() < numTerminals)


{
terminals.push_back(Symbol(token, true));
}

terminals.push_back(endSymbol);

cout << "Enter the start symbol: ";


string startSymbolName;
cin >> startSymbolName;

for (const auto &nt : nonTerminals)


{
if (nt.name == startSymbolName)
{
startSymbol = nt;
break;
}
}

80
// Add augmented production S' -> S
Symbol augmentedStartSymbol(startSymbol.name + "'", false);
nonTerminals.insert(nonTerminals.begin(), augmentedStartSymbol);
productions.push_back(Production(augmentedStartSymbol,
{startSymbol}));

cout << "Enter the number of productions: ";


int numProductions;
cin >> numProductions;
cin.ignore();

cout << "Enter productions in the format 'A -> B C | D' (use '#' for
epsilon):" << endl;
for (int i = 0; i < numProductions; i++)
{
getline(cin, line);
size_t arrowPos = line.find("->");

if (arrowPos != string::npos)
{
string lhsStr = line.substr(0, arrowPos);
string rhsStr = line.substr(arrowPos + 2);

// Trim whitespace
lhsStr.erase(0, lhsStr.find_first_not_of(" \t"));
lhsStr.erase(lhsStr.find_last_not_of(" \t") + 1);

Symbol lhs;
for (const auto &nt : nonTerminals)
{
if (nt.name == lhsStr)
{
lhs = nt;
break;
}
}

size_t pos = 0;
while (pos < rhsStr.length())
{
vector<Symbol> rhs;
size_t pipePos = rhsStr.find('|', pos);
string prod;

if (pipePos != string::npos)
{
prod = rhsStr.substr(pos, pipePos - pos);
pos = pipePos + 1;
}
else
{
prod = rhsStr.substr(pos);

81
pos = rhsStr.length();
}

// Trim whitespace
prod.erase(0, prod.find_first_not_of(" \t"));
prod.erase(prod.find_last_not_of(" \t") + 1);

istringstream prodStream(prod);
string symbolStr;

while (prodStream >> symbolStr)


{
if (symbolStr == "#")
{
// Epsilon production (empty RHS)
break;
}

bool found = false;


for (const auto &nt : nonTerminals)
{
if (nt.name == symbolStr)
{
rhs.push_back(nt);
found = true;
break;
}
}

if (!found)
{
for (const auto &t : terminals)
{
if (t.name == symbolStr)
{
rhs.push_back(t);
found = true;
break;
}
}
}

if (!found)
{
cout << "Symbol " << symbolStr << " not found in
grammar!" << endl;
}
}

productions.push_back(Production(lhs, rhs));
}
}

82
}
}

set<Symbol> computeFirst(const Symbol &symbol)


{
if (firstSets.find(symbol) != firstSets.end())
{
return firstSets[symbol];
}

set<Symbol> first;

if (symbol.isTerminal)
{
first.insert(symbol);
return first;
}

for (const auto &prod : productions)


{
if (prod.lhs.name == symbol.name)
{
if (prod.rhs.empty())
{
// Epsilon production
first.insert(epsilonSymbol);
}
else
{
int i = 0;
bool continueToNext = true;

while (i < prod.rhs.size() && continueToNext)


{
continueToNext = false;
set<Symbol> firstOfSymbol = computeFirst(prod.rhs[i]);

for (const auto &s : firstOfSymbol)


{
if (s.name != epsilonSymbol.name)
{
first.insert(s);
}
}

if (firstOfSymbol.find(epsilonSymbol) !=
firstOfSymbol.end())
{
continueToNext = true;
if (i == prod.rhs.size() - 1)
{
first.insert(epsilonSymbol);

83
}
}

i++;
}
}
}
}

firstSets[symbol] = first;
return first;
}

set<Symbol> computeFirst(const vector<Symbol> &symbols, int startPos = 0)


{
set<Symbol> result;

if (startPos >= symbols.size())


{
result.insert(epsilonSymbol);
return result;
}

set<Symbol> firstOfSymbol = computeFirst(symbols[startPos]);


bool allHaveEpsilon = true;

for (const auto &s : firstOfSymbol)


{
if (s.name != epsilonSymbol.name)
{
result.insert(s);
}
}

if (firstOfSymbol.find(epsilonSymbol) == firstOfSymbol.end())
{
allHaveEpsilon = false;
}

if (allHaveEpsilon && startPos < symbols.size() - 1)


{
set<Symbol> restFirst = computeFirst(symbols, startPos + 1);
for (const auto &s : restFirst)
{
result.insert(s);
}
}
else if (allHaveEpsilon && startPos == symbols.size() - 1)
{
result.insert(epsilonSymbol);
}

84
return result;
}

void computeFollowSets()
{
// Initialize FOLLOW sets
for (const auto &nt : nonTerminals)
{
followSets[nt] = set<Symbol>();
}

// Add $ to FOLLOW(S') where S' is the start symbol


followSets[productions[0].lhs].insert(endSymbol);

bool changed = true;


while (changed)
{
changed = false;

for (const auto &prod : productions)


{
for (size_t i = 0; i < prod.rhs.size(); i++)
{
if (!prod.rhs[i].isTerminal)
{
// For each non-terminal B in A -> αBβ
size_t followSetSizeBefore =
followSets[prod.rhs[i]].size();

if (i < prod.rhs.size() - 1)
{
// Compute FIRST(β)
set<Symbol> firstOfBeta = computeFirst(prod.rhs, i
+ 1);

// Add FIRST(β) - {ε} to FOLLOW(B)


for (const auto &s : firstOfBeta)
{
if (s.name != epsilonSymbol.name)
{
followSets[prod.rhs[i]].insert(s);
}
}

// If ε is in FIRST(β), add FOLLOW(A) to FOLLOW(B)


if (firstOfBeta.find(epsilonSymbol) !=
firstOfBeta.end())
{
for (const auto &s : followSets[prod.lhs])
{
followSets[prod.rhs[i]].insert(s);
}

85
}
}
else
{
// If B is at the end of the production, add
FOLLOW(A) to FOLLOW(B)
for (const auto &s : followSets[prod.lhs])
{
followSets[prod.rhs[i]].insert(s);
}
}

if (followSets[prod.rhs[i]].size() >
followSetSizeBefore)
{
changed = true;
}
}
}
}
}
}

void computeFirstAndFollowSets()
{
// Compute FIRST sets
for (const auto &symbol : terminals)
{
firstSets[symbol] = {symbol};
}

for (const auto &nt : nonTerminals)


{
computeFirst(nt);
}

// Compute FOLLOW sets


computeFollowSets();
}

void closure(ItemSet &itemSet)


{
bool changed = true;

while (changed)
{
changed = false;
set<LRItem> newItems = itemSet.items;

for (const auto &item : itemSet.items)


{

86
if (item.dotPosition <
productions[item.productionIdx].rhs.size())
{
Symbol symbolAfterDot =
productions[item.productionIdx].rhs[item.dotPosition];

if (!symbolAfterDot.isTerminal)
{
// For each production B -> γ where B is the symbol
after dot
for (size_t i = 0; i < productions.size(); i++)
{
if (productions[i].lhs.name ==
symbolAfterDot.name)
{
// Compute lookaheads
set<Symbol> lookaheads;

// If A -> α·Bβ, a is a lookahead, then


compute FIRST(βa)
vector<Symbol> beta;
if (item.dotPosition + 1 <
productions[item.productionIdx].rhs.size())
{
for (size_t j = item.dotPosition + 1; j <
productions[item.productionIdx].rhs.size(); j++)
{

beta.push_back(productions[item.productionIdx].rhs[j]);
}
}

// For each lookahead in the original item


for (const auto &la : item.lookaheads)
{
vector<Symbol> betaA = beta;
betaA.push_back(la);

set<Symbol> firstOfBetaA =
computeFirst(betaA);

for (const auto &s : firstOfBetaA)


{
if (s.name != epsilonSymbol.name)
{
lookaheads.insert(s);
}
}

if (firstOfBetaA.find(epsilonSymbol) !=
firstOfBetaA.end() || beta.empty())
{

87
lookaheads.insert(la);
}
}

// Create new item B -> ·γ with computed


lookaheads
LRItem newItem(i, 0);
newItem.lookaheads = lookaheads;

// Check if an item with the same core already


exists in the set
bool found = false;
for (auto &existingItem : newItems)
{
if (existingItem.hasSameCore(newItem))
{
size_t oldSize =
existingItem.lookaheads.size();

// Add lookaheads to existing item


for (const auto &la :
newItem.lookaheads)
{

existingItem.lookaheads.insert(la);
}

if (existingItem.lookaheads.size() >
oldSize)
{
changed = true;
}

found = true;
break;
}
}

if (!found)
{
newItems.insert(newItem);
changed = true;
}
}
}
}
}
}

itemSet.items = newItems;
}
}

88
ItemSet goTo(const ItemSet &itemSet, const Symbol &symbol)
{
ItemSet result;

for (const auto &item : itemSet.items)


{
if (item.dotPosition < productions[item.productionIdx].rhs.size()
&&
productions[item.productionIdx].rhs[item.dotPosition].name ==
symbol.name)
{
// Move dot one position to the right
LRItem newItem(item.productionIdx, item.dotPosition + 1);
newItem.lookaheads = item.lookaheads;
result.items.insert(newItem);
}
}

if (!result.items.empty())
{
closure(result);
}

return result;
}

void constructCanonicalCollection()
{
canonicalCollection.clear();
gotoTable.clear();

// Create initial item set with S' -> ·S, $


ItemSet initialItemSet;
LRItem initialItem(0, 0); // First production is S' -> S
initialItem.lookaheads.insert(endSymbol);
initialItemSet.items.insert(initialItem);
closure(initialItemSet);

canonicalCollection.push_back(initialItemSet);
vector<bool> processed(1, false);

// Process all item sets


for (size_t i = 0; i < canonicalCollection.size(); i++)
{
if (processed[i])
continue;
processed[i] = true;

// First handle non-terminals for goto


set<Symbol> nonTerminalSymbols;
set<Symbol> terminalSymbols;

89
for (const auto &item : canonicalCollection[i].items)
{
if (item.dotPosition <
productions[item.productionIdx].rhs.size())
{
Symbol sym =
productions[item.productionIdx].rhs[item.dotPosition];
if (sym.isTerminal)
terminalSymbols.insert(sym);
else
nonTerminalSymbols.insert(sym);
}
}

// Process non-terminals first, but in reverse order to get


correct state numbering
vector<Symbol> sortedNonTerminals(nonTerminalSymbols.begin(),
nonTerminalSymbols.end());
sort(sortedNonTerminals.begin(), sortedNonTerminals.end(),
[](const Symbol &a, const Symbol &b)
{ return b.name < a.name; });

for (const auto &symbol : sortedNonTerminals)


{
ItemSet gotoSet = goTo(canonicalCollection[i], symbol);
if (!gotoSet.items.empty())
{
bool found = false;
int existingIndex = -1;

// Check if this state already exists


for (size_t j = 0; j < canonicalCollection.size(); j++)
{
if (canonicalCollection[j] == gotoSet)
{
found = true;
existingIndex = j;
break;
}
}

if (!found)
{
canonicalCollection.push_back(gotoSet);
processed.push_back(false);
gotoTable[make_pair(i, symbol)] =
canonicalCollection.size() - 1;
}
else
{
gotoTable[make_pair(i, symbol)] = existingIndex;

90
}
}
}

// Then process terminals


for (const auto &symbol : terminalSymbols)
{
ItemSet gotoSet = goTo(canonicalCollection[i], symbol);
if (!gotoSet.items.empty())
{
bool found = false;
int existingIndex = -1;

// Check if this state already exists


for (size_t j = 0; j < canonicalCollection.size(); j++)
{
if (canonicalCollection[j] == gotoSet)
{
found = true;
existingIndex = j;
break;
}
}

if (!found)
{
canonicalCollection.push_back(gotoSet);
processed.push_back(false);
gotoTable[make_pair(i, symbol)] =
canonicalCollection.size() - 1;
}
else
{
gotoTable[make_pair(i, symbol)] = existingIndex;
}
}
}
}
}

void constructParsingTable()
{
actionTable.clear();

// Initialize all states with empty maps


for (size_t i = 0; i < canonicalCollection.size(); i++)
{
actionTable[i] = map<Symbol, pair<char, int>>();
}

// First pass: Add accept and goto actions


for (size_t i = 0; i < canonicalCollection.size(); i++)

91
{
// Add goto actions for non-terminals
for (const auto &nt : nonTerminals)
{
if (nt.name != productions[0].lhs.name) // Skip augmented
start symbol
{
if (gotoTable.find(make_pair(i, nt)) != gotoTable.end())
{
int j = gotoTable[make_pair(i, nt)];
actionTable[i][nt] = make_pair('g', j);
}
}
}

// Check for accept action


for (const auto &item : canonicalCollection[i].items)
{
if (item.productionIdx == 0 && item.dotPosition ==
productions[0].rhs.size())
{
actionTable[i][endSymbol] = make_pair('a', 0);
}
}
}

// Second pass: Add shift and reduce actions


for (size_t i = 0; i < canonicalCollection.size(); i++)
{
for (const auto &item : canonicalCollection[i].items)
{
// Case 1: [A -> α·aβ, b] => action[i, a] = shift j
if (item.dotPosition <
productions[item.productionIdx].rhs.size() &&

productions[item.productionIdx].rhs[item.dotPosition].isTerminal)
{
Symbol a =
productions[item.productionIdx].rhs[item.dotPosition];
if (gotoTable.find(make_pair(i, a)) != gotoTable.end())
{
int j = gotoTable[make_pair(i, a)];
actionTable[i][a] = make_pair('s', j);
}
}

// Case 2: [A -> α·, a] => action[i, a] = reduce A -> α


if (item.dotPosition ==
productions[item.productionIdx].rhs.size() &&
item.productionIdx != 0) // Skip the augmented production
{
for (const auto &a : item.lookaheads)

92
{
// Only add reduce action if there's no existing
action
if (actionTable[i].find(a) == actionTable[i].end())
{
actionTable[i][a] = make_pair('r',
item.productionIdx);
}
}
}
}
}
}

void printGrammar()
{
cout << "\nGrammar:\n";
for (size_t i = 0; i < productions.size(); i++)
{
cout << i << ": " << productions[i].toString() << endl;
}
}

void printFirstAndFollowSets()
{
cout << "\nFIRST Sets:\n";
for (const auto &nt : nonTerminals)
{
cout << "FIRST(" << nt.name << ") = { ";
bool first = true;
for (const auto &s : firstSets[nt])
{
if (!first)
cout << ", ";
cout << s.name;
first = false;
}
cout << " }" << endl;
}

cout << "\nFOLLOW Sets:\n";


for (const auto &nt : nonTerminals)
{
cout << "FOLLOW(" << nt.name << ") = { ";
bool first = true;
for (const auto &s : followSets[nt])
{
if (!first)
cout << ", ";
cout << s.name;
first = false;
}

93
cout << " }" << endl;
}
}

void printCanonicalCollection()
{
cout << "\nCanonical Collection of LR(1) Items:\n";
for (size_t i = 0; i < canonicalCollection.size(); i++)
{
cout << "I" << i << ":\n";
for (const auto &item : canonicalCollection[i].items)
{
cout << " [" << productions[item.productionIdx].lhs.name <<
" -> ";

// Print RHS with dot


for (size_t j = 0; j <
productions[item.productionIdx].rhs.size(); j++)
{
if (j == item.dotPosition)
{
cout << ". "; // Using simple dot instead of special
character
}
cout << productions[item.productionIdx].rhs[j].name << "
";
}

if (item.dotPosition ==
productions[item.productionIdx].rhs.size())
{
cout << "."; // Using simple dot instead of special
character
}

cout << ", ";

// Print lookaheads
bool first = true;
for (const auto &la : item.lookaheads)
{
if (!first)
cout << "/";
cout << la.name;
first = false;
}

cout << "]\n";


}
}
}

94
void printParsingTable()
{
cout << "\nCLR Parsing Table:\n";

// Print header
cout << "+-----+";
for (const auto &t : terminals)
{
cout << "--------+";
}
for (const auto &nt : nonTerminals)
{
if (nt.name != productions[0].lhs.name)
{ // Skip the augmented start symbol
cout << "--------+";
}
}
cout << "\n| Sta |";

for (const auto &t : terminals)


{
cout << setw(7) << t.name << " |";
}
for (const auto &nt : nonTerminals)
{
if (nt.name != productions[0].lhs.name)
{ // Skip the augmented start symbol
cout << setw(7) << nt.name << " |";
}
}
cout << "\n+-----+";
for (const auto &t : terminals)
{
cout << "--------+";
}
for (const auto &nt : nonTerminals)
{
if (nt.name != productions[0].lhs.name)
{ // Skip the augmented start symbol
cout << "--------+";
}
}
cout << endl;

// Print rows
for (size_t i = 0; i < canonicalCollection.size(); i++)
{
cout << "| " << setw(3) << i << " |";

for (const auto &t : terminals)


{
if (actionTable[i].find(t) != actionTable[i].end())

95
{
char action = actionTable[i][t].first;
int value = actionTable[i][t].second;

if (action == 's')
{
cout << setw(7) << "s" + to_string(value) << " |";
}
else if (action == 'r')
{
cout << setw(7) << "r" + to_string(value) << " |";
}
else if (action == 'a')
{
cout << setw(7) << "acc" << " |";
}
else
{
cout << setw(7) << " " << " |";
}
}
else
{
cout << setw(7) << " " << " |";
}
}

for (const auto &nt : nonTerminals)


{
if (nt.name != productions[0].lhs.name)
{ // Skip the augmented start symbol
if (actionTable[i].find(nt) != actionTable[i].end() &&
actionTable[i][nt].first == 'g')
{
cout << setw(7) << actionTable[i][nt].second << " |";
}
else
{
cout << setw(7) << " " << " |";
}
}
}

cout << endl;


}

cout << "+-----+";


for (const auto &t : terminals)
{
cout << "--------+";
}
for (const auto &nt : nonTerminals)

96
{
if (nt.name != productions[0].lhs.name)
{ // Skip the augmented start symbol
cout << "--------+";
}
}
cout << endl;
}

bool parse(const string &input)


{
cout << "\nParsing input: " << input << endl;

stack<pair<int, Symbol>> stateSymbolStack;


stateSymbolStack.push(make_pair(0, Symbol("", true))); // Initial
state

vector<Symbol> inputSymbols;
istringstream iss(input);
string token;

while (iss >> token)


{
bool found = false;
for (const auto &t : terminals)
{
if (t.name == token)
{
inputSymbols.push_back(t);
found = true;
break;
}
}

if (!found)
{
cout << "Invalid token: " << token << endl;
return false;
}
}

inputSymbols.push_back(endSymbol);

size_t currentPos = 0;

cout << "\nParsing Steps:\n";


cout <<
"+----------------------+------------------+-----------------+----------------
----------------+\n";
cout << "| Stack | Input | Action |
Explanation |\n";

97
cout <<
"+----------------------+------------------+-----------------+----------------
----------------+\n";

while (true)
{
int currentState = stateSymbolStack.top().first;
Symbol currentSymbol = inputSymbols[currentPos];

// Print current stack


string stackStr = "";
stack<pair<int, Symbol>> tempStack = stateSymbolStack;
vector<pair<int, Symbol>> tempVec;

while (!tempStack.empty())
{
tempVec.push_back(tempStack.top());
tempStack.pop();
}

reverse(tempVec.begin(), tempVec.end());

for (const auto &pair : tempVec)


{
stackStr += to_string(pair.first);
if (pair.second.name != "")
{
stackStr += " " + pair.second.name;
stackStr += " " + pair.second.name;
}
stackStr += " ";
}

// Print remaining input


string inputStr = "";
for (size_t i = currentPos; i < inputSymbols.size(); i++)
{
inputStr += inputSymbols[i].name + " ";
}

if (actionTable[currentState].find(currentSymbol) ==
actionTable[currentState].end())
{
cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "ERROR" << " | "
<< setw(30) << left << "No action defined" << " |\n";
cout <<
"+----------------------+------------------+-----------------+----------------
----------------+\n";
return false;
}

98
pair<char, int> action = actionTable[currentState][currentSymbol];

if (action.first == 's')
{
// Shift action
stateSymbolStack.push(make_pair(action.second,
currentSymbol));
currentPos++;

cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "Shift " +
to_string(action.second) << " | "
<< setw(30) << left << "Shift and goto state " +
to_string(action.second) << " |\n";
}
else if (action.first == 'r')
{
// Reduce action
int prodIdx = action.second;
Production prod = productions[prodIdx];

// Pop |β| symbols


for (size_t i = 0; i < prod.rhs.size(); i++)
{
stateSymbolStack.pop();
}

int topState = stateSymbolStack.top().first;

// Push A and goto[top, A]


if (actionTable[topState].find(prod.lhs) !=
actionTable[topState].end() &&
actionTable[topState][prod.lhs].first == 'g')
{
int gotoState = actionTable[topState][prod.lhs].second;
stateSymbolStack.push(make_pair(gotoState, prod.lhs));

cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "Reduce by " +
to_string(prodIdx) << " | "
<< setw(30) << left << prod.toString() << " |\n";
}
else
{
cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "ERROR" << " | "
<< setw(30) << left << "No goto action for " +
prod.lhs.name << " |\n";

99
cout <<
"+----------------------+------------------+-----------------+----------------
----------------+\n";
return false;
}
}
else if (action.first == 'a')
{
// Accept action
cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "Accept" << " | "
<< setw(30) << left << "Input accepted" << " |\n";
cout <<
"+----------------------+------------------+-----------------+----------------
----------------+\n";
return true;
}
else
{
cout << "| " << setw(20) << left << stackStr << " | "
<< setw(16) << left << inputStr << " | "
<< setw(15) << left << "ERROR" << " | "
<< setw(30) << left << "Invalid action" << " |\n";
cout <<
"+----------------------+------------------+-----------------+----------------
----------------+\n";
return false;
}
}

return false;
}

void run()
{
readGrammar();
printGrammar();

computeFirstAndFollowSets();
printFirstAndFollowSets();

constructCanonicalCollection();
printCanonicalCollection();

constructParsingTable();
printParsingTable();

cout << "\nEnter input string to parse: ";


string input;
getline(cin, input);

100
bool accepted = parse(input);
cout << "\nInput string " << (accepted ? "ACCEPTED" : "REJECTED") <<
endl;
}
};

int main()
{
CLRParser parser;
parser.run();
return 0;
}
Input/Output:
Test Case 1

101
Test Case 2

102
103
104
Conclusion: The CLR parser constructs a more powerful parse table using LR(1) items,
avoiding conflicts that occur in SLR parsing. This makes CLR more robust but computationally
expensive due to larger state sets.

105
ASSESSMENT – 4
EXPERIMENT-1
Implementation of a simple calculator using LEX and YACC tools.
Aim: To implement a simple calculator using LEX and YACC tools that can perform basic
arithmetic operations such as addition, subtraction, multiplication, and division.
Algorithm:
1.​ Define Tokens in LEX
o​ Identify tokens for numbers, operators (+, -, *, /), and parentheses.
o​ Write regular expressions to recognize these tokens.
2.​ Define Grammar in YACC
o​ Specify grammar rules for arithmetic expressions.
o​ Use precedence and associativity rules to resolve ambiguities.
3.​ Implement Actions

106
o​ Assign actions to perform calculations during parsing.
4.​ Compile and Execute
o​ Compile the LEX file using lex and generate lex.yy.c.
o​ Compile the YACC file using yacc and generate y.tab.c.
o​ Link both and run the executable to evaluate expressions.

Source Code:
Calc.l
%{
#include "y.tab.h"
#include <stdlib.h>
%}

%%
[0-9]+ { yylval = atoi(yytext); return NUMBER; }
[ \t] { /* Ignore whitespace */ }
\n { return EOL; }
"+" { return PLUS; }
"-" { return MINUS; }
"*" { return MULTIPLY; }
"/" { return DIVIDE; }
"(" { return LPAREN; }
")" { return RPAREN; }
. { printf("Unexpected character: %s\n", yytext); }
%%

int yywrap(void) {
return 1;
}

107
Calc.y
%{
#include <stdio.h>
#include <stdlib.h>
%}

%token NUMBER PLUS MINUS MULTIPLY DIVIDE LPAREN RPAREN EOL

%%
input:
/* empty */
| input line
;

line:
EOL
| exp EOL { printf("Result: %d\n", $1); }
;

exp:
NUMBER { $$ = $1; }
| exp PLUS exp { $$ = $1 + $3; }
| exp MINUS exp { $$ = $1 - $3; }
| exp MULTIPLY exp { $$ = $1 * $3; }
| exp DIVIDE exp {
if($3 != 0)
$$ = $1 / $3;
else {
printf("Error: Division by zero\n");

108
$$ = 0;
}
}
| LPAREN exp RPAREN { $$ = $2; }
;

%%

int main(void) {
printf("Enter expression:\n");
yyparse();
return 0;
}

int yyerror(char *s) {


fprintf(stderr, "Error: %s\n", s);
return 0;
}
Input/Output:

109
Conclusion: The implementation successfully demonstrates the use of LEX and YACC to build a
simple calculator that can evaluate arithmetic expressions based on operator precedence and
associativity.

EXPERIMENT-2
Implementation of Abstract syntax tree –Infix to postfix using the LEX and YACC tools.
Aim: To implement an Abstract Syntax Tree (AST) for converting an infix expression to a postfix
expression using LEX and YACC tools.

Algorithm:
1.​ Define Tokens in LEX
o​ Recognize numbers, operators (+, -, *, /), and parentheses.
2.​ Define Grammar in YACC
o​ Parse expressions following operator precedence rules.
o​ Construct an AST representing the expression.
3.​ Implement Postfix Conversion
o​ Traverse the AST using post-order traversal to generate postfix notation.
4.​ Compile and Execute
o​ Compile the LEX and YACC files, link them, and execute the program.
Source Code:
Lexer.l
%{
#include "y.tab.h"
#include <stdlib.h>
%}

digit [0-9]+

%%
{digit} { yylval.num = atoi(yytext); return NUM; }
[ \t\n]+ { /* ignore whitespace */ }
"+" { return PLUS; }

110
"-" { return MINUS; }
"*" { return MUL; }
"/" { return DIV; }
"(" { return '('; }
")" { return ')'; }
%%

int yywrap(void) {
return 1;
}
Parser.y
%{
#include <stdio.h>
#include <stdlib.h>

typedef struct node {


int value; // For number nodes
char op; // For operator nodes
struct node *left;
struct node *right;
} Node;

Node* createNode(char op, Node* left, Node* right);


Node* createNumNode(int value);
void printPostfix(Node* node);
Node* root; // Global pointer to the AST root
%}

%union {

111
int num;
Node* node;
}

%token <num> NUM


%token PLUS MINUS MUL DIV

%left PLUS MINUS


%left MUL DIV

%type <node> expr term factor

%%
input: expr { root = $1; }
;

expr: term { $$ = $1; }


| expr PLUS term { $$ = createNode('+', $1, $3); }
| expr MINUS term { $$ = createNode('-', $1, $3); }
;

term: factor { $$ = $1; }


| term MUL factor { $$ = createNode('*', $1, $3); }
| term DIV factor { $$ = createNode('/', $1, $3); }
;

factor: NUM { $$ = createNumNode($1); }


| '(' expr ')' { $$ = $2; }
;

112
%%

Node* createNode(char op, Node* left, Node* right) {


Node* newNode = (Node*)malloc(sizeof(Node));
newNode->op = op;
newNode->left = left;
newNode->right = right;
newNode->value = 0;
return newNode;
}

Node* createNumNode(int value) {


Node* newNode = (Node*)malloc(sizeof(Node));
newNode->value = value;
newNode->op = '\0';
newNode->left = newNode->right = NULL;
return newNode;
}

void printPostfix(Node* node) {


if (node) {
printPostfix(node->left);
printPostfix(node->right);
if (node->op != '\0')
printf("%c ", node->op);
else
printf("%d ", node->value);
}
}

113
int main() {
yyparse();
printf("Postfix Expression: ");
printPostfix(root);
printf("\n");
return 0;
}

void yyerror(const char *s) {


fprintf(stderr, "Error: %s\n", s);
}
Input/Output:

Conclusion: The experiment demonstrates the construction of an Abstract Syntax Tree and its
traversal to convert infix expressions into postfix notation using LEX and YACC.

EXPERIMENT-3
Using LEX and YACC tools to recognize the strings of the following context-free
languages:
1.​ L(G) = { anbm / m ≠ n}
2.​ L(G) = { ab (bbaa)n bba (ba)n / n ≥ 0}
Aim: To use LEX and YACC tools to recognize strings belonging to the given context-free languages:
1.​ L(G) = { anbm / m ≠ n }
2.​ L(G) = { ab (bbaa)n bba (ba)n / n ≥ 0 }
Algorithm:

114
1.​ Define Tokens in LEX
o​ Recognize symbols (a, b) based on given language rules.
2.​ Define Grammar in YACC
o​ Define rules to parse valid strings belonging to the specified languages.
o​ Implement conditions to enforce constraints (m ≠ n, repetitions based on n).
3.​ Perform String Validation
o​ Accept strings if they belong to the language; reject otherwise.
4.​ Compile and Execute
o​ Use lex and yacc to generate and compile the parser.
o​ Run the program to check input strings.
Source Code:
Lang1.l
%{
#include "y.tab.h"
%}
%%
a { return ACHAR; }
b { return BCHAR; }
[ \t\n]+ /* skip whitespace */
. { printf("Invalid character: %s\n", yytext); exit(1); }
%%
Lang1.y
%{
#include <stdio.h>
#include <stdlib.h>
int yyerror(char *s);
int yylex(void);
%}
%token ACHAR BCHAR

115
%%
S: A B { if($1 != $2)
printf("Accepted\n");
else
printf("Not Accepted\n");
}
;
A: /* empty */ { $$ = 0; }
| A ACHAR { $$ = $1 + 1; }
;
B: /* empty */ { $$ = 0; }
| B BCHAR { $$ = $1 + 1; }
;
%%
int main(void) {
yyparse();
return 0;
}
int yyerror(char *s) {
fprintf(stderr, "%s\n", s);
return 0;
}
Lang2.l
%{
#include "y.tab.h"
%}
%%
bbaa { return BBAA; }
bba { return BBA; }

116
ab { return AB; }
ba { return BA; }
[ \t\n]+ /* skip whitespace */
. { printf("Invalid character: %s\n", yytext); exit(1); }
%%
Lang2.y
%{
#include <stdio.h>
#include <stdlib.h>
int yyerror(char *s);
int yylex(void);
%}
%token AB BBAA BBA BA
%%
S: AB L BBA R { if($2 == $4)
printf("Accepted\n");
else
printf("Not Accepted\n");
}
;
L: /* empty */ { $$ = 0; }
| L BBAA { $$ = $1 + 1; }
;
R: /* empty */ { $$ = 0; }
| R BA { $$ = $1 + 1; }
;
%%
int main(void) {
yyparse();

117
return 0;
}
int yyerror(char *s) {
fprintf(stderr, "%s\n", s);
return 0;
}
Input/Output:
Lang1

Lang2

118
Conclusion: The experiment successfully implements a parser that recognizes strings
belonging to the given context-free languages, demonstrating how LEX and YACC can be
used for language recognition.

119
ASSESSMENT – 5
EXPERIMENT-1
Implementation of three address codes for a simple program using LEX and YACC tools.
Aim: To implement a program that generates three-address codes (TAC) using LEX and YACC.
Algorithm:

1. Lexical Analysis (LEX):

●​ Define token patterns for identifiers, operators, and keywords.


●​ Generate a lexical analyzer that converts input code into tokens.

2. Syntax Analysis (YACC):

●​ Define grammar rules to parse expressions and statements.


●​ Use syntax-directed translation to generate three-address code.

3. Three-Address Code Generation:

●​ Convert expressions into TAC using temporary variables.


●​ Handle assignment, arithmetic, and conditional expressions.

4. Execution:

●​ Provide sample input and observe the generated TAC.

Source Code:
Lexer.l
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "y.tab.h"

/* Symbol table structure */


struct symtab {
char *name;
int val;

120
};

extern struct symtab *lookup(char *);


extern struct symtab *install(char *);
%}

%%
[0-9]+ { yylval.num = atoi(yytext); return NUMBER; }
[a-zA-Z][a-zA-Z0-9]* {
struct symtab *sp = lookup(yytext);
if (sp == NULL)
sp = install(yytext);
yylval.sym = sp;
return IDENTIFIER;
}
"=" { return ASSIGN; }
"+" { return PLUS; }
"-" { return MINUS; }
"*" { return MULTIPLY; }
"/" { return DIVIDE; }
"(" { return LPAREN; }
")" { return RPAREN; }
";" { return SEMICOLON; }
[ \t\n] { /* ignore whitespace */ }
. { printf("Unrecognized character: %s\n", yytext); }
%%

int yywrap() {
return 1;

121
}
Parser.y
/* parser.y - YACC file for syntax analysis and code generation */
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Symbol table handling */


struct symtab {
char *name;
int val;
};

/* Three-address code instruction structure */


typedef struct {
char op; /* '+', '-', '*', '/', '=' */
char *arg1; /* argument 1 */
char *arg2; /* argument 2 */
char *result; /* result */
} TAC;

/* Global variables */
#define SYMTAB_SIZE 100
struct symtab symtab[SYMTAB_SIZE];
int symtab_index = 0;

#define TAC_SIZE 1000


TAC tac[TAC_SIZE];

122
int tac_index = 0;

int temp_var_count = 0;

/* Function declarations */
void emit(char op, char *arg1, char *arg2, char *result);
char *new_temp();
struct symtab *lookup(char *name);
struct symtab *install(char *name);
%}

%union {
int num;
struct symtab *sym;
char *code;
}

%token <num> NUMBER


%token <sym> IDENTIFIER
%token ASSIGN PLUS MINUS MULTIPLY DIVIDE LPAREN RPAREN SEMICOLON

%type <code> expr term factor

%%
program:
statement_list
;

statement_list:

123
statement
| statement_list statement
;

statement:
IDENTIFIER ASSIGN expr SEMICOLON {
emit('=', $3, "", $1->name);
}
;

expr:
expr PLUS term {
char *temp = new_temp();
emit('+', $1, $3, temp);
$$ = temp;
}
| expr MINUS term {
char *temp = new_temp();
emit('-', $1, $3, temp);
$$ = temp;
}
| term {
$$ = $1;
}
;

term:
term MULTIPLY factor {
char *temp = new_temp();

124
emit('*', $1, $3, temp);
$$ = temp;
}
| term DIVIDE factor {
char *temp = new_temp();
emit('/', $1, $3, temp);
$$ = temp;
}
| factor {
$$ = $1;
}
;

factor:
LPAREN expr RPAREN {
$$ = $2;
}
| NUMBER {
char buffer[20];
sprintf(buffer, "%d", $1);
$$ = strdup(buffer);
}
| IDENTIFIER {
$$ = $1->name;
}
;
%%

/* Generate a new temporary variable name */

125
char *new_temp() {
char buffer[20];
sprintf(buffer, "t%d", temp_var_count++);
return strdup(buffer);
}

/* Emit a three-address code instruction */


void emit(char op, char *arg1, char *arg2, char *result) {
tac[tac_index].op = op;
tac[tac_index].arg1 = strdup(arg1);
tac[tac_index].arg2 = arg2 && *arg2 ? strdup(arg2) : NULL;
tac[tac_index].result = strdup(result);
tac_index++;
}

/* Look up a symbol in the symbol table */


struct symtab *lookup(char *name) {
for (int i = 0; i < symtab_index; i++) {
if (strcmp(symtab[i].name, name) == 0) {
return &symtab[i];
}
}
return NULL;
}

/* Install a new symbol in the symbol table */


struct symtab *install(char *name) {
if (symtab_index >= SYMTAB_SIZE) {
fprintf(stderr, "Symbol table full\n");

126
exit(1);
}
symtab[symtab_index].name = strdup(name);
symtab[symtab_index].val = 0;
return &symtab[symtab_index++];
}

/* Print the generated three-address code */


void print_tac() {
for (int i = 0; i < tac_index; i++) {
printf("%d: ", i);

switch (tac[i].op) {
case '=':
printf("%s = %s\n", tac[i].result, tac[i].arg1);
break;
case '+':
case '-':
case '*':
case '/':
printf("%s = %s %c %s\n", tac[i].result, tac[i].arg1, tac[i].op, tac[i].arg2);
break;
default:
printf("Unknown operation\n");
}
}
}

int main() {

127
printf("Enter expressions (end with Ctrl+D):\n");
yyparse();
printf("\nGenerated Three-Address Code:\n");
print_tac();
return 0;
}

int yyerror(char *s) {


fprintf(stderr, "Parse error: %s\n", s);
return 0;
}
Input/Output:

128
129
Conclusion: The implementation successfully converts a simple program into its
three-address code representation using LEX and YACC, demonstrating syntax-directed
translation and intermediate code generation.

EXPERIMENT-2
Implement simple code optimization techniques (Constant folding, Strength reduction and
Algebraic transformation, etc...
Aim: To implement simple code optimization techniques such as constant folding, strength
reduction, and algebraic transformations.
Algorithm:

1. Constant Folding:

●​ Identify expressions with constant values.


●​ Compute constant expressions at compile time and replace them.

2. Strength Reduction:

●​ Replace costly operations (multiplication, division) with equivalent low-cost


operations (shift, add, subtract).
●​ Example: Replace x * 2 with x << 1.

3. Algebraic Transformations:

●​ Simplify expressions using algebraic identities.


●​ Example: Replace x + 0 with x, x * 1 with x, etc.

4. Implementation:

●​ Parse the given code and identify optimizable patterns.


●​ Apply transformations and output the optimized code.

Source Code:
#include <iostream>
#include <vector>
#include <chrono>
#include <cmath>
#include <string>
#include <sstream>
#include <stack>
#include <map>

using namespace std;

// A simple AST node structure for expressions


struct ExprNode

130
{
enum Type
{
CONSTANT,
VARIABLE,
BINARY_OP
};

Type type;
double value; // For constants
char var_name; // For variables
char op; // For binary operations
ExprNode *left; // Left operand
ExprNode *right; // Right operand

// Constructor for constants


ExprNode(double val) : type(CONSTANT), value(val), left(nullptr),
right(nullptr) {}

// Constructor for variables


ExprNode(char name) : type(VARIABLE), var_name(name), left(nullptr),
right(nullptr) {}

// Constructor for binary operations


ExprNode(char operation, ExprNode *l, ExprNode *r)
: type(BINARY_OP), op(operation), left(l), right(r) {}

~ExprNode()
{
if (left)
delete left;
if (right)
delete right;
}
};

// Function to create a deep copy of an AST


ExprNode *clone(ExprNode *node)
{
if (!node)
return nullptr;

ExprNode *copy = nullptr;


if (node->type == ExprNode::CONSTANT)
{
copy = new ExprNode(node->value);
}
else if (node->type == ExprNode::VARIABLE)
{
copy = new ExprNode(node->var_name);
}
else if (node->type == ExprNode::BINARY_OP)

131
{
copy = new ExprNode(node->op, clone(node->left), clone(node->right));
}

return copy;
}

// Function to print an expression


void printExpr(ExprNode *node)
{
if (!node)
return;

if (node->type == ExprNode::CONSTANT)
{
cout << node->value;
}
else if (node->type == ExprNode::VARIABLE)
{
cout << node->var_name;
}
else if (node->type == ExprNode::BINARY_OP)
{
cout << "(";
printExpr(node->left);
cout << " " << node->op << " ";
printExpr(node->right);
cout << ")";
}
}

// 1. Constant Folding
ExprNode *constantFolding(ExprNode *node)
{
if (!node)
return nullptr;

// Recursively optimize children


if (node->type == ExprNode::BINARY_OP)
{
node->left = constantFolding(node->left);
node->right = constantFolding(node->right);

// If both children are constants, fold them


if (node->left->type == ExprNode::CONSTANT && node->right->type ==
ExprNode::CONSTANT)
{
double result = 0;
double left_val = node->left->value;
double right_val = node->right->value;

switch (node->op)

132
{
case '+':
result = left_val + right_val;
break;
case '-':
result = left_val - right_val;
break;
case '*':
result = left_val * right_val;
break;
case '/':
if (right_val != 0)
{
result = left_val / right_val;
}
else
{
// Division by zero - return the original node
return node;
}
break;
default:
return node; // Unknown operator
}

// Create a new constant node with the result


ExprNode *folded = new ExprNode(result);

// Free the original node and its children


node->left = nullptr; // Avoid double deletion
node->right = nullptr; // Avoid double deletion
delete node;

return folded;
}
}

return node;
}

// 2. Algebraic Simplification
ExprNode *algebraicSimplification(ExprNode *node)
{
if (!node)
return nullptr;

// Recursively optimize children


if (node->type == ExprNode::BINARY_OP)
{
node->left = algebraicSimplification(node->left);
node->right = algebraicSimplification(node->right);

133
// x + 0 = x, x - 0 = x
if ((node->op == '+' || node->op == '-') &&
node->right->type == ExprNode::CONSTANT && node->right->value ==
0)
{
ExprNode *result = node->left;
node->left = nullptr; // Avoid double deletion
delete node;
return result;
}

// 0 + x = x
if (node->op == '+' &&
node->left->type == ExprNode::CONSTANT && node->left->value == 0)
{
ExprNode *result = node->right;
node->right = nullptr; // Avoid double deletion
delete node;
return result;
}

// x * 1 = x, x / 1 = x
if ((node->op == '*' || node->op == '/') &&
node->right->type == ExprNode::CONSTANT && node->right->value ==
1)
{
ExprNode *result = node->left;
node->left = nullptr; // Avoid double deletion
delete node;
return result;
}

// 1 * x = x
if (node->op == '*' &&
node->left->type == ExprNode::CONSTANT && node->left->value == 1)
{
ExprNode *result = node->right;
node->right = nullptr; // Avoid double deletion
delete node;
return result;
}

// x * 0 = 0, 0 * x = 0
if (node->op == '*' &&
((node->left->type == ExprNode::CONSTANT && node->left->value ==
0) ||
(node->right->type == ExprNode::CONSTANT && node->right->value ==
0)))
{
ExprNode *result = new ExprNode(static_cast<double>(0));
delete node;
return result;

134
}

// 0 / x = 0 (if x != 0, but we'll assume x != 0 for simplicity)


if (node->op == '/' &&
node->left->type == ExprNode::CONSTANT && node->left->value == 0)
{
ExprNode *result = new ExprNode(0.0);
delete node;
return result;
}
}

return node;
}

// 3. Strength Reduction
ExprNode *strengthReduction(ExprNode *node)
{
if (!node)
return nullptr;

// Recursively optimize children


if (node->type == ExprNode::BINARY_OP)
{
node->left = strengthReduction(node->left);
node->right = strengthReduction(node->right);

// Replace multiplication by power of 2 with shift left


if (node->op == '*' &&
node->right->type == ExprNode::CONSTANT &&
node->right->value > 0 &&
(static_cast<int>(node->right->value) &
(static_cast<int>(node->right->value) - 1)) == 0)
{

int power = static_cast<int>(log2(node->right->value));


cout << "Strength reduction: " << node->right->value << " = 2^" <<
power << endl;

// We'll just update the node value and message rather than
creating a new operator
// In a real compiler, you'd replace this with a shift operator
node->right->value = power;
cout << "Replaced multiplication with shift left by " << power <<
endl;
}

// Replace division by power of 2 with shift right


if (node->op == '/' &&
node->right->type == ExprNode::CONSTANT &&
node->right->value > 0 &&

135
(static_cast<int>(node->right->value) &
(static_cast<int>(node->right->value) - 1)) == 0)
{

int power = static_cast<int>(log2(node->right->value));


cout << "Strength reduction: " << node->right->value << " = 2^" <<
power << endl;

// We'll just update the node value and message rather than
creating a new operator
// In a real compiler, you'd replace this with a shift operator
node->right->value = power;
cout << "Replaced division with shift right by " << power << endl;
}
}

return node;
}

// Apply all optimizations one by one with intermediate results


ExprNode *optimize(ExprNode *node)
{
ExprNode *result = clone(node);

cout << "\n--- Optimization Steps ---\n";

cout << "\nStarting expression: ";


printExpr(result);
cout << endl;

// Step 1: Constant Folding


result = constantFolding(result);
cout << "\nAfter Constant Folding: ";
printExpr(result);
cout << endl;

// Step 2: Algebraic Simplification


result = algebraicSimplification(result);
cout << "\nAfter Algebraic Simplification: ";
printExpr(result);
cout << endl;

// Step 3: Strength Reduction


result = strengthReduction(result);
cout << "\nAfter Strength Reduction: ";
printExpr(result);
cout << endl;

return result;
}

// Function to benchmark execution time

136
double evaluateExpr(ExprNode *node, double x_value)
{
if (!node)
return 0.0;

if (node->type == ExprNode::CONSTANT)
{
return node->value;
}
else if (node->type == ExprNode::VARIABLE)
{
if (node->var_name == 'x')
return x_value;
return 0.0; // Default for other variables
}
else if (node->type == ExprNode::BINARY_OP)
{
double left_val = evaluateExpr(node->left, x_value);
double right_val = evaluateExpr(node->right, x_value);

switch (node->op)
{
case '+':
return left_val + right_val;
case '-':
return left_val - right_val;
case '*':
return left_val * right_val;
case '/':
return right_val != 0 ? left_val / right_val : 0.0;
default:
return 0.0;
}
}

return 0.0;
}

// Function to measure execution time


double benchmarkExpr(ExprNode *node, int iterations)
{
auto start = chrono::high_resolution_clock::now();

double result = 0.0;


for (int i = 0; i < iterations; i++)
{
result += evaluateExpr(node, i % 10);
}

auto end = chrono::high_resolution_clock::now();


chrono::duration<double, milli> duration = end - start;

137
return duration.count();
}

// Simple expression parser for basic expressions


// Supports +, -, *, / operators and variables (x), and numbers
ExprNode *parseExpression(const string &expr)
{
map<char, int> precedence = {
{'+', 1}, {'-', 1}, {'*', 2}, {'/', 2}};

stack<ExprNode *> values;


stack<char> ops;

for (size_t i = 0; i < expr.length(); i++)


{
if (expr[i] == ' ')
continue;

if (expr[i] == '(')
{
ops.push(expr[i]);
}
else if (isdigit(expr[i]))
{
stringstream ss;
while (i < expr.length() && (isdigit(expr[i]) || expr[i] == '.'))
{
ss << expr[i++];
}
i--; // Back one step since loop will increment again

double val;
ss >> val;
values.push(new ExprNode(val));
}
else if (expr[i] == 'x')
{
values.push(new ExprNode('x'));
}
else if (expr[i] == ')')
{
while (!ops.empty() && ops.top() != '(')
{
char op = ops.top();
ops.pop();

ExprNode *right = values.top();


values.pop();
ExprNode *left = values.top();
values.pop();

values.push(new ExprNode(op, left, right));

138
}

if (!ops.empty())
ops.pop(); // Remove the '('
}
else if (expr[i] == '+' || expr[i] == '-' || expr[i] == '*' || expr[i]
== '/')
{
while (!ops.empty() && ops.top() != '(' &&
precedence[ops.top()] >= precedence[expr[i]])
{
char op = ops.top();
ops.pop();

ExprNode *right = values.top();


values.pop();
ExprNode *left = values.top();
values.pop();

values.push(new ExprNode(op, left, right));


}

ops.push(expr[i]);
}
}

while (!ops.empty())
{
char op = ops.top();
ops.pop();

ExprNode *right = values.top();


values.pop();
ExprNode *left = values.top();
values.pop();

values.push(new ExprNode(op, left, right));


}

return values.empty() ? nullptr : values.top();


}

// Function to run all optimizations individually and show the result of each
void showDetailedOptimization(ExprNode *expr)
{
cout << "\n--- Individual Optimization Effects ---\n";

// Original expression
cout << "\nOriginal expression: ";
printExpr(expr);
cout << endl;

139
// Just constant folding
ExprNode *after_cf = clone(expr);
after_cf = constantFolding(after_cf);
cout << "\nAfter ONLY Constant Folding: ";
printExpr(after_cf);
cout << endl;
delete after_cf;

// Just algebraic simplification


ExprNode *after_as = clone(expr);
after_as = algebraicSimplification(after_as);
cout << "\nAfter ONLY Algebraic Simplification: ";
printExpr(after_as);
cout << endl;
delete after_as;

// Just strength reduction


ExprNode *after_sr = clone(expr);
after_sr = strengthReduction(after_sr);
cout << "\nAfter ONLY Strength Reduction: ";
printExpr(after_sr);
cout << endl;
delete after_sr;
}

int main()
{
string input;
cout << "Enter an expression (e.g., '(2*x+0)*(3+4)' or 'x*8/4+2*0'): ";
getline(cin, input);

ExprNode *expr = parseExpression(input);


if (!expr)
{
cout << "Failed to parse expression." << endl;
return 1;
}

cout << "Original expression: ";


printExpr(expr);
cout << endl;

// Show the effect of each optimization technique individually


showDetailedOptimization(expr);

// Apply all optimizations in sequence and show intermediate results


ExprNode *optimized = optimize(expr);

cout << "\n--- Final Results ---\n";


cout << "Original expression: ";
printExpr(expr);
cout << endl;

140
cout << "Fully optimized expression: ";
printExpr(optimized);
cout << endl;

// Benchmark both expressions


int iterations = 10000000;
double original_time = benchmarkExpr(expr, iterations);
double optimized_time = benchmarkExpr(optimized, iterations);

cout << "\n--- Performance Analysis ---\n";


cout << "Original expression time: " << original_time << " ms" << endl;
cout << "Optimized expression time: " << optimized_time << " ms" << endl;
cout << "Speedup: " << (original_time / optimized_time) << "x" << endl;

// Clean up
delete expr;
delete optimized;

return 0;
}

Input/Output:

141
142
Conclusion: The experiment demonstrates various code optimization techniques that reduce
computational overhead, making the generated code more efficient without altering its
functionality.

EXPERIMENT-3
Implement Back-End of the compiler for which three address code is given as input and the
8086-assembly language is produced as output.
Aim: To implement a compiler backend that takes three-address code as input and generates
8086 assembly code.
Algorithm:

1. Input Parsing:

●​ Read the three-address code instructions.

2. Register Allocation:

●​ Assign registers for temporary and variable storage.

3. Instruction Translation:

●​ Convert TAC instructions into equivalent 8086 assembly instructions.


●​ Example: t1 = a + b → MOV AX, a; ADD AX, b; MOV t1, AX.

4. Memory Management:

●​ Allocate stack or memory locations for variables.

5. Code Generation:

●​ Output the final 8086 assembly code.

Source Code:
#include <iostream>
#include <vector>
#include <string>
#include <sstream>
#include <fstream>
#include <unordered_set>
#include <unordered_map>
#include <algorithm>
#include <stdexcept>

// Helper function to split a string by spaces


std::vector<std::string> splitString(const std::string &s)
{

143
std::vector<std::string> tokens;
std::stringstream ss(s);
std::string token;
while (ss >> token)
{
tokens.push_back(token);
}
return tokens;
}

// Helper to check if a string represents a number (constant)


bool isNumber(const std::string &s)
{
if (s.empty())
return false;
char *end = nullptr;
strtol(s.c_str(), &end, 10); // Try parsing as long
// Check if the entire string was consumed and it's not just '-' or '+'
return (*end == 0) && (s.find_first_of("0123456789") !=
std::string::npos);
}

// Helper to check if a string is a valid identifier (variable/temp)


// Basic check: starts with letter or '_', followed by letters, numbers, '_'
bool isIdentifier(const std::string &s)
{
if (s.empty() || (!isalpha(s[0]) && s[0] != '_'))
{
return false;
}
for (size_t i = 1; i < s.length(); ++i)
{
if (!isalnum(s[i]) && s[i] != '_')
{
return false;
}
}
// Avoid collision with register names (though unlikely in TAC)
return s != "AX" && s != "BX" && s != "CX" && s != "DX" &&
s != "SI" && s != "DI" && s != "SP" && s != "BP";
}

class Backend8086
{
private:
std::vector<std::string> tacLines;
std::vector<std::string> assemblyCode;
std::unordered_set<std::string> variables; // To store all
variable names
std::unordered_map<std::string, std::string> labelMap; // Map TAC labels
to assembly labels

144
int labelCounter = 0; // For generating
unique assembly labels if needed

// --- Helper Functions ---

// Get assembly representation for an operand (variable or constant)


std::string getOperand(const std::string &operand)
{
if (isNumber(operand))
{
return operand; // Immediate value
}
else if (isIdentifier(operand))
{
variables.insert(operand); // Ensure variable is tracked
return operand; // Memory variable name
}
else
{
// Could be a label for jump targets, handle separately if needed
return operand; // Assume it's a label for now
}
// Consider throwing an error for unrecognized operands
// throw std::runtime_error("Unrecognized operand type: " + operand);
}

// Generates a unique label name (not strictly needed if TAC labels are
unique)
std::string generateNewLabel()
{
return "L_INTERNAL_" + std::to_string(labelCounter++);
}

// --- Code Generation for specific TAC instructions ---

void generateAssignment(const std::vector<std::string> &tokens)


{
// Format: result = op1
if (tokens.size() != 3)
return; // Basic error check
std::string result = tokens[0];
std::string op1 = tokens[2];

variables.insert(result); // Ensure result variable is declared

if (isNumber(op1))
{
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; " + tokens[0] +
" = " + tokens[2]);
}
else
{

145
variables.insert(op1);
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load " + op1);
}
assemblyCode.push_back("\tMOV " + result + ", AX" + "\t\t; Store
result in " + result);
}

void generateBinaryOperation(const std::vector<std::string> &tokens)


{
// Format: result = op1 op op2
if (tokens.size() != 5)
return;
std::string result = tokens[0];
std::string op1 = tokens[2];
std::string op = tokens[3];
std::string op2 = tokens[4];

variables.insert(result); // Ensure result variable is declared

// Load op1 into AX


if (isNumber(op1))
{
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load immediate
" + op1);
}
else
{
variables.insert(op1);
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load " + op1);
}

// Load op2 into BX (or use directly if possible)


if (isNumber(op2))
{
assemblyCode.push_back("\tMOV BX, " + op2 + "\t\t; Load immediate
" + op2);
}
else
{
variables.insert(op2);
assemblyCode.push_back("\tMOV BX, " + op2 + "\t\t; Load " + op2);
}

// Perform operation
if (op == "+")
{
assemblyCode.push_back(std::string("\tADD AX, BX") + "\t\t; AX = "
+ op1 + " + " + op2);
}
else if (op == "-")
{

146
assemblyCode.push_back(std::string("\tSUB AX, BX") + "\t\t; AX = "
+ op1 + " - " + op2);
}
else if (op == "*")
{
// 16-bit multiplication: AX = AX * BX. Result in DX:AX. Assume
fits in AX.
assemblyCode.push_back(std::string("\tMUL BX") + "\t\t; AX = AX *
BX (result in DX:AX)");
// assemblyCode.push_back("\t; Assuming result fits in AX for " +
result);
}
else if (op == "/")
{
// 16-bit division: AX = DX:AX / BX. Quotient in AX, Remainder in
DX.
assemblyCode.push_back(std::string("\tMOV DX, 0") + "\t\t; Clear
DX for division"); // Important! Assumes positive numbers or signed handled
correctly
assemblyCode.push_back(std::string("\tDIV BX") + "\t\t; AX = AX /
BX, Remainder in DX");
// assemblyCode.push_back("\t; Quotient stored in " + result);
}
else
{
assemblyCode.push_back("\t; Unsupported binary operator: " + op);
// Consider throwing an error
}

// Store result
assemblyCode.push_back("\tMOV " + result + ", AX" + "\t\t; Store
result in " + result);
}

void generateUnaryOperation(const std::vector<std::string> &tokens)


{
// Format: result = op op1 (e.g., t1 = - a)
if (tokens.size() != 4 || tokens[2] != "-")
return; // Only handle negation for now
std::string result = tokens[0];
std::string op1 = tokens[3];

variables.insert(result);

if (isNumber(op1))
{
// Negate constant directly
try
{
long val = std::stol(op1);
assemblyCode.push_back("\tMOV AX, " + std::to_string(-val) +
"\t\t; Load negated constant");

147
}
catch (...)
{
assemblyCode.push_back("\t; Error negating constant " + op1);
return;
}
}
else
{
variables.insert(op1);
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load " + op1);
assemblyCode.push_back(std::string("\tNEG AX") + "\t\t; Negate
value in AX");
}
assemblyCode.push_back("\tMOV " + result + ", AX" + "\t\t; Store
result in " + result);
}

void generateLabel(const std::vector<std::string> &tokens)


{
// Format: L1:
if (tokens.size() != 1 || tokens[0].back() != ':')
return;
std::string label = tokens[0].substr(0, tokens[0].size() - 1);
assemblyCode.push_back(label + ":");
}

void generateGoto(const std::vector<std::string> &tokens)


{
// Format: goto L1
if (tokens.size() != 2)
return;
assemblyCode.push_back("\tJMP " + tokens[1]);
}

void generateConditionalJump(const std::vector<std::string> &tokens)


{
// Format: if op1 relop op2 goto label
if (tokens.size() != 6 || tokens[1] != "if" || tokens[4] != "goto")
return;
std::string op1 = tokens[2];
std::string relop = tokens[3];
std::string op2 = tokens[4]; // Error in logic here, op2 is tokens[4]!
NO, op2 is tokens[4]
// Correction: op1 = tokens[1], relop = tokens[2], op2 = tokens[3],
goto = tokens[4], label = tokens[5]
// Correct Format: if op1 relop op2 goto L -> tokens size 6
op1 = tokens[1];
relop = tokens[2];
op2 = tokens[3];
std::string label = tokens[5];

148
// Load op1 into AX
if (isNumber(op1))
{
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load immediate
" + op1);
}
else
{
variables.insert(op1);
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load " + op1);
}

// Compare AX with op2 (using BX or immediate)


if (isNumber(op2))
{
assemblyCode.push_back("\tCMP AX, " + op2 + "\t\t; Compare AX with
" + op2);
}
else
{
variables.insert(op2);
assemblyCode.push_back("\tMOV BX, " + op2 + "\t\t; Load " + op2 +
" into BX for compare");
assemblyCode.push_back(std::string("\tCMP AX, BX") + "\t\t;
Compare AX with BX");
}

// Generate appropriate jump instruction


std::string jumpInstruction = "";
if (relop == "==")
jumpInstruction = "JE";
else if (relop == "!=")
jumpInstruction = "JNE";
else if (relop == ">")
jumpInstruction = "JG";
else if (relop == "<")
jumpInstruction = "JL";
else if (relop == ">=")
jumpInstruction = "JGE";
else if (relop == "<=")
jumpInstruction = "JLE";
else
{
assemblyCode.push_back("\t; Unsupported relational operator: " +
relop);
return;
}

assemblyCode.push_back("\t" + jumpInstruction + " " + label + "\t\t;


Jump if " + op1 + " " + relop + " " + op2);
}

149
void generateParam(const std::vector<std::string> &tokens)
{
// Format: param op1
if (tokens.size() != 2)
return;
std::string op1 = tokens[1];

if (isNumber(op1))
{
assemblyCode.push_back(std::string("\tMOV AX, ") + op1 + "\t\t;
Load immediate param");
assemblyCode.push_back(std::string("\tPUSH AX") + "\t\t; Push
param onto stack");
}
else
{
variables.insert(op1);
assemblyCode.push_back("\tPUSH " + op1 + "\t\t; Push param " + op1
+ " onto stack");
}
}

void generateCall(const std::vector<std::string> &tokens)


{
// Format: result = call func, n OR call func, n
if (tokens.size() < 3 || tokens.size() > 5)
return; // `call func, n` or `res = call func, n`

std::string resultVar = "";


std::string funcName;
std::string paramCountStr;
int paramCount = 0;

if (tokens.size() == 5 && tokens[1] == "=" && tokens[2] == "call")


{
// Format: result = call func, n
resultVar = tokens[0];
variables.insert(resultVar);
funcName = tokens[3];
paramCountStr = tokens[4];
}
else if (tokens.size() == 3 && tokens[0] == "call")
{
// Format: call func, n
funcName = tokens[1];
paramCountStr = tokens[2];
}
else
{
assemblyCode.push_back("\t; Malformed call instruction");
return;
}

150
// Remove comma if present in funcName or paramCountStr (depends on
exact TAC format)
if (!funcName.empty() && funcName.back() == ',')
{
funcName.pop_back();
}
if (!paramCountStr.empty() && paramCountStr.back() == ',')
{
paramCountStr.pop_back();
}
// Ensure param count is valid number
try
{
paramCount = std::stoi(paramCountStr);
}
catch (...)
{
assemblyCode.push_back("\t; Invalid parameter count in call: " +
paramCountStr);
return;
}

assemblyCode.push_back("\tCALL " + funcName + "\t\t; Call function " +


funcName);

// Caller stack cleanup (CDECL style assumed here)


if (paramCount > 0)
{
assemblyCode.push_back("\tADD SP, " + std::to_string(paramCount *
2) + "\t; Clean up " + std::to_string(paramCount) + " parameters (2 bytes
each)");
}

// Store return value if needed (conventionally in AX)


if (!resultVar.empty())
{
assemblyCode.push_back("\tMOV " + resultVar + ", AX" + "\t\t;
Store return value in " + resultVar);
}
}

void generateReturn(const std::vector<std::string> &tokens)


{
// Format: return [op1]
if (tokens.size() == 1)
{ // Just "return"
assemblyCode.push_back("\t; Preparing simple return (no value
specified)");
// Assuming the function epilogue handles RET
}
else if (tokens.size() == 2)

151
{ // "return op1"
std::string op1 = tokens[1];
if (isNumber(op1))
{
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load return
value");
}
else
{
variables.insert(op1);
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load return
value from " + op1);
}
// The actual RET instruction is usually part of function epilogue
assemblyCode.push_back("\t; Return value placed in AX");
}
// Note: A real function would have a proper epilogue (MOV SP, BP; POP
BP; RET)
// We'll add a simple RET for now, assuming it's end of MAIN or a
simple function
assemblyCode.push_back("\tRET\t\t; Return from procedure");
}

void generatePrint(const std::vector<std::string> &tokens)


{
// Format: print op1
if (tokens.size() != 2)
return;
std::string op1 = tokens[1];

if (isNumber(op1))
{
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load immediate
value to print");
}
else
{
variables.insert(op1);
assemblyCode.push_back("\tMOV AX, " + op1 + "\t\t; Load variable "
+ op1 + " to print");
}
assemblyCode.push_back("\tCALL PRINT_NUM\t; Call procedure to print
number in AX");
// Add a newline after printing
assemblyCode.push_back("\tCALL PRINT_NEWLINE");
}

void generateRead(const std::vector<std::string> &tokens)


{
// Format: read op1
if (tokens.size() != 2 || !isIdentifier(tokens[1]))
return;

152
std::string op1 = tokens[1];
variables.insert(op1);

assemblyCode.push_back("\tCALL READ_NUM\t; Call procedure to read


number into AX");
assemblyCode.push_back("\tMOV " + op1 + ", AX\t\t; Store read value in
" + op1);
}

// --- Main Generation Logic ---

void generateDataSection()
{
assemblyCode.push_back(".DATA");
// Declare all identified variables
for (const std::string &var : variables)
{
assemblyCode.push_back(var + "\tDW\t?"); // Define Word,
initialized to undefined (?)
}
// Add any necessary data for helper procedures (like newline string)
assemblyCode.push_back("NEWLINE\tDB\t0DH, 0AH, '$' ; Carriage return,
Line feed, End of string");
assemblyCode.push_back("INPUT_MSG\tDB\t'Enter number: $'");
assemblyCode.push_back("OUTPUT_MSG\tDB\t'Output: $'"); // Optional
output prefix
assemblyCode.push_back("TEMP_STR\tDB\t6 DUP('$') ; Buffer for number
conversion (max 5 digits + sign)");
}

void generateCodeSection()
{
assemblyCode.push_back(".CODE");
assemblyCode.push_back("MAIN\tPROC FAR"); // Assuming FAR proc for
.EXE entry point

// Standard setup for DS register


assemblyCode.push_back("\tMOV AX, @DATA\t; Get data segment address");
assemblyCode.push_back("\tMOV DS, AX\t\t; Initialize DS");
assemblyCode.push_back("\tMOV ES, AX\t\t; Initialize ES (often needed
for string ops, good practice)");
assemblyCode.push_back(""); // Blank line for readability

// Translate each TAC line


for (const std::string &line : tacLines)
{
std::vector<std::string> tokens = splitString(line);
if (tokens.empty())
continue; // Skip blank lines

assemblyCode.push_back("; TAC: " + line); // Add original TAC as


comment

153
// Determine instruction type based on tokens
if (tokens.size() >= 3 && tokens[1] == "=")
{
if (tokens.size() == 3)
{ // Assignment: x = y
generateAssignment(tokens);
}
else if (tokens.size() == 5 && (tokens[3] == "+" || tokens[3]
== "-" || tokens[3] == "*" || tokens[3] == "/"))
{ // Binary: x = y op z
generateBinaryOperation(tokens);
}
else if (tokens.size() == 4 && tokens[2] == "-")
{ // Unary: x = - y
generateUnaryOperation(tokens);
}
else if (tokens.size() == 5 && tokens[2] == "call")
{ // Call with result: x = call func, n
generateCall(tokens);
}
else
{
assemblyCode.push_back("\t; Unrecognized
assignment/expression format");
}
}
else if (tokens.size() == 1 && tokens[0].back() == ':')
{ // Label: L1:
generateLabel(tokens);
}
else if (tokens.size() == 2 && tokens[0] == "goto")
{ // Unconditional Jump: goto L1
generateGoto(tokens);
}
else if (tokens.size() == 6 && tokens[0] == "if" && tokens[4] ==
"goto")
{ // Conditional Jump: if x > y goto L1
generateConditionalJump(tokens);
}
else if (tokens.size() == 2 && tokens[0] == "param")
{ // Parameter: param z
generateParam(tokens);
}
else if (tokens.size() == 3 && tokens[0] == "call")
{ // Call without result: call func, n
generateCall(tokens);
}
else if (tokens.size() >= 1 && tokens[0] == "return")
{ // Return: return [x]
generateReturn(tokens);
}

154
else if (tokens.size() == 2 && tokens[0] == "print")
{ // Output: print x
generatePrint(tokens);
}
else if (tokens.size() == 2 && tokens[0] == "read")
{ // Input: read y
generateRead(tokens);
}
else
{
assemblyCode.push_back("\t; Unrecognized TAC instruction: " +
line);
}
assemblyCode.push_back(""); // Blank line between TAC translations
}

// Standard program exit


assemblyCode.push_back("EXIT_PROG:");
assemblyCode.push_back("\tMOV AH, 4CH\t\t; DOS exit function");
assemblyCode.push_back("\tINT 21H\t\t\t; Call DOS interrupt");

assemblyCode.push_back("MAIN\tENDP");
assemblyCode.push_back("");
}

// Basic I/O procedures (using DOS INT 21h)


void addHelperProcedures()
{
assemblyCode.push_back("; --- Helper Procedures ---");
assemblyCode.push_back("");

// PRINT_NUM: Prints the signed number in AX


assemblyCode.push_back("PRINT_NUM\tPROC NEAR");
assemblyCode.push_back("\t; Prints number in AX");
assemblyCode.push_back("\tPUSH AX\t\t; Save registers used");
assemblyCode.push_back("\tPUSH BX");
assemblyCode.push_back("\tPUSH CX");
assemblyCode.push_back("\tPUSH DX");
assemblyCode.push_back("\tPUSH SI");
assemblyCode.push_back("");
assemblyCode.push_back("\tMOV CX, 0\t\t; Digit count");
assemblyCode.push_back("\tMOV BX, 10\t\t; Base for division");
assemblyCode.push_back("");
assemblyCode.push_back("\tCMP AX, 0\t\t; Check if number is
negative");
assemblyCode.push_back("\tJGE PN_POSITIVE");
assemblyCode.push_back("\tPUSH AX\t\t; Save AX");
assemblyCode.push_back("\tMOV AH, 02H\t; DOS print char function");
assemblyCode.push_back("\tMOV DL, '-'\t; Char to print");
assemblyCode.push_back("\tINT 21H\t\t; Print '-'");
assemblyCode.push_back("\tPOP AX\t\t; Restore AX");

155
assemblyCode.push_back("\tNEG AX\t\t; Make AX positive for
conversion");
assemblyCode.push_back("PN_POSITIVE:");
assemblyCode.push_back("\tCMP AX, 0 \t ;Handle zero case");
assemblyCode.push_back("\tJNE PN_CONVERT_LOOP");
assemblyCode.push_back("\tPUSH 0 \t\t ; Push '0' digit if AX is 0");
assemblyCode.push_back("\tINC CX");

assemblyCode.push_back("PN_CONVERT_LOOP:");
assemblyCode.push_back("\tCMP AX, 0");
assemblyCode.push_back("\tJE PN_PRINT_LOOP");
assemblyCode.push_back("\tMOV DX, 0\t\t; Clear DX for division");
assemblyCode.push_back("\tDIV BX\t\t; AX = AX / 10, DX = AX % 10");
assemblyCode.push_back("\tPUSH DX\t\t; Push remainder (digit) onto
stack");
assemblyCode.push_back("\tINC CX\t\t; Increment digit count");
assemblyCode.push_back("\tJMP PN_CONVERT_LOOP");
assemblyCode.push_back("");
assemblyCode.push_back("PN_PRINT_LOOP:");
assemblyCode.push_back("\tCMP CX, 0");
assemblyCode.push_back("\tJE PN_DONE");
assemblyCode.push_back("\tPOP DX\t\t; Pop digit from stack");
assemblyCode.push_back("\tADD DL, '0'\t\t; Convert digit to ASCII
char");
assemblyCode.push_back("\tMOV AH, 02H\t; DOS print char function");
assemblyCode.push_back("\tINT 21H\t\t; Print character");
assemblyCode.push_back("\tDEC CX\t\t; Decrement digit count");
assemblyCode.push_back("\tJMP PN_PRINT_LOOP");
assemblyCode.push_back("");
assemblyCode.push_back("PN_DONE:");
assemblyCode.push_back("\tPOP SI\t\t; Restore registers");
assemblyCode.push_back("\tPOP DX");
assemblyCode.push_back("\tPOP CX");
assemblyCode.push_back("\tPOP BX");
assemblyCode.push_back("\tPOP AX");
assemblyCode.push_back("\tRET");
assemblyCode.push_back("PRINT_NUM\tENDP");
assemblyCode.push_back("");

// PRINT_NEWLINE: Prints CRLF


assemblyCode.push_back("PRINT_NEWLINE\tPROC NEAR");
assemblyCode.push_back("\tPUSH AX");
assemblyCode.push_back("\tPUSH DX");
assemblyCode.push_back("\tMOV AH, 09H\t; DOS print string function");
assemblyCode.push_back("\tLEA DX, NEWLINE\t; Load address of newline
string");
assemblyCode.push_back("\tINT 21H");
assemblyCode.push_back("\tPOP DX");
assemblyCode.push_back("\tPOP AX");
assemblyCode.push_back("\tRET");
assemblyCode.push_back("PRINT_NEWLINE\tENDP");
assemblyCode.push_back("");

156
// READ_NUM: Reads a signed integer from input, stores in AX
// Basic implementation - reads char by char, simple conversion
// Does not handle backspace or extensive error checking
assemblyCode.push_back("READ_NUM\tPROC NEAR");
assemblyCode.push_back("\t; Reads signed number, stores in AX");
assemblyCode.push_back("\tPUSH BX\t\t; Save registers");
assemblyCode.push_back("\tPUSH CX");
assemblyCode.push_back("\tPUSH DX");
assemblyCode.push_back("\tPUSH SI");
assemblyCode.push_back("");
// Optional: Print prompt message
// assemblyCode.push_back("\tMOV AH, 09H");
// assemblyCode.push_back("\tLEA DX, INPUT_MSG");
// assemblyCode.push_back("\tINT 21H");

assemblyCode.push_back("\tMOV SI, 1\t\t; Sign flag (1 for positive, -1


for negative)");
assemblyCode.push_back("\tMOV AX, 0\t\t; Accumulator for the number");
assemblyCode.push_back("\tMOV CX, 0\t\t; Digit count");
assemblyCode.push_back("\tMOV BX, 10\t\t; Base for multiplication");

assemblyCode.push_back("RN_READ_CHAR:");
assemblyCode.push_back("\tMOV AH, 01H\t; DOS read char with echo");
assemblyCode.push_back("\tINT 21H\t\t; Character in AL");

assemblyCode.push_back("\tCMP AL, 0DH\t; Check for Enter key");


assemblyCode.push_back("\tJE RN_DONE_READING");

assemblyCode.push_back("\tCMP CX, 0\t\t; Check if first character");


assemblyCode.push_back("\tJNE RN_CHECK_DIGIT");
assemblyCode.push_back("\tCMP AL, '-'\t; Check if it's a minus sign");
assemblyCode.push_back("\tJNE RN_CHECK_DIGIT");
assemblyCode.push_back("\tMOV SI, -1\t; Set sign flag to negative");
assemblyCode.push_back("\tINC CX\t\t; Increment count (processed
sign)");
assemblyCode.push_back("\tJMP RN_READ_CHAR ; Read next char");

assemblyCode.push_back("RN_CHECK_DIGIT:");
assemblyCode.push_back("\tCMP AL, '0'\t; Check if >= '0'");
assemblyCode.push_back("\tJL RN_INVALID_INPUT");
assemblyCode.push_back("\tCMP AL, '9'\t; Check if <= '9'");
assemblyCode.push_back("\tJG RN_INVALID_INPUT");

// Convert char to digit and accumulate


assemblyCode.push_back("\tAND AL, 0FH\t; Convert ASCII digit to number
(AL = AL - '0')");
assemblyCode.push_back("\tPUSH AX\t\t; Save digit");
assemblyCode.push_back("\tMOV AX, BX\t; AX = 10");
assemblyCode.push_back("\tPOP DX\t\t; DX = digit");
assemblyCode.push_back("\tPUSH DX\t\t; Save digit again");

157
assemblyCode.push_back("\tIMUL WORD PTR [SP+2] ; AX = AX * Current
value (on stack below return addr)");
// This IMUL usage is incorrect. Need to multiply the accumulated
value.
assemblyCode.push_back("\t; Correction for accumulation:");
assemblyCode.push_back("\tPOP DX\t\t; DX = new digit value (0-9)");
assemblyCode.push_back("\tPUSH DX\t\t; Save it again temporarily");
assemblyCode.push_back("\tMOV CX, AX\t; Save current AX (accumulated
value)");
assemblyCode.push_back("\tMOV AX, 10\t; AX = 10");
assemblyCode.push_back("\tMUL CX\t\t; AX = Accumulated value * 10");
// Add error checking for overflow here if needed (check DX)
assemblyCode.push_back("\tPOP CX\t\t; CX = new digit value");
assemblyCode.push_back("\tADD AX, CX\t; AX = (Accumulated value * 10)
+ new digit");

assemblyCode.push_back("\tINC CX\t\t; Increment digit count (now used


for general count)");
assemblyCode.push_back("\tJMP RN_READ_CHAR");

assemblyCode.push_back("RN_INVALID_INPUT:");
assemblyCode.push_back("\t; Handle invalid input - for simplicity,
just finish");
// Could print an error message here
assemblyCode.push_back("\t; Fall through to RN_DONE_READING");

assemblyCode.push_back("RN_DONE_READING:");
// Apply sign
assemblyCode.push_back("\tCMP SI, 0");
assemblyCode.push_back("\tJGE RN_POSITIVE_NUM");
assemblyCode.push_back("\tNEG AX\t\t; Negate AX if SI is negative");

assemblyCode.push_back("RN_POSITIVE_NUM:");
// Print newline after input
assemblyCode.push_back("\tCALL PRINT_NEWLINE");

assemblyCode.push_back("\tPOP SI\t\t; Restore registers");


assemblyCode.push_back("\tPOP DX");
assemblyCode.push_back("\tPOP CX");
assemblyCode.push_back("\tPOP BX");
assemblyCode.push_back("\tRET");
assemblyCode.push_back("READ_NUM\tENDP");
assemblyCode.push_back("");
}

public:
Backend8086(const std::vector<std::string> &inputTac) : tacLines(inputTac)
{}

std::vector<std::string> generate()
{
assemblyCode.clear();

158
variables.clear();
labelMap.clear();
labelCounter = 0;

// --- Pass 1 (Optional but good practice): Collect variables and


validate labels ---
// Our simple getOperand function adds variables dynamically, so a
strict first pass isn't mandatory here,
// but it's conceptually useful. We will rely on dynamic addition
during code generation pass.

// --- Generate Assembly Code ---


assemblyCode.push_back(".MODEL SMALL");
assemblyCode.push_back(".STACK 100h"); // Define stack size

// Generate .DATA section (needs variables collected *before* this)


// We need to generate code first to find all variables, then insert
.DATA at the beginning
std::vector<std::string> codeBody; // Temporary store for code while
finding vars

// Simulate code generation pass to collect variables


Backend8086 tempBackend(tacLines); // Use a temporary instance
to avoid polluting main state yet
tempBackend.generateCodeSection(); // This call populates
tempBackend.variables
this->variables = tempBackend.variables; // Copy discovered variables

// Now generate the final output in the correct order


generateDataSection(); // Uses the now populated 'this->variables'
assemblyCode.push_back("");
generateCodeSection(); // Generate the actual code section body
addHelperProcedures(); // Add I/O and other helpers

assemblyCode.push_back("END MAIN"); // Mark end of program

return assemblyCode;
}
};

int main(int argc, char *argv[])


{
// --- Input TAC ---
// Example TAC code
std::vector<std::string> tac = {
"read x",
"read y",
"t1 = x + y",
"t2 = x * y",
"if t1 > t2 goto L1",
"print t1",
"goto L2",

159
"L1:",
"print t2",
"L2:",
"z = t1 - 5",
"print z"
// Add more complex TAC examples here:
// "param x",
// "param y",
// "t3 = call AddFunc, 2", // Assuming AddFunc exists elsewhere
// "print t3",
// "return z"
};

// Or read from file if argument provided


if (argc > 1)
{
std::ifstream inFile(argv[1]);
if (!inFile)
{
std::cerr << "Error opening input file: " << argv[1] << std::endl;
return 1;
}
tac.clear();
std::string line;
while (std::getline(inFile, line))
{
// Basic cleanup: remove leading/trailing whitespace (optional)
line.erase(0, line.find_first_not_of(" \t\n\r"));
line.erase(line.find_last_not_of(" \t\n\r") + 1);
if (!line.empty())
{
tac.push_back(line);
}
}
std::cout << "Read " << tac.size() << " TAC lines from " << argv[1] <<
std::endl;
}
else
{
std::cout << "Using built-in example TAC." << std::endl;
}

// --- Generate Assembly ---


Backend8086 backend(tac);
std::vector<std::string> assembly;
try
{
assembly = backend.generate();
}
catch (const std::exception &e)
{

160
std::cerr << "Error during assembly generation: " << e.what() <<
std::endl;
return 1;
}

// --- Output Assembly ---


std::cout << "\n--- Generated 8086 Assembly ---" << std::endl;
std::string outputFilename = "output.asm";
std::ofstream outFile(outputFilename);
if (!outFile)
{
std::cerr << "Error creating output file: " << outputFilename <<
std::endl;
// Still print to console
}

for (const std::string &line : assembly)


{
std::cout << line << std::endl;
if (outFile)
{
outFile << line << std::endl;
}
}

if (outFile)
{
std::cout << "\nAssembly code written to " << outputFilename <<
std::endl;
outFile.close();
}

return 0;
}

Input/Output:
TEST CASE 1
My_tac.txt
read count
LOOP_START:
if count < 1 goto LOOP_END
; Calculate negative and print
neg_val = - count
print neg_val

161
; Decrement count
count = count - 1
goto LOOP_START
LOOP_END:
; End of loop
print 999 ; Indicate loop finished
Output

162
163
164
TEST CASE 2
My_tac.txt
read num1
read num2
t_sum = num1 + num2
t_prod = num1 * num2
if t_sum > t_prod goto PRINT_SUM
; Product is greater or equal
print t_prod
goto END_COMPARE
PRINT_SUM:
; Sum is greater
print t_sum

165
END_COMPARE:
; Program continues or ends
Output
PS D:\Coding & Others\C++\Compiler> cd "d:\Coding & Others\C++\Compiler\" ; if ($?) { g++
8086.cpp -o 8086 } ; if ($?)
{ .\8086 my_tac.txt}
Read 13 TAC lines from my_tac.txt

--- Generated 8086 Assembly ---


.MODEL SMALL
.STACK 100h
.DATA
t_prod DW ?
t_sum DW ?
num2 DW ?
num1 DW ?
NEWLINE DB 0DH, 0AH, '$' ; Carriage return, Line feed, End of string
INPUT_MSG DB 'Enter number: $'
OUTPUT_MSG DB 'Output: $'
TEMP_STR DB 6 DUP('$') ; Buffer for number conversion (max 5 digits + sign)

.CODE
MAIN PROC FAR
MOV AX, @DATA ; Get data segment address
MOV DS, AX ; Initialize DS
MOV ES, AX ; Initialize ES (often needed for string ops, good practice)

; TAC: read num1


CALL READ_NUM ; Call procedure to read number into AX
MOV num1, AX ; Store read value in num1

166
; TAC: read num2
CALL READ_NUM ; Call procedure to read number into AX
MOV num2, AX ; Store read value in num2

; TAC: t_sum = num1 + num2


MOV AX, num1 ; Load num1
MOV BX, num2 ; Load num2
ADD AX, BX ; AX = num1 + num2
MOV t_sum, AX ; Store result in t_sum

; TAC: t_prod = num1 * num2


MOV AX, num1 ; Load num1
MOV BX, num2 ; Load num2
MUL BX ; AX = AX * BX (result in DX:AX)
MOV t_prod, AX ; Store result in t_prod

; TAC: if t_sum > t_prod goto PRINT_SUM

; TAC: ; Product is greater or equal


; Unrecognized TAC instruction: ; Product is greater or equal

; TAC: print t_prod


MOV AX, t_prod ; Load variable t_prod to print
CALL PRINT_NUM ; Call procedure to print number in AX
CALL PRINT_NEWLINE

; TAC: goto END_COMPARE


JMP END_COMPARE

167
; TAC: PRINT_SUM:
PRINT_SUM:

; TAC: ; Sum is greater


; Unrecognized TAC instruction: ; Sum is greater

; TAC: print t_sum


MOV AX, t_sum ; Load variable t_sum to print
CALL PRINT_NUM ; Call procedure to print number in AX
CALL PRINT_NEWLINE

; TAC: END_COMPARE:
END_COMPARE:

; TAC: ; Program continues or ends


; Unrecognized TAC instruction: ; Program continues or ends

EXIT_PROG:
MOV AH, 4CH ; DOS exit function
INT 21H ; Call DOS interrupt
MAIN ENDP

; --- Helper Procedures ---

PRINT_NUM PROC NEAR


; Prints number in AX
PUSH AX ; Save registers used
PUSH BX

168
PUSH CX
PUSH DX
PUSH SI

MOV CX, 0 ; Digit count


MOV BX, 10 ; Base for division

CMP AX, 0 ; Check if number is negative


JGE PN_POSITIVE
PUSH AX ; Save AX
MOV AH, 02H ; DOS print char function
MOV DL, '-' ; Char to print
INT 21H ; Print '-'
POP AX ; Restore AX
NEG AX ; Make AX positive for conversion
PN_POSITIVE:
CMP AX, 0 ;Handle zero case
JNE PN_CONVERT_LOOP
PUSH 0 ; Push '0' digit if AX is 0
INC CX
PN_CONVERT_LOOP:
CMP AX, 0
JE PN_PRINT_LOOP
MOV DX, 0 ; Clear DX for division
DIV BX ; AX = AX / 10, DX = AX % 10
PUSH DX ; Push remainder (digit) onto stack
INC CX ; Increment digit count
JMP PN_CONVERT_LOOP

169
PN_PRINT_LOOP:
CMP CX, 0
JE PN_DONE
POP DX ; Pop digit from stack
ADD DL, '0' ; Convert digit to ASCII char
MOV AH, 02H ; DOS print char function
INT 21H ; Print character
DEC CX ; Decrement digit count
JMP PN_PRINT_LOOP

PN_DONE:
POP SI ; Restore registers
POP DX
POP CX
POP BX
POP AX
RET
PRINT_NUM ENDP

PRINT_NEWLINE PROC NEAR


PUSH AX
PUSH DX
MOV AH, 09H ; DOS print string function
LEA DX, NEWLINE ; Load address of newline string
INT 21H
POP DX
POP AX
RET
PRINT_NEWLINE ENDP

170
READ_NUM PROC NEAR
; Reads signed number, stores in AX
PUSH BX ; Save registers
PUSH CX
PUSH DX
PUSH SI

MOV SI, 1 ; Sign flag (1 for positive, -1 for negative)


MOV AX, 0 ; Accumulator for the number
MOV CX, 0 ; Digit count
MOV BX, 10 ; Base for multiplication
RN_READ_CHAR:
MOV AH, 01H ; DOS read char with echo
INT 21H ; Character in AL
CMP AL, 0DH ; Check for Enter key
JE RN_DONE_READING
CMP CX, 0 ; Check if first character
JNE RN_CHECK_DIGIT
CMP AL, '-' ; Check if it's a minus sign
JNE RN_CHECK_DIGIT
MOV SI, -1 ; Set sign flag to negative
INC CX ; Increment count (processed sign)
JMP RN_READ_CHAR ; Read next char
RN_CHECK_DIGIT:
CMP AL, '0' ; Check if >= '0'
JL RN_INVALID_INPUT
CMP AL, '9' ; Check if <= '9'
JG RN_INVALID_INPUT

171
AND AL, 0FH ; Convert ASCII digit to number (AL = AL - '0')
PUSH AX ; Save digit
MOV AX, BX ; AX = 10
POP DX ; DX = digit
PUSH DX ; Save digit again
IMUL WORD PTR [SP+2] ; AX = AX * Current value (on stack below return addr)
; Correction for accumulation:
POP DX ; DX = new digit value (0-9)
PUSH DX ; Save it again temporarily
MOV CX, AX ; Save current AX (accumulated value)
MOV AX, 10 ; AX = 10
MUL CX ; AX = Accumulated value * 10
POP CX ; CX = new digit value
ADD AX, CX ; AX = (Accumulated value * 10) + new digit
INC CX ; Increment digit count (now used for general count)
JMP RN_READ_CHAR
RN_INVALID_INPUT:
; Handle invalid input - for simplicity, just finish
; Fall through to RN_DONE_READING
RN_DONE_READING:
CMP SI, 0
JGE RN_POSITIVE_NUM
NEG AX ; Negate AX if SI is negative
RN_POSITIVE_NUM:
CALL PRINT_NEWLINE
POP SI ; Restore registers
POP DX
POP CX
POP BX

172
RET
READ_NUM ENDP

END MAIN

Assembly code written to output.asm


PS D:\Coding & Others\C++\Compiler>
Conclusion: The implementation successfully converts three-address code into 8086
assembly, demonstrating backend compiler functionality, including register allocation and
instruction translation.

173

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy