Ai Project
Ai Project
Project
- Python 3.x
- Libraries: pandas, scikit-learn, nltk
Workflow
1. Install Python.
2. Install libraries: pip install pandas scikit-learn nltk
Complete Code Overview
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import nltk
from nltk.corpus import stopwords
# Download stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
# 1. Load your dataset
dataset_path = "C:\\Users\\Talha\\Downloads\\train.csv\\train.csv"
df = pd.read_csv(dataset_path, encoding='ISO-8859-1')
2. Preprocess the data
def preprocess_text(text):
if isinstance(text, str):
# Convert to lowercase
text = text.lower()
# Remove stopwords
words = [word for word in text.split() if word not in stop_words]
return " ".join(words)
else:
return ""
df['selected_text'] = df['selected_text'].apply(preprocess_text)
# 3. Vectorize the text data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['selected_text'])
y = df['sentiment']
while True:
user_input = input("Enter a sentence (or type 'exit' to quit): ")
if user_input.lower() == 'exit':
break
sentiment = predict_sentiment(user_input)
print(f"Predicted Sentiment: {sentiment}")
How to Run the Code