NLP Programs
NLP Programs
Answer:
python
Copy
from nltk.tokenize import word_tokenize
sentence = "Hello, how are you?"
tokens = word_tokenize(sentence)
print(tokens) # Output: ['Hello', ',', 'how', 'are', 'you', '?']
Answer:
python
Copy
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
stop_words = set(stopwords.words('english'))
sentence = "This is a sample sentence."
words = word_tokenize(sentence)
filtered_words = [word for word in words if word.lower() not in stop_words]
print(filtered_words) # Output: ['sample', 'sentence', '.']
Answer:
python
Copy
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
word = "running"
stemmed_word = stemmer.stem(word)
print(stemmed_word) # Output: 'run'
Answer:
python
Copy
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
word = "better"
lemma = lemmatizer.lemmatize(word, pos='a') # 'a' for adjective
print(lemma) # Output: 'good'
python
Copy
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
for ent in doc.ents:
print(ent.text, ent.label_) # Output: Apple ORG, U.K. GPE, $1 billion MONEY
Answer:
python
Copy
from sklearn.feature_extraction.text import TfidfVectorizer
corpus = ["This is a sample sentence.", "This is another example sentence."]
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(corpus)
print(tfidf_matrix.toarray())
Answer:
python
Copy
from gensim.models import Word2Vec
sentences = [["I", "love", "NLP"], ["NLP", "is", "fun"]]
model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)
print(model.wv["NLP"]) # Output: Word vector for "NLP"
Answer:
python
Copy
import numpy as np
def load_glove(file):
embeddings = {}
with open(file, 'r', encoding='utf-8') as f:
for line in f:
values = line.split()
word = values[0]
vector = np.asarray(values[1:], dtype='float32')
embeddings[word] = vector
return embeddings
glove_embeddings = load_glove("glove.6B.100d.txt")
print(glove_embeddings["the"])
Answer:
python
Copy
from textblob import TextBlob
text = "I love NLP!"
blob = TextBlob(text)
print(blob.sentiment) # Output: Sentiment(polarity=0.5, subjectivity=0.5)
Answer:
python
Copy
from sklearn.feature_extraction.text import CountVectorizer
corpus = ["This is a sample sentence.", "This is another example sentence."]
vectorizer = CountVectorizer()
bow_matrix = vectorizer.fit_transform(corpus)
print(bow_matrix.toarray())
Answer:
python
Copy
from nltk import pos_tag
from nltk.tokenize import word_tokenize
sentence = "I love NLP."
tokens = word_tokenize(sentence)
tags = pos_tag(tokens)
print(tags) # Output: [('I', 'PRP'), ('love', 'VBP'), ('NLP', 'NNP'), ('.', '.')]
Answer:
python
Copy
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("I love NLP.")
for token in doc:
print(token.text, token.dep_, token.head.text) # Output: I nsubj love, love ROOT love, NLP dobj love, . pu
nct love
13. How do you generate n-grams using NLTK?
Answer:
python
Copy
from nltk import ngrams
sentence = "I love NLP."
tokens = sentence.split()
bigrams = list(ngrams(tokens, 2))
print(bigrams) # Output: [('I', 'love'), ('love', 'NLP.')]
Answer:
python
Copy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
Answer:
python
Copy
from wordcloud import WordCloud
import matplotlib.pyplot as plt
Answer:
python
Copy
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
def preprocess(text):
text = re.sub(r'\W', ' ', text) # Remove special characters
text = text.lower() # Convert to lowercase
tokens = word_tokenize(text) # Tokenize
stop_words = set(stopwords.words('english'))
tokens = [word for word in tokens if word not in stop_words] # Remove stop words
stemmer = PorterStemmer()
tokens = [stemmer.stem(word) for word in tokens] # Stemming
return tokens
Answer:
python
Copy
from collections import Counter
text = "I love NLP. NLP is fun."
words = text.split()
word_freq = Counter(words)
print(word_freq) # Output: Counter({'I': 1, 'love': 1, 'NLP.': 1, 'NLP': 1, 'is': 1, 'fun.': 1})
Answer:
python
Copy
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("I love NLP. It is fun.")
for sent in doc.sents:
print(sent.text) # Output: I love NLP. \n It is fun.
Answer:
python
Copy
from gensim import corpora
from gensim.models import LdaModel
Answer:
python
Copy
from sklearn.metrics import classification_report, accuracy_score
y_true = [1, 0, 1, 0]
y_pred = [1, 1, 0, 0]
print(classification_report(y_true, y_pred))
print("Accuracy:", accuracy_score(y_true, y_pred))
Answer:
python
Copy
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
dataset = load_dataset("imdb")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)
Answer:
python
Copy
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
dataset = load_dataset("imdb")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)
22. How do you use a pre-trained GPT-2 model for text generation?
Answer:
python
Copy
from transformers import GPT2LMHeadModel, GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
23. How do you perform text summarization using Hugging Face Transformers?
Answer:
python
Copy
from transformers import pipeline
summarizer = pipeline("summarization")
text = "Natural Language Processing (NLP) is a field of AI focused on the interaction between computers a
nd humans using natural language."
summary = summarizer(text, max_length=30, min_length=10, do_sample=False)
print(summary[0]['summary_text'])
24. How do you perform question answering using a pre-trained BERT model?
Answer:
python
Copy
from transformers import pipeline
qa_pipeline = pipeline("question-answering")
context = "Natural Language Processing (NLP) is a field of AI focused on the interaction between compute
rs and humans using natural language."
question = "What is NLP?"
result = qa_pipeline(question=question, context=context)
print(result['answer']) # Output: a field of AI
25. How do you perform named entity recognition (NER) using spaCy?
Answer:
python
Copy
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
for ent in doc.ents:
print(ent.text, ent.label_) # Output: Apple ORG, U.K. GPE, $1 billion MONEY
Answer:
python
Copy
import spacy
from spacy.training import Example
nlp = spacy.blank("en")
ner = nlp.add_pipe("ner")
ner.add_label("ORG")
train_data = [
("Apple is looking at buying U.K. startup for $1 billion", {"entities": [(0, 5, "ORG")]})
]
optimizer = nlp.begin_training()
for _ in range(10):
for text, annotations in train_data:
example = Example.from_dict(nlp.make_doc(text), annotations)
nlp.update([example], sgd=optimizer)
27. How do you perform sentiment analysis using Hugging Face Transformers?
Answer:
python
Copy
from transformers import pipeline
sentiment_analyzer = pipeline("sentiment-analysis")
text = "I love NLP!"
result = sentiment_analyzer(text)
print(result) # Output: [{'label': 'POSITIVE', 'score': 0.9998}]
28. How do you perform machine translation using Hugging Face
Transformers?
Answer:
python
Copy
from transformers import pipeline
translator = pipeline("translation_en_to_fr")
text = "Hello, how are you?"
translation = translator(text)
print(translation[0]['translation_text']) # Output: Bonjour, comment ça va ?
Answer:
python
Copy
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
# Plot
plt.scatter(word_vectors_2d[:, 0], word_vectors_2d[:, 1])
for i, word in enumerate(words):
plt.annotate(word, xy=(word_vectors_2d[i, 0], word_vectors_2d[i, 1]))
plt.show()
Answer:
python
Copy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
documents = ["I love NLP.", "I hate spam.", "NLP is fun.", "Spam is bad."]
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(documents)
31. How do you perform text classification using a pre-trained BERT model?
Answer:
python
Copy
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
Answer:
python
Copy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
Answer:
python
Copy
import spacy
nlp = spacy.load("en_core_web_sm")
text = "I love NLP! It's amazing."
doc = nlp(text)
Answer:
python
Copy
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Sample data
texts = ["I love NLP", "I hate spam"]
labels = [1, 0]
# Tokenization
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=10)
# LSTM Model
model = tf.keras.Sequential([
tf.keras.layers.Embedding(1000, 64),
tf.keras.layers.LSTM(64),
tf.keras.layers.Dense(1, activation="sigmoid")
])
Answer:
python
Copy
import tensorflow as tf
import numpy as np
# Sample text
text = "I love NLP"
chars = sorted(list(set(text)))
char_to_index = {c: i for i, c in enumerate(chars)}
# Prepare data
seq_length = 3
X = []
y = []
for i in range(len(text) - seq_length):
X.append([char_to_index[c] for c in text[i:i+seq_length]])
y.append(char_to_index[text[i+seq_length]])
X = np.array(X)
y = tf.keras.utils.to_categorical(y, num_classes=len(chars))
# LSTM Model
model = tf.keras.Sequential([
tf.keras.layers.LSTM(128, input_shape=(seq_length, 1)),
tf.keras.layers.Dense(len(chars), activation="softmax")
])
model.compile(optimizer="adam", loss="categorical_crossentropy")
model.fit(X, y, epochs=100)
# Generate text
def generate_text(seed, length):
for _ in range(length):
seed_encoded = [char_to_index[c] for c in seed]
seed_encoded = np.array(seed_encoded).reshape(1, seq_length)
pred = model.predict(seed_encoded, verbose=0)
next_char = chars[np.argmax(pred)]
seed = seed[1:] + next_char
return seed
Answer:
python
Copy
import openai
openai.api_key = "your-api-key"
response = openai.Completion.create(
engine="davinci",
prompt="Translate English to French: 'Hello, how are you?'",
max_tokens=50
)
print(response.choices[0].text.strip())
42. How do you perform zero-shot text classification using Hugging Face
Transformers?
Answer:
python
Copy
from transformers import pipeline
classifier = pipeline("zero-shot-classification")
result = classifier(
"I love NLP!",
candidate_labels=["positive", "negative"]
)
print(result) # Output: {'labels': ['positive', 'negative'], 'scores': [0.99, 0.01]}
python
Copy
from transformers import pipeline
Answer:
python
Copy
from transformers import pipeline
Answer:
python
Copy
from transformers import T5ForConditionalGeneration, T5Tokenizer
model = T5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")