Skip to content

Commit ed1cc01

Browse files
authored
feat(tasks 2 & 3): Add deep learning and NLP scripts to src/
Includes the TensorFlow CNN script for MNIST and the spaCy script for NER and Sentiment Analysis.
1 parent 271db13 commit ed1cc01

File tree

2 files changed

+207
-0
lines changed

2 files changed

+207
-0
lines changed

src/task2_deep_learning_cnn.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Task 2: Deep Learning with TensorFlow/Keras
4+
# Objective: Build and train a CNN to classify handwritten digits (MNIST)
5+
# Framework: TensorFlow / Keras (Deep Learning)
6+
7+
import tensorflow as tf
8+
from tensorflow.keras.datasets import mnist
9+
from tensorflow.keras.models import Sequential
10+
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
11+
from tensorflow.keras.utils import to_categorical
12+
import numpy as np
13+
import matplotlib.pyplot as plt
14+
15+
# Ensure TensorFlow runs only once for logging
16+
print("--- Task 2: CNN Image Classification (MNIST Dataset) ---")
17+
18+
# 1. Load and Preprocess Data
19+
try:
20+
(x_train, y_train), (x_test, y_test) = mnist.load_data()
21+
22+
# Reshape: Add channel dimension (28x28 -> 28x28x1)
23+
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
24+
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
25+
26+
# Normalize: Scale pixel values from 0-255 to 0-1
27+
x_train = x_train.astype('float32') / 255
28+
x_test = x_test.astype('float32') / 255
29+
30+
# One-Hot Encoding: Convert labels to binary vectors
31+
num_classes = 10
32+
y_train_encoded = to_categorical(y_train, num_classes)
33+
y_test_encoded = to_categorical(y_test, num_classes)
34+
35+
print(f"\nTraining data shape: {x_train.shape}")
36+
37+
except Exception as e:
38+
print(f"Error loading or preprocessing MNIST data: {e}")
39+
exit()
40+
41+
42+
# 2. Define the CNN Model Architecture
43+
model = Sequential([
44+
# Input layer and first Convolution
45+
Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
46+
MaxPooling2D(pool_size=(2, 2)),
47+
48+
# Second Convolution
49+
Conv2D(64, (3, 3), activation='relu'),
50+
MaxPooling2D(pool_size=(2, 2)),
51+
52+
Dropout(0.25),
53+
54+
# Fully Connected Layers
55+
Flatten(),
56+
Dense(128, activation='relu'),
57+
Dropout(0.5),
58+
59+
# Output Layer
60+
Dense(num_classes, activation='softmax')
61+
])
62+
63+
# 3. Compile the Model
64+
print("\n3. Compiling Model...")
65+
model.compile(
66+
optimizer='adam',
67+
loss='categorical_crossentropy', # Loss function for multi-class classification
68+
metrics=['accuracy']
69+
)
70+
71+
model.summary()
72+
73+
74+
# 4. Train the Model
75+
print("\n4. Training the CNN Model...")
76+
history = model.fit(
77+
x_train, y_train_encoded,
78+
batch_size=128,
79+
epochs=10, # 10 epochs is sufficient for high accuracy
80+
verbose=1,
81+
validation_data=(x_test, y_test_encoded)
82+
)
83+
print(" Training finished.")
84+
85+
86+
# 5. Evaluate Performance
87+
print("\n5. Evaluating Model Performance on Test Set...")
88+
score = model.evaluate(x_test, y_test_encoded, verbose=0)
89+
print(f"Test Loss: {score[0]:.4f}")
90+
print(f"Test Accuracy: {score[1]:.4f}")
91+
92+
# Check if the target accuracy is met
93+
if score[1] > 0.95:
94+
print(" Success! Test Accuracy target (>95%) achieved.")
95+
else:
96+
print(" Note: Test Accuracy target not reached. Review model parameters.")
97+
98+
99+
# 6. Visualize Predictions
100+
test_samples = x_test[:5]
101+
test_labels = y_test[:5]
102+
predictions = model.predict(test_samples)
103+
predicted_classes = np.argmax(predictions, axis=1)
104+
105+
print("\n6. Visualizing 5 Sample Predictions (will require manual display of the plot).")
106+
107+
# Plotting the results
108+
plt.figure(figsize=(12, 4))
109+
for i in range(5):
110+
plt.subplot(1, 5, i + 1)
111+
plt.imshow(test_samples[i].reshape(28, 28), cmap='gray')
112+
plt.title(f"True: {test_labels[i]}\nPred: {predicted_classes[i]}", fontsize=10)
113+
plt.axis('off')
114+
plt.suptitle("CNN Predictions on Sample MNIST Images")
115+
# In a real environment, you'd save this image for the report
116+
# plt.savefig('assets/cnn_predictions_sample.png')
117+
# plt.show() # Uncomment to display plot
118+
119+
print("\nTask 2 completed. CNN trained and evaluated.")

src/task3_nlp_spacy.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Task 3: NLP with spaCy
4+
# Objective: Perform Named Entity Recognition (NER) and rule-based Sentiment Analysis on user reviews.
5+
# Framework: spaCy (Natural Language Processing)
6+
7+
import spacy
8+
from spacy.lang.en.stop_words import STOP_WORDS
9+
10+
# Note: The model 'en_core_web_sm' must be downloaded separately
11+
# (Ex: python -m spacy download en_core_web_sm)
12+
13+
print("--- Task 3: NLP - NER and Rule-based Sentiment ---")
14+
15+
# 1. Sample Text Data (Reviews)
16+
17+
amazon_reviews = [
18+
"The new 'ChronoWatch X1' is absolutely fantastic! Battery life is great, and the 'Ogetec' brand delivered quickly.",
19+
"I was disappointed with the 'ZenBook 14'. It overheats constantly. I regret buying this laptop.",
20+
"This book, 'The AI Engineer', is highly informative and a must-read for any developer. Five stars!",
21+
"The customer service from 'EcoGoods' was terrible. My package arrived late and damaged."
22+
]
23+
24+
# 2. Load spaCy Model
25+
try:
26+
# Load the English model
27+
nlp = spacy.load("en_core_web_sm")
28+
print("\nspaCy model 'en_core_web_sm' loaded successfully.")
29+
except OSError:
30+
print("\n[ERROR] spaCy model 'en_core_web_sm' not found.")
31+
print("Please run: python -m spacy download en_core_web_sm")
32+
exit()
33+
34+
# 3. Named Entity Recognition (NER)
35+
print("\n3. Named Entity Recognition (NER) Results:")
36+
37+
def perform_ner(text):
38+
"""
39+
Applies the spaCy model to extract named entities.
40+
"""
41+
doc = nlp(text)
42+
entities = [(ent.text, ent.label_) for ent in doc.ents]
43+
return entities
44+
45+
for i, review in enumerate(amazon_reviews):
46+
entities = perform_ner(review)
47+
48+
# Filter for entities relevant to products/brands
49+
product_brands = [(text, label) for text, label in entities if label in ('ORG', 'PRODUCT', 'WORK_OF_ART')]
50+
51+
print(f"\nReview {i+1}: '{review[:50]}...'")
52+
if product_brands:
53+
print(f" Extracted Entities (Product/Brand): {product_brands}")
54+
else:
55+
print(" No relevant entities found.")
56+
57+
58+
# 4. Rule-Based Sentiment Analysis
59+
# This is a simple, rule-based approach using keywords, as required by the task.
60+
61+
positive_words = {"fantastic", "great", "highly informative", "must-read", "five stars", "excellent", "love"}
62+
negative_words = {"disappointed", "overheats", "regret", "terrible", "late", "damaged", "awful"}
63+
64+
def analyze_sentiment(text):
65+
"""
66+
Performs basic sentiment analysis by counting positive and negative keywords.
67+
"""
68+
doc = nlp(text.lower())
69+
70+
# Tokenization and removing stop words
71+
tokens = [token.text for token in doc if token.text not in STOP_WORDS and token.is_alpha]
72+
73+
pos_count = sum(1 for token in tokens if token in positive_words)
74+
neg_count = sum(1 for token in tokens if token in negative_words)
75+
76+
if pos_count > neg_count:
77+
return "Positive"
78+
elif neg_count > pos_count:
79+
return "Negative"
80+
else:
81+
return "Neutral/Mixed"
82+
83+
print("\n4. Rule-Based Sentiment Analysis Results:")
84+
for i, review in enumerate(amazon_reviews):
85+
sentiment = analyze_sentiment(review)
86+
print(f"Review {i+1}: Sentiment: {sentiment}")
87+
88+
print("\nTask 3 completed successfully. NER and Sentiment analysis performed.")

0 commit comments

Comments
 (0)