Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions social_media_analyzer/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,37 @@
"bank transfer", "wire details", "account details", "iban", "swift code", "bic"
]

# --- Teenager Protection Heuristics ---

# Keywords/phrases related to cyberbullying
CYBERBULLYING_KEYWORDS = [
"loser", "stupid", "idiot", "hate you", "ugly", "fat",
"kill yourself", "kys", "go die", "nobody likes you", "freak",
"weirdo", "everyone hates you", "you're worthless", "pathetic",
"troll", "noob", "poser", "wannabe", "go away",
"social reject", "outcast", "misfit", "dork", "nerd"
]

# Keywords/phrases related to inappropriate content (sexual, violent, etc.)
INAPPROPRIATE_CONTENT_KEYWORDS = [
# Sexually suggestive
"nude", "sexting", "send nudes", "horny", "slut", "whore", "dick", "pussy",
"porn", "sexy pic", "private parts", "hook up",
# Violence
"kill", "murder", "blood", "gun", "knife", "fight me",
"i will hurt you", "beat you up", "gonna get you",
# Drugs/Alcohol
"drugs", "weed", "cocaine", "pills", "get high", "drunk", "wasted"
]

# Keywords/phrases indicating oversharing of personal information
PRIVACY_RISK_KEYWORDS = [
"my address is", "i live at", "my phone number is", "call me at",
"my full name is", "my school is", "i go to [school_name]",
"my mom's name is", "my dad's name is",
"i'm home alone", "parents are out", "my password is"
]


# --- Fake News Heuristics ---

Expand Down Expand Up @@ -240,6 +271,10 @@ def generate_suspicious_url_patterns(legitimate_domains):
"PHONE_NUMBER_UNSOLICITED": 1.0,
"SUSPICIOUS_URL_PATTERN": 3.0, # High weight for matching a suspicious URL pattern
"GOOGLE_SAFE_BROWSING_HIT": 10.0, # Very high weight for a positive Google Safe Browsing match
# Teenager Protection Weights
"CYBERBULLYING": 2.5,
"INAPPROPRIATE_CONTENT": 3.0,
"PRIVACY_RISK": 3.5,
}
Comment on lines 273 to 278
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Heuristic weights for teen protection categories may need calibration.

These weights appear arbitrary; please validate them against expert input or data to ensure appropriate risk prioritization.

Suggested change
"GOOGLE_SAFE_BROWSING_HIT": 10.0, # Very high weight for a positive Google Safe Browsing match
# Teenager Protection Weights
"CYBERBULLYING": 2.5,
"INAPPROPRIATE_CONTENT": 3.0,
"PRIVACY_RISK": 3.5,
}
"GOOGLE_SAFE_BROWSING_HIT": 10.0, # Very high weight for a positive Google Safe Browsing match
# Teenager Protection Weights
# NOTE: The following weights for teen protection categories are provisional.
# TODO: Validate these weights against expert input or empirical data to ensure appropriate risk prioritization.
"CYBERBULLYING": 2.5,
"INAPPROPRIATE_CONTENT": 3.0,
"PRIVACY_RISK": 3.5,
}


if __name__ == '__main__':
Expand Down
50 changes: 48 additions & 2 deletions social_media_analyzer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from . import fake_profile_detector
from . import scam_detector
from . import fake_news_detector
from . import teen_protection

def get_api_key():
"""Gets the Google API key from environment variables."""
Expand Down Expand Up @@ -130,6 +131,48 @@ def analyze_social_media(api_key):
except ValueError:
print("Invalid input. Please enter a number.")

def analyze_for_teen_risks():
"""Handles analysis for risks relevant to teenagers."""
print("\n--- Teenager Protection Tools ---")
print("Select the type of analysis you want to perform:")
print("1. Analyze text for Cyberbullying")
print("2. Analyze text for Inappropriate Content")
print("3. Analyze text for Privacy Risks (Oversharing)")

try:
choice = int(input("Enter your choice (1-3): "))
if choice not in [1, 2, 3]:
print("Invalid choice. Please try again.")
return
except ValueError:
print("Invalid input. Please enter a number.")
return

text_to_analyze = input("Please paste the text you want to analyze: ").strip()
if not text_to_analyze:
print("No text entered.")
return

result = {}
if choice == 1:
print("\n--- Analyzing for Cyberbullying ---")
result = teen_protection.analyze_for_cyberbullying(text_to_analyze)
elif choice == 2:
print("\n--- Analyzing for Inappropriate Content ---")
result = teen_protection.analyze_for_inappropriate_content(text_to_analyze)
elif choice == 3:
print("\n--- Analyzing for Privacy Risks ---")
result = teen_protection.analyze_for_privacy_risks(text_to_analyze)

print(f"Score: {result['score']} (Higher is more suspicious)")
if result['indicators_found']:
print("Indicators Found:")
for indicator in result['indicators_found']:
print(f"- {indicator}")
else:
print("No specific risk indicators were found.")


def main():
"""Main function to run the security analyzer."""
api_key = get_api_key()
Expand All @@ -145,17 +188,20 @@ def main():
print("1. Analyze a Social Media Platform")
print("2. Analyze a Website URL for Scams")
print("3. Analyze a News URL for Fake News")
print("4. Exit")
print("4. Teenager Protection Tools")
print("5. Exit")

try:
choice = int(input("Enter your choice (1-4): "))
choice = int(input("Enter your choice (1-5): "))
if choice == 1:
analyze_social_media(api_key)
elif choice == 2:
analyze_website_url(api_key)
elif choice == 3:
analyze_news_url()
elif choice == 4:
analyze_for_teen_risks()
elif choice == 5:
print("Exiting. Stay safe!")
break
else:
Expand Down
58 changes: 58 additions & 0 deletions social_media_analyzer/teen_protection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from .heuristics import (
CYBERBULLYING_KEYWORDS,
INAPPROPRIATE_CONTENT_KEYWORDS,
PRIVACY_RISK_KEYWORDS,
HEURISTIC_WEIGHTS
)

def analyze_text_for_teen_risks(text, analysis_type):
"""
Analyzes text for a specific type of risk to teenagers.

:param text: The text content to analyze.
:param analysis_type: The type of analysis to perform ('cyberbullying',
'inappropriate_content', 'privacy_risk').
:return: A dictionary with the score and indicators found.
"""
if not text:
return {"score": 0.0, "indicators_found": []}

text_lower = text.lower()
score = 0.0
indicators_found = []

keyword_map = {
'cyberbullying': ('CYBERBULLYING', CYBERBULLYING_KEYWORDS),
'inappropriate_content': ('INAPPROPRIATE_CONTENT', INAPPROPRIATE_CONTENT_KEYWORDS),
'privacy_risk': ('PRIVACY_RISK', PRIVACY_RISK_KEYWORDS),
}

if analysis_type not in keyword_map:
return {"error": "Invalid analysis type specified."}

category, keywords = keyword_map[analysis_type]
weight = HEURISTIC_WEIGHTS.get(category.upper(), 1.0)

for keyword in keywords:
if keyword in text_lower:
message = f"Detected potential {category.replace('_', ' ').lower()} keyword: '{keyword}'"
if message not in indicators_found:
indicators_found.append(message)
score += weight

return {
"score": round(score, 2),
"indicators_found": indicators_found
}

def analyze_for_cyberbullying(text):
"""Analyzes text for signs of cyberbullying."""
return analyze_text_for_teen_risks(text, 'cyberbullying')

def analyze_for_inappropriate_content(text):
"""Analyzes text for inappropriate content."""
return analyze_text_for_teen_risks(text, 'inappropriate_content')

def analyze_for_privacy_risks(text):
"""Analyzes text for privacy risks (oversharing)."""
return analyze_text_for_teen_risks(text, 'privacy_risk')
13 changes: 8 additions & 5 deletions social_media_analyzer/test_runner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest
from unittest.mock import patch, Mock
from social_media_analyzer.scam_detector import analyze_text_for_scams
from .scam_detector import analyze_text_for_scams
from .test_teen_protection import TestTeenProtection

def run_manual_tests():
# Example Usage
Expand Down Expand Up @@ -91,11 +92,13 @@ def test_google_safe_browsing_clean(self, mock_post):
if __name__ == '__main__':
run_manual_tests()
# Run unit tests
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestScamDetector))
scam_suite = unittest.makeSuite(TestScamDetector)
teen_suite = unittest.makeSuite(TestTeenProtection)
all_tests = unittest.TestSuite([scam_suite, teen_suite])

runner = unittest.TextTestRunner()
print("\n--- Running Unit Tests for Google Safe Browsing Integration ---")
result = runner.run(suite)
print("\n--- Running All Unit Tests ---")
result = runner.run(all_tests)
if result.wasSuccessful():
print("All tests passed!")
else:
Expand Down
70 changes: 70 additions & 0 deletions social_media_analyzer/test_teen_protection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import unittest
from .teen_protection import (
analyze_for_cyberbullying,
analyze_for_inappropriate_content,
analyze_for_privacy_risks
)

class TestTeenProtection(unittest.TestCase):

def test_cyberbullying(self):
"""Test the cyberbullying detection."""
# Test case with bullying keywords
text1 = "You are such a loser and an idiot."
result1 = analyze_for_cyberbullying(text1)
self.assertGreater(result1['score'], 0)
self.assertIn("Detected potential cyberbullying keyword: 'loser'", result1['indicators_found'])
self.assertIn("Detected potential cyberbullying keyword: 'idiot'", result1['indicators_found'])

# Test case with no bullying keywords
text2 = "Have a great day!"
result2 = analyze_for_cyberbullying(text2)
self.assertEqual(result2['score'], 0)
self.assertEqual(len(result2['indicators_found']), 0)

def test_inappropriate_content(self):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Missing tests for multiple keyword occurrences in a single text.

Add a test where a keyword appears multiple times to ensure correct score accumulation and indicator handling.

"""Test the inappropriate content detection."""
# Test case with inappropriate keywords
text1 = "Don't send nudes or talk about drugs."
result1 = analyze_for_inappropriate_content(text1)
self.assertGreater(result1['score'], 0)
self.assertIn("Detected potential inappropriate content keyword: 'send nudes'", result1['indicators_found'])
self.assertIn("Detected potential inappropriate content keyword: 'drugs'", result1['indicators_found'])

# Test case with no inappropriate keywords
text2 = "This is a perfectly normal conversation."
result2 = analyze_for_inappropriate_content(text2)
self.assertEqual(result2['score'], 0)
self.assertEqual(len(result2['indicators_found']), 0)

def test_privacy_risks(self):
"""Test the privacy risk detection."""
# Test case with privacy risk keywords
text1 = "My address is 123 Main St and my phone number is 555-1234."
result1 = analyze_for_privacy_risks(text1)
self.assertGreater(result1['score'], 0)
self.assertIn("Detected potential privacy risk keyword: 'my address is'", result1['indicators_found'])
self.assertIn("Detected potential privacy risk keyword: 'my phone number is'", result1['indicators_found'])

# Test case with no privacy risk keywords
text2 = "I like to talk about my hobbies."
result2 = analyze_for_privacy_risks(text2)
self.assertEqual(result2['score'], 0)
self.assertEqual(len(result2['indicators_found']), 0)

def test_empty_input(self):
"""Test empty input for all analysis types."""
result_cb = analyze_for_cyberbullying("")
self.assertEqual(result_cb['score'], 0)
self.assertEqual(len(result_cb['indicators_found']), 0)

result_ic = analyze_for_inappropriate_content("")
self.assertEqual(result_ic['score'], 0)
self.assertEqual(len(result_ic['indicators_found']), 0)

result_pr = analyze_for_privacy_risks("")
self.assertEqual(result_pr['score'], 0)
self.assertEqual(len(result_pr['indicators_found']), 0)

if __name__ == '__main__':
unittest.main()
Loading