diff --git a/social_media_analyzer/fake_news_detector.py b/social_media_analyzer/fake_news_detector.py index 8369574..8cc74ee 100644 --- a/social_media_analyzer/fake_news_detector.py +++ b/social_media_analyzer/fake_news_detector.py @@ -1,6 +1,7 @@ import re import urllib.request from urllib.parse import urlparse +import nltk from .heuristics import ( FAKE_NEWS_DOMAINS, SENSATIONALIST_KEYWORDS, @@ -11,6 +12,12 @@ def analyze_url_for_fake_news(url): """ Analyzes a URL for indicators of fake news. + + NOTE: This function requires the following NLTK data to be downloaded: + - 'punkt' + - 'averaged_perceptron_tagger' + - 'maxent_ne_chunker' + - 'words' """ if not url.startswith(('http://', 'https://')): url = 'http://' + url @@ -19,6 +26,10 @@ def analyze_url_for_fake_news(url): score = 0.0 indicators_found = [] + named_entities = { + "organizations": [], + "persons": [], + } # 1. Check against known fake news domains if domain in FAKE_NEWS_DOMAINS: @@ -51,6 +62,21 @@ def analyze_url_for_fake_news(url): score += HEURISTIC_WEIGHTS.get("CLICKBAIT_PATTERN", 1.5) indicators_found.append(f"Found clickbait pattern: '{pattern}'") + # 5. Named Entity Recognition + tokens = nltk.word_tokenize(text_content) + tagged = nltk.pos_tag(tokens) + entities = nltk.ne_chunk(tagged) + + for entity in entities: + if isinstance(entity, nltk.Tree): + entity_text = " ".join([word for word, tag in entity.leaves()]) + if entity.label() == 'ORGANIZATION': + if entity_text not in named_entities["organizations"]: + named_entities["organizations"].append(entity_text) + elif entity.label() == 'PERSON': + if entity_text not in named_entities["persons"]: + named_entities["persons"].append(entity_text) + else: return {"error": f"Failed to fetch URL: HTTP status code {response.status}"} except Exception as e: @@ -59,5 +85,6 @@ def analyze_url_for_fake_news(url): return { "url": url, "score": round(score, 2), - "indicators_found": indicators_found + "indicators_found": indicators_found, + "named_entities": named_entities } diff --git a/social_media_analyzer/requirements.txt b/social_media_analyzer/requirements.txt index f229360..16a8ce6 100644 --- a/social_media_analyzer/requirements.txt +++ b/social_media_analyzer/requirements.txt @@ -1 +1,3 @@ requests +nltk +textblob diff --git a/social_media_analyzer/scam_detector.py b/social_media_analyzer/scam_detector.py index a73562a..ad66962 100644 --- a/social_media_analyzer/scam_detector.py +++ b/social_media_analyzer/scam_detector.py @@ -3,6 +3,7 @@ import requests import os from urllib.parse import urlparse +from textblob import TextBlob from .heuristics import ( URGENCY_KEYWORDS, SENSITIVE_INFO_KEYWORDS, @@ -127,7 +128,15 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None): indicators_found = [] urls_analyzed_details = [] - # 1. Keyword-based checks + # 1. Sentiment Analysis + blob = TextBlob(text_content) + if blob.sentiment.polarity < -0.5: + message = "Strong negative sentiment detected in text." + if message not in indicators_found: + indicators_found.append(message) + score += HEURISTIC_WEIGHTS.get("NEGATIVE_SENTIMENT", 2.0) + + # 2. Keyword-based checks keyword_checks = { "URGENCY": URGENCY_KEYWORDS, "SENSITIVE_INFO": SENSITIVE_INFO_KEYWORDS, @@ -145,7 +154,7 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None): indicators_found.append(message) score += HEURISTIC_WEIGHTS.get(category, 1.0) - # 2. Regex-based checks + # 3. Regex-based checks found_urls = URL_PATTERN.findall(text_content) for url_str in found_urls: is_susp, reason = is_url_suspicious(url_str, platform, api_key) @@ -159,7 +168,7 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None): indicators_found.append(f"Suspicious URL found: {url_str} (Reason: {reason})") urls_analyzed_details.append(url_analysis) - # 3. Financial Identifiers + # 4. Financial Identifiers for id_name, pattern in FINANCIAL_ADDRESS_PATTERNS.items(): if pattern.search(text_content): message = f"Potential {id_name} identifier found." @@ -167,7 +176,7 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None): indicators_found.append(message) score += HEURISTIC_WEIGHTS.get(f"{id_name}_ADDRESS", 2.5) - # 4. Phone Numbers + # 5. Phone Numbers if PHONE_NUMBER_PATTERN.search(text_content): message = "Phone number detected in text." if message not in indicators_found: diff --git a/social_media_analyzer/test_scam_detector.py b/social_media_analyzer/test_scam_detector.py new file mode 100644 index 0000000..c148e14 --- /dev/null +++ b/social_media_analyzer/test_scam_detector.py @@ -0,0 +1,29 @@ +import unittest +from .scam_detector import analyze_text_for_scams + +class TestScamDetector(unittest.TestCase): + + def test_sentiment_analysis(self): + # Test case for negative sentiment + text_negative = "This is a terrible, awful, no good, very bad message." + result_negative = analyze_text_for_scams(text_negative) + self.assertIn("Strong negative sentiment detected in text.", [indicator for indicator in result_negative["indicators_found"]]) + + # Test case for positive sentiment + text_positive = "This is a wonderful, amazing, great message." + result_positive = analyze_text_for_scams(text_positive) + self.assertNotIn("Strong negative sentiment detected in text.", [indicator for indicator in result_positive["indicators_found"]]) + + def test_keyword_matching(self): + # Test case for urgency keyword + text_urgency = "URGENT: Your account has been compromised." + result_urgency = analyze_text_for_scams(text_urgency) + self.assertIn("Presence of 'Urgency' keyword: 'urgent'", [indicator for indicator in result_urgency["indicators_found"]]) + + # Test case for stemming + text_stemming = "I need you to verify your account immediately." + result_stemming = analyze_text_for_scams(text_stemming) + self.assertIn("Presence of 'Sensitive Info' keyword: 'verify your account'", [indicator for indicator in result_stemming["indicators_found"]]) + +if __name__ == '__main__': + unittest.main() diff --git a/src/App.jsx b/src/App.jsx index 8b32d07..0a51996 100644 --- a/src/App.jsx +++ b/src/App.jsx @@ -1,27 +1,24 @@ +import React, { useState } from 'react'; import './App.css'; +import ScamAnalyzer from './ScamAnalyzer'; +import FakeNewsAnalyzer from './FakeNewsAnalyzer'; function App() { + const [view, setView] = useState('scam'); + return (
- logo -

- GitHub Codespaces ♥️ React -

-

- Edit src/App.jsx and save to reload. -

-

- - Learn React - -

+

Universal Security Analyzer

+
+
+ {view === 'scam' && } + {view === 'fake-news' && } +
); } diff --git a/src/FakeNewsAnalyzer.jsx b/src/FakeNewsAnalyzer.jsx new file mode 100644 index 0000000..53afa1a --- /dev/null +++ b/src/FakeNewsAnalyzer.jsx @@ -0,0 +1,81 @@ +import React, { useState } from 'react'; + +function FakeNewsAnalyzer() { + const [url, setUrl] = useState(''); + const [result, setResult] = useState(null); + const [loading, setLoading] = useState(false); + + const handleAnalyze = () => { + setLoading(true); + fetch('/analyze/fake-news', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ url }), + }) + .then((res) => res.json()) + .then((data) => { + setResult(data); + setLoading(false); + }) + .catch((error) => { + console.error('Error:', error); + setLoading(false); + }); + }; + + return ( +
+

Fake News Analyzer

+ setUrl(e.target.value)} + placeholder="Enter a news URL to analyze..." + size="50" + /> +
+ + {result && ( +
+

Analysis Results

+ {result.error ? ( +

Error: {result.error}

+ ) : ( + <> +

Score: {result.score}

+

Indicators Found:

+ + {result.named_entities && ( + <> +

Named Entities Found:

+
Organizations:
+ +
Persons:
+ + + )} + + )} +
+ )} +
+ ); +} + +export default FakeNewsAnalyzer; diff --git a/src/ScamAnalyzer.jsx b/src/ScamAnalyzer.jsx new file mode 100644 index 0000000..1b74907 --- /dev/null +++ b/src/ScamAnalyzer.jsx @@ -0,0 +1,58 @@ +import React, { useState } from 'react'; + +function ScamAnalyzer() { + const [text, setText] = useState(''); + const [result, setResult] = useState(null); + const [loading, setLoading] = useState(false); + + const handleAnalyze = () => { + setLoading(true); + fetch('/analyze/scam', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ text }), + }) + .then((res) => res.json()) + .then((data) => { + setResult(data); + setLoading(false); + }) + .catch((error) => { + console.error('Error:', error); + setLoading(false); + }); + }; + + return ( +
+

Scam Analyzer

+