Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion social_media_analyzer/fake_news_detector.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import urllib.request
from urllib.parse import urlparse
import nltk
from .heuristics import (
FAKE_NEWS_DOMAINS,
SENSATIONALIST_KEYWORDS,
Expand All @@ -11,6 +12,12 @@
def analyze_url_for_fake_news(url):
"""
Analyzes a URL for indicators of fake news.

NOTE: This function requires the following NLTK data to be downloaded:
- 'punkt'
- 'averaged_perceptron_tagger'
- 'maxent_ne_chunker'
- 'words'
"""
if not url.startswith(('http://', 'https://')):
url = 'http://' + url
Expand All @@ -19,6 +26,10 @@ def analyze_url_for_fake_news(url):

score = 0.0
indicators_found = []
named_entities = {
"organizations": [],
"persons": [],
}
Comment on lines +29 to +32
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue: Named entity extraction does not handle cases where NLTK data is missing.

Catch NLTK exceptions and provide a user-friendly error message or guidance on downloading missing data.


# 1. Check against known fake news domains
if domain in FAKE_NEWS_DOMAINS:
Expand Down Expand Up @@ -51,6 +62,21 @@ def analyze_url_for_fake_news(url):
score += HEURISTIC_WEIGHTS.get("CLICKBAIT_PATTERN", 1.5)
indicators_found.append(f"Found clickbait pattern: '{pattern}'")

# 5. Named Entity Recognition
tokens = nltk.word_tokenize(text_content)
tagged = nltk.pos_tag(tokens)
entities = nltk.ne_chunk(tagged)

for entity in entities:
if isinstance(entity, nltk.Tree):
entity_text = " ".join([word for word, tag in entity.leaves()])
if entity.label() == 'ORGANIZATION':
if entity_text not in named_entities["organizations"]:
named_entities["organizations"].append(entity_text)
elif entity.label() == 'PERSON':
if entity_text not in named_entities["persons"]:
named_entities["persons"].append(entity_text)

else:
return {"error": f"Failed to fetch URL: HTTP status code {response.status}"}
except Exception as e:
Expand All @@ -59,5 +85,6 @@ def analyze_url_for_fake_news(url):
return {
"url": url,
"score": round(score, 2),
"indicators_found": indicators_found
"indicators_found": indicators_found,
"named_entities": named_entities
}
2 changes: 2 additions & 0 deletions social_media_analyzer/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
requests
nltk
textblob
17 changes: 13 additions & 4 deletions social_media_analyzer/scam_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import requests
import os
from urllib.parse import urlparse
from textblob import TextBlob
from .heuristics import (
URGENCY_KEYWORDS,
SENSITIVE_INFO_KEYWORDS,
Expand Down Expand Up @@ -127,7 +128,15 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None):
indicators_found = []
urls_analyzed_details = []

# 1. Keyword-based checks
# 1. Sentiment Analysis
blob = TextBlob(text_content)
if blob.sentiment.polarity < -0.5:
message = "Strong negative sentiment detected in text."
if message not in indicators_found:
indicators_found.append(message)
score += HEURISTIC_WEIGHTS.get("NEGATIVE_SENTIMENT", 2.0)

# 2. Keyword-based checks
keyword_checks = {
"URGENCY": URGENCY_KEYWORDS,
"SENSITIVE_INFO": SENSITIVE_INFO_KEYWORDS,
Expand All @@ -145,7 +154,7 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None):
indicators_found.append(message)
score += HEURISTIC_WEIGHTS.get(category, 1.0)

# 2. Regex-based checks
# 3. Regex-based checks
found_urls = URL_PATTERN.findall(text_content)
for url_str in found_urls:
is_susp, reason = is_url_suspicious(url_str, platform, api_key)
Expand All @@ -159,15 +168,15 @@ def analyze_text_for_scams(text_content, platform=None, api_key=None):
indicators_found.append(f"Suspicious URL found: {url_str} (Reason: {reason})")
urls_analyzed_details.append(url_analysis)

# 3. Financial Identifiers
# 4. Financial Identifiers
for id_name, pattern in FINANCIAL_ADDRESS_PATTERNS.items():
if pattern.search(text_content):
message = f"Potential {id_name} identifier found."
if message not in indicators_found:
indicators_found.append(message)
score += HEURISTIC_WEIGHTS.get(f"{id_name}_ADDRESS", 2.5)

# 4. Phone Numbers
# 5. Phone Numbers
if PHONE_NUMBER_PATTERN.search(text_content):
message = "Phone number detected in text."
if message not in indicators_found:
Expand Down
29 changes: 29 additions & 0 deletions social_media_analyzer/test_scam_detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import unittest
from .scam_detector import analyze_text_for_scams

class TestScamDetector(unittest.TestCase):

def test_sentiment_analysis(self):
# Test case for negative sentiment
text_negative = "This is a terrible, awful, no good, very bad message."
result_negative = analyze_text_for_scams(text_negative)
self.assertIn("Strong negative sentiment detected in text.", [indicator for indicator in result_negative["indicators_found"]])

# Test case for positive sentiment
text_positive = "This is a wonderful, amazing, great message."
result_positive = analyze_text_for_scams(text_positive)
self.assertNotIn("Strong negative sentiment detected in text.", [indicator for indicator in result_positive["indicators_found"]])
Comment on lines +10 to +15
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (code-quality): Replace identity comprehension with call to collection constructor [×2] (identity-comprehension)


ExplanationConvert list/set/tuple comprehensions that do not change the input elements into.

Before

# List comprehensions
[item for item in coll]
[item for item in friends.names()]

# Dict comprehensions
{k: v for k, v in coll}
{k: v for k, v in coll.items()}  # Only if we know coll is a `dict`

# Unneeded call to `.items()`
dict(coll.items())  # Only if we know coll is a `dict`

# Set comprehensions
{item for item in coll}

After

# List comprehensions
list(iter(coll))
list(iter(friends.names()))

# Dict comprehensions
dict(coll)
dict(coll)

# Unneeded call to `.items()`
dict(coll)

# Set comprehensions
set(coll)

All these comprehensions are just creating a copy of the original collection.
They can all be simplified by simply constructing a new collection directly. The
resulting code is easier to read and shows the intent more clearly.
Convert list/set/tuple comprehensions that do not change the input elements into.

Before

# List comprehensions
[item for item in coll]
[item for item in friends.names()]

# Dict comprehensions
{k: v for k, v in coll}
{k: v for k, v in coll.items()}  # Only if we know coll is a `dict`

# Unneeded call to `.items()`
dict(coll.items())  # Only if we know coll is a `dict`

# Set comprehensions
{item for item in coll}

After

# List comprehensions
list(iter(coll))
list(iter(friends.names()))

# Dict comprehensions
dict(coll)
dict(coll)

# Unneeded call to `.items()`
dict(coll)

# Set comprehensions
set(coll)

All these comprehensions are just creating a copy of the original collection.
They can all be simplified by simply constructing a new collection directly. The
resulting code is easier to read and shows the intent more clearly.


def test_keyword_matching(self):
# Test case for urgency keyword
text_urgency = "URGENT: Your account has been compromised."
result_urgency = analyze_text_for_scams(text_urgency)
self.assertIn("Presence of 'Urgency' keyword: 'urgent'", [indicator for indicator in result_urgency["indicators_found"]])

# Test case for stemming
text_stemming = "I need you to verify your account immediately."
result_stemming = analyze_text_for_scams(text_stemming)
self.assertIn("Presence of 'Sensitive Info' keyword: 'verify your account'", [indicator for indicator in result_stemming["indicators_found"]])

if __name__ == '__main__':
unittest.main()
31 changes: 14 additions & 17 deletions src/App.jsx
Original file line number Diff line number Diff line change
@@ -1,27 +1,24 @@
import React, { useState } from 'react';
import './App.css';
import ScamAnalyzer from './ScamAnalyzer';
import FakeNewsAnalyzer from './FakeNewsAnalyzer';

function App() {
const [view, setView] = useState('scam');

return (
<div className="App">
<header className="App-header">
<img src="Octocat.png" className="App-logo" alt="logo" />
<p>
GitHub Codespaces <span className="heart">♥️</span> React
</p>
<p className="small">
Edit <code>src/App.jsx</code> and save to reload.
</p>
<p>
<a
className="App-link"
href="https://reactjs.org"
target="_blank"
rel="noopener noreferrer"
>
Learn React
</a>
</p>
<h1>Universal Security Analyzer</h1>
<nav>
<button onClick={() => setView('scam')}>Scam Analyzer</button>
<button onClick={() => setView('fake-news')}>Fake News Analyzer</button>
</nav>
</header>
<main>
{view === 'scam' && <ScamAnalyzer />}
{view === 'fake-news' && <FakeNewsAnalyzer />}
</main>
</div>
);
}
Expand Down
81 changes: 81 additions & 0 deletions src/FakeNewsAnalyzer.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import React, { useState } from 'react';

function FakeNewsAnalyzer() {
const [url, setUrl] = useState('');
const [result, setResult] = useState(null);
const [loading, setLoading] = useState(false);

const handleAnalyze = () => {
setLoading(true);
fetch('/analyze/fake-news', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ url }),
})
.then((res) => res.json())
.then((data) => {
setResult(data);
setLoading(false);
})
.catch((error) => {
console.error('Error:', error);
setLoading(false);
});
};

return (
<div>
<h2>Fake News Analyzer</h2>
<input
type="text"
value={url}
onChange={(e) => setUrl(e.target.value)}
placeholder="Enter a news URL to analyze..."
size="50"
/>
<br />
<button onClick={handleAnalyze} disabled={loading}>
{loading ? 'Analyzing...' : 'Analyze'}
</button>
{result && (
<div>
<h3>Analysis Results</h3>
{result.error ? (
<p>Error: {result.error}</p>
) : (
<>
<p>Score: {result.score}</p>
<h4>Indicators Found:</h4>
<ul>
{result.indicators_found.map((indicator, index) => (
<li key={index}>{indicator}</li>
))}
</ul>
{result.named_entities && (
<>
<h4>Named Entities Found:</h4>
<h5>Organizations:</h5>
<ul>
{result.named_entities.organizations.map((org, index) => (
<li key={index}>{org}</li>
))}
</ul>
<h5>Persons:</h5>
<ul>
{result.named_entities.persons.map((person, index) => (
<li key={index}>{person}</li>
))}
</ul>
</>
)}
</>
)}
</div>
)}
</div>
);
}

export default FakeNewsAnalyzer;
58 changes: 58 additions & 0 deletions src/ScamAnalyzer.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import React, { useState } from 'react';

function ScamAnalyzer() {
const [text, setText] = useState('');
const [result, setResult] = useState(null);
const [loading, setLoading] = useState(false);

const handleAnalyze = () => {
setLoading(true);
fetch('/analyze/scam', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ text }),
})
.then((res) => res.json())
.then((data) => {
setResult(data);
setLoading(false);
})
.catch((error) => {
console.error('Error:', error);
setLoading(false);
});
};

return (
<div>
<h2>Scam Analyzer</h2>
<textarea
rows="10"
cols="50"
value={text}
onChange={(e) => setText(e.target.value)}
placeholder="Paste a message to analyze for scams..."
/>
<br />
<button onClick={handleAnalyze} disabled={loading}>
{loading ? 'Analyzing...' : 'Analyze'}
</button>
{result && (
<div>
<h3>Analysis Results</h3>
<p>Score: {result.score}</p>
<h4>Indicators Found:</h4>
<ul>
{result.indicators_found.map((indicator, index) => (
<li key={index}>{indicator}</li>
))}
</ul>
</div>
)}
</div>
);
}

export default ScamAnalyzer;
40 changes: 24 additions & 16 deletions text_message_analyzer/app.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,36 @@
from flask import Flask, request, jsonify
from social_media_analyzer import scam_detector, fake_news_detector
import os

app = Flask(__name__)

@app.route("/")
def hello():
return "Hello, World!"
def get_api_key():
"""Gets the Google API key from environment variables."""
return os.environ.get("GOOGLE_API_KEY")

@app.route('/analyze', methods=['POST'])
def analyze():
@app.route('/analyze/scam', methods=['POST'])
def analyze_scam():
data = request.get_json()
if not data or 'text' not in data:
return jsonify({'error': 'Invalid input, "text" field is required.'}), 400
return jsonify({"error": "Missing 'text' in request body"}), 400

text_to_analyze = data['text']
api_key = get_api_key()

# Placeholder analysis logic
is_suspicious = 'phishing' in text_to_analyze.lower()
result = scam_detector.analyze_text_for_scams(text_to_analyze, api_key=api_key)
return jsonify(result)

@app.route('/analyze/fake-news', methods=['POST'])
def analyze_fake_news():
data = request.get_json()
if not data or 'url' not in data:
return jsonify({"error": "Missing 'url' in request body"}), 400

url_to_analyze = data['url']

result = fake_news_detector.analyze_url_for_fake_news(url_to_analyze)
return jsonify(result)

return jsonify({
'text': text_to_analyze,
'analysis': {
'is_suspicious': is_suspicious
}
})

if __name__ == "__main__":
app.run(host="0.0.0.0", port=8080)
if __name__ == '__main__':
app.run(debug=True)
Loading