Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
121 changes: 121 additions & 0 deletions tasks/infobox_football_biography/pseudocode_structure.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# Football Player Data Translation Bot - pseudocode structure

## Observer Pattern: Monitoring and Error Handling

```python
class ErrorObserver:
def update(self, message):
# Log error messages


class ProgressObserver:
def update(self, message):
# Log progress messages

```

## Pipeline Pattern: Structured Data Processing

```python
class DataExtractor:
def extract_data(self, article_url):
wikitext = self.fetch_wikitext(article_url)
parsed_data = self.parse_infobox_template(wikitext)
return parsed_data


class DataTranslator:
def __init__(self, translation_array):
self.translation_array = translation_array

def translate(self, english_data):
# Translation logic using translation_array
pass


class TemplateIntegrator:
def integrate(self, arabic_article_url, integrated_template):
arabic_wikitext = self.fetch_wikitext(arabic_article_url)
arabic_template = self.extract_arabic_template(arabic_wikitext)
# Integration logic using integrated_template
pass


class QualityAssurer:
def validate_translation(self, translation):
# Validation logic
pass

def validate_integration(self, integrated_template):
# Validation logic
pass
```

## Decorator Pattern: Quality Assurance

```python
class TranslationQualityChecker:
def __init__(self, translator):
self.translator = translator

def translate(self, english_data):
translated_data = self.translator.translate(english_data)
# Validate translated_data
return translated_data


class IntegrationQualityChecker:
def __init__(self, integrator):
self.integrator = integrator

def integrate(self, arabic_article_url, integrated_template):
self.integrator.integrate(arabic_article_url, integrated_template)
# Validate integrated_template
```

## Template Method Pattern: Bot Operation Structure

```python

class FootballPlayerBot:
def __init__(self):
self.error_observer = ErrorObserver()
self.progress_observer = ProgressObserver()
self.translation_array = load_custom_translation_array()

def notify_error(self, message):
self.error_observer.update(message)

def notify_progress(self, message):
self.progress_observer.update(message)

# ... (Other methods)

def bot_operation(self):
self.notify_progress("Starting bot operation...")

data_extractor = DataExtractor()
data_translator = DataTranslator(self.translation_array)
template_integrator = TemplateIntegrator()

quality_translator = TranslationQualityChecker(data_translator)
quality_integrator = IntegrationQualityChecker(template_integrator)

for each article in English Wikipedia:
english_data = data_extractor.extract_data(article.url)
translated_data = quality_translator.translate(english_data)

arabic_wikitext = self.fetch_wikitext(article.arabic_url)
integrated_template = quality_integrator.integrate(article.arabic_url, translated_data)

# ... (Other steps)

self.notify_progress("Bot operation completed.")
```

# Instantiate and run the bot

```python
bot = FootballPlayerBot()
bot.bot_operation()
```
22 changes: 22 additions & 0 deletions tasks/infobox_football_biography/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pywikibot

from tasks.infobox_football_biography.src.data_extraction.templates.infobox_football_biography import \
InfoboxFootballBiography
from tasks.infobox_football_biography.src.football_player_bot import FootballPlayerBot

bot = FootballPlayerBot()

site = pywikibot.Site("en", "wikipedia")
page_title = "Paul_Abasolo"
page = pywikibot.Page(site, page_title)

template = InfoboxFootballBiography(
logger=bot.getChainOfLoggers(),
text_page=page.text
)

template.parse()
template.template_name()
template.parameters_list()
if template.check():
template.list.sort(key=lambda x: x["name"])
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from abc import abstractmethod, ABC

import wikitextparser as wtp

from core.utils.helpers import prepare_str
from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger


class DataExtractor(ABC):
def __init__(self, text_page: str, logger: AbstractLogger):
self.text_page = text_page
self.logger = logger
self.list = []

@abstractmethod
def template_name(self) -> str:
pass

@abstractmethod
def parameters_list(self) -> list:
pass

def check(self) -> bool:
return len(self.list) > 0

def parse(self):
self.logger.logMessage(AbstractLogger.INFO, "start extract data")
parsed = wtp.parse(self.text_page)
self.logger.logMessage(AbstractLogger.INFO, "end extract data")
self.logger.logMessage(AbstractLogger.INFO, "start extract template")
templates = parsed.templates
if not len(templates):
self.logger.logMessage(AbstractLogger.WARNING, "no template found in page")
return
if self.template_name() is None:
self.logger.logMessage(AbstractLogger.ERROR, "no template name set in class you are using")
return
if self.parameters_list() is None or len(self.parameters_list()) == 0:
self.logger.logMessage(AbstractLogger.ERROR, "no parameters set in class you are using")
return
for template in templates:
if prepare_str(template.name) == prepare_str(self.template_name()):
self.logger.logMessage(AbstractLogger.INFO, "start extract parameters")
for parameter in self.parameters_list():
for param in template.arguments:
if prepare_str(param.name) == prepare_str(parameter):
self.logger.logMessage(AbstractLogger.INFO, f"{param.name}: {param.value}")
self.list.append({
"name": param.name,
"value": param.value
})
break
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from abc import ABC

from tasks.infobox_football_biography.src.data_extraction.data_extractor import DataExtractor
from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger


class InfoboxFootballBiography(DataExtractor, ABC):
def __init__(self, text_page: str, logger: AbstractLogger):
super().__init__(text_page=text_page, logger=logger)

def template_name(self) -> str:
return "Infobox football biography"

def parameters_list(self) -> list:
return [
"position",
"years1", "clubs1", "caps1", "goals1",
"years2", "clubs2", "caps2", "goals2",
"years3", "clubs3", "caps3", "goals3",
"years4", "clubs4", "caps4", "goals4",
"years5", "clubs5", "caps5", "goals5",
"years6", "clubs6", "caps6", "goals6",
"years7", "clubs7", "caps7", "goals7",
"years8", "clubs8", "caps8", "goals8",
"years9", "clubs9", "caps9", "goals9",
"years10", "clubs10", "caps10", "goals10",
"years11", "clubs11", "caps11", "goals11",
"years12", "clubs12", "caps12", "goals12",
"years13", "clubs13", "caps13", "goals13",
"years14", "clubs14", "caps14", "goals14",
"years15", "clubs15", "caps15", "goals15",
"years16", "clubs16", "caps16", "goals16",
"years17", "clubs17", "caps17", "goals17",
"years18", "clubs18", "caps18", "goals18",
"years19", "clubs19", "caps19", "goals19",
]
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class ClassificationContext:
def __init__(self):
self.strategies = []

def add_strategy(self, strategy):
self.strategies.append(strategy)

def classify(self, value):
for strategy in self.strategies:
result = strategy.classify(value)
if result:
return result
return "unknown"
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from tasks.infobox_football_biography.src.data_translator.value_classification_strategy import \
ValueClassificationStrategy


class NormalTextClassificationStrategy(ValueClassificationStrategy):
def classify(self, value):
return "normal_text"
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from tasks.infobox_football_biography.src.data_translator.value_classification_strategy import \
ValueClassificationStrategy


class NumberClassificationStrategy(ValueClassificationStrategy):
def classify(self, value):
try:
float(value)
return "number"
except ValueError:
return None
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from tasks.infobox_football_biography.src.data_translator.value_classification_strategy import \
ValueClassificationStrategy


class WikiLinkClassificationStrategy(ValueClassificationStrategy):
def classify(self, value):
# todo: use wpikitextparser
if value.startswith("[[") and value.endswith("]]"):
return "wiki_link"
return None
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from tasks.infobox_football_biography.src.data_translator.data_translation_handler import DataTranslationHandler


class NormalTextTranslationHandler(DataTranslationHandler):
def translate(self, value):
# Translate the normal text to Arabic (implement this logic)
return value
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from tasks.infobox_football_biography.src.data_translator.data_translation_handler import DataTranslationHandler


class NumberTranslationHandler(DataTranslationHandler):
def translate(self, value):
# Translate the number to Arabic
return str(value)
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from tasks.infobox_football_biography.src.data_translator.data_translation_handler import DataTranslationHandler


class WikiLinkTranslationHandler(DataTranslationHandler):
def translate(self, value):
# Translate the wiki link to Arabic (implement this logic)
return value
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from abc import abstractmethod, ABC


class DataTranslationHandler(ABC):
@abstractmethod
def translate(self, value):
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class TranslationChain:
def __init__(self):
self.handlers = []

def add_handler(self, handler):
self.handlers.append(handler)

def translate(self, value):
for handler in self.handlers:
translated_value = handler.translate(value)
if translated_value:
return translated_value
return value
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from abc import abstractmethod, ABC


class ValueClassificationStrategy(ABC):
@abstractmethod
def classify(self, value):
pass
24 changes: 24 additions & 0 deletions tasks/infobox_football_biography/src/football_player_bot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger
from tasks.infobox_football_biography.src.logger.console_logger import ConsoleLogger
from tasks.infobox_football_biography.src.logger.error_logger import ErrorLogger
from tasks.infobox_football_biography.src.logger.file_logger import FileLogger


class FootballPlayerBot:

def getChainOfLoggers(self) -> AbstractLogger:
errorLogger = ErrorLogger(AbstractLogger.ERROR)
fileLogger = FileLogger(AbstractLogger.DEBUG)
consoleLogger = ConsoleLogger(AbstractLogger.INFO)

errorLogger.nextLogger = fileLogger
fileLogger.nextLogger = consoleLogger

return errorLogger

def __init__(self):
self.logger = self.getChainOfLoggers()

self.logger.logMessage(AbstractLogger.INFO, "Start")
self.logger.logMessage(AbstractLogger.DEBUG, "debug message")
self.logger.logMessage(AbstractLogger.ERROR, "error message")
Empty file.
31 changes: 31 additions & 0 deletions tasks/infobox_football_biography/src/logger/abstract_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from abc import ABC, abstractmethod


class AbstractLogger(ABC):
INFO: int = 1
DEBUG: int = 2
WARNING: int = 3
ERROR: int = 4
FATAL: int = 5

_level: int = None
# next element in chain or responsibility
_nextLogger = None

@property
def nextLogger(self):
return self._nextLogger

@nextLogger.setter
def nextLogger(self, value):
self._nextLogger = value

def logMessage(self, level, message):
if self._level <= level:
self.write(message)
if self._nextLogger is not None:
self._nextLogger.logMessage(level, message)

@abstractmethod
def write(self, message):
pass
Loading