diff --git a/tasks/infobox_football_biography/__init__.py b/tasks/infobox_football_biography/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/pseudocode_structure.md b/tasks/infobox_football_biography/pseudocode_structure.md new file mode 100644 index 00000000..9a2228e4 --- /dev/null +++ b/tasks/infobox_football_biography/pseudocode_structure.md @@ -0,0 +1,121 @@ +# Football Player Data Translation Bot - pseudocode structure + +## Observer Pattern: Monitoring and Error Handling + +```python +class ErrorObserver: + def update(self, message): +# Log error messages + + +class ProgressObserver: + def update(self, message): +# Log progress messages + +``` + +## Pipeline Pattern: Structured Data Processing + +```python +class DataExtractor: + def extract_data(self, article_url): + wikitext = self.fetch_wikitext(article_url) + parsed_data = self.parse_infobox_template(wikitext) + return parsed_data + + +class DataTranslator: + def __init__(self, translation_array): + self.translation_array = translation_array + + def translate(self, english_data): + # Translation logic using translation_array + pass + + +class TemplateIntegrator: + def integrate(self, arabic_article_url, integrated_template): + arabic_wikitext = self.fetch_wikitext(arabic_article_url) + arabic_template = self.extract_arabic_template(arabic_wikitext) + # Integration logic using integrated_template + pass + + +class QualityAssurer: + def validate_translation(self, translation): + # Validation logic + pass + + def validate_integration(self, integrated_template): + # Validation logic + pass +``` + +## Decorator Pattern: Quality Assurance + +```python +class TranslationQualityChecker: + def __init__(self, translator): + self.translator = translator + + def translate(self, english_data): + translated_data = self.translator.translate(english_data) + # Validate translated_data + return translated_data + + +class IntegrationQualityChecker: + def __init__(self, integrator): + self.integrator = integrator + + def integrate(self, arabic_article_url, integrated_template): + self.integrator.integrate(arabic_article_url, integrated_template) + # Validate integrated_template +``` + +## Template Method Pattern: Bot Operation Structure + +```python + +class FootballPlayerBot: + def __init__(self): + self.error_observer = ErrorObserver() + self.progress_observer = ProgressObserver() + self.translation_array = load_custom_translation_array() + + def notify_error(self, message): + self.error_observer.update(message) + + def notify_progress(self, message): + self.progress_observer.update(message) + + # ... (Other methods) + + def bot_operation(self): + self.notify_progress("Starting bot operation...") + + data_extractor = DataExtractor() + data_translator = DataTranslator(self.translation_array) + template_integrator = TemplateIntegrator() + + quality_translator = TranslationQualityChecker(data_translator) + quality_integrator = IntegrationQualityChecker(template_integrator) + + for each article in English Wikipedia: + english_data = data_extractor.extract_data(article.url) + translated_data = quality_translator.translate(english_data) + + arabic_wikitext = self.fetch_wikitext(article.arabic_url) + integrated_template = quality_integrator.integrate(article.arabic_url, translated_data) + + # ... (Other steps) + + self.notify_progress("Bot operation completed.") +``` + +# Instantiate and run the bot + +```python +bot = FootballPlayerBot() +bot.bot_operation() +``` diff --git a/tasks/infobox_football_biography/run.py b/tasks/infobox_football_biography/run.py new file mode 100644 index 00000000..0f56feb0 --- /dev/null +++ b/tasks/infobox_football_biography/run.py @@ -0,0 +1,22 @@ +import pywikibot + +from tasks.infobox_football_biography.src.data_extraction.templates.infobox_football_biography import \ + InfoboxFootballBiography +from tasks.infobox_football_biography.src.football_player_bot import FootballPlayerBot + +bot = FootballPlayerBot() + +site = pywikibot.Site("en", "wikipedia") +page_title = "Paul_Abasolo" +page = pywikibot.Page(site, page_title) + +template = InfoboxFootballBiography( + logger=bot.getChainOfLoggers(), + text_page=page.text +) + +template.parse() +template.template_name() +template.parameters_list() +if template.check(): + template.list.sort(key=lambda x: x["name"]) diff --git a/tasks/infobox_football_biography/src/__init__.py b/tasks/infobox_football_biography/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/src/data_extraction/__init__.py b/tasks/infobox_football_biography/src/data_extraction/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/src/data_extraction/data_extractor.py b/tasks/infobox_football_biography/src/data_extraction/data_extractor.py new file mode 100644 index 00000000..2fcc1055 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_extraction/data_extractor.py @@ -0,0 +1,52 @@ +from abc import abstractmethod, ABC + +import wikitextparser as wtp + +from core.utils.helpers import prepare_str +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger + + +class DataExtractor(ABC): + def __init__(self, text_page: str, logger: AbstractLogger): + self.text_page = text_page + self.logger = logger + self.list = [] + + @abstractmethod + def template_name(self) -> str: + pass + + @abstractmethod + def parameters_list(self) -> list: + pass + + def check(self) -> bool: + return len(self.list) > 0 + + def parse(self): + self.logger.logMessage(AbstractLogger.INFO, "start extract data") + parsed = wtp.parse(self.text_page) + self.logger.logMessage(AbstractLogger.INFO, "end extract data") + self.logger.logMessage(AbstractLogger.INFO, "start extract template") + templates = parsed.templates + if not len(templates): + self.logger.logMessage(AbstractLogger.WARNING, "no template found in page") + return + if self.template_name() is None: + self.logger.logMessage(AbstractLogger.ERROR, "no template name set in class you are using") + return + if self.parameters_list() is None or len(self.parameters_list()) == 0: + self.logger.logMessage(AbstractLogger.ERROR, "no parameters set in class you are using") + return + for template in templates: + if prepare_str(template.name) == prepare_str(self.template_name()): + self.logger.logMessage(AbstractLogger.INFO, "start extract parameters") + for parameter in self.parameters_list(): + for param in template.arguments: + if prepare_str(param.name) == prepare_str(parameter): + self.logger.logMessage(AbstractLogger.INFO, f"{param.name}: {param.value}") + self.list.append({ + "name": param.name, + "value": param.value + }) + break diff --git a/tasks/infobox_football_biography/src/data_extraction/templates/__init__.py b/tasks/infobox_football_biography/src/data_extraction/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/src/data_extraction/templates/infobox_football_biography.py b/tasks/infobox_football_biography/src/data_extraction/templates/infobox_football_biography.py new file mode 100644 index 00000000..5464894a --- /dev/null +++ b/tasks/infobox_football_biography/src/data_extraction/templates/infobox_football_biography.py @@ -0,0 +1,36 @@ +from abc import ABC + +from tasks.infobox_football_biography.src.data_extraction.data_extractor import DataExtractor +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger + + +class InfoboxFootballBiography(DataExtractor, ABC): + def __init__(self, text_page: str, logger: AbstractLogger): + super().__init__(text_page=text_page, logger=logger) + + def template_name(self) -> str: + return "Infobox football biography" + + def parameters_list(self) -> list: + return [ + "position", + "years1", "clubs1", "caps1", "goals1", + "years2", "clubs2", "caps2", "goals2", + "years3", "clubs3", "caps3", "goals3", + "years4", "clubs4", "caps4", "goals4", + "years5", "clubs5", "caps5", "goals5", + "years6", "clubs6", "caps6", "goals6", + "years7", "clubs7", "caps7", "goals7", + "years8", "clubs8", "caps8", "goals8", + "years9", "clubs9", "caps9", "goals9", + "years10", "clubs10", "caps10", "goals10", + "years11", "clubs11", "caps11", "goals11", + "years12", "clubs12", "caps12", "goals12", + "years13", "clubs13", "caps13", "goals13", + "years14", "clubs14", "caps14", "goals14", + "years15", "clubs15", "caps15", "goals15", + "years16", "clubs16", "caps16", "goals16", + "years17", "clubs17", "caps17", "goals17", + "years18", "clubs18", "caps18", "goals18", + "years19", "clubs19", "caps19", "goals19", + ] diff --git a/tasks/infobox_football_biography/src/data_translator/__init__.py b/tasks/infobox_football_biography/src/data_translator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/src/data_translator/classification_context.py b/tasks/infobox_football_biography/src/data_translator/classification_context.py new file mode 100644 index 00000000..41765f2e --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/classification_context.py @@ -0,0 +1,13 @@ +class ClassificationContext: + def __init__(self): + self.strategies = [] + + def add_strategy(self, strategy): + self.strategies.append(strategy) + + def classify(self, value): + for strategy in self.strategies: + result = strategy.classify(value) + if result: + return result + return "unknown" diff --git a/tasks/infobox_football_biography/src/data_translator/data_classification/__init__.py b/tasks/infobox_football_biography/src/data_translator/data_classification/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/src/data_translator/data_classification/normal_text_classification_strategy.py b/tasks/infobox_football_biography/src/data_translator/data_classification/normal_text_classification_strategy.py new file mode 100644 index 00000000..8784cd31 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_classification/normal_text_classification_strategy.py @@ -0,0 +1,7 @@ +from tasks.infobox_football_biography.src.data_translator.value_classification_strategy import \ + ValueClassificationStrategy + + +class NormalTextClassificationStrategy(ValueClassificationStrategy): + def classify(self, value): + return "normal_text" diff --git a/tasks/infobox_football_biography/src/data_translator/data_classification/number_classification_strategy.py b/tasks/infobox_football_biography/src/data_translator/data_classification/number_classification_strategy.py new file mode 100644 index 00000000..72d8350d --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_classification/number_classification_strategy.py @@ -0,0 +1,11 @@ +from tasks.infobox_football_biography.src.data_translator.value_classification_strategy import \ + ValueClassificationStrategy + + +class NumberClassificationStrategy(ValueClassificationStrategy): + def classify(self, value): + try: + float(value) + return "number" + except ValueError: + return None diff --git a/tasks/infobox_football_biography/src/data_translator/data_classification/wikilink_classification_strategy.py b/tasks/infobox_football_biography/src/data_translator/data_classification/wikilink_classification_strategy.py new file mode 100644 index 00000000..d6ba71fd --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_classification/wikilink_classification_strategy.py @@ -0,0 +1,10 @@ +from tasks.infobox_football_biography.src.data_translator.value_classification_strategy import \ + ValueClassificationStrategy + + +class WikiLinkClassificationStrategy(ValueClassificationStrategy): + def classify(self, value): + # todo: use wpikitextparser + if value.startswith("[[") and value.endswith("]]"): + return "wiki_link" + return None diff --git a/tasks/infobox_football_biography/src/data_translator/data_translation/__init__.py b/tasks/infobox_football_biography/src/data_translator/data_translation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/src/data_translator/data_translation/normal_text_translation_handler.py b/tasks/infobox_football_biography/src/data_translator/data_translation/normal_text_translation_handler.py new file mode 100644 index 00000000..c783ece0 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_translation/normal_text_translation_handler.py @@ -0,0 +1,7 @@ +from tasks.infobox_football_biography.src.data_translator.data_translation_handler import DataTranslationHandler + + +class NormalTextTranslationHandler(DataTranslationHandler): + def translate(self, value): + # Translate the normal text to Arabic (implement this logic) + return value diff --git a/tasks/infobox_football_biography/src/data_translator/data_translation/number_translation_handler.py b/tasks/infobox_football_biography/src/data_translator/data_translation/number_translation_handler.py new file mode 100644 index 00000000..f391dfa8 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_translation/number_translation_handler.py @@ -0,0 +1,7 @@ +from tasks.infobox_football_biography.src.data_translator.data_translation_handler import DataTranslationHandler + + +class NumberTranslationHandler(DataTranslationHandler): + def translate(self, value): + # Translate the number to Arabic + return str(value) diff --git a/tasks/infobox_football_biography/src/data_translator/data_translation/wikilink_translation_handler.py b/tasks/infobox_football_biography/src/data_translator/data_translation/wikilink_translation_handler.py new file mode 100644 index 00000000..5e35ca48 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_translation/wikilink_translation_handler.py @@ -0,0 +1,7 @@ +from tasks.infobox_football_biography.src.data_translator.data_translation_handler import DataTranslationHandler + + +class WikiLinkTranslationHandler(DataTranslationHandler): + def translate(self, value): + # Translate the wiki link to Arabic (implement this logic) + return value diff --git a/tasks/infobox_football_biography/src/data_translator/data_translation_handler.py b/tasks/infobox_football_biography/src/data_translator/data_translation_handler.py new file mode 100644 index 00000000..ebbc1590 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/data_translation_handler.py @@ -0,0 +1,7 @@ +from abc import abstractmethod, ABC + + +class DataTranslationHandler(ABC): + @abstractmethod + def translate(self, value): + pass diff --git a/tasks/infobox_football_biography/src/data_translator/translation_chain.py b/tasks/infobox_football_biography/src/data_translator/translation_chain.py new file mode 100644 index 00000000..fa0f4a02 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/translation_chain.py @@ -0,0 +1,13 @@ +class TranslationChain: + def __init__(self): + self.handlers = [] + + def add_handler(self, handler): + self.handlers.append(handler) + + def translate(self, value): + for handler in self.handlers: + translated_value = handler.translate(value) + if translated_value: + return translated_value + return value diff --git a/tasks/infobox_football_biography/src/data_translator/value_classification_strategy.py b/tasks/infobox_football_biography/src/data_translator/value_classification_strategy.py new file mode 100644 index 00000000..404f54a6 --- /dev/null +++ b/tasks/infobox_football_biography/src/data_translator/value_classification_strategy.py @@ -0,0 +1,7 @@ +from abc import abstractmethod, ABC + + +class ValueClassificationStrategy(ABC): + @abstractmethod + def classify(self, value): + pass diff --git a/tasks/infobox_football_biography/src/football_player_bot.py b/tasks/infobox_football_biography/src/football_player_bot.py new file mode 100644 index 00000000..6e3b7747 --- /dev/null +++ b/tasks/infobox_football_biography/src/football_player_bot.py @@ -0,0 +1,24 @@ +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger +from tasks.infobox_football_biography.src.logger.console_logger import ConsoleLogger +from tasks.infobox_football_biography.src.logger.error_logger import ErrorLogger +from tasks.infobox_football_biography.src.logger.file_logger import FileLogger + + +class FootballPlayerBot: + + def getChainOfLoggers(self) -> AbstractLogger: + errorLogger = ErrorLogger(AbstractLogger.ERROR) + fileLogger = FileLogger(AbstractLogger.DEBUG) + consoleLogger = ConsoleLogger(AbstractLogger.INFO) + + errorLogger.nextLogger = fileLogger + fileLogger.nextLogger = consoleLogger + + return errorLogger + + def __init__(self): + self.logger = self.getChainOfLoggers() + + self.logger.logMessage(AbstractLogger.INFO, "Start") + self.logger.logMessage(AbstractLogger.DEBUG, "debug message") + self.logger.logMessage(AbstractLogger.ERROR, "error message") diff --git a/tasks/infobox_football_biography/src/logger/__init__.py b/tasks/infobox_football_biography/src/logger/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/infobox_football_biography/src/logger/abstract_logger.py b/tasks/infobox_football_biography/src/logger/abstract_logger.py new file mode 100644 index 00000000..e18be231 --- /dev/null +++ b/tasks/infobox_football_biography/src/logger/abstract_logger.py @@ -0,0 +1,31 @@ +from abc import ABC, abstractmethod + + +class AbstractLogger(ABC): + INFO: int = 1 + DEBUG: int = 2 + WARNING: int = 3 + ERROR: int = 4 + FATAL: int = 5 + + _level: int = None + # next element in chain or responsibility + _nextLogger = None + + @property + def nextLogger(self): + return self._nextLogger + + @nextLogger.setter + def nextLogger(self, value): + self._nextLogger = value + + def logMessage(self, level, message): + if self._level <= level: + self.write(message) + if self._nextLogger is not None: + self._nextLogger.logMessage(level, message) + + @abstractmethod + def write(self, message): + pass diff --git a/tasks/infobox_football_biography/src/logger/console_logger.py b/tasks/infobox_football_biography/src/logger/console_logger.py new file mode 100644 index 00000000..dbeb112d --- /dev/null +++ b/tasks/infobox_football_biography/src/logger/console_logger.py @@ -0,0 +1,13 @@ +from abc import ABC + +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger + + +class ConsoleLogger(AbstractLogger, ABC): + def __init__(self, level): + super().__init__() + self._level = level + + def write(self, message): + # todo: add Timestamps, context, etc + print("Standard Console::Logger: " + message) diff --git a/tasks/infobox_football_biography/src/logger/error_logger.py b/tasks/infobox_football_biography/src/logger/error_logger.py new file mode 100644 index 00000000..cda643bc --- /dev/null +++ b/tasks/infobox_football_biography/src/logger/error_logger.py @@ -0,0 +1,10 @@ +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger + + +class ErrorLogger(AbstractLogger): + def __init__(self, level): + super().__init__() + self._level = level + + def write(self, message): + print("Error Console::Logger: " + message) diff --git a/tasks/infobox_football_biography/src/logger/file_logger.py b/tasks/infobox_football_biography/src/logger/file_logger.py new file mode 100644 index 00000000..2a3231dd --- /dev/null +++ b/tasks/infobox_football_biography/src/logger/file_logger.py @@ -0,0 +1,10 @@ +from tasks.infobox_football_biography.src.logger.abstract_logger import AbstractLogger + + +class FileLogger(AbstractLogger): + def __init__(self, level): + super().__init__() + self._level = level + + def write(self, message): + print("FileLogger::Logger: " + message) diff --git a/tasks/infobox_football_biography/test.py b/tasks/infobox_football_biography/test.py new file mode 100644 index 00000000..272d5534 --- /dev/null +++ b/tasks/infobox_football_biography/test.py @@ -0,0 +1,38 @@ +from tasks.infobox_football_biography.src.data_translator.classification_context import ClassificationContext +from tasks.infobox_football_biography.src.data_translator.data_classification.normal_text_classification_strategy import \ + NormalTextClassificationStrategy +from tasks.infobox_football_biography.src.data_translator.data_classification.number_classification_strategy import \ + NumberClassificationStrategy +from tasks.infobox_football_biography.src.data_translator.data_classification.wikilink_classification_strategy import \ + WikiLinkClassificationStrategy +from tasks.infobox_football_biography.src.data_translator.data_translation.number_translation_handler import \ + NumberTranslationHandler +from tasks.infobox_football_biography.src.data_translator.translation_chain import TranslationChain + +classification_context = ClassificationContext() +classification_context.add_strategy(NumberClassificationStrategy()) +classification_context.add_strategy(WikiLinkClassificationStrategy()) +classification_context.add_strategy(NormalTextClassificationStrategy()) + +# Create translation chain and add translation handlers +translation_chain = TranslationChain() +translation_chain.add_handler(NumberTranslationHandler()) +# Add handlers for WikiLink and NormalText as needed + +# Sample data +data = [ + {"name": "param1", "value": "42"}, + {"name": "param2", "value": "[[link to article]]"}, + {"name": "param3", "value": "This is normal text."} +] + +for item in data: + value = item["value"] + classification = classification_context.classify(value) + translation = translation_chain.translate(value) + + print(f"Name: {item['name']}") + print(f"Value: {value}") + print(f"Classification: {classification}") + print(f"Translation: {translation}") + print("---") diff --git a/tasks/infobox_football_biography/todo.md b/tasks/infobox_football_biography/todo.md new file mode 100644 index 00000000..dfd7adf1 --- /dev/null +++ b/tasks/infobox_football_biography/todo.md @@ -0,0 +1,70 @@ +# Football Player Data Translation Bot - Arabic Wikipedia + +## Observer Pattern: Monitoring and Error Handling + +- [x] Implement an Chain of Responsibility Pattern to monitor the bot's progress and detect errors. +- [x] Set up error handling mechanisms for each stage of the bot's operation. +- [x] Create a logging system to record errors and progress. + +## Pipeline Pattern: Structured Data Processing + +- [X] Data Extraction Stage from English Wikipedia: + - [X] Identify English Wikipedia articles with Infobox football biography templates. + - [X] Configure the wikitext parsing library for English. + - [X] Extract data from the English Infobox template and organize it. + +- [ ] Translation Stage (Using Custom Array): + - [ ] Create a custom translation array mapping English keys to Arabic keys. + - [ ] Implement a translation function using the custom array. + +- [ ] Template Integration Stage on Arabic Wikipedia: + - [ ] Identify target Arabic Wikipedia articles and corresponding templates. + - [ ] Set up the wikitext parsing library for Arabic Wikipedia. + - [ ] Parse the Arabic article wikitext and extract the Arabic template. + - [ ] Integrate the translated and translated data into the Arabic template. + +- [ ] Error Handling and Quality Assurance Stage: + - [ ] Implement quality checks for translations and integrated data. + - [ ] Ensure accurate and contextually appropriate integration. + +## Decorator Pattern: Quality Assurance + +- [ ] Implement quality checks as decorators for translation and integration stages. +- [ ] Validate translations and integrated data for accuracy and context. + +## Template Method Pattern: Bot Operation Structure + +- [ ] Design a template method for the entire bot operation: + - [ ] Extract data from English Wikipedia articles. + - [ ] Translate data using the custom array. + - [ ] Integrate translated data into Arabic Wikipedia templates. + - [ ] Perform error handling and quality assurance. + - [ ] Log progress and errors through the observer pattern. + +## Testing and Iteration + +- [ ] Test the bot operation on a small subset of articles. +- [ ] Identify and address issues with data extraction, translation, or template integration. +- [ ] Iterate based on testing results and feedback. + +## Automation and Scaling (Optional) + +- [ ] Evaluate the feasibility of automating the bot for a larger number of articles. +- [ ] Develop automation scripts or tools, considering Wikipedia's guidelines and rate limits. +- [ ] Strategize to manage potential server load and ensure adherence to Wikipedia's rules. + +## Documentation and Reporting + +- [ ] Document the bot's operation, including setup, implementation, and usage instructions. +- [ ] Prepare a report summarizing the project, challenges encountered, and solutions applied. + +## Legal and Ethical Considerations + +- [ ] Review and adhere to Wikipedia's terms of use and guidelines for bot operation and content modification. +- [ ] Ensure compliance with relevant data protection and copyright regulations. + +## Project Completion + +- [ ] Conduct a final review of the bot's operation and components. +- [ ] Ensure that all tasks are completed and thoroughly tested. +- [ ] Conclude the project, including documentation and any required reporting.