Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
4 changes: 4 additions & 0 deletions tasks/tahdith_tashkila/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from tasks.tahdith_tashkila.src.bot import BotFactory

bot = BotFactory()
bot.run("قالب:تشكيلة_شيفيلد_يونايتد")
153 changes: 153 additions & 0 deletions tasks/tahdith_tashkila/src/bot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import pywikibot

from tasks.tahdith_tashkila.src.data_extraction.templates.football_squad import FootballSquad
from tasks.tahdith_tashkila.src.data_translator.classification_context import ClassificationContext
from tasks.tahdith_tashkila.src.data_translator.data_classification.has_ar_post_classification_strategy import \
HasArPostClassificationStrategy
from tasks.tahdith_tashkila.src.data_translator.data_classification.not_has_ar_post_classification_strategy import \
NotHasArPostClassificationStrategy
from tasks.tahdith_tashkila.src.data_translator.data_translation.has_ar_post_classification_strategy import \
HasArPostTranslationHandler
from tasks.tahdith_tashkila.src.data_translator.data_translation.not_has_ar_post_classification_strategy import \
NotHasArPostTranslationHandler
from tasks.tahdith_tashkila.src.data_translator.translation_chain import TranslationChain
from tasks.tahdith_tashkila.src.logger.abstract_logger import AbstractLogger
from tasks.tahdith_tashkila.src.logger.console_logger import ConsoleLogger
from tasks.tahdith_tashkila.src.logger.error_logger import ErrorLogger
from tasks.tahdith_tashkila.src.logger.file_logger import FileLogger
from tasks.tahdith_tashkila.src.template_integration.templates.football_squad import \
FootballSquad as FootballSquadIntegration


class BotFactory:
BOT_STATUS_STARTED = 0
BOT_STATUS_LOADING_PAGE = 1
BOT_STATUS_DATA_EXTRACTED = 2
BOT_STATUS_DATA_TRANSLATED = 3
def getChainOfLoggers(self) -> AbstractLogger:
errorLogger = ErrorLogger(AbstractLogger.ERROR)
fileLogger = FileLogger(AbstractLogger.DEBUG)
consoleLogger = ConsoleLogger(AbstractLogger.INFO)

errorLogger.nextLogger = fileLogger
fileLogger.nextLogger = consoleLogger

return errorLogger

def run(self, page_title: str):
self.logger.logMessage(AbstractLogger.INFO, "Bot started")
self.load_page(page_title=page_title)
self.data_extractor()
self.data_translator()
self.template_integrator()
self.save()
self.logger.logMessage(AbstractLogger.INFO, "Bot finished")

def __init__(self):
self.logger = self.getChainOfLoggers()
# ar page
self.ar_site = pywikibot.Site("ar", "wikipedia")
self.ar_page = None
self.ar_text = None
# en page
self.en_site = pywikibot.Site("en", "wikipedia")
self.en_page = None
self.en_text = None
self.status = self.BOT_STATUS_STARTED

self.data_extractor_list = []

def load_page(self, page_title: str):
self.logger.logMessage(AbstractLogger.INFO, "start loading page")
self.ar_page = pywikibot.Page(self.ar_site, page_title)
if not self.ar_page.exists():
self.logger.logMessage(AbstractLogger.ERROR, "page does not exist")
return
self.logger.logMessage(AbstractLogger.INFO, "page exists")
en_title = None
for item in self.ar_page.langlinks():
if str(item).startswith("[[en:"):
en_title = item.title
break
if en_title is None:
self.logger.logMessage(AbstractLogger.ERROR, "en page not found")
en_title_with_template = "template:" + en_title
self.logger.logMessage(AbstractLogger.INFO, "start geting en page")
self.en_page = pywikibot.Page(self.en_site, en_title_with_template)

if not self.en_page.exists():
self.logger.logMessage(AbstractLogger.ERROR, "page does not exist")
return

self.logger.logMessage(AbstractLogger.INFO, "en page exists")

self.en_text = self.en_page.text

self.ar_text = self.ar_page.text

self.logger.logMessage(AbstractLogger.INFO, "end fill page")

self.status = self.BOT_STATUS_LOADING_PAGE

def data_extractor(self):
self.logger.logMessage(AbstractLogger.INFO, "start extract template")
extractor = FootballSquad(
text_page=self.en_text,
logger=self.logger
)
extractor.parse()
if not extractor.check():
self.logger.logMessage(AbstractLogger.ERROR, "cannot found data in this template")

self.data_extractor_list = extractor.list
self.status = self.BOT_STATUS_DATA_EXTRACTED

def data_translator(self):
# Create classification context and add classification strategies

classification_context = ClassificationContext()
classification_context.add_strategy(HasArPostClassificationStrategy())
classification_context.add_strategy(NotHasArPostClassificationStrategy())

# Create translation chain and add translation handlers

translation_chain = TranslationChain()
translation_chain.add_handler(HasArPostTranslationHandler())
translation_chain.add_handler(NotHasArPostTranslationHandler())

updated_data_extractor_list = []

for item in self.data_extractor_list:
classification = classification_context.classify(item)
item.classification = classification
updated_data_extractor_list.append(item)

self.data_extractor_list = updated_data_extractor_list
updated_data_extractor_list = []

for item in self.data_extractor_list:
translated_value = translation_chain.translate(item)
item.translated_value = translated_value
updated_data_extractor_list.append(item)

self.data_extractor_list = updated_data_extractor_list
updated_data_extractor_list = []
self.status = self.BOT_STATUS_DATA_TRANSLATED

def template_integrator(self):
temp_text = self.ar_text
template_integrator = FootballSquadIntegration(
text_page=temp_text,
logger=self.logger
)
template_integrator.new_data = self.data_extractor_list
temp_text = template_integrator.parse()
self.ar_text = temp_text

def save(self):
temp_title_page = str(self.ar_page.title()).replace("قالب:", "مستخدم:LokasBot/تحديث تشكيلة/")
self.temp_page = pywikibot.Page(self.ar_site, temp_title_page)
self.temp_page.text = self.ar_text
self.temp_page.save(
"بوت:تحديث تشكيلة v0.0.1-beta"
)
Empty file.
53 changes: 53 additions & 0 deletions tasks/tahdith_tashkila/src/data_extraction/data_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from abc import abstractmethod, ABC

import wikitextparser as wtp

from core.utils.helpers import prepare_str
from tasks.tahdith_tashkila.src.logger.abstract_logger import AbstractLogger


class DataExtractor(ABC):
def __init__(self, text_page: str, logger: AbstractLogger):
self.text_page = text_page
self.logger = logger
self.list = []

@abstractmethod
def template_name(self) -> str:
pass

@abstractmethod
def parameters_list(self) -> list:
pass

def check(self) -> bool:
return len(self.list)

def parse(self):
self.logger.logMessage(AbstractLogger.INFO, "start extract data")
parsed = wtp.parse(self.text_page)
self.logger.logMessage(AbstractLogger.INFO, "end extract data")
self.logger.logMessage(AbstractLogger.INFO, "start extract template")
templates = parsed.templates
if not len(templates):
self.logger.logMessage(AbstractLogger.WARNING, "no template found in page")
return
if self.template_name() is None:
self.logger.logMessage(AbstractLogger.ERROR, "no template name set in class you are using")
return
if self.parameters_list() is None or len(self.parameters_list()) == 0:
self.logger.logMessage(AbstractLogger.ERROR, "no parameters set in class you are using")
return

for template in templates:
if prepare_str(template.name) == prepare_str(self.template_name()):
self.logger.logMessage(AbstractLogger.INFO, "start extract parameters")
for parameter in self.parameters_list():
for param in template.arguments:
if prepare_str(param.name) == prepare_str(parameter):
self.logger.logMessage(AbstractLogger.INFO, f"{param.name}: {param.value}")
self.list.append({
"name": param.name,
"value": param.value
})
break
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from abc import ABC

import wikitextparser as wtp

from core.utils.helpers import prepare_str
from tasks.tahdith_tashkila.src.data_extraction.data_extractor import DataExtractor
from tasks.tahdith_tashkila.src.logger.abstract_logger import AbstractLogger
from tasks.tahdith_tashkila.src.models.player import Player


class FootballSquad(DataExtractor, ABC):
def __init__(self, text_page: str, logger: AbstractLogger):
super().__init__(text_page=text_page, logger=logger)

def template_name(self) -> str:
return "Football squad"

def parameters_list(self) -> list:
return [
"no##",
"manager##",
"manager_type##",
"list"
]

def parse(self):
self.logger.logMessage(AbstractLogger.INFO, "start extract data")
parsed = wtp.parse(self.text_page)
self.logger.logMessage(AbstractLogger.INFO, "end extract data")
self.logger.logMessage(AbstractLogger.INFO, "start extract template")
templates = parsed.templates
if not len(templates):
self.logger.logMessage(AbstractLogger.WARNING, "no template found in page")
return
if self.template_name() is None:
self.logger.logMessage(AbstractLogger.ERROR, "no template name set in class you are using")
return
if self.parameters_list() is None or len(self.parameters_list()) == 0:
self.logger.logMessage(AbstractLogger.ERROR, "no parameters set in class you are using")
return

for template in templates:
if prepare_str(template.name) == prepare_str(self.template_name()):
self.logger.logMessage(AbstractLogger.INFO, "start extract parameters")

have_list_argument = False

for argument in template.arguments:
if prepare_str(argument.name) == prepare_str('list'):
have_list_argument = True
break

if have_list_argument:
for sub_template in parsed.templates:
if prepare_str(sub_template.name) == prepare_str("football squad2 player"):
temp_dic = {}
for temp_arg in sub_template.arguments:
temp_dic[temp_arg.name] = temp_arg.value

player = Player()
player.title = None
player.name = temp_dic["name"] if 'name' in temp_dic else None
player.number = temp_dic["no"] if 'no' in temp_dic else None
player.is_manager = False
self.list.append(player)

if prepare_str(sub_template.name) == prepare_str("football squad manager"):
temp_dic = {}
for temp_arg in sub_template.arguments:
temp_dic[temp_arg.name] = temp_arg.value
player = Player()
player.title = temp_dic['title'] if 'title' in temp_dic else None
player.name = temp_dic["name"] if 'name' in temp_dic else None
player.number = None
player.is_manager = True
self.list.append(player)

# if not have_list_argument:
# for param in template.arguments:
# for parameter in self.parameters_list():
# tem_parameter = str(parameter).replace("#", "")
# if prepare_str(param.name).startswith(tem_parameter):
# self.logger.logMessage(AbstractLogger.INFO, f"{param.name}: {param.value}")
# self.list.append({
# "name": param.name,
# "value": param.value
# })
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class ClassificationContext:
def __init__(self):
self.strategies = []
self.logger = None

def add_strategy(self, strategy):
self.strategies.append(strategy)
def classify(self, value):
for strategy in self.strategies:
result = strategy.classify(value)
if result:
return result
return "unknown"
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from pywikibot.scripts.generate_user_files import pywikibot

from tasks.tahdith_tashkila.src.data_translator.value_classification_strategy import ValueClassificationStrategy
from tasks.tahdith_tashkila.src.models.player import Player


class HasArPostClassificationStrategy(ValueClassificationStrategy):
def classify(self, value):
try:
self.check_if_has(value)
return "has_ar_post"
except ValueError:
return None

def check_if_has(self, model: Player):
en_site = pywikibot.Site("en", "wikipedia")
en_page = pywikibot.Page(en_site, model.page_title)
ar_title = None
if en_page.exists():
for item in en_page.langlinks():
if str(item).startswith("[[ar:"):
ar_title = item.title
break

if ar_title is None:
raise ValueError("ar page not found")
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pywikibot

from tasks.tahdith_tashkila.src.data_translator.value_classification_strategy import ValueClassificationStrategy
from tasks.tahdith_tashkila.src.models.player import Player


class NotHasArPostClassificationStrategy(ValueClassificationStrategy):
def classify(self, value):
try:
self.check_if_has(value)
return "not_has_ar_post"
except ValueError:
return None

def check_if_has(self, model: Player):
en_site = pywikibot.Site("en", "wikipedia")
en_page = pywikibot.Page(en_site, model.page_title)
ar_title = None
if en_page.exists():
for item in en_page.langlinks():
if str(item).startswith("[[ar:"):
ar_title = item.title
break

if ar_title is not None:
raise ValueError("ar page found")
Empty file.
Loading