diff --git a/Normalize-MediaWiki-link-tables.md b/Normalize-MediaWiki-link-tables.md new file mode 100644 index 00000000..e69de29b diff --git a/tasks/missingtopics/update.py b/tasks/missingtopics/update.py index 0748ac6a..89216f66 100644 --- a/tasks/missingtopics/update.py +++ b/tasks/missingtopics/update.py @@ -50,12 +50,13 @@ def has_arabic_chars(text): """ db = Database() -db.query = """select replace(pl_title,"مقالات_مطلوبة_حسب_الاختصاص/","") as page_title from pagelinks +db.query = """select replace(lt_title,"مقالات_مطلوبة_حسب_الاختصاص/","") as page_title from pagelinks +inner join linktarget ON lt_id = pl_target_id where pl_from in (676775) -and pl_namespace in (4) +and lt_namespace in (4) and pl_from_namespace in (4) -and pl_title not like "%وصلة_حمراء%" -order by pl_title +and lt_title not like "%وصلة_حمراء%" +order by lt_title """ db.get_content_from_database() diff --git a/tasks/statistics/WikiProject/list_of_contributors.py b/tasks/statistics/WikiProject/list_of_contributors.py index 516931a1..66c5584b 100644 --- a/tasks/statistics/WikiProject/list_of_contributors.py +++ b/tasks/statistics/WikiProject/list_of_contributors.py @@ -5,11 +5,12 @@ query = """select actor_name as "username", count(rev_actor) as "edit_count", IF(actor_name in ( - select replace(pl_title,"_"," ") from pagelinks + select replace(lt_title,"_"," ") from pagelinks join page on page.page_id = pagelinks.pl_from + inner join linktarget ON lt_id = pl_target_id where page.page_namespace in (4) and page_title like "مشروع_ويكي_العراق/المساهمون" - and pl_namespace in (2) + and lt_namespace in (2) and pl_from_namespace in (4) ), "YES", "NO") as "in_WikiProject" diff --git a/tasks/statistics/articles_in_which_there_is_a_link_to_user_pages.py b/tasks/statistics/articles_in_which_there_is_a_link_to_user_pages.py index a8c62d4d..97cd668d 100644 --- a/tasks/statistics/articles_in_which_there_is_a_link_to_user_pages.py +++ b/tasks/statistics/articles_in_which_there_is_a_link_to_user_pages.py @@ -1,18 +1,33 @@ from tasks.statistics.module import UpdatePage, ArticleTables, index # Set the parameters for the update -query = """select page.page_title as ll_page_title,pagelinks.pl_title as ll_page_to_title,pagelinks.pl_namespace as ll_pl_namespace -from page - inner join pagelinks - on pagelinks.pl_from = page.page_id -where pagelinks.pl_from_namespace = 0 - and (pagelinks.pl_namespace = 2 or pagelinks.pl_namespace = 3) - and page.page_namespace = 0 - and page.page_is_redirect = 0 - and page.page_id not in (select templatelinks.tl_from from templatelinks - join linktarget on linktarget.lt_id = templatelinks.tl_target_id - where linktarget.lt_title in (select pl_title from pagelinks where pl_from = 9043549) and templatelinks.tl_from_namespace = 0 ) - and page.page_title not in (select pl_title from pagelinks where pl_from = 9043549);""" +query = """SELECT page.page_title AS ll_page_title, + linktarget.lt_title AS ll_page_to_title, + linktarget.lt_namespace AS ll_pl_namespace +FROM page +INNER JOIN pagelinks ON pagelinks.pl_from = page.page_id +inner join linktarget ON lt_id = pl_target_id +WHERE pagelinks.pl_from_namespace = 0 + AND (linktarget.lt_namespace = 2 + OR linktarget.lt_namespace = 3) + AND page.page_namespace = 0 + AND page.page_is_redirect = 0 + AND page.page_id not in + (SELECT templatelinks.tl_from + FROM templatelinks + JOIN linktarget ON linktarget.lt_id = templatelinks.tl_target_id + WHERE linktarget.lt_title in + (SELECT lt_title + FROM pagelinks + inner join linktarget ON lt_id = pl_target_id + WHERE pl_from = 9043549) + AND templatelinks.tl_from_namespace = 0 ) + AND page.page_title not in + (SELECT lt_title + FROM pagelinks + inner join linktarget ON lt_id = pl_target_id + WHERE pl_from = 9043549 + );""" file_path = 'stub/articles_in_which_there_is_a_link_to_user_pages.txt' page_name = "ويكيبيديا:تقارير قاعدة البيانات/مقالات يوجد فيها وصلة إلى صفحات المستخدمين" diff --git a/tasks/statistics/list_of_portals_by_number_of_articles.py b/tasks/statistics/list_of_portals_by_number_of_articles.py index e59a018c..e98d6b0c 100644 --- a/tasks/statistics/list_of_portals_by_number_of_articles.py +++ b/tasks/statistics/list_of_portals_by_number_of_articles.py @@ -2,7 +2,11 @@ # Set the parameters for the update query = """SELECT main.page_title as portal_name, COUNT(*) - 1 as sub_page_count, - (SELECT COUNT(*) FROM pagelinks WHERE pl_title = main.page_title and pl_from_namespace = 0 and pl_namespace = 100) as links_count + ( + SELECT COUNT(*) FROM pagelinks + inner join linktarget ON lt_id = pl_target_id + WHERE lt_title = main.page_title and pl_from_namespace = 0 and lt_namespace = 100 + ) as links_count FROM page AS p INNER JOIN ( SELECT page_title diff --git a/tasks/users_this_week/README.md b/tasks/users_this_week/README.md index 707da33c..a93b21dc 100644 --- a/tasks/users_this_week/README.md +++ b/tasks/users_this_week/README.md @@ -19,10 +19,12 @@ FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = "bot") - and actor_name not in (SELECT replace(pl_title,"_"," ") -FROM pagelinks -where pagelinks.pl_from = 7352181 -and pl_namespace = 2) + and actor_name not in (SELECT replace(lt_title, "_", " ") + FROM pagelinks + inner join linktarget ON lt_id = pl_target_id + WHERE pagelinks.pl_from = 7352181 + AND lt_namespace = 2) + GROUP BY actor_name having COUNT(*) > 1 ORDER BY score DESC,name @@ -48,10 +50,11 @@ where WHERE ug_group = "bot") and log_action = "approve-i" and log_namespace = 0 - and actor_name not in (SELECT replace(pl_title,"_"," ") + and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) group by actor_name having COUNT(*) > 1 ORDER BY score DESC,name @@ -69,10 +72,11 @@ INNER JOIN actor on logging.log_actor = actor_id where log_timestamp BETWEEN START_WEEK_DATE AND END_WEEK_DATE and log_type in ("block", "protect", "delete", "rights") and actor_name IN (SELECT user_name FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = 'sysop') -and actor_name not in (SELECT replace(pl_title,"_"," ") +and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) group by logging.log_actor having COUNT(*)>1 ORDER BY score DESC,name @@ -127,10 +131,11 @@ AND parent.rev_timestamp BETWEEN START_WEEK_DATE AND END_WEEK_DATE INNER JOIN user ON user_id = ug_user WHERE ug_group = "bot") and actor_name IN (SELECT user_name FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = 'editor' or 'autoreview') -and actor_name not in (SELECT replace(pl_title,"_"," ") +and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) GROUP BY actor_name having score > 0 ORDER BY score DESC,name @@ -152,10 +157,11 @@ select actor_name as name, COUNT(*) as score and actor_name Not IN (SELECT user_name FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = 'bot') and ucase(actor_name) not like ucase("%BOT") COLLATE utf8mb4_general_ci and actor_name not like "%بوت%" collate utf8mb4_general_ci -and actor_name not in (SELECT replace(pl_title,"_"," ") +and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) group by logging.log_actor having COUNT(*)>1 ORDER BY score DESC,name @@ -180,10 +186,11 @@ AND rev.rev_timestamp BETWEEN START_WEEK_DATE AND END_WEEK_DATE AND ucase(actor_name) NOT LIKE ucase("%BOT") COLLATE utf8mb4_general_ci AND actor_name NOT LIKE "%بوت%" collate utf8mb4_general_ci AND actor_name NOT IN (SELECT user_name FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = "bot") -and actor_name not in (SELECT replace(pl_title,"_"," ") +and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) GROUP BY actor_name HAVING score > 0 ORDER BY score DESC,name @@ -215,10 +222,11 @@ AND ipb_user IS NULL AND ucase(actor_name) NOT LIKE ucase("%BOT") COLLATE utf8mb4_general_ci AND actor_name NOT LIKE "%بوت%" collate utf8mb4_general_ci and actor_name NOT IN (SELECT user_name FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = 'editor' or 'autoreview' or 'bot') -and actor_name not in (SELECT replace(pl_title,"_"," ") +and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) GROUP BY actor_name ORDER BY score DESC,name LIMIT 10; diff --git a/tasks/users_this_week/data.py b/tasks/users_this_week/data.py index acd2e36d..68653910 100644 --- a/tasks/users_this_week/data.py +++ b/tasks/users_this_week/data.py @@ -32,10 +32,11 @@ FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = "bot") - and actor_name not in (SELECT replace(pl_title,"_"," ") -FROM pagelinks -where pagelinks.pl_from = 7352181 -and pl_namespace = 2) + and actor_name not in (SELECT replace(lt_title, "_", " ") + FROM pagelinks + inner join linktarget ON lt_id = pl_target_id + WHERE pagelinks.pl_from = 7352181 + AND lt_namespace = 2) GROUP BY actor_name having COUNT(*) > 1 ORDER BY score DESC,name