From 43d94ac138efd734253ef5e078f6136a9b5f7d10 Mon Sep 17 00:00:00 2001 From: lokas Date: Thu, 30 May 2024 21:47:32 +0300 Subject: [PATCH 1/7] add Normalize-MediaWiki-link-tables.md --- Normalize-MediaWiki-link-tables.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 Normalize-MediaWiki-link-tables.md diff --git a/Normalize-MediaWiki-link-tables.md b/Normalize-MediaWiki-link-tables.md new file mode 100644 index 00000000..e69de29b From c96d1e6da9a0cc1e4e0e6f2aaf290672d85932f7 Mon Sep 17 00:00:00 2001 From: GergesShamon Date: Thu, 30 May 2024 19:39:56 +0000 Subject: [PATCH 2/7] Normalize MediaWiki link tables --- tasks/users_this_week/README.md | 35 ++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/tasks/users_this_week/README.md b/tasks/users_this_week/README.md index 707da33c..7227df2f 100644 --- a/tasks/users_this_week/README.md +++ b/tasks/users_this_week/README.md @@ -19,10 +19,11 @@ FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = "bot") - and actor_name not in (SELECT replace(pl_title,"_"," ") + and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) GROUP BY actor_name having COUNT(*) > 1 ORDER BY score DESC,name @@ -48,10 +49,11 @@ where WHERE ug_group = "bot") and log_action = "approve-i" and log_namespace = 0 - and actor_name not in (SELECT replace(pl_title,"_"," ") + and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) group by actor_name having COUNT(*) > 1 ORDER BY score DESC,name @@ -69,10 +71,11 @@ INNER JOIN actor on logging.log_actor = actor_id where log_timestamp BETWEEN START_WEEK_DATE AND END_WEEK_DATE and log_type in ("block", "protect", "delete", "rights") and actor_name IN (SELECT user_name FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = 'sysop') -and actor_name not in (SELECT replace(pl_title,"_"," ") +and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) group by logging.log_actor having COUNT(*)>1 ORDER BY score DESC,name @@ -127,10 +130,11 @@ AND parent.rev_timestamp BETWEEN START_WEEK_DATE AND END_WEEK_DATE INNER JOIN user ON user_id = ug_user WHERE ug_group = "bot") and actor_name IN (SELECT user_name FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = 'editor' or 'autoreview') -and actor_name not in (SELECT replace(pl_title,"_"," ") +and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) GROUP BY actor_name having score > 0 ORDER BY score DESC,name @@ -152,10 +156,11 @@ select actor_name as name, COUNT(*) as score and actor_name Not IN (SELECT user_name FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = 'bot') and ucase(actor_name) not like ucase("%BOT") COLLATE utf8mb4_general_ci and actor_name not like "%بوت%" collate utf8mb4_general_ci -and actor_name not in (SELECT replace(pl_title,"_"," ") +and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) group by logging.log_actor having COUNT(*)>1 ORDER BY score DESC,name @@ -180,10 +185,11 @@ AND rev.rev_timestamp BETWEEN START_WEEK_DATE AND END_WEEK_DATE AND ucase(actor_name) NOT LIKE ucase("%BOT") COLLATE utf8mb4_general_ci AND actor_name NOT LIKE "%بوت%" collate utf8mb4_general_ci AND actor_name NOT IN (SELECT user_name FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = "bot") -and actor_name not in (SELECT replace(pl_title,"_"," ") +and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) GROUP BY actor_name HAVING score > 0 ORDER BY score DESC,name @@ -215,10 +221,11 @@ AND ipb_user IS NULL AND ucase(actor_name) NOT LIKE ucase("%BOT") COLLATE utf8mb4_general_ci AND actor_name NOT LIKE "%بوت%" collate utf8mb4_general_ci and actor_name NOT IN (SELECT user_name FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = 'editor' or 'autoreview' or 'bot') -and actor_name not in (SELECT replace(pl_title,"_"," ") +and actor_name not in (SELECT replace(lt_title,"_"," ") FROM pagelinks +JOIN linktarget ON lt_id = pl_target_id where pagelinks.pl_from = 7352181 -and pl_namespace = 2) +and lt_namespace = 2) GROUP BY actor_name ORDER BY score DESC,name LIMIT 10; From 6956169397be2c7d895d65c4619c8251a115b72d Mon Sep 17 00:00:00 2001 From: lokas Date: Thu, 30 May 2024 22:53:11 +0300 Subject: [PATCH 3/7] fix query --- tasks/users_this_week/README.md | 9 +++++---- tasks/users_this_week/data.py | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tasks/users_this_week/README.md b/tasks/users_this_week/README.md index 707da33c..7b91591e 100644 --- a/tasks/users_this_week/README.md +++ b/tasks/users_this_week/README.md @@ -19,10 +19,11 @@ FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = "bot") - and actor_name not in (SELECT replace(pl_title,"_"," ") -FROM pagelinks -where pagelinks.pl_from = 7352181 -and pl_namespace = 2) + and actor_name not in (SELECT replace(lt_title, "_", " ") + FROM pagelinks + inner join linktarget ON lt_id = pl_target_id + WHERE pagelinks.pl_from = 7352181 + AND lt_namespace = 2) GROUP BY actor_name having COUNT(*) > 1 ORDER BY score DESC,name diff --git a/tasks/users_this_week/data.py b/tasks/users_this_week/data.py index acd2e36d..68653910 100644 --- a/tasks/users_this_week/data.py +++ b/tasks/users_this_week/data.py @@ -32,10 +32,11 @@ FROM user_groups INNER JOIN user ON user_id = ug_user WHERE ug_group = "bot") - and actor_name not in (SELECT replace(pl_title,"_"," ") -FROM pagelinks -where pagelinks.pl_from = 7352181 -and pl_namespace = 2) + and actor_name not in (SELECT replace(lt_title, "_", " ") + FROM pagelinks + inner join linktarget ON lt_id = pl_target_id + WHERE pagelinks.pl_from = 7352181 + AND lt_namespace = 2) GROUP BY actor_name having COUNT(*) > 1 ORDER BY score DESC,name From fd6e8f6e3d31d6d0745a124b7508ab583e4cafe8 Mon Sep 17 00:00:00 2001 From: lokas Date: Thu, 30 May 2024 23:18:09 +0300 Subject: [PATCH 4/7] update query missingtopics --- tasks/missingtopics/update.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tasks/missingtopics/update.py b/tasks/missingtopics/update.py index 0748ac6a..89216f66 100644 --- a/tasks/missingtopics/update.py +++ b/tasks/missingtopics/update.py @@ -50,12 +50,13 @@ def has_arabic_chars(text): """ db = Database() -db.query = """select replace(pl_title,"مقالات_مطلوبة_حسب_الاختصاص/","") as page_title from pagelinks +db.query = """select replace(lt_title,"مقالات_مطلوبة_حسب_الاختصاص/","") as page_title from pagelinks +inner join linktarget ON lt_id = pl_target_id where pl_from in (676775) -and pl_namespace in (4) +and lt_namespace in (4) and pl_from_namespace in (4) -and pl_title not like "%وصلة_حمراء%" -order by pl_title +and lt_title not like "%وصلة_حمراء%" +order by lt_title """ db.get_content_from_database() From 918495d257938210d72694f0b7d3f13d56bf9f28 Mon Sep 17 00:00:00 2001 From: lokas Date: Fri, 31 May 2024 08:38:05 +0300 Subject: [PATCH 5/7] update query of list_of_portals_by_number_of_articles --- tasks/statistics/list_of_portals_by_number_of_articles.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tasks/statistics/list_of_portals_by_number_of_articles.py b/tasks/statistics/list_of_portals_by_number_of_articles.py index e59a018c..e98d6b0c 100644 --- a/tasks/statistics/list_of_portals_by_number_of_articles.py +++ b/tasks/statistics/list_of_portals_by_number_of_articles.py @@ -2,7 +2,11 @@ # Set the parameters for the update query = """SELECT main.page_title as portal_name, COUNT(*) - 1 as sub_page_count, - (SELECT COUNT(*) FROM pagelinks WHERE pl_title = main.page_title and pl_from_namespace = 0 and pl_namespace = 100) as links_count + ( + SELECT COUNT(*) FROM pagelinks + inner join linktarget ON lt_id = pl_target_id + WHERE lt_title = main.page_title and pl_from_namespace = 0 and lt_namespace = 100 + ) as links_count FROM page AS p INNER JOIN ( SELECT page_title From 90818f39174191105b6cbf27eb282c249d893e18 Mon Sep 17 00:00:00 2001 From: lokas Date: Fri, 31 May 2024 08:41:11 +0300 Subject: [PATCH 6/7] update query of list_of_contributors.py --- tasks/statistics/WikiProject/list_of_contributors.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tasks/statistics/WikiProject/list_of_contributors.py b/tasks/statistics/WikiProject/list_of_contributors.py index 516931a1..66c5584b 100644 --- a/tasks/statistics/WikiProject/list_of_contributors.py +++ b/tasks/statistics/WikiProject/list_of_contributors.py @@ -5,11 +5,12 @@ query = """select actor_name as "username", count(rev_actor) as "edit_count", IF(actor_name in ( - select replace(pl_title,"_"," ") from pagelinks + select replace(lt_title,"_"," ") from pagelinks join page on page.page_id = pagelinks.pl_from + inner join linktarget ON lt_id = pl_target_id where page.page_namespace in (4) and page_title like "مشروع_ويكي_العراق/المساهمون" - and pl_namespace in (2) + and lt_namespace in (2) and pl_from_namespace in (4) ), "YES", "NO") as "in_WikiProject" From 2b7b19c0aa5df8a0471432ebc60b11a69ede8c0e Mon Sep 17 00:00:00 2001 From: lokas Date: Fri, 31 May 2024 08:45:57 +0300 Subject: [PATCH 7/7] update query of articles_in_which_there_is_a_link_to_user_pages --- ..._in_which_there_is_a_link_to_user_pages.py | 39 +++++++++++++------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/tasks/statistics/articles_in_which_there_is_a_link_to_user_pages.py b/tasks/statistics/articles_in_which_there_is_a_link_to_user_pages.py index a8c62d4d..97cd668d 100644 --- a/tasks/statistics/articles_in_which_there_is_a_link_to_user_pages.py +++ b/tasks/statistics/articles_in_which_there_is_a_link_to_user_pages.py @@ -1,18 +1,33 @@ from tasks.statistics.module import UpdatePage, ArticleTables, index # Set the parameters for the update -query = """select page.page_title as ll_page_title,pagelinks.pl_title as ll_page_to_title,pagelinks.pl_namespace as ll_pl_namespace -from page - inner join pagelinks - on pagelinks.pl_from = page.page_id -where pagelinks.pl_from_namespace = 0 - and (pagelinks.pl_namespace = 2 or pagelinks.pl_namespace = 3) - and page.page_namespace = 0 - and page.page_is_redirect = 0 - and page.page_id not in (select templatelinks.tl_from from templatelinks - join linktarget on linktarget.lt_id = templatelinks.tl_target_id - where linktarget.lt_title in (select pl_title from pagelinks where pl_from = 9043549) and templatelinks.tl_from_namespace = 0 ) - and page.page_title not in (select pl_title from pagelinks where pl_from = 9043549);""" +query = """SELECT page.page_title AS ll_page_title, + linktarget.lt_title AS ll_page_to_title, + linktarget.lt_namespace AS ll_pl_namespace +FROM page +INNER JOIN pagelinks ON pagelinks.pl_from = page.page_id +inner join linktarget ON lt_id = pl_target_id +WHERE pagelinks.pl_from_namespace = 0 + AND (linktarget.lt_namespace = 2 + OR linktarget.lt_namespace = 3) + AND page.page_namespace = 0 + AND page.page_is_redirect = 0 + AND page.page_id not in + (SELECT templatelinks.tl_from + FROM templatelinks + JOIN linktarget ON linktarget.lt_id = templatelinks.tl_target_id + WHERE linktarget.lt_title in + (SELECT lt_title + FROM pagelinks + inner join linktarget ON lt_id = pl_target_id + WHERE pl_from = 9043549) + AND templatelinks.tl_from_namespace = 0 ) + AND page.page_title not in + (SELECT lt_title + FROM pagelinks + inner join linktarget ON lt_id = pl_target_id + WHERE pl_from = 9043549 + );""" file_path = 'stub/articles_in_which_there_is_a_link_to_user_pages.txt' page_name = "ويكيبيديا:تقارير قاعدة البيانات/مقالات يوجد فيها وصلة إلى صفحات المستخدمين"