From 63537e693cc997e3a6a8fbad3d99dd3d0463d39a Mon Sep 17 00:00:00 2001 From: xqt Date: Sat, 18 Jun 2022 14:27:08 +0200 Subject: [PATCH] [IMPR] Improvements for interwiki.py - add some type hints - use dict comprehensions to create dicts - remove While loop in Subject.assemble() because input_choice only continues if the given input is valid. Therefore 'r'ecject answer may not be handled because it just continues the for loop. - simplify sanity check and use error() to print the message - no OSError is expected in Subject.reportBacklinks(); remove try statement - simplify InterwikiBot.firstSubject() - remove InterwikiBot.__len__ and use self.subjects directly in InterwikiBot.isDone() Change-Id: I583b002e26db9a6701ee00a53482a4f725e21ee8 --- scripts/interwiki.py | 189 +++++++++++++++++++------------------------ 1 file changed, 81 insertions(+), 108 deletions(-) diff --git a/scripts/interwiki.py b/scripts/interwiki.py index 76e75adbc8..c353b17005 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -339,6 +339,7 @@ from collections import Counter, defaultdict from contextlib import suppress from textwrap import fill +from typing import Optional import pywikibot from pywikibot import ( @@ -349,6 +350,7 @@ textlib, titletranslate, ) +from pywikibot.backports import Iterable from pywikibot.bot import ListOption, OptionHandler, StandardOption from pywikibot.cosmetic_changes import moved_links from pywikibot.exceptions import ( @@ -457,17 +459,16 @@ class InterwikiBotConfig: summary = '' repository = False - def note(self, text) -> None: + def note(self, text: str) -> None: """Output a notification message with. The text will be printed only if conf.quiet isn't set. :param text: text to be shown - :type text: str """ if not self.quiet: pywikibot.output('NOTE: ' + text) - def readOptions(self, option) -> bool: + def readOptions(self, option: str) -> bool: """Read all commandline parameters for the global container.""" arg, _, value = option.partition(':') if not arg.startswith('-'): @@ -1337,7 +1338,6 @@ def assemble(self): errorCount += 1 # See if new{} contains any problematic values - result = {} for site, pages in new.items(): if len(pages) > 1: errorCount += 1 @@ -1345,9 +1345,7 @@ def assemble(self): if not errorCount and not self.conf.select: # no errors, so all lists have only one item - for site, pages in new.items(): - result[site] = pages[0] - return result + return {site: pages[0] for site, pages in new.items()} # There are any errors. if config.interwiki_graph: @@ -1359,6 +1357,7 @@ def assemble(self): if self.conf.autonomous: return None + result = {} # First loop over the ones that have more solutions for site, pages in new.items(): if len(pages) <= 1: @@ -1394,28 +1393,24 @@ def assemble(self): page2 = pages[0] pywikibot.output('Found link to {} in:'.format(page2)) self.whereReport(page2, indent=4) - while True: - if acceptall: - answer = 'a' - else: - # TODO: allow answer to repeat previous or go back - # after a mistake - answer = pywikibot.input_choice( - 'What should be done?', - [('accept', 'a'), ('reject', 'r'), - ('give up', 'g'), ('accept all', 'l')], 'a', - automatic_quit=False) - if answer == 'l': # accept all - acceptall = True - answer = 'a' - if answer == 'a': # accept this one - result[site] = pages[0] - break - if answer == 'g': # give up - return None - if answer == 'r': # reject - # None acceptable - break + + # TODO: allow answer to repeat previous or go back + # after a mistake + answer = 'a' if acceptall else pywikibot.input_choice( + 'What should be done?', + [('accept', 'a'), ('reject', 'r'), ('give up', 'g'), + ('accept all', 'l')], 'a', automatic_quit=False) + + if answer == 'l': # accept all + acceptall = True + answer = 'a' + + if answer == 'a': # accept this one + result[site] = pages[0] + elif answer == 'g': # give up + return None + # else reject if None acceptable + return result def finish(self): @@ -1593,12 +1588,8 @@ def replaceLinks(self, page, newPages) -> bool: # site. pltmp = new[page.site] if pltmp != page: - s = 'None' - if pltmp is not None: - s = pltmp - pywikibot.output( - 'BUG>>> {} is not in the list of new links! Found {}.' - .format(page, s)) + pywikibot.error('{} is not in the list of new links! Found {}.' + .format(page, pltmp)) raise SaveError('BUG: sanity check failed') # Avoid adding an iw link back to itself @@ -1611,9 +1602,7 @@ def replaceLinks(self, page, newPages) -> bool: del new[stmp] # Put interwiki links into a map - old = {} - for page2 in interwikis: - old[page2.site] = page2 + old = {p.site: p for p in interwikis} # Check what needs to get done mods, mcomment, adding, removing, modifying = compareLanguages( @@ -1642,9 +1631,9 @@ def replaceLinks(self, page, newPages) -> bool: self.conf.note('No changes needed on page {}'.format(page)) return False - pywikibot.output('<>Updating links on page {}.<>' - .format(page)) - pywikibot.output('Changes to be made: {}'.format(mods)) + pywikibot.info('<>Updating links on page {}.' + .format(page)) + pywikibot.info('Changes to be made: {}'.format(mods)) oldtext = page.get() template = (page.namespace() == 10) newtext = textlib.replaceLanguageLinks(oldtext, new, @@ -1653,14 +1642,12 @@ def replaceLinks(self, page, newPages) -> bool: # This is for now. Later there should be different funktions for each # kind if not botMayEdit(page): + pywikibot.info('SKIPPING: {} '.format(page), newline=False) if template: - pywikibot.output( - 'SKIPPING: {} should have interwiki links on subpage.' - .format(page)) + msg = 'should have interwiki links on subpage.' else: - pywikibot.output( - 'SKIPPING: {} is under construction or to be deleted.' - .format(page)) + msg = 'is under construction or to be deleted.' + pywikibot.info(msg) return False if newtext == oldtext: @@ -1773,54 +1760,49 @@ def reportBacklinks(self, new, updatedSites) -> None: # use sets because searching an element is faster than in lists expectedPages = set(new.values()) expectedSites = set(new) - try: - for site in expectedSites - set(updatedSites): - page = new[site] - if page.section(): - continue + for site in expectedSites - set(updatedSites): + page = new[site] + if page.section(): + continue + + try: + linkedPages = {pywikibot.Page(link) + for link in page.iterlanglinks()} + except NoPageError: + pywikibot.warning('Page {} does no longer exist?!' + .format(page)) + break + + # To speed things up, create a dictionary which maps sites + # to pages. This assumes that there is only one interwiki + # link per language. + linkedPagesDict = {p.site: p for p in linkedPages} + for expectedPage in expectedPages - linkedPages: + if expectedPage == page: + continue try: - linkedPages = {pywikibot.Page(link) - for link in page.iterlanglinks()} - except NoPageError: + linkedPage = linkedPagesDict[expectedPage.site] pywikibot.warning( - 'Page {} does no longer exist?!'.format(page)) - break - - # To speed things up, create a dictionary which maps sites - # to pages. This assumes that there is only one interwiki - # link per language. - linkedPagesDict = {} - for linkedPage in linkedPages: - linkedPagesDict[linkedPage.site] = linkedPage - for expectedPage in expectedPages - linkedPages: - if expectedPage == page: - continue - try: - linkedPage = linkedPagesDict[expectedPage.site] - pywikibot.warning( - '{}: {} does not link to {} but to {}' - .format(page.site.family.name, - page, expectedPage, linkedPage)) - except KeyError: - if not expectedPage.site.is_data_repository(): - pywikibot.warning('{}: {} does not link to {}' - .format(page.site.family.name, - page, expectedPage)) - # Check for superfluous links - for linkedPage in linkedPages: - if linkedPage in expectedPages: - continue - # Check whether there is an alternative page on - # that language. - # In this case, it was already reported above. - if linkedPage.site not in expectedSites: - pywikibot.warning('{}: {} links to incorrect {}' + '{}: {} does not link to {} but to {}' + .format(page.site.family.name, + page, expectedPage, linkedPage)) + except KeyError: + if not expectedPage.site.is_data_repository(): + pywikibot.warning('{}: {} does not link to {}' .format(page.site.family.name, - page, linkedPage)) - - except OSError: - pywikibot.error('could not report backlinks') + page, expectedPage)) + # Check for superfluous links + for linkedPage in linkedPages: + if linkedPage in expectedPages: + continue + # Check whether there is an alternative page on + # that language. + # In this case, it was already reported above. + if linkedPage.site not in expectedSites: + pywikibot.warning('{}: {} links to incorrect {}' + .format(page.site.family.name, + page, linkedPage)) class InterwikiBot: @@ -1935,11 +1917,9 @@ def generateMore(self, number) -> None: # for loop was exited by break statement self.pageGenerator = None - def firstSubject(self): + def firstSubject(self) -> Optional[Subject]: """Return the first subject that is still being worked on.""" - if self.subjects: - return self.subjects[0] - return None + return self.subjects[0] if self.subjects else None def maxOpenSite(self): """ @@ -1982,10 +1962,9 @@ def selectQuerySite(self): self.generateMore(self.conf.maxquerysize - mycount) except ServerError: # Could not extract allpages special page? - pywikibot.output( - 'ERROR: could not retrieve more pages. ' - 'Will try again in {} seconds' - .format(timeout)) + pywikibot.error('could not retrieve more pages. ' + 'Will try again in {} seconds' + .format(timeout)) pywikibot.sleep(timeout) timeout *= 2 else: @@ -2049,9 +2028,9 @@ def queryStep(self) -> None: subj.finish() del self.subjects[i] - def isDone(self): + def isDone(self) -> bool: """Check whether there is still more work to do.""" - return not self and self.pageGenerator is None + return not self.subjects and self.pageGenerator is None def plus(self, site, count: int = 1) -> None: """Helper routine that the Subject class expects in a counter.""" @@ -2067,10 +2046,6 @@ def run(self) -> None: while not self.isDone(): self.queryStep() - def __len__(self) -> int: - """Return length of subjects.""" - return len(self.subjects) - def compareLanguages(old, new, insite, summary): """Compare changes and setup i18n message.""" @@ -2090,7 +2065,6 @@ def fmt(d, site): return str(d[site]) else: # Use short format, just the language code - def fmt(d, site): return site.code @@ -2262,11 +2236,10 @@ def read_dump(self): namespace=self.next_namespace, filterredir=False) - def write_dump(self, iterable, append: bool = True) -> None: + def write_dump(self, iterable: Iterable, append: bool = True) -> None: """Write dump file. :param iterable: an iterable of page titles to be dumped. - :type iterable: iterable :param append: if a dump already exits, append the page titles to it if True else overwrite it. """