[PATCH 2 of 6] scripts: docs-headings: distribute over available CPU cores

Sat Dec 29 21:50:16 UTC 2018

# HG changeset patch
# User Thomas De Schampheleire <thomas.de_schampheleire at nokia.com>
# Date 1546110972 -3600
#      Sat Dec 29 20:16:12 2018 +0100
# Node ID a7df630cfe21e5e66c555b9fa88ef2c3930870b1
# Parent  6caed3c13cb8d631430371b8e1141a724c4c4cae
scripts: docs-headings: distribute over available CPU cores

This script is only relevant for contributors, and the fact that it is quite
slow is normally not a big problem.
However, when running it in iteration on different commits, as preparation
to sending out a series, its slowness becomes annoying.

Luckily, using multiprocessing.Pool, it is very easy to parallelize.

diff --git a/scripts/docs-headings.py b/scripts/docs-headings.py
--- a/scripts/docs-headings.py
+++ b/scripts/docs-headings.py
@@ -4,6 +4,7 @@
 Consistent formatting of rst section titles
 """
 
+import multiprocessing
 import re
 import subprocess
 
@@ -28,52 +29,59 @@ pystyles = ['#', '*', '=', '-', '^', '"'
 # match on a header line underlined with one of the valid characters
 headermatch = re.compile(r'''\n*(.+)\n([][!"#$%&'()*+,./:;<=>?@\\^_`{|}~-])\2{2,}\n+''', flags=re.MULTILINE)
 
+def process_one(fn):
+    print 'processing %s' % fn
+    s = open(fn).read()
+
+    # find levels and their styles
+    lastpos = 0
+    styles = []
+    for markup in headermatch.findall(s):
+        style = markup[1]
+        if style in styles:
+            stylepos = styles.index(style)
+            if stylepos > lastpos + 1:
+                print 'bad style %r with level %s - was at %s' % (style, stylepos, lastpos)
+        else:
+            stylepos = len(styles)
+            if stylepos > lastpos + 1:
+                print 'bad new style %r - expected %r' % (style, styles[lastpos + 1])
+            else:
+                styles.append(style)
+        lastpos = stylepos
+
+    # remove superfluous spacing (may however be restored by header spacing)
+    s = re.sub(r'''(\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
+
+    if styles:
+        newstyles = pystyles[pystyles.index(styles[0]):]
+
+        def subf(m):
+            title, style = m.groups()
+            level = styles.index(style)
+            before, after = spaces[level]
+            newstyle = newstyles[level]
+            return '\n' * (before + 1) + title + '\n' + newstyle * len(title) + '\n' * (after + 1)
+        s = headermatch.sub(subf, s)
+
+    # remove superfluous spacing when headers are adjacent
+    s = re.sub(r'''(\n.+\n([][!"#$%&'()*+,./:;<=>?@\\^_`{|}~-])\2{2,}\n\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
+    # fix trailing space and spacing before link sections
+    s = s.strip() + '\n'
+    s = re.sub(r'''\n+((?:\.\. _[^\n]*\n)+)$''', r'\n\n\n\1', s)
+
+    open(fn, 'w').write(s)
 
 def main():
-    for fn in subprocess.check_output(['hg', 'loc', 'set:**.rst+kallithea/i18n/how_to']).splitlines():
-        print 'processing %s:' % fn
-        s = open(fn).read()
 
-        # find levels and their styles
-        lastpos = 0
-        styles = []
-        for markup in headermatch.findall(s):
-            style = markup[1]
-            if style in styles:
-                stylepos = styles.index(style)
-                if stylepos > lastpos + 1:
-                    print 'bad style %r with level %s - was at %s' % (style, stylepos, lastpos)
-            else:
-                stylepos = len(styles)
-                if stylepos > lastpos + 1:
-                    print 'bad new style %r - expected %r' % (style, styles[lastpos + 1])
-                else:
-                    styles.append(style)
-            lastpos = stylepos
+    filenames = subprocess.check_output(['hg', 'loc', 'set:**.rst+kallithea/i18n/how_to']).splitlines()
 
-        # remove superfluous spacing (may however be restored by header spacing)
-        s = re.sub(r'''(\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
-
-        if styles:
-            newstyles = pystyles[pystyles.index(styles[0]):]
+    # distribute jobs over multiple cores
+    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
+    args = ((fn) for fn in filenames)
+    result_obj = pool.map_async(process_one, args).get()
 
-            def subf(m):
-                title, style = m.groups()
-                level = styles.index(style)
-                before, after = spaces[level]
-                newstyle = newstyles[level]
-                return '\n' * (before + 1) + title + '\n' + newstyle * len(title) + '\n' * (after + 1)
-            s = headermatch.sub(subf, s)
-
-        # remove superfluous spacing when headers are adjacent
-        s = re.sub(r'''(\n.+\n([][!"#$%&'()*+,./:;<=>?@\\^_`{|}~-])\2{2,}\n\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
-        # fix trailing space and spacing before link sections
-        s = s.strip() + '\n'
-        s = re.sub(r'''\n+((?:\.\. _[^\n]*\n)+)$''', r'\n\n\n\1', s)
-
-        open(fn, 'w').write(s)
-        print subprocess.check_output(['hg', 'diff', fn])
-        print
+    print subprocess.check_output(['hg', 'diff'] + filenames)
 
 if __name__ == '__main__':
     main()