[PATCH 2 of 6] scripts: docs-headings: distribute over available CPU cores
Thomas De Schampheleire
patrickdepinguin at gmail.com
Sat Dec 29 21:50:16 UTC 2018
# HG changeset patch
# User Thomas De Schampheleire <thomas.de_schampheleire at nokia.com>
# Date 1546110972 -3600
# Sat Dec 29 20:16:12 2018 +0100
# Node ID a7df630cfe21e5e66c555b9fa88ef2c3930870b1
# Parent 6caed3c13cb8d631430371b8e1141a724c4c4cae
scripts: docs-headings: distribute over available CPU cores
This script is only relevant for contributors, and the fact that it is quite
slow is normally not a big problem.
However, when running it in iteration on different commits, as preparation
to sending out a series, its slowness becomes annoying.
Luckily, using multiprocessing.Pool, it is very easy to parallelize.
diff --git a/scripts/docs-headings.py b/scripts/docs-headings.py
--- a/scripts/docs-headings.py
+++ b/scripts/docs-headings.py
@@ -4,6 +4,7 @@
Consistent formatting of rst section titles
"""
+import multiprocessing
import re
import subprocess
@@ -28,52 +29,59 @@ pystyles = ['#', '*', '=', '-', '^', '"'
# match on a header line underlined with one of the valid characters
headermatch = re.compile(r'''\n*(.+)\n([][!"#$%&'()*+,./:;<=>?@\\^_`{|}~-])\2{2,}\n+''', flags=re.MULTILINE)
+def process_one(fn):
+ print 'processing %s' % fn
+ s = open(fn).read()
+
+ # find levels and their styles
+ lastpos = 0
+ styles = []
+ for markup in headermatch.findall(s):
+ style = markup[1]
+ if style in styles:
+ stylepos = styles.index(style)
+ if stylepos > lastpos + 1:
+ print 'bad style %r with level %s - was at %s' % (style, stylepos, lastpos)
+ else:
+ stylepos = len(styles)
+ if stylepos > lastpos + 1:
+ print 'bad new style %r - expected %r' % (style, styles[lastpos + 1])
+ else:
+ styles.append(style)
+ lastpos = stylepos
+
+ # remove superfluous spacing (may however be restored by header spacing)
+ s = re.sub(r'''(\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
+
+ if styles:
+ newstyles = pystyles[pystyles.index(styles[0]):]
+
+ def subf(m):
+ title, style = m.groups()
+ level = styles.index(style)
+ before, after = spaces[level]
+ newstyle = newstyles[level]
+ return '\n' * (before + 1) + title + '\n' + newstyle * len(title) + '\n' * (after + 1)
+ s = headermatch.sub(subf, s)
+
+ # remove superfluous spacing when headers are adjacent
+ s = re.sub(r'''(\n.+\n([][!"#$%&'()*+,./:;<=>?@\\^_`{|}~-])\2{2,}\n\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
+ # fix trailing space and spacing before link sections
+ s = s.strip() + '\n'
+ s = re.sub(r'''\n+((?:\.\. _[^\n]*\n)+)$''', r'\n\n\n\1', s)
+
+ open(fn, 'w').write(s)
def main():
- for fn in subprocess.check_output(['hg', 'loc', 'set:**.rst+kallithea/i18n/how_to']).splitlines():
- print 'processing %s:' % fn
- s = open(fn).read()
- # find levels and their styles
- lastpos = 0
- styles = []
- for markup in headermatch.findall(s):
- style = markup[1]
- if style in styles:
- stylepos = styles.index(style)
- if stylepos > lastpos + 1:
- print 'bad style %r with level %s - was at %s' % (style, stylepos, lastpos)
- else:
- stylepos = len(styles)
- if stylepos > lastpos + 1:
- print 'bad new style %r - expected %r' % (style, styles[lastpos + 1])
- else:
- styles.append(style)
- lastpos = stylepos
+ filenames = subprocess.check_output(['hg', 'loc', 'set:**.rst+kallithea/i18n/how_to']).splitlines()
- # remove superfluous spacing (may however be restored by header spacing)
- s = re.sub(r'''(\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
-
- if styles:
- newstyles = pystyles[pystyles.index(styles[0]):]
+ # distribute jobs over multiple cores
+ pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
+ args = ((fn) for fn in filenames)
+ result_obj = pool.map_async(process_one, args).get()
- def subf(m):
- title, style = m.groups()
- level = styles.index(style)
- before, after = spaces[level]
- newstyle = newstyles[level]
- return '\n' * (before + 1) + title + '\n' + newstyle * len(title) + '\n' * (after + 1)
- s = headermatch.sub(subf, s)
-
- # remove superfluous spacing when headers are adjacent
- s = re.sub(r'''(\n.+\n([][!"#$%&'()*+,./:;<=>?@\\^_`{|}~-])\2{2,}\n\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
- # fix trailing space and spacing before link sections
- s = s.strip() + '\n'
- s = re.sub(r'''\n+((?:\.\. _[^\n]*\n)+)$''', r'\n\n\n\1', s)
-
- open(fn, 'w').write(s)
- print subprocess.check_output(['hg', 'diff', fn])
- print
+ print subprocess.check_output(['hg', 'diff'] + filenames)
if __name__ == '__main__':
main()
More information about the kallithea-general
mailing list