memory leak stikes back

Mon Aug 31 11:06:06 UTC 2020

On 29/08/2020 12.26, Thomas De Schampheleire wrote:
> Hi Łukasz,
> 
> Apart from some code changes we did to improve/eliminate some caching mechanisms, we also added a note to our documentation to clarify the expectations. See the bottom of the following page:
> https://kallithea.readthedocs.io/en/stable/overview.html
> 
> Basically, we can not avoid certain memory growth in the context of a single process, and the recommended approach is to ensure that the web server process recreates its workers regularly.
> 
> For gearbox/waitress, this can be done automatically with the setting threadpool_max_requests, which is set to 100 by default in the ini file produced by 'kallithea-cli config-create'.
> 
> Is this setting present in your ini file?
> Could you share the ini file?
> 

No it is not. I generated new config using kallithea-cli and manually moved changes from old one.
I attached it below.

I just generated new one and I do not see threadpool_max_requests option in there.

Regards,
Łukasz

###################################################################################
###################################################################################
## Kallithea config file generated with kallithea-cli 0.6.1                      ##
##                                                                               ##
## The %(here)s variable will generally be replaced with the parent directory of ##
## this file. Other use of % must be escaped as %% .                             ##
###################################################################################
###################################################################################

[DEFAULT]

################################################################################
## Email settings                                                             ##
##                                                                            ##
## Refer to the documentation ("Email settings") for more details.            ##
##                                                                            ##
## It is recommended to use a valid sender address that passes access         ##
## validation and spam filtering in mail servers.                             ##
################################################################################

## 'From' header for application emails. You can optionally add a name.
## Default:
#app_email_from = Kallithea
## Examples:
#app_email_from = Kallithea <kallithea-noreply at example.com>
#app_email_from = kallithea-noreply at example.com
app_email_from = CodeReview <codereview at domain.pl>

## Subject prefix for application emails.
## A space between this prefix and the real subject is automatically added.
## Default:
#email_prefix =
## Example:
#email_prefix = [Kallithea]

## Recipients for error emails and fallback recipients of application mails.
## Multiple addresses can be specified, comma-separated.
## Only addresses are allowed, do not add any name part.
## Default:
#email_to =
## Examples:
#email_to = admin at example.com
#email_to = admin at example.com,another_admin at example.com
email_to = root at domain.pl

## 'From' header for error emails. You can optionally add a name.
## Default: (none)
## Examples:
#error_email_from = Kallithea Errors <kallithea-noreply at example.com>
#error_email_from = kallithea_errors at example.com
error_email_from = CodeReview <codereview at domain.pl>

## SMTP server settings
## If specifying credentials, make sure to use secure connections.
## Default: Send unencrypted unauthenticated mails to the specified smtp_server.
## For "SSL", use smtp_use_ssl = true and smtp_port = 465.
## For "STARTTLS", use smtp_use_tls = true and smtp_port = 587.
smtp_server = localhost
smtp_username =
smtp_password =
smtp_port = 25
smtp_use_ssl = false
smtp_use_tls = false

## Entry point for 'gearbox serve'
[server:main]
host = 127.0.0.1
port = 5000

## WAITRESS ##
use = egg:waitress#main
## number of worker threads
threads = 1
## MAX BODY SIZE 100GB
max_request_body_size = 107374182400
## use poll instead of select, fixes fd limits, may not work on old
## windows systems.
#asyncore_use_poll = True

## middleware for hosting the WSGI application under a URL prefix
[filter:proxy-prefix]
use = egg:PasteDeploy#prefix
prefix = /codereview

[app:main]
use = egg:kallithea
## enable proxy prefix middleware
filter-with = proxy-prefix

full_stack = true
static_files = true

## Internationalization (see setup documentation for details)
## By default, the languages requested by the browser are used if available, with English as default.
## Set i18n.enabled=false to disable automatic language choice.
i18n.enabled = false
## To Force a language, set i18n.enabled=false and specify the language in i18n.lang.
## Valid values are the names of subdirectories in kallithea/i18n with a LC_MESSAGES/kallithea.mo
i18n.lang = en

cache_dir = %(here)s/data
index_dir = %(here)s/data/index

## uncomment and set this path to use archive download cache
archive_cache_dir = %(here)s/tarballcache

## change this to unique ID for security
app_instance_uuid = 339b99011ca3463a8b4c2b64680cded0

## cut off limit for large diffs (size in bytes)
cut_off_limit = 256000

## force https in Kallithea, fixes https redirects, assumes it's always https
force_https = true

## use Strict-Transport-Security headers
use_htsts = false

## number of commits stats will parse on each iteration
commit_parse_limit = 25

## Path to Python executable to be used for git hooks.
## This value will be written inside the git hook scripts as the text
## after '#!' (shebang). When empty or not defined, the value of
## 'sys.executable' at the time of installation of the git hooks is
## used, which is correct in many cases but for example not when using uwsgi.
## If you change this setting, you should reinstall the Git hooks via
## Admin > Settings > Remap and Rescan.
#git_hook_interpreter = /srv/kallithea/venv/bin/python3
git_hook_interpreter = /srv/kallithea/venv/bin/python3

## path to git executable
git_path = git

## git rev filter option, --all is the default filter, if you need to
## hide all refs in changelog switch this to --branches --tags
#git_rev_filter = --branches --tags

## RSS feed options
rss_cut_off_limit = 256000
rss_items_per_page = 10
rss_include_diff = false

## options for showing and identifying changesets
show_sha_length = 12
show_revision_number = false

## Canonical URL to use when creating full URLs in UI and texts.
## Useful when the site is available under different names or protocols.
## Defaults to what is provided in the WSGI environment.
#canonical_url = https://kallithea.example.com/repos

## gist URL alias, used to create nicer urls for gist. This should be an
## url that does rewrites to _admin/gists/<gistid>.
## example: http://gist.example.com/{gistid}. Empty means use the internal
## Kallithea url, ie. http[s]://kallithea.example.com/_admin/gists/<gistid>
gist_alias_url =

## default encoding used to convert from and to unicode
## can be also a comma separated list of encoding in case of mixed encodings
default_encoding = utf-8

## Set Mercurial encoding, similar to setting HGENCODING before launching Kallithea
hgencoding = utf-8

## issue tracker for Kallithea (leave blank to disable, absent for default)
#bugtracker = https://bitbucket.org/conservancy/kallithea/issues

## issue tracking mapping for commit messages, comments, PR descriptions, ...
## Refer to the documentation ("Integration with issue trackers") for more details.

## regular expression to match issue references
## This pattern may/should contain parenthesized groups, that can
## be referred to in issue_server_link or issue_sub using Python backreferences
## (e.g. \1, \2, ...). You can also create named groups with '(?P<groupname>)'.
## To require mandatory whitespace before the issue pattern, use:
## (?:^|(?<=\s)) before the actual pattern, and for mandatory whitespace
## behind the issue pattern, use (?:$|(?=\s)) after the actual pattern.

#issue_pat = #(\d+)
issue_pat = (Case\s?|#)(\d+)

## server url to the issue
## This pattern may/should contain backreferences to parenthesized groups in issue_pat.
## A backreference can be \1, \2, ... or \g<groupname> if you specified a named group
## called 'groupname' in issue_pat.
## The special token {repo} is replaced with the full repository name
## including repository groups, while {repo_name} is replaced with just
## the name of the repository.

#issue_server_link = https://issues.example.com/{repo}/issue/\1
issue_server_link = https://hq.domain.pl/support/default.asp?\2

## substitution pattern to use as the link text
## If issue_sub is empty, the text matched by issue_pat is retained verbatim
## for the link text. Otherwise, the link text is that of issue_sub, with any
## backreferences to groups in issue_pat replaced.

issue_sub =

## issue_pat, issue_server_link and issue_sub can have suffixes to specify
## multiple patterns, to other issues server, wiki or others
## below an example how to create a wiki pattern
## wiki-some-id -> https://wiki.example.com/some-id

#issue_pat_wiki = wiki-(\S+)
#issue_server_link_wiki = https://wiki.example.com/\1
#issue_sub_wiki = WIKI-\1

## alternative return HTTP header for failed authentication. Default HTTP
## response is 401 HTTPUnauthorized. Currently Mercurial clients have trouble with
## handling that. Set this variable to 403 to return HTTPForbidden
auth_ret_code =

## allows to change the repository location in settings page
allow_repo_location_change = True

## allows to setup custom hooks in settings page
allow_custom_hooks_settings = True

## extra extensions for indexing, space separated and without the leading '.'.
#index.extensions =
#    gemfile
#    lock

## extra filenames for indexing, space separated
#index.filenames =
#    .dockerignore
#    .editorconfig
#    INSTALL
#    CHANGELOG

####################################
##            SSH CONFIG          ##
####################################

## SSH is disabled by default, until an Administrator decides to enable it.
ssh_enabled = true

## File where users' SSH keys will be stored *if* ssh_enabled is true.
#ssh_authorized_keys = /home/kallithea/.ssh/authorized_keys
ssh_authorized_keys = /srv/kallithea/.ssh/authorized_keys

## Path to be used in ssh_authorized_keys file to invoke kallithea-cli with ssh-serve.
#kallithea_cli_path = /srv/kallithea/venv/bin/kallithea-cli
kallithea_cli_path = /srv/kallithea/venv/bin/kallithea-cli

## Locale to be used in the ssh-serve command.
## This is needed because an SSH client may try to use its own locale
## settings, which may not be available on the server.
## See `locale -a` for valid values on this system.
#ssh_locale = C.UTF-8
ssh_locale = en_US.UTF-8

####################################
##         CELERY CONFIG          ##
####################################

## Note: Celery doesn't support Windows.
use_celery = false

## Celery config settings from https://docs.celeryproject.org/en/4.4.0/userguide/configuration.html prefixed with 'celery.'.

## Example: use the message queue on the local virtual host 'kallitheavhost' as the RabbitMQ user 'kallithea':
celery.broker_url = amqp://kallithea:thepassword@localhost:5672/kallitheavhost

celery.result.backend = db+sqlite:///celery-results.db

#celery.amqp.task.result.expires = 18000

celery.worker_concurrency = 2
celery.worker_max_tasks_per_child = 1

## If true, tasks will never be sent to the queue, but executed locally instead.
celery.task_always_eager = false

####################################
##          BEAKER CACHE          ##
####################################

beaker.cache.data_dir = %(here)s/data/cache/data
beaker.cache.lock_dir = %(here)s/data/cache/lock

beaker.cache.regions = long_term,long_term_file

beaker.cache.long_term.type = memory
beaker.cache.long_term.expire = 36000
beaker.cache.long_term.key_length = 256

beaker.cache.long_term_file.type = file
beaker.cache.long_term_file.expire = 604800
beaker.cache.long_term_file.key_length = 256

####################################
##        BEAKER SESSION          ##
####################################

## Name of session cookie. Should be unique for a given host and path, even when running
## on different ports. Otherwise, cookie sessions will be shared and messed up.
session.key = kallithea
## Sessions should always only be accessible by the browser, not directly by JavaScript.
session.httponly = true
## Session lifetime. 2592000 seconds is 30 days.
session.timeout = 2592000

## Server secret used with HMAC to ensure integrity of cookies.
session.secret = <secret removed>
## Further, encrypt the data with AES.
#session.encrypt_key = <key_for_encryption>
#session.validate_key = <validation_key>

## Type of storage used for the session, current types are
## dbm, file, memcached, database, and memory.

## File system storage of session data. (default)
#session.type = file

## Cookie only, store all session data inside the cookie. Requires secure secrets.
#session.type = cookie

## Database storage of session data.
#session.type = ext:database
#session.sa.url = postgresql://postgres:qwe@localhost/kallithea
#session.table_name = db_session

####################################
##        ERROR HANDLING          ##
####################################

## Show a nice error page for application HTTP errors and exceptions (default true)
#errorpage.enabled = true

## Enable Backlash client-side interactive debugger (default false)
## WARNING: *THIS MUST BE false IN PRODUCTION ENVIRONMENTS!!!*
## This debug mode will allow all visitors to execute malicious code.
#debug = false

## Enable Backlash server-side error reporting (unless debug mode handles it client-side) (default true)
#trace_errors.enable = true
## Errors will be reported by mail if trace_errors.error_email is set.

## Propagate email settings to ErrorReporter of TurboGears2
## You do not normally need to change these lines
get trace_errors.smtp_server = smtp_server
get trace_errors.smtp_port = smtp_port
get trace_errors.from_address = error_email_from
get trace_errors.error_email = email_to
get trace_errors.smtp_username = smtp_username
get trace_errors.smtp_password = smtp_password
get trace_errors.smtp_use_tls = smtp_use_tls

##################################
##        LOGVIEW CONFIG        ##
##################################

logview.sqlalchemy = #faa
logview.pylons.templating = #bfb
logview.pylons.util = #eee

#########################
##      DB CONFIG      ##
#########################

## SQLITE [default]
sqlalchemy.url = sqlite:///%(here)s/kallithea.db?timeout=60

## see sqlalchemy docs for other backends

sqlalchemy.pool_recycle = 3600

################################
##   ALEMBIC CONFIGURATION    ##
################################

[alembic]
script_location = kallithea:alembic

################################
##   LOGGING CONFIGURATION    ##
################################

[loggers]
keys = root, routes, kallithea, sqlalchemy, tg, gearbox, beaker, templates, whoosh_indexer, werkzeug, backlash

[handlers]
keys = console, console_color, console_color_sql, null

[formatters]
keys = generic, color_formatter, color_formatter_sql

#############
## LOGGERS ##
#############

[logger_root]
level = NOTSET
handlers = console
## For coloring based on log level:
#handlers = console_color

[logger_routes]
level = WARN
handlers =
qualname = routes.middleware
## "level = DEBUG" logs the route matched and routing variables.

[logger_beaker]
level = WARN
handlers =
qualname = beaker.container

[logger_templates]
level = WARN
handlers =
qualname = pylons.templating

[logger_kallithea]
level = WARN
handlers =
qualname = kallithea

[logger_tg]
level = WARN
handlers =
qualname = tg

[logger_gearbox]
level = WARN
handlers =
qualname = gearbox

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
## For coloring based on log level and pretty printing of SQL:
#level = INFO
#handlers = console_color_sql
#propagate = 0

[logger_whoosh_indexer]
level = WARN
handlers =
qualname = whoosh_indexer

[logger_werkzeug]
level = WARN
handlers =
qualname = werkzeug

[logger_backlash]
level = WARN
handlers =
qualname = backlash

##############
## HANDLERS ##
##############

[handler_console]
class = StreamHandler
args = (sys.stderr,)
formatter = generic

[handler_console_color]
## ANSI color coding based on log level
class = StreamHandler
args = (sys.stderr,)
formatter = color_formatter

[handler_console_color_sql]
## ANSI color coding and pretty printing of SQL statements
class = StreamHandler
args = (sys.stderr,)
formatter = color_formatter_sql

[handler_null]
class = NullHandler
args = ()

################
## FORMATTERS ##
################

[formatter_generic]
format = %(asctime)s.%(msecs)03d %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %Y-%m-%d %H:%M:%S

[formatter_color_formatter]
class = kallithea.lib.colored_formatter.ColorFormatter
format = %(asctime)s.%(msecs)03d %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %Y-%m-%d %H:%M:%S

[formatter_color_formatter_sql]
class = kallithea.lib.colored_formatter.ColorFormatterSql
format = %(asctime)s.%(msecs)03d %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %Y-%m-%d %H:%M:%S

#################
## SSH LOGGING ##
#################

## The default loggers use 'handler_console' that uses StreamHandler with
## destination 'sys.stderr'. In the context of the SSH server process, these log
## messages would be sent to the client, which is normally not what you want.
## By default, when running ssh-serve, just use NullHandler and disable logging
## completely. For other logging options, see:
## https://docs.python.org/2/library/logging.handlers.html

[ssh_serve:logger_root]
level = CRITICAL
handlers = null

## Note: If logging is configured with other handlers, they might need similar
## muting for ssh-serve too.