[Erp5-report] r15283 - /erp5/trunk/utils/treenalyser.py

Tue Jul 24 09:30:22 CEST 2007

Author: vincent
Date: Tue Jul 24 09:30:21 2007
New Revision: 15283

URL: http://svn.erp5.org?rev=15283&view=rev
Log:
Initial import.

Added:
    erp5/trunk/utils/treenalyser.py   (with props)

Added: erp5/trunk/utils/treenalyser.py
URL: http://svn.erp5.org/erp5/trunk/utils/treenalyser.py?rev=15283&view=auto
==============================================================================

--- erp5/trunk/utils/treenalyser.py (added)
+++ erp5/trunk/utils/treenalyser.py Tue Jul 24 09:30:21 2007
@@ -1,0 +1,218 @@
+#!/usr/bin/python
+##############################################################################
+#
+# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
+# Copyright (c) 2007 Nexedi SARL and Contributors. All Rights Reserved.
+#                    Vincent Pelletier <vincent at nexedi.com>
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+
+# Licence and Copyright note: This program is a (heavy) modification of
+# Zope's "netspace.py" tool, as of Zope 2.8.8. There is no explicit licence
+# and copyrights in that file, so it's believed to comply to package's
+# doc/LICENCE file, which defines the licence as ZPL 2.0 and copyright holders
+# as "Zope Corporation and Contributors".
+# Please contact me (vincent at nexedi.com) if it is wrong, so I can update it.
+
+"""Analyze a tree of ZODB objects, displaying their ZODB path, size and
+size including subobjects.
+
+usage: treenalyser.py [-v|-q] [-c] [-x] [-p path] [-d depth] Data.fs
+
+-d: Limit depth to given number of subobject. This only limit the number of
+    displayed objects, size calculation always recurses the entire subtree.
+    -1 (default) means no limit.
+-v: Print info for all objects, even if a traversal path isn't found.
+-q: Be more quiet.
+-p: Path to object to examine recursively. By default, start at ZODB's root.
+    This parameter must be a valid python expression on the "root" variable.
+    Example: -p "root['erp5']"
+-c: Enable display of class path for each object.
+-x: Enable display of hexadecimal dump of object's raw data.
+"""
+
+import sys
+import getopt
+import ZODB
+from ZODB.FileStorage import FileStorage
+from ZODB.utils import oid_repr, get_pickle_metadata
+from ZODB.serialize import referencesf
+
+# Constant factorization.
+SPACE = ' '
+EMPTY_STRING = ''
+# Range of ASCII characters which are possible to display, used in hex code
+# display.
+SPACE_ORD = ord(' ')
+TILDE_ORD = ord('~')
+
+def find_path_dict(objekt, maximum_depth=-1, objekt_path=EMPTY_STRING,
+                   depth=0):
+  path_dict = {}
+  can_recurse = (maximum_depth < 0) or (depth < maximum_depth)
+  depth=depth + 1
+  objekt_items = getattr(objekt, 'items', None)
+  if objekt_items is not None:
+    items = objekt_items()
+  elif isinstance(objekt, tuple):
+    items = zip(range(len(objekt)), objekt)
+  else:
+    objekt_dict = getattr(objekt, '__dict__', None)
+    if objekt_dict is not None:
+      items = objekt_dict.items()
+    else:
+      items = []
+  for k, v in items:
+    if (not isinstance(k, basestring)) or k[0].isdigit() or SPACE in k:
+      format = "%s['%s']"
+    else:
+      format = '%s.%s'
+    path = format % (objekt_path, k)
+    oid = getattr(v, '_p_oid', None)
+    if oid is not None:
+      path_dict[oid] = path
+    if can_recurse:
+      path_dict.update(find_path_dict(v, maximum_depth=maximum_depth,
+                       depth=depth, objekt_path=path))
+  return path_dict
+
+def display_tree(zodb_path, root_object_path='root', maximum_depth=-1,
+                 verbose=0, display_klass=0, display_hexdump=0):
+    object_total_size_cache = {}
+    # FIXME: determine how much memory the cache should be allowed to use.
+    OBJECT_CACHE_SIZE_LIMIT = 1000000
+    # FIXME: avoid using globals
+    global object_total_size_cache_hit
+    global object_total_size_cache_miss
+    global object_total_size_cache_prune
+    object_total_size_cache_hit = 0
+    object_total_size_cache_miss = 0
+    object_total_size_cache_prune = 0
+  
+    file_storage = FileStorage(zodb_path, read_only=1)
+    database = ZODB.DB(file_storage)
+    # TODO: implement an object path parser to avoid using eval.
+    root = eval(root_object_path, {'root': database.open().root()})
+    root_id = 'root'
+    path_dict = find_path_dict(root, maximum_depth=maximum_depth,
+                               objekt_path=root_id)
+    root_oid = getattr(root, '_p_oid', None)
+    if root_oid is not None:
+      path_dict[root_oid] = root_object_path
+    if verbose > 0:
+      print "Will display %s objects" % (len(path_dict), )
+
+    def getTotalSize(oid):
+      def _getTotalSize(oid):
+        v = object_total_size_cache.get(oid)
+        if v is not None:
+          global object_total_size_cache_hit
+          object_total_size_cache_hit += 1
+          return v
+        global object_total_size_cache_miss
+        object_total_size_cache_miss += 1
+        data, serialno = file_storage.load(oid, EMPTY_STRING)
+        size = len(data)
+        for suboid in referencesf(data):
+          try:
+            size += _getTotalSize(suboid)
+          except RuntimeError:
+            # TODO: use python logging facility to avoid displaying the log
+            # message too many times.
+            print 'Warning: RuntimeError raised during size computation, '\
+                  'values will be underestimated.'
+        if len(object_total_size_cache) > OBJECT_CACHE_SIZE_LIMIT:
+          global object_total_size_cache_prune
+          object_total_size_cache_prune += 1
+          object_total_size_cache.popitem()
+        object_total_size_cache[oid] = size
+        return size
+      return _getTotalSize(oid)
+
+    keys = path_dict.keys()
+    keys.sort()
+    keys.reverse()
+
+    fmt = '%18s %5s %8s %s' # 18 = '0x' + 8 bytes hex
+    fmt_prefix = '%34s' # 34 = 18+' '+5+' '+8+' '
+    klass_fmt = fmt_prefix + '%s.%s'
+    hex_fmt = fmt_prefix + '%08x %24s %24s |%16s|'
+    LINE_LENGTH = 16
+    HALF_LINE_LENGTH = LINE_LENGTH / 2
+
+    if verbose > -1:
+      print "%s = %s" % (root_id, root_object_path)
+      print fmt % ('OID', 'len', 'rlen', 'path')
+    for oid in keys:
+      total_size = getTotalSize(oid)
+      data, serialno = file_storage.load(oid, EMPTY_STRING)
+      data_len = len(data)
+      mod, klass = get_pickle_metadata(data)
+      refs = referencesf(data)
+      path = path_dict.get(oid, '-')
+      print fmt % (oid_repr(oid), data_len, total_size, path)
+      if display_klass:
+        print klass_fmt % (EMPTY_STRING, mod, klass)
+      if display_hexdump:
+        for line_number in xrange(data_len/16 + 1):
+          offset = LINE_LENGTH * line_number
+          outstring_list = ['  '] * LINE_LENGTH
+          ascii_list = [SPACE] * LINE_LENGTH
+          for column_number in xrange(min(LINE_LENGTH, data_len - offset)):
+            byte = data[column_number + offset]
+            byte_value = ord(byte)
+            outstring_list[column_number] = '%02x' % (byte_value, )
+            if SPACE_ORD <= byte_value <= TILDE_ORD:
+              ascii_list[column_number] = '%s' % (byte, )
+            else:
+              ascii_list[column_number] = '.'
+          print hex_fmt % (EMPTY_STRING, line_number * LINE_LENGTH,
+                           SPACE.join(outstring_list[:HALF_LINE_LENGTH]),
+                           SPACE.join(outstring_list[HALF_LINE_LENGTH:]),
+                           EMPTY_STRING.join(ascii_list))
+    if verbose > 1:
+      print 'Object size cache stats: len = %s, hit = %s, miss = %s, '\
+            'prune = %s' % (len(object_total_size_cache),
+            object_total_size_cache_hit, object_total_size_cache_miss,
+            object_total_size_cache_prune)
+
+def main():
+    verbose = 0
+    kw = {}
+    try:
+      opts, args = getopt.getopt(sys.argv[1:], 'cd:p:qvx')
+      zodb_path, = args
+    except getopt.error, err:
+      print err
+      print __doc__
+      sys.exit(2)
+    except ValueError:
+      print "expected one argument, got", len(args)
+      print __doc__
+      sys.exit(2)
+    root_object_path = 'root'
+    for o, v in opts:
+      if o == '-v':
+        verbose += 1
+      elif o == '-q':
+        verbose -= 1
+      elif o == '-p':
+        kw['root_object_path'] = v
+      elif o == '-c':
+        kw['display_klass'] = 1
+      elif o == '-x':
+        kw['display_hexdump'] = 1
+      elif o == '-d':
+        kw['maximum_depth'] = int(v)
+    kw['verbose'] = verbose
+    display_tree(zodb_path, **kw)
+ 
+if __name__ == "__main__":
+  main()

Propchange: erp5/trunk/utils/treenalyser.py
------------------------------------------------------------------------------
    svn:executable = *