[Erp5-report] r28449 - /erp5/trunk/utils/timing_log_parser/plot.py

Tue Aug 18 18:21:18 CEST 2009

Author: seb
Date: Tue Aug 18 18:21:17 2009
New Revision: 28449

URL: http://svn.erp5.org?rev=28449&view=rev
Log:
* Add option to display B-spline regression lines, this is really
  helpfull in order to see directions of chaotics values
* use R-project through the rpy2 library in order to compute
  statistics and to build graph

Modified:
    erp5/trunk/utils/timing_log_parser/plot.py

Modified: erp5/trunk/utils/timing_log_parser/plot.py
URL: http://svn.erp5.org/erp5/trunk/utils/timing_log_parser/plot.py?rev=28449&r1=28448&r2=28449&view=diff
==============================================================================

--- erp5/trunk/utils/timing_log_parser/plot.py [utf8] (original)
+++ erp5/trunk/utils/timing_log_parser/plot.py [utf8] Tue Aug 18 18:21:17 2009
@@ -3,6 +3,7 @@
 #
 # Copyright (c) 2009 Nexedi SA and Contributors. All Rights Reserved.
 #                    Vincent Pelletier <vincent at nexedi.com>
+#                    Sebastien Robin <seb at nexedi.com>
 #
 # WARNING: This program as such is intended to be used by professional
 # programmers who take the whole responsability of assessing all potential
@@ -26,15 +27,16 @@
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 #
 ##############################################################################
-import pylab
-from matplotlib.dates import MonthLocator, DayLocator, DateFormatter, date2num
-from matplotlib.font_manager import FontProperties
 from datetime import date
 from os import path
-
-"""
+import rpy2.robjects as robjects
+import os
+from optparse import OptionParser
+r = robjects.r
+
+usage = """
   Usage:
-    plot.py file1.csv [file2.csv [...]]
+    plot.py file1.csv [OPTION] [file2.csv [...]]
   Result:
     Generates, in current directory, a graph per csv column in png format.
     Their name is composed of:
@@ -43,6 +45,9 @@
     - csv file basename (without extension)
     - csv column title
     - 'png' extension
+
+  Options:
+    --with-regression : enable B-spline regression 
 
   CSV files must have been generated by parse_timing_log.py tool.
 """
@@ -110,35 +115,94 @@
   d, m, y = expr.split('/')
   return date(int(y), int(m), int(d))
 
-def main(file_name_list):
-  major_locator = MonthLocator()
-  minor_locator = DayLocator()
-  major_formater = DateFormatter('%m/%y')
+def main():
+  parser = OptionParser(usage)
+  parser.add_option("--with-regression", action="store_true", dest="regression_enabled")
+  (options, file_name_list) = parser.parse_args()
+
+  current_dir = os.getcwd()
   for file_name in file_name_list:
     print 'Loading %s...' % (file_name, )
     file = CSVFile(file_name)
-    date_list = [date2num(strToDate(x)) for x in file.getColumn(0)]
-    xlim = [date_list[0], date_list[-1]]
+
+    date_string_list = file.getColumn(0)
+    date_list = []
+    x_label_value_list = []
+    # plotting functionnalities does not select smartly
+    # a good number of x values to display, so we will display 20 dates
+    # in order to have good enough dates on the x axis.
+    # x_label_value_list will be like [1, 5, 10...]
+    # date_list will be like ['2009/07/01', '2009/07/05', '2009/07/10', ...]
+    factor = 1
+    if len(date_string_list) > 20:
+      factor = int(len(date_string_list) / 20)
+    i = 0
+    for date_string in date_string_list:
+      if i % factor == 0:
+        x_label_value_list.append(i)
+        date_split = date_string.replace('"','').split('/')
+        date_split.reverse()
+        new_date = '/'.join(date_split)
+        date_list.append(new_date)
+      i += 1
+    max_x = len(date_string_list)
+    # knots are used for B-spline regression
+    # We need to three additional knots at the begin and end in
+    # order to have the right basis
+    knot_list  = [x_label_value_list[0]] * 3 + x_label_value_list \
+        + [max_x] * 4
+    r_x_label_value_list = robjects.FloatVector(x_label_value_list)
+    robjects.globalEnv["x_label_value_list"] = r_x_label_value_list
+    robjects.globalEnv["knot_list"] = knot_list
+    r("x_label <- c(%s)" % ','.join(['"%s"' % x for x in date_list]))
+    # import the splines library in R
+    if options.regression_enabled:
+      r("library(splines)")
+    # now parse all columns and store a png file
     for title, column, value_max, ratio in file.iterColumns(start=1):
-      ax = pylab.subplot(111, autoscale_on=False)
-      ax.xaxis.set_major_locator(major_locator)
-      ax.xaxis.set_minor_locator(minor_locator)
-      ax.xaxis.set_major_formatter(major_formater)
-      ax.axes.set_ylim([0, max(value_max, 3)])
-      ax.axes.set_xlim(xlim)
-      pylab.plot_date(date_list, column, 'k-', xdate=1)
-
       out_file, out_ext = path.splitext(path.basename(file_name))
       if out_ext != '.csv':
         out_file = '.'.join((out_file, out_ext))
       out_file_name = '%03i_%s_%s.png' % (ratio, out_file, title)
+      i = 0
+      x_data = []
+      y_data = []
+      for value in column:
+        if value is not None:
+          x_data.append(i)
+          y_data.append(value)
+        i += 1
+      r_y_data = robjects.FloatVector(y_data)
+      r_x_data = robjects.FloatVector(x_data)
+      robjects.globalEnv["y_data"] = r_y_data
+      robjects.globalEnv["x_data"] = r_x_data
+      display_column_regression = options.regression_enabled
+      # if there is no more than one unique point, regression is useless
+      if len(set([x for x in r_y_data])) <= 1:
+        display_column_regression = 0
+      regression_string = ''
+      # Calculate a B-spline regression in order to give clear overview
+      # about the direction of chaotics values.
+      if display_column_regression:
+        r("bx <- splineDesign(knot_list, x_data)")
+        r("fitted_model <- lm(y_data ~ bx)")
+        regression_string = ', fitted_model$fit'
+      # Define the place where to store the graphe and format of the image
+      r("""png(file='%s/%s', width=800, height=600)""" % (current_dir,
+        out_file_name))
+      # Increase the size for the place of the bottom axis labels (x)
+      r("""par(mar=c(9, 4, 4, 2) + 0.1)""")
+      # Plot the graph itself
+      r("""matplot(x_data, cbind(y_data %s), type='ll',
+                lty=1, main='%s (average display time per day)',
+                xlab='', ylab='time (s)', xaxt='n')""" % (
+                  regression_string, title))
+      r("""axis(1, at=x_label_value_list, lab=x_label, las=2)""")
+      # stop changing the png file
+      r("""dev.off()""")
+
       print 'Saving %s...' % (out_file_name, )
-      pylab.savefig(out_file_name)
-
-      # Needed to cleanup pylab state.
-      pylab.close()
 
 if __name__ == '__main__':
-  import sys
-  main(sys.argv[1:])
-
+  main()
+