[Erp5-report] r28449 - /erp5/trunk/utils/timing_log_parser/plot.py
nobody at svn.erp5.org
nobody at svn.erp5.org
Tue Aug 18 18:21:18 CEST 2009
Author: seb
Date: Tue Aug 18 18:21:17 2009
New Revision: 28449
URL: http://svn.erp5.org?rev=28449&view=rev
Log:
* Add option to display B-spline regression lines, this is really
helpfull in order to see directions of chaotics values
* use R-project through the rpy2 library in order to compute
statistics and to build graph
Modified:
erp5/trunk/utils/timing_log_parser/plot.py
Modified: erp5/trunk/utils/timing_log_parser/plot.py
URL: http://svn.erp5.org/erp5/trunk/utils/timing_log_parser/plot.py?rev=28449&r1=28448&r2=28449&view=diff
==============================================================================
--- erp5/trunk/utils/timing_log_parser/plot.py [utf8] (original)
+++ erp5/trunk/utils/timing_log_parser/plot.py [utf8] Tue Aug 18 18:21:17 2009
@@ -3,6 +3,7 @@
#
# Copyright (c) 2009 Nexedi SA and Contributors. All Rights Reserved.
# Vincent Pelletier <vincent at nexedi.com>
+# Sebastien Robin <seb at nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
@@ -26,15 +27,16 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
-import pylab
-from matplotlib.dates import MonthLocator, DayLocator, DateFormatter, date2num
-from matplotlib.font_manager import FontProperties
from datetime import date
from os import path
-
-"""
+import rpy2.robjects as robjects
+import os
+from optparse import OptionParser
+r = robjects.r
+
+usage = """
Usage:
- plot.py file1.csv [file2.csv [...]]
+ plot.py file1.csv [OPTION] [file2.csv [...]]
Result:
Generates, in current directory, a graph per csv column in png format.
Their name is composed of:
@@ -43,6 +45,9 @@
- csv file basename (without extension)
- csv column title
- 'png' extension
+
+ Options:
+ --with-regression : enable B-spline regression
CSV files must have been generated by parse_timing_log.py tool.
"""
@@ -110,35 +115,94 @@
d, m, y = expr.split('/')
return date(int(y), int(m), int(d))
-def main(file_name_list):
- major_locator = MonthLocator()
- minor_locator = DayLocator()
- major_formater = DateFormatter('%m/%y')
+def main():
+ parser = OptionParser(usage)
+ parser.add_option("--with-regression", action="store_true", dest="regression_enabled")
+ (options, file_name_list) = parser.parse_args()
+
+ current_dir = os.getcwd()
for file_name in file_name_list:
print 'Loading %s...' % (file_name, )
file = CSVFile(file_name)
- date_list = [date2num(strToDate(x)) for x in file.getColumn(0)]
- xlim = [date_list[0], date_list[-1]]
+
+ date_string_list = file.getColumn(0)
+ date_list = []
+ x_label_value_list = []
+ # plotting functionnalities does not select smartly
+ # a good number of x values to display, so we will display 20 dates
+ # in order to have good enough dates on the x axis.
+ # x_label_value_list will be like [1, 5, 10...]
+ # date_list will be like ['2009/07/01', '2009/07/05', '2009/07/10', ...]
+ factor = 1
+ if len(date_string_list) > 20:
+ factor = int(len(date_string_list) / 20)
+ i = 0
+ for date_string in date_string_list:
+ if i % factor == 0:
+ x_label_value_list.append(i)
+ date_split = date_string.replace('"','').split('/')
+ date_split.reverse()
+ new_date = '/'.join(date_split)
+ date_list.append(new_date)
+ i += 1
+ max_x = len(date_string_list)
+ # knots are used for B-spline regression
+ # We need to three additional knots at the begin and end in
+ # order to have the right basis
+ knot_list = [x_label_value_list[0]] * 3 + x_label_value_list \
+ + [max_x] * 4
+ r_x_label_value_list = robjects.FloatVector(x_label_value_list)
+ robjects.globalEnv["x_label_value_list"] = r_x_label_value_list
+ robjects.globalEnv["knot_list"] = knot_list
+ r("x_label <- c(%s)" % ','.join(['"%s"' % x for x in date_list]))
+ # import the splines library in R
+ if options.regression_enabled:
+ r("library(splines)")
+ # now parse all columns and store a png file
for title, column, value_max, ratio in file.iterColumns(start=1):
- ax = pylab.subplot(111, autoscale_on=False)
- ax.xaxis.set_major_locator(major_locator)
- ax.xaxis.set_minor_locator(minor_locator)
- ax.xaxis.set_major_formatter(major_formater)
- ax.axes.set_ylim([0, max(value_max, 3)])
- ax.axes.set_xlim(xlim)
- pylab.plot_date(date_list, column, 'k-', xdate=1)
-
out_file, out_ext = path.splitext(path.basename(file_name))
if out_ext != '.csv':
out_file = '.'.join((out_file, out_ext))
out_file_name = '%03i_%s_%s.png' % (ratio, out_file, title)
+ i = 0
+ x_data = []
+ y_data = []
+ for value in column:
+ if value is not None:
+ x_data.append(i)
+ y_data.append(value)
+ i += 1
+ r_y_data = robjects.FloatVector(y_data)
+ r_x_data = robjects.FloatVector(x_data)
+ robjects.globalEnv["y_data"] = r_y_data
+ robjects.globalEnv["x_data"] = r_x_data
+ display_column_regression = options.regression_enabled
+ # if there is no more than one unique point, regression is useless
+ if len(set([x for x in r_y_data])) <= 1:
+ display_column_regression = 0
+ regression_string = ''
+ # Calculate a B-spline regression in order to give clear overview
+ # about the direction of chaotics values.
+ if display_column_regression:
+ r("bx <- splineDesign(knot_list, x_data)")
+ r("fitted_model <- lm(y_data ~ bx)")
+ regression_string = ', fitted_model$fit'
+ # Define the place where to store the graphe and format of the image
+ r("""png(file='%s/%s', width=800, height=600)""" % (current_dir,
+ out_file_name))
+ # Increase the size for the place of the bottom axis labels (x)
+ r("""par(mar=c(9, 4, 4, 2) + 0.1)""")
+ # Plot the graph itself
+ r("""matplot(x_data, cbind(y_data %s), type='ll',
+ lty=1, main='%s (average display time per day)',
+ xlab='', ylab='time (s)', xaxt='n')""" % (
+ regression_string, title))
+ r("""axis(1, at=x_label_value_list, lab=x_label, las=2)""")
+ # stop changing the png file
+ r("""dev.off()""")
+
print 'Saving %s...' % (out_file_name, )
- pylab.savefig(out_file_name)
-
- # Needed to cleanup pylab state.
- pylab.close()
if __name__ == '__main__':
- import sys
- main(sys.argv[1:])
-
+ main()
+
More information about the Erp5-report
mailing list