/[chrome]/trunk/src/tools/valgrind/memcheck_analyze.py
Chromium logo

Contents of /trunk/src/tools/valgrind/memcheck_analyze.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 56662 - (show annotations)
Thu Aug 19 09:18:54 2010 UTC (13 years ago) by timurrrr@chromium.org
File MIME type: text/x-python
File size: 20199 byte(s)
Remove Mac-specific sanity test expectations since "-O1 -g ..." is back thanks to the new 10.6 Valgrind builder
This patch partly reverts r53308 and r53408
BUG=49253
TEST=tested locally on Linux and Mac
Review URL: http://codereview.chromium.org/3158024
1 #!/usr/bin/python
2 # Copyright (c) 2010 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 # memcheck_analyze.py
7
8 ''' Given a valgrind XML file, parses errors and uniques them.'''
9
10 import gdb_helper
11
12 import logging
13 import optparse
14 import os
15 import re
16 import subprocess
17 import sys
18 import time
19 from xml.dom.minidom import parse
20 from xml.parsers.expat import ExpatError
21
22 import common
23
24 # Global symbol table (yuck)
25 TheAddressTable = None
26
27 # These are functions (using C++ mangled names) that we look for in stack
28 # traces. We don't show stack frames while pretty printing when they are below
29 # any of the following:
30 _TOP_OF_STACK_POINTS = [
31 # Don't show our testing framework.
32 "testing::Test::Run()",
33 "_ZN7testing4Test3RunEv",
34 # Also don't show the internals of libc/pthread.
35 "start_thread"
36 ]
37
38 def getTextOf(top_node, name):
39 ''' Returns all text in all DOM nodes with a certain |name| that are children
40 of |top_node|.
41 '''
42
43 text = ""
44 for nodes_named in top_node.getElementsByTagName(name):
45 text += "".join([node.data for node in nodes_named.childNodes
46 if node.nodeType == node.TEXT_NODE])
47 return text
48
49 def getCDATAOf(top_node, name):
50 ''' Returns all CDATA in all DOM nodes with a certain |name| that are children
51 of |top_node|.
52 '''
53
54 text = ""
55 for nodes_named in top_node.getElementsByTagName(name):
56 text += "".join([node.data for node in nodes_named.childNodes
57 if node.nodeType == node.CDATA_SECTION_NODE])
58 if (text == ""):
59 return None
60 return text
61
62 def removeCommonRoot(source_dir, directory):
63 '''Returns a string with the string prefix |source_dir| removed from
64 |directory|.'''
65 if source_dir:
66 # Do this for safety, just in case directory is an absolute path outside of
67 # source_dir.
68 prefix = os.path.commonprefix([source_dir, directory])
69 return directory[len(prefix) + 1:]
70
71 return directory
72
73 # Constants that give real names to the abbreviations in valgrind XML output.
74 INSTRUCTION_POINTER = "ip"
75 OBJECT_FILE = "obj"
76 FUNCTION_NAME = "fn"
77 SRC_FILE_DIR = "dir"
78 SRC_FILE_NAME = "file"
79 SRC_LINE = "line"
80
81 def gatherFrames(node, source_dir):
82 frames = []
83 for frame in node.getElementsByTagName("frame"):
84 frame_dict = {
85 INSTRUCTION_POINTER : getTextOf(frame, INSTRUCTION_POINTER),
86 OBJECT_FILE : getTextOf(frame, OBJECT_FILE),
87 FUNCTION_NAME : getTextOf(frame, FUNCTION_NAME),
88 SRC_FILE_DIR : removeCommonRoot(
89 source_dir, getTextOf(frame, SRC_FILE_DIR)),
90 SRC_FILE_NAME : getTextOf(frame, SRC_FILE_NAME),
91 SRC_LINE : getTextOf(frame, SRC_LINE)
92 }
93 frames += [frame_dict]
94 if frame_dict[FUNCTION_NAME] in _TOP_OF_STACK_POINTS:
95 break
96 global TheAddressTable
97 if TheAddressTable != None and frame_dict[SRC_LINE] == "":
98 # Try using gdb
99 TheAddressTable.Add(frame_dict[OBJECT_FILE],
100 frame_dict[INSTRUCTION_POINTER])
101 return frames
102
103 class ValgrindError:
104 ''' Takes a <DOM Element: error> node and reads all the data from it. A
105 ValgrindError is immutable and is hashed on its pretty printed output.
106 '''
107
108 def __init__(self, source_dir, error_node, commandline):
109 ''' Copies all the relevant information out of the DOM and into object
110 properties.
111
112 Args:
113 error_node: The <error></error> DOM node we're extracting from.
114 source_dir: Prefix that should be stripped from the <dir> node.
115 commandline: The command that was run under valgrind
116 '''
117
118 # Valgrind errors contain one <what><stack> pair, plus an optional
119 # <auxwhat><stack> pair, plus an optional <origin><what><stack></origin>,
120 # plus (since 3.5.0) a <suppression></suppression> pair.
121 # (Origin is nicely enclosed; too bad the other two aren't.)
122 # The most common way to see all three in one report is
123 # a syscall with a parameter that points to uninitialized memory, e.g.
124 # Format:
125 # <error>
126 # <unique>0x6d</unique>
127 # <tid>1</tid>
128 # <kind>SyscallParam</kind>
129 # <what>Syscall param write(buf) points to uninitialised byte(s)</what>
130 # <stack>
131 # <frame>
132 # ...
133 # </frame>
134 # </stack>
135 # <auxwhat>Address 0x5c9af4f is 7 bytes inside a block of ...</auxwhat>
136 # <stack>
137 # <frame>
138 # ...
139 # </frame>
140 # </stack>
141 # <origin>
142 # <what>Uninitialised value was created by a heap allocation</what>
143 # <stack>
144 # <frame>
145 # ...
146 # </frame>
147 # </stack>
148 # </origin>
149 # <suppression>
150 # <sname>insert_a_suppression_name_here</sname>
151 # <skind>Memcheck:Param</skind>
152 # <skaux>write(buf)</skaux>
153 # <sframe> <fun>__write_nocancel</fun> </sframe>
154 # ...
155 # <sframe> <fun>main</fun> </sframe>
156 # <rawtext>
157 # <![CDATA[
158 # {
159 # <insert_a_suppression_name_here>
160 # Memcheck:Param
161 # write(buf)
162 # fun:__write_nocancel
163 # ...
164 # fun:main
165 # }
166 # ]]>
167 # </rawtext>
168 # </suppression>
169 # </error>
170 #
171 # Each frame looks like this:
172 # <frame>
173 # <ip>0x83751BC</ip>
174 # <obj>/data/dkegel/chrome-build/src/out/Release/base_unittests</obj>
175 # <fn>_ZN7testing8internal12TestInfoImpl7RunTestEPNS_8TestInfoE</fn>
176 # <dir>/data/dkegel/chrome-build/src/testing/gtest/src</dir>
177 # <file>gtest-internal-inl.h</file>
178 # <line>655</line>
179 # </frame>
180 # although the dir, file, and line elements are missing if there is
181 # no debug info.
182
183 self._kind = getTextOf(error_node, "kind")
184 self._backtraces = []
185 self._suppression = None
186 self._commandline = commandline
187
188 # Iterate through the nodes, parsing <what|auxwhat><stack> pairs.
189 description = None
190 for node in error_node.childNodes:
191 if node.localName == "what" or node.localName == "auxwhat":
192 description = "".join([n.data for n in node.childNodes
193 if n.nodeType == n.TEXT_NODE])
194 elif node.localName == "xwhat":
195 description = getTextOf(node, "text")
196 elif node.localName == "stack":
197 self._backtraces.append([description, gatherFrames(node, source_dir)])
198 description = None
199 elif node.localName == "origin":
200 description = getTextOf(node, "what")
201 stack = node.getElementsByTagName("stack")[0]
202 frames = gatherFrames(stack, source_dir)
203 self._backtraces.append([description, frames])
204 description = None
205 stack = None
206 frames = None
207 elif node.localName == "suppression":
208 self._suppression = getCDATAOf(node, "rawtext");
209
210 def __str__(self):
211 ''' Pretty print the type and backtrace(s) of this specific error,
212 including suppression (which is just a mangled backtrace).'''
213 output = ""
214 if (self._commandline):
215 output += self._commandline + "\n"
216
217 output += self._kind + "\n"
218 for backtrace in self._backtraces:
219 output += backtrace[0] + "\n"
220 filter = subprocess.Popen("c++filt -n", stdin=subprocess.PIPE,
221 stdout=subprocess.PIPE,
222 stderr=subprocess.STDOUT,
223 shell=True,
224 close_fds=True)
225 buf = ""
226 for frame in backtrace[1]:
227 buf += (frame[FUNCTION_NAME] or frame[INSTRUCTION_POINTER]) + "\n"
228 (stdoutbuf, stderrbuf) = filter.communicate(buf.encode('latin-1'))
229 demangled_names = stdoutbuf.split("\n")
230
231 i = 0
232 for frame in backtrace[1]:
233 output += (" " + demangled_names[i])
234 i = i + 1
235
236 global TheAddressTable
237 if TheAddressTable != None and frame[SRC_FILE_DIR] == "":
238 # Try using gdb
239 foo = TheAddressTable.GetFileLine(frame[OBJECT_FILE],
240 frame[INSTRUCTION_POINTER])
241 if foo[0] != None:
242 output += (" (" + foo[0] + ":" + foo[1] + ")")
243 elif frame[SRC_FILE_DIR] != "":
244 output += (" (" + frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME] +
245 ":" + frame[SRC_LINE] + ")")
246 else:
247 output += " (" + frame[OBJECT_FILE] + ")"
248 output += "\n"
249
250 assert self._suppression != None, "Your Valgrind doesn't generate " \
251 "suppressions - is it too old?"
252
253 output += "Suppression (error hash=#%016X#):" % \
254 (self.__hash__() & 0xffffffffffffffff)
255 # Widen suppression slightly to make portable between mac and linux
256 supp = self._suppression;
257 supp = supp.replace("fun:_Znwj", "fun:_Znw*")
258 supp = supp.replace("fun:_Znwm", "fun:_Znw*")
259 # Split into lines so we can enforce length limits
260 supplines = supp.split("\n")
261
262 # Truncate at line 26 (VG_MAX_SUPP_CALLERS plus 2 for name and type)
263 # or at the first 'boring' caller.
264 # (https://bugs.kde.org/show_bug.cgi?id=199468 proposes raising
265 # VG_MAX_SUPP_CALLERS, but we're probably fine with it as is.)
266 # TODO(dkegel): add more boring callers
267 newlen = 26;
268 for boring_caller in [" fun:_ZN11MessageLoop3RunEv",
269 " fun:_ZN7testing4Test3RunEv"]:
270 try:
271 newlen = min(newlen, supplines.index(boring_caller))
272 except ValueError:
273 pass
274 if (len(supplines) > newlen):
275 supplines = supplines[0:newlen]
276 supplines.append("}")
277
278 output += "\n".join(supplines) + "\n"
279
280 return output
281
282 def UniqueString(self):
283 ''' String to use for object identity. Don't print this, use str(obj)
284 instead.'''
285 rep = self._kind + " "
286 for backtrace in self._backtraces:
287 for frame in backtrace[1]:
288 rep += frame[FUNCTION_NAME]
289
290 if frame[SRC_FILE_DIR] != "":
291 rep += frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME]
292 else:
293 rep += frame[OBJECT_FILE]
294
295 return rep
296
297 def __hash__(self):
298 return hash(self.UniqueString())
299 def __eq__(self, rhs):
300 return self.UniqueString() == rhs
301
302 def find_and_truncate(f):
303 f.seek(0)
304 while True:
305 line = f.readline()
306 if line == "":
307 return False
308 if '</valgrindoutput>' in line:
309 # valgrind often has garbage after </valgrindoutput> upon crash
310 f.truncate()
311 return True
312
313 class MemcheckAnalyzer:
314 ''' Given a set of Valgrind XML files, parse all the errors out of them,
315 unique them and output the results.'''
316
317 SANITY_TEST_SUPPRESSIONS = {
318 "Memcheck sanity test 01 (memory leak).": 1,
319 "Memcheck sanity test 02 (malloc/read left).": 1,
320 "Memcheck sanity test 03 (malloc/read right).": 1,
321 "Memcheck sanity test 04 (malloc/write left).": 1,
322 "Memcheck sanity test 05 (malloc/write right).": 1,
323 "Memcheck sanity test 06 (new/read left).": 1,
324 "Memcheck sanity test 07 (new/read right).": 1,
325 "Memcheck sanity test 08 (new/write left).": 1,
326 "Memcheck sanity test 09 (new/write right).": 1,
327 "Memcheck sanity test 10 (write after free).": 1,
328 "Memcheck sanity test 11 (write after delete).": 1,
329 "Memcheck sanity test 12 (array deleted without []).": 1,
330 "Memcheck sanity test 13 (single element deleted with []).": 1,
331 }
332
333 # Max time to wait for memcheck logs to complete.
334 LOG_COMPLETION_TIMEOUT = 180.0
335
336 def __init__(self, source_dir, show_all_leaks=False, use_gdb=False):
337 '''Create a parser for Memcheck logs.
338
339 Args:
340 source_dir: Path to top of source tree for this build
341 show_all_leaks: Whether to show even less important leaks
342 use_gdb: Whether to use gdb to resolve source filenames and line numbers
343 in the report stacktraces
344 '''
345 self._source_dir = source_dir
346 self._show_all_leaks = show_all_leaks
347 self._use_gdb = use_gdb
348
349 # Contains the set of unique errors
350 self._errors = set()
351
352 # Contains the time when the we started analyzing the first log file.
353 # This variable is used to skip incomplete logs after some timeout.
354 self._analyze_start_time = None
355
356
357 def Report(self, files, check_sanity=False):
358 '''Reads in a set of files and prints Memcheck report.
359
360 Args:
361 files: A list of filenames.
362 check_sanity: if true, search for SANITY_TEST_SUPPRESSIONS
363 '''
364 # Beyond the detailed errors parsed by ValgrindError above,
365 # the xml file contain records describing suppressions that were used:
366 # <suppcounts>
367 # <pair>
368 # <count>28</count>
369 # <name>pango_font_leak_todo</name>
370 # </pair>
371 # <pair>
372 # <count>378</count>
373 # <name>bug_13243</name>
374 # </pair>
375 # </suppcounts
376 # Collect these and print them at the end.
377 #
378 # With our patch for https://bugs.kde.org/show_bug.cgi?id=205000 in,
379 # the file also includes records of the form
380 # <load_obj><obj>/usr/lib/libgcc_s.1.dylib</obj><ip>0x27000</ip></load_obj>
381 # giving the filename and load address of each binary that was mapped
382 # into the process.
383
384 global TheAddressTable
385 if self._use_gdb:
386 TheAddressTable = gdb_helper.AddressTable()
387 else:
388 TheAddressTable = None
389 cur_report_errors = set()
390 suppcounts = {}
391 badfiles = set()
392
393 if self._analyze_start_time == None:
394 self._analyze_start_time = time.time()
395 start_time = self._analyze_start_time
396
397 parse_failed = False
398 for file in files:
399 # Wait up to three minutes for valgrind to finish writing all files,
400 # but after that, just skip incomplete files and warn.
401 f = open(file, "r+")
402 pid = re.match(".*\.([0-9]+)$", file)
403 if pid:
404 pid = pid.groups()[0]
405 found = False
406 running = True
407 firstrun = True
408 origsize = os.path.getsize(file)
409 while (running and not found and
410 (firstrun or
411 ((time.time() - start_time) < self.LOG_COMPLETION_TIMEOUT))):
412 firstrun = False
413 f.seek(0)
414 if pid:
415 # Make sure the process is still running so we don't wait for
416 # 3 minutes if it was killed. See http://crbug.com/17453
417 ps_out = subprocess.Popen("ps p %s" % pid, shell=True,
418 stdout=subprocess.PIPE).stdout
419 if ps_out.readlines() < 2:
420 running = False
421 found = find_and_truncate(f)
422 if not running and not found:
423 logging.warn("Valgrind process PID = %s is not running but "
424 "its XML log has not been finished correctly." % pid)
425 if running and not found:
426 time.sleep(1)
427 f.close()
428 if not found:
429 badfiles.add(file)
430 else:
431 newsize = os.path.getsize(file)
432 if origsize > newsize+1:
433 logging.warn(str(origsize - newsize) +
434 " bytes of junk were after </valgrindoutput> in %s!" %
435 file)
436 try:
437 parsed_file = parse(file);
438 except ExpatError, e:
439 parse_failed = True
440 logging.warn("could not parse %s: %s" % (file, e))
441 lineno = e.lineno - 1
442 context_lines = 5
443 context_start = max(0, lineno - context_lines)
444 context_end = lineno + context_lines + 1
445 context_file = open(file, "r")
446 for i in range(0, context_start):
447 context_file.readline()
448 for i in range(context_start, context_end):
449 context_data = context_file.readline().rstrip()
450 if i != lineno:
451 logging.warn(" %s" % context_data)
452 else:
453 logging.warn("> %s" % context_data)
454 context_file.close()
455 continue
456 if TheAddressTable != None:
457 load_objs = parsed_file.getElementsByTagName("load_obj")
458 for load_obj in load_objs:
459 obj = getTextOf(load_obj, "obj")
460 ip = getTextOf(load_obj, "ip")
461 TheAddressTable.AddBinaryAt(obj, ip)
462
463 commandline = None
464 preamble = parsed_file.getElementsByTagName("preamble")[0];
465 for node in preamble.getElementsByTagName("line"):
466 if node.localName == "line":
467 for x in node.childNodes:
468 if x.nodeType == node.TEXT_NODE and "Command" in x.data:
469 commandline = x.data
470 break
471
472 raw_errors = parsed_file.getElementsByTagName("error")
473 for raw_error in raw_errors:
474 # Ignore "possible" leaks for now by default.
475 if (self._show_all_leaks or
476 getTextOf(raw_error, "kind") != "Leak_PossiblyLost"):
477 error = ValgrindError(self._source_dir, raw_error, commandline)
478 if error not in cur_report_errors:
479 # We haven't seen such errors doing this report yet...
480 if error in self._errors:
481 # ... but we saw it in earlier reports, e.g. previous UI test
482 cur_report_errors.add("This error was already printed in "
483 "some other test, see 'hash=#%016X#'" % \
484 (error.__hash__() & 0xffffffffffffffff))
485 else:
486 # ... and we haven't seen it in other tests as well
487 self._errors.add(error)
488 cur_report_errors.add(error)
489
490 suppcountlist = parsed_file.getElementsByTagName("suppcounts")
491 if len(suppcountlist) > 0:
492 suppcountlist = suppcountlist[0]
493 for node in suppcountlist.getElementsByTagName("pair"):
494 count = getTextOf(node, "count");
495 name = getTextOf(node, "name");
496 if name in suppcounts:
497 suppcounts[name] += int(count)
498 else:
499 suppcounts[name] = int(count)
500
501 if len(badfiles) > 0:
502 logging.warn("valgrind didn't finish writing %d files?!" % len(badfiles))
503 for file in badfiles:
504 logging.warn("Last 20 lines of %s :" % file)
505 os.system("tail -n 20 '%s' 1>&2" % file)
506
507 if parse_failed:
508 logging.error("FAIL! Couldn't parse Valgrind output file")
509 return -2
510
511 is_sane = False
512 print "-----------------------------------------------------"
513 print "Suppressions used:"
514 print " count name"
515
516 remaining_sanity_supp = MemcheckAnalyzer.SANITY_TEST_SUPPRESSIONS
517 for (name, count) in sorted(suppcounts.items(),
518 key=lambda (k,v): (v,k)):
519 print "%7d %s" % (count, name)
520 if name in remaining_sanity_supp and remaining_sanity_supp[name] == count:
521 del remaining_sanity_supp[name]
522 if len(remaining_sanity_supp) == 0:
523 is_sane = True
524 print "-----------------------------------------------------"
525 sys.stdout.flush()
526
527 retcode = 0
528 if cur_report_errors:
529 logging.error("FAIL! There were %s errors: " % len(cur_report_errors))
530
531 if TheAddressTable != None:
532 TheAddressTable.ResolveAll()
533
534 for error in cur_report_errors:
535 logging.error(error)
536
537 retcode = -1
538
539 # Report tool's insanity even if there were errors.
540 if check_sanity and not is_sane:
541 logging.error("FAIL! Sanity check failed!")
542 logging.info("The following test errors were not handled: ")
543 for (name, count) in sorted(remaining_sanity_supp.items(),
544 key=lambda (k,v): (v,k)):
545 logging.info("%7d %s" % (count, name))
546 retcode = -3
547
548 if retcode != 0:
549 return retcode
550
551 logging.info("PASS! No errors found!")
552 return 0
553
554 def _main():
555 '''For testing only. The MemcheckAnalyzer class should be imported instead.'''
556 retcode = 0
557 parser = optparse.OptionParser("usage: %prog [options] <files to analyze>")
558 parser.add_option("", "--source_dir",
559 help="path to top of source tree for this build"
560 "(used to normalize source paths in baseline)")
561
562 (options, args) = parser.parse_args()
563 if len(args) == 0:
564 parser.error("no filename specified")
565 filenames = args
566
567 analyzer = MemcheckAnalyzer(options.source_dir, use_gdb=True)
568 retcode = analyzer.Report(filenames)
569
570 sys.exit(retcode)
571
572 if __name__ == "__main__":
573 _main()

Properties

Name Value
svn:eol-style LF
svn:executable *

Powered by ViewVC 1.1.26 ViewVC Help