Package logsparser :: Module normalizer
[frames] | no frames]

Source Code for Module logsparser.normalizer

  1  # -*- python -*- 
  2   
  3  # pylogsparser - Logs parsers python library 
  4  # 
  5  # Copyright (C) 2011 Wallix Inc. 
  6  # 
  7  # This library is free software; you can redistribute it and/or modify it 
  8  # under the terms of the GNU Lesser General Public License as published by the 
  9  # Free Software Foundation; either version 2.1 of the License, or (at your 
 10  # option) any later version. 
 11  # 
 12  # This library is distributed in the hope that it will be useful, but WITHOUT 
 13  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 14  # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
 15  # details. 
 16  # 
 17  # You should have received a copy of the GNU Lesser General Public License 
 18  # along with this library; if not, write to the Free Software Foundation, Inc., 
 19  # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 20  # 
 21   
 22  """ 
 23  Here we have everything needed to parse and use XML definition files. 
 24   
 25  The only class one should ever use here is L{Normalizer}. The rest is 
 26  used during the parsing of the definition files that is taken care of 
 27  by the Normalizer class. 
 28  """ 
 29   
 30  import re 
 31  import csv 
 32  import warnings 
 33  import math 
 34   
 35  from lxml.etree import parse, tostring 
 36  from datetime import datetime # pyflakes:ignore 
 37  import urlparse # pyflakes:ignore 
 38  import logsparser.extras as extras # pyflakes:ignore 
 39   
 40  try: 
 41      import GeoIP #pyflakes:ignore 
 42      country_code_by_address = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE).country_code_by_addr 
 43  except ImportError, e: 
 44      country_code_by_address =lambda x: None 
 45   
 46  # the following symbols and modules are allowed for use in callbacks. 
 47  SAFE_SYMBOLS = ["list", "dict", "tuple", "set", "long", "float", "object", 
 48                  "bool", "callable", "True", "False", "dir", 
 49                  "frozenset", "getattr", "hasattr", "abs", "cmp", "complex", 
 50                  "divmod", "id", "pow", "round", "slice", "vars", 
 51                  "hash", "hex", "int", "isinstance", "issubclass", "len", 
 52                  "map", "filter", "max", "min", "oct", "chr", "ord", "range", 
 53                  "reduce", "repr", "str", "unicode", "basestring", "type", "zip", 
 54                  "xrange", "None", "Exception", "re", "datetime", "math", 
 55                  "urlparse", "country_code_by_address", "extras"] 
 56   
57 -class Tag(object):
58 """A tag as defined in a pattern."""
59 - def __init__(self, 60 name, 61 tagtype, 62 substitute, 63 description = {}, 64 callbacks = []):
65 """@param name: the tag's name 66 @param tagtype: the tag's type name 67 @param substitute: the string chain representing the tag in a log pattern 68 @param description = a dictionary holding multilingual descriptions of 69 the tag 70 @param callbacks: a list of eventual callbacks to fire once the tag value 71 has been extracted""" 72 self.name = name 73 self.tagtype = tagtype 74 self.substitute = substitute 75 self.description = description 76 self.callbacks = callbacks
77
78 - def get_description(self, language = 'en'):
79 """@Return : The tag description""" 80 return self.description.get(language, 'N/A')
81
82 -class TagType(object):
83 """A tag type. This defines how to match a given tag."""
84 - def __init__(self, 85 name, 86 ttype, 87 regexp, 88 description = {}, 89 flags = re.UNICODE | re.IGNORECASE):
90 """@param name: the tag type's name 91 @param ttype: the expected type of the value fetched by the associated regular expression 92 @param regexp: the regular expression (as text, not compiled) associated to this type 93 @param description: a dictionary holding multilingual descriptions of 94 the tag type 95 @param flags: flags by which to compile the regular expression""" 96 self.name = name 97 self.ttype = ttype 98 self.regexp = regexp 99 self.description = description 100 try: 101 self.compiled_regexp = re.compile(regexp, flags) 102 except: 103 raise ValueError, "Invalid regular expression %s" % regexp
104 105 106 # import the common tag types
107 -def get_generic_tagTypes(path = 'normalizers/common_tagTypes.xml'):
108 """Imports the common tag types. 109 110 @return: a dictionary of tag types.""" 111 generic = {} 112 try: 113 tagTypes = parse(open(path, 'r')).getroot() 114 for tagType in tagTypes: 115 tt_name = tagType.get('name') 116 tt_type = tagType.get('ttype') or 'basestring' 117 tt_desc = {} 118 for child in tagType: 119 if child.tag == 'description': 120 for desc in child: 121 lang = desc.get('language') or 'en' 122 tt_desc[lang] = child.text 123 elif child.tag == 'regexp': 124 tt_regexp = child.text 125 generic[tt_name] = TagType(tt_name, tt_type, tt_regexp, tt_desc) 126 return generic 127 except StandardError, err: 128 warnings.warn("Could not load generic tags definition file : %s \ 129 - generic tags will not be available." % err) 130 return {}
131 132 # import the common callbacks
133 -def get_generic_callBacks(path = 'normalizers/common_callBacks.xml'):
134 """Imports the common callbacks. 135 136 @return a dictionnary of callbacks.""" 137 generic = {} 138 try: 139 callBacks = parse(open(path, 'r')).getroot() 140 for callBack in callBacks: 141 cb_name = callBack.get('name') 142 # cb_desc = {} 143 for child in callBack: 144 if child.tag == 'code': 145 cb_code = child.text 146 # descriptions are not used yet but implemented in xml and dtd files for later use 147 # elif child.tag == 'description': 148 # for desc in child: 149 # lang = desc.get('language') 150 # cb_desc[lang] = desc.text 151 generic[cb_name] = CallbackFunction(cb_code, cb_name) 152 return generic 153 except StandardError, err: 154 warnings.warn("Could not load generic callbacks definition file : %s \ 155 - generic callbacks will not be available." % err) 156 return {}
157
158 -class PatternExample(object):
159 """Represents an log sample matching a given pattern. expected_tags is a 160 dictionary of tag names -> values that should be obtained after the 161 normalization of this sample."""
162 - def __init__(self, 163 raw_line, 164 expected_tags = {}, 165 description = {}):
166 self.raw_line = raw_line 167 self.expected_tags = expected_tags 168 self.description = description
169
170 - def get_description(self, language = 'en'):
171 """@return : An example description""" 172 return { 'sample' : self.raw_line, 173 'normalization' : self.expected_tags }
174
175 -class Pattern(object):
176 """A pattern, as defined in a normalizer configuration file."""
177 - def __init__(self, 178 name, 179 pattern, 180 tags = {}, 181 description = '', 182 commonTags = {}, 183 examples = [] ):
184 self.name = name 185 self.pattern = pattern 186 self.tags = tags 187 self.description = description 188 self.examples = examples 189 self.commonTags = commonTags
190
191 - def normalize(self, logline):
192 raise NotImplementedError
193
194 - def test_examples(self):
195 raise NotImplementedError
196
197 - def get_description(self, language = 'en'):
198 tags_desc = dict([ (tag.name, tag.get_description(language)) for tag in self.tags.values() ]) 199 substitutes = dict([ (tag.substitute, tag.name) for tag in self.tags.values() ]) 200 examples_desc = [ example.get_description(language) for example in self.examples ] 201 return { 'pattern' : self.pattern, 202 'description' : self.description.get(language, "N/A"), 203 'tags' : tags_desc, 204 'substitutes' : substitutes, 205 'commonTags' : self.commonTags, 206 'examples' : examples_desc }
207
208 -class CSVPattern(object):
209 """A pattern that handle CSV case."""
210 - def __init__(self, 211 name, 212 pattern, 213 separator = ',', 214 quotechar = '"', 215 tags = {}, 216 callBacks = [], 217 tagTypes = {}, 218 genericTagTypes = {}, 219 genericCallBacks = {}, 220 description = '', 221 commonTags = {}, 222 examples = []):
223 """ 224 @param name: the pattern name 225 @param pattern: the CSV pattern 226 @param separator: the CSV delimiter 227 @param quotechar: the CSV quote character 228 @param tags: a dict of L{Tag} instance with Tag name as key 229 @param callBacks: a list of L{CallbackFunction} 230 @param tagTypes: a dict of L{TagType} instance with TagType name as key 231 @param genericTagTypes: a dict of L{TagType} instance from common_tags xml definition with TagType name as key 232 @param genericCallBacks: a dict of L{CallBacks} instance from common_callbacks xml definition with callback name as key 233 @param description: a pattern description 234 @param commonTags: a Dict of tags to add to the final normalisation 235 @param examples: a list of L{PatternExample} 236 """ 237 self.name = name 238 self.pattern = pattern 239 self.separator = separator 240 self.quotechar = quotechar 241 self.tags = tags 242 self.callBacks = callBacks 243 self.tagTypes = tagTypes 244 self.genericTagTypes = genericTagTypes 245 self.genericCallBacks = genericCallBacks 246 self.description = description 247 self.examples = examples 248 self.commonTags = commonTags 249 _fields = self.pattern.split(self.separator) 250 if self.separator != ' ': 251 self.fields = [f.strip() for f in _fields] 252 else: 253 self.fields = _fields 254 self.check_count = len(self.fields)
255
256 - def postprocess(self, data):
257 for tag in self.tags: 258 # tagTypes defined in the conf file take precedence on the 259 # generic ones. If nothing found either way, fall back to 260 # Anything. 261 tag_regexp = self.tagTypes.get(self.tags[tag].tagtype, 262 self.genericTagTypes.get(self.tags[tag].tagtype, self.genericTagTypes['Anything'])).regexp 263 r = re.compile(tag_regexp) 264 field = self.tags[tag].substitute 265 if field not in data.keys(): 266 continue 267 if not r.match(data[field]): 268 # We found a tag that not matchs the expected regexp 269 return None 270 else: 271 value = data[field] 272 del data[field] 273 data[tag] = value 274 # try to apply callbacks 275 # but do not try to apply callbacks if we do not have any value 276 if not data[tag]: 277 continue 278 callbacks_names = self.tags[tag].callbacks 279 for cbname in callbacks_names: 280 try: 281 # get the callback in the definition file, or look it up in the common library if not found 282 callback = [cb for cb in self.callBacks.values() if cb.name == cbname] or\ 283 [cb for cb in self.genericCallBacks.values() if cb.name == cbname] 284 callback = callback[0] 285 except: 286 warnings.warn("Unable to find callback %s for pattern %s" % 287 (cbname, self.name)) 288 continue 289 try: 290 callback(data[tag], data) 291 except Exception, e: 292 raise Exception("Error on callback %s in pattern %s : %s - skipping" % 293 (cbname, 294 self.name, e)) 295 # remove temporary tags 296 temp_tags = [t for t in data.keys() if t.startswith('__')] 297 for t in temp_tags: 298 del data[t] 299 empty_tags = [t for t in data.keys() if not data[t]] 300 # remove empty tags 301 for t in empty_tags: 302 del data[t] 303 return data
304
305 - def normalize(self, logline):
306 # Verify logline is a basestring 307 if not isinstance(logline, basestring): 308 return None 309 # Try to retreive some fields with csv reader 310 try: 311 data = [data for data in csv.reader([logline], delimiter = self.separator, quotechar = self.quotechar)][0] 312 except: 313 return None 314 # Check we have something in data 315 if not data: 316 return None 317 else: 318 # Verify csv reader has match the expected number of fields 319 if len(data) != self.check_count: 320 return None 321 # Check expected for for fileds and apply callbacks 322 data = self.postprocess(dict(zip(self.fields, data))) 323 # Add common tags 324 if data: 325 data.update(self.commonTags) 326 return data
327
328 - def test_examples(self):
329 raise NotImplementedError
330
331 - def get_description(self, language = 'en'):
332 tags_desc = dict([ (tag.name, tag.get_description(language)) for tag in self.tags.values() ]) 333 substitutes = dict([ (tag.substitute, tag.name) for tag in self.tags.values() ]) 334 examples_desc = [ example.get_description(language) for example in self.examples ] 335 return { 'pattern' : self.pattern, 336 'description' : self.description.get(language, "N/A"), 337 'tags' : tags_desc, 338 'substitutes' : substitutes, 339 'commonTags' : self.commonTags, 340 'examples' : examples_desc }
341
342 -class CallbackFunction(object):
343 """This class is used to define a callback function from source code present 344 in the XML configuration file. The function is defined in a sanitized 345 environment (imports are disabled, for instance). 346 This class is inspired from this recipe : 347 http://code.activestate.com/recipes/550804-create-a-restricted-python-function-from-a-string/ 348 """
349 - def __init__(self, function_body = "log['test'] = value", 350 name = 'unknown'):
351 352 source = "def __cbfunc__(value,log):\n" 353 source += '\t' + '\n\t'.join(function_body.split('\n')) + '\n' 354 355 self.__doc__ = "Callback function generated from the following code:\n\n" + source 356 byteCode = compile(source, '<string>', 'exec') 357 self.name = name 358 359 # Setup a standard-compatible python environment 360 builtins = dict() 361 globs = dict() 362 locs = dict() 363 builtins["locals"] = lambda: locs 364 builtins["globals"] = lambda: globs 365 globs["__builtins__"] = builtins 366 globs["__name__"] = "SAFE_ENV" 367 globs["__doc__"] = source 368 369 if type(__builtins__) is dict: 370 bi_dict = __builtins__ 371 else: 372 bi_dict = __builtins__.__dict__ 373 374 for k in SAFE_SYMBOLS: 375 try: 376 locs[k] = locals()[k] 377 continue 378 except KeyError: 379 pass 380 try: 381 globs[k] = globals()[k] 382 continue 383 except KeyError: 384 pass 385 try: 386 builtins[k] = bi_dict[k] 387 except KeyError: 388 pass 389 390 # set the function in the safe environment 391 eval(byteCode, globs, locs) 392 self.cbfunction = locs["__cbfunc__"]
393
394 - def __call__(self, value, log):
395 """call the instance as a function to run the callback.""" 396 # Exceptions are caught higher up in the normalization process. 397 self.cbfunction(value, log) 398 return log
399 400
401 -class Normalizer(object):
402 """Log Normalizer, based on an XML definition file.""" 403
404 - def __init__(self, xmlconf, genericTagTypes, genericCallBacks):
405 """initializes the normalizer with an lxml ElementTree. 406 407 @param xmlconf: lxml ElementTree normalizer definition 408 @param genericTagTypes: path to generic tags definition xml file 409 """ 410 self.text_source = tostring(xmlconf, pretty_print = True) 411 self.sys_path = xmlconf.docinfo.URL 412 normalizer = xmlconf.getroot() 413 self.genericTagTypes = get_generic_tagTypes(genericTagTypes) 414 self.genericCallBacks = get_generic_callBacks(genericCallBacks) 415 self.description = {} 416 self.authors = [] 417 self.tagTypes = {} 418 self.callbacks = {} 419 self.prerequisites = {} 420 self.patterns = {} 421 self.commonTags = {} 422 self.finalCallbacks = [] 423 self.name = normalizer.get('name') 424 if not self.name: 425 raise ValueError, "The normalizer configuration lacks a name." 426 self.version = float(normalizer.get('version')) or 1.0 427 self.appliedTo = normalizer.get('appliedTo') or 'raw' 428 self.re_flags = ( (normalizer.get('unicode') == "yes" and re.UNICODE ) or 0 ) |\ 429 ( (normalizer.get('ignorecase') == "yes" and re.IGNORECASE ) or 0 ) |\ 430 ( (normalizer.get('multiline') == "yes" and re.MULTILINE ) or 0 ) 431 self.matchtype = ( normalizer.get('matchtype') == "search" and "search" ) or 'match' 432 try: 433 self.taxonomy = normalizer.get('taxonomy') 434 except: 435 self.taxonomy = None 436 437 for node in normalizer: 438 if node.tag == "description": 439 for desc in node: 440 self.description[desc.get('language')] = desc.text 441 elif node.tag == "authors": 442 for author in node: 443 self.authors.append(author.text) 444 elif node.tag == "tagTypes": 445 for tagType in node: 446 tT_description = {} 447 tT_regexp = '' 448 for child in tagType: 449 if child.tag == 'description': 450 for desc in child: 451 tT_description[desc.get("language")] = desc.text 452 elif child.tag == 'regexp': 453 tT_regexp = child.text 454 self.tagTypes[tagType.get('name')] = TagType(tagType.get('name'), 455 tagType.get('ttype') or "basestring", 456 tT_regexp, 457 tT_description, 458 self.re_flags) 459 elif node.tag == 'callbacks': 460 for callback in node: 461 self.callbacks[callback.get('name')] = CallbackFunction(callback.text, callback.get('name')) 462 elif node.tag == 'prerequisites': 463 for prereqTag in node: 464 self.prerequisites[prereqTag.get('name')] = prereqTag.text 465 elif node.tag == 'patterns': 466 self.__parse_patterns(node) 467 elif node.tag == "commonTags": 468 for commonTag in node: 469 self.commonTags[commonTag.get('name')] = commonTag.text 470 elif node.tag == "finalCallbacks": 471 for callback in node: 472 self.finalCallbacks.append(callback.text) 473 # precompile regexp 474 self.full_regexp, self.tags_translation, self.tags_to_pattern, whatever = self.get_uncompiled_regexp() 475 self.full_regexp = re.compile(self.full_regexp, self.re_flags)
476
477 - def __parse_patterns(self, node):
478 for pattern in node: 479 p_name = pattern.get('name') 480 p_description = {} 481 p_tags = {} 482 p_commonTags = {} 483 p_examples = [] 484 p_csv = {} 485 for p_node in pattern: 486 if p_node.tag == 'description': 487 for desc in p_node: 488 p_description[desc.get('language')] = desc.text 489 elif p_node.tag == 'text': 490 p_pattern = p_node.text 491 if 'type' in p_node.attrib: 492 p_type = p_node.get('type') 493 if p_type == 'csv': 494 p_csv = {'type': 'csv'} 495 if 'separator' in p_node.attrib: 496 p_csv['separator'] = p_node.get('separator') 497 if 'quotechar' in p_node.attrib: 498 p_csv['quotechar'] = p_node.get('quotechar') 499 elif p_node.tag == 'tags': 500 for tag in p_node: 501 t_cb = [] 502 t_description = {} 503 t_name = tag.get('name') 504 t_tagtype = tag.get('tagType') 505 for child in tag: 506 if child.tag == 'description': 507 for desc in child: 508 t_description[desc.get('language')] = desc.text 509 if child.tag == 'substitute': 510 t_substitute = child.text 511 elif child.tag == 'callbacks': 512 for cb in child: 513 t_cb.append(cb.text) 514 p_tags[t_name] = Tag(t_name, t_tagtype, t_substitute, t_description, t_cb) 515 elif p_node.tag == "commonTags": 516 for commontag in p_node: 517 p_commonTags[commontag.get('name')] = commontag.text 518 elif p_node.tag == 'examples': 519 for example in p_node: 520 e_description = {} 521 e_expectedTags = {} 522 for child in example: 523 if child.tag == 'description': 524 for desc in child: 525 e_description[desc.get('language')] = desc.text 526 elif child.tag == 'text': 527 e_rawline = child.text 528 elif child.tag == "expectedTags": 529 for etag in child: 530 e_expectedTags[etag.get('name')] = etag.text 531 p_examples.append(PatternExample(e_rawline, e_expectedTags, e_description)) 532 if not p_csv: 533 self.patterns[p_name] = Pattern(p_name, p_pattern, p_tags, p_description, p_commonTags, p_examples) 534 else: 535 self.patterns[p_name] = CSVPattern(p_name, p_pattern, p_csv['separator'], p_csv['quotechar'], p_tags, 536 self.callbacks, self.tagTypes, self.genericTagTypes, self.genericCallBacks, p_description, 537 p_commonTags, p_examples)
538
539 - def get_description(self, language = "en"):
540 return "%s v. %s" % (self.name, self.version)
541
542 - def get_long_description(self, language = 'en'):
543 patterns_desc = [ pattern.get_description(language) for pattern in self.patterns.values() ] 544 return { 'name' : self.name, 545 'version' : self.version, 546 'authors' : self.authors, 547 'description' : self.description.get(language, "N/A"), 548 'patterns' : patterns_desc, 549 'commonTags' : self.commonTags, 550 'taxonomy' : self.taxonomy }
551
552 - def get_uncompiled_regexp(self, p = None, increment = 0):
553 """returns the uncompiled regular expression associated to pattern named p. 554 If p is None, all patterns are stitched together, ready for compilation. 555 increment is the starting value to use for the generic tag names in the 556 returned regular expression. 557 @return: regexp, dictionary of tag names <-> tag codes, 558 dictionary of tags codes <-> pattern the tag came from, 559 new increment value 560 """ 561 patterns = p 562 regexps = [] 563 tags_translations = {} 564 tags_to_pattern = {} 565 if not patterns: 566 # WARNING ! dictionary keys are not necessarily returned in creation order. 567 # This is silly, as the pattern order is crucial. So we must enforce that 568 # patterns are named in alphabetical order of precedence ... 569 patterns = sorted(self.patterns.keys()) 570 if isinstance(patterns, basestring): 571 patterns = [patterns] 572 for pattern in patterns: 573 if isinstance(self.patterns[pattern], CSVPattern): 574 continue 575 regexp = self.patterns[pattern].pattern 576 for tagname, tag in self.patterns[pattern].tags.items(): 577 # tagTypes defined in the conf file take precedence on the 578 # generic ones. If nothing found either way, fall back to 579 # Anything. 580 581 tag_regexp = self.tagTypes.get(tag.tagtype, 582 self.genericTagTypes.get(tag.tagtype, 583 self.genericTagTypes['Anything'])).regexp 584 named_group = '(?P<tag%i>%s)' % (increment, tag_regexp) 585 regexp = regexp.replace(tag.substitute, named_group) 586 tags_translations['tag%i' % increment] = tagname 587 tags_to_pattern['tag%i' % increment] = pattern 588 increment += 1 589 regexps.append("(?:%s)" % regexp) 590 return "|".join(regexps), tags_translations, tags_to_pattern, increment
591
592 - def normalize(self, log, do_not_check_prereq = False):
593 """normalization in standalone mode. 594 @param log: a dictionary or an object providing at least a get() method 595 @param do_not_check_prereq: if set to True, the prerequisite tags check 596 is skipped (debug purpose only) 597 @return: a dictionary with updated tags if normalization was successful.""" 598 if isinstance(log, basestring) or not hasattr(log, "get"): 599 raise ValueError, "the normalizer expects an argument of type Dict" 600 # Test prerequisites 601 if all( [ re.match(value, log.get(prereq, '')) 602 for prereq, value in self.prerequisites.items() ]) or\ 603 do_not_check_prereq: 604 csv_patterns = [csv_pattern for csv_pattern in self.patterns.values() if isinstance(csv_pattern, CSVPattern)] 605 if self.appliedTo in log.keys(): 606 m = getattr(self.full_regexp, self.matchtype)(log[self.appliedTo]) 607 if m is not None: 608 m = m.groupdict() 609 if m: 610 # this little trick makes the following line not type dependent 611 temp_wl = dict([ (u, log[u]) for u in log.keys() ]) 612 for tag in m: 613 if m[tag] is not None: 614 matched_pattern = self.patterns[self.tags_to_pattern[tag]] 615 temp_wl[self.tags_translation[tag]] = m[tag] 616 # apply eventual callbacks 617 for cb in matched_pattern.tags[self.tags_translation[tag]].callbacks: 618 # TODO it could be desirable to make sure the callback 619 # does not try to change important preset values such as 620 # 'raw' and 'uuid'. 621 try: 622 # if the callback doesn't exist in the normalizer file, it will 623 # search in the commonCallBack file. 624 temp_wl = self.callbacks.get(cb, self.genericCallBacks.get(cb))(m[tag], temp_wl) 625 except Exception, e: 626 pattern_name = self.patterns[self.tags_to_pattern[tag]].name 627 raise Exception("Error on callback %s in pattern %s : %s - skipping" % 628 (self.callbacks[cb].name, 629 pattern_name, e)) 630 # remove temporary tags 631 if self.tags_translation[tag].startswith('__'): 632 del temp_wl[self.tags_translation[tag]] 633 log.update(temp_wl) 634 # add the pattern's common Tags 635 log.update(matched_pattern.commonTags) 636 # then add the normalizer's common Tags 637 log.update(self.commonTags) 638 # then add the taxonomy if relevant 639 if self.taxonomy: 640 log['taxonomy'] = self.taxonomy 641 # and finally, apply the final callbacks 642 for cb in self.finalCallbacks: 643 try: 644 log.update(self.callbacks.get(cb, self.genericCallBacks.get(cb))(None, log)) 645 except Exception, e: 646 raise Exception("Cannot apply final callback %s : %r - skipping" % (cb, e)) 647 elif csv_patterns: 648 # this little trick makes the following line not type dependent 649 temp_wl = dict([ (u, log[u]) for u in log.keys() ]) 650 ret = None 651 for csv_pattern in csv_patterns: 652 ret = csv_pattern.normalize(temp_wl[self.appliedTo]) 653 if ret: 654 log.update(ret) 655 # then add the normalizer's common Tags 656 log.update(self.commonTags) 657 # then add the taxonomy if relevant 658 if self.taxonomy: 659 log['taxonomy'] = self.taxonomy 660 # and finally, apply the final callbacks 661 for cb in self.finalCallbacks: 662 try: 663 log.update(self.callbacks.get(cb, self.genericCallBacks.get(cb))(None, log)) 664 except Exception, e: 665 raise Exception("Cannot apply final callback %s : %r - skipping" % (cb, e)) 666 break 667 return log
668
669 - def validate(self):
670 """if the definition file comes with pattern examples, this method can 671 be invoked to test these patterns against the examples. 672 Note that tags not included in the "expectedTags" directives will not 673 be checked for validation. 674 @return: True if the normalizer is validated, raises a ValueError 675 describing the problem otherwise. 676 """ 677 for p in self.patterns: 678 for example in self.patterns[p].examples: 679 w = { self.appliedTo : example.raw_line } 680 if isinstance(self.patterns[p], Pattern): 681 w = self.normalize(w, do_not_check_prereq = True) 682 elif isinstance(self.patterns[p], CSVPattern): 683 w = self.patterns[p].normalize(example.raw_line) 684 if w: 685 w.update(self.commonTags) 686 if self.taxonomy: 687 w['taxonomy'] = self.taxonomy 688 for cb in self.finalCallbacks: 689 try: 690 w.update(self.callbacks.get(cb, self.genericCallBacks.get(cb))(None, w)) 691 except Exception, e: 692 raise Exception("Cannot apply final callback %s : %r - skipping" % (cb, e)) 693 for expectedTag in example.expected_tags.keys(): 694 if isinstance(w.get(expectedTag), datetime): 695 svalue = str(w.get(expectedTag)) 696 elif isinstance(w.get(expectedTag), int): 697 svalue = str(w.get(expectedTag)) 698 else: 699 svalue = w.get(expectedTag) 700 if svalue != example.expected_tags[expectedTag]: 701 raise ValueError, 'Sample log "%s" does not match : expected %s -> %s, %s' % \ 702 (example, 703 expectedTag, 704 example.expected_tags[expectedTag], 705 w.get(expectedTag)) 706 # No problem so far ? Awesome ! 707 return True
708
709 - def get_source(self):
710 """gets the raw XML source for this normalizer.""" 711 return self.text_source
712
713 - def get_languages(self):
714 """guesstimates the available languages from the description field and 715 returns them as a list.""" 716 return self.description.keys()
717 718 # Documentation generator
719 -def doc2RST(description, gettext = None):
720 """ Returns a RestructuredText documentation from 721 a parser description. 722 @param description: the long description of the parser. 723 @param gettext: is the gettext method to use. 724 You must configure gettext to use the domain 'normalizer' and 725 select a language. 726 eg. gettext.translation('normalizer', 'i18n', ['fr_FR']).ugettext 727 """ 728 729 def escape(text): 730 if isinstance(text, basestring): 731 for c in "*\\": 732 text.replace(c, "\\" + c) 733 return text
734 735 if not gettext: 736 _ = lambda x: x 737 else: 738 _ = gettext 739 740 template = _("""%(title)s 741 742 **Written by** 743 744 %(authors)s 745 746 Description 747 ::::::::::: 748 749 %(description)s %(taxonomy)s 750 751 This normalizer can parse logs of the following structure(s): 752 753 %(patterns)s 754 755 Examples 756 :::::::: 757 758 %(examples)s""") 759 760 d = {} 761 d['title'] = description['name'] + ' v.' + str(description['version']) 762 d['title'] += '\n' + '-'*len(d['title']) 763 d['authors'] = '\n'.join( ['* *%s*' % a for a in description['authors'] ] ) 764 d['description'] = escape(description['description']) or _('undocumented') 765 d['taxonomy'] = '' 766 if description["taxonomy"]: 767 d['taxonomy'] = ("\n\n" +\ 768 (_("This normalizer belongs to the category : *%s*") % description['taxonomy']) ) 769 d['patterns'] = '' 770 d['examples'] = '' 771 for p in description['patterns']: 772 d['patterns'] +="""* **%s**""" % escape(p['pattern']) 773 d['patterns'] += _(", where\n\n") 774 for sub in p['substitutes']: 775 d['patterns'] += _(" * **%s** is %s ") % (escape(sub), (p['tags'][p['substitutes'][sub]] or _('undocumented') )) 776 if not p['substitutes'][sub].startswith('__'): 777 d['patterns'] += _("(normalized as *%s*)") % p['substitutes'][sub] 778 d['patterns'] += "\n" 779 if description['commonTags'] or p['commonTags']: 780 d['patterns'] += _("\n Additionally, The following tags are automatically set:\n\n") 781 for name, value in sum([description['commonTags'].items(), 782 p['commonTags'].items()], 783 []): 784 d['patterns'] += " * *%s* : %s\n" % (escape(name), value) 785 d['patterns'] += "\n" 786 if p.get('description') : 787 d['patterns'] += "\n %s\n" % p['description'] 788 d['patterns'] += "\n" 789 for example in p['examples']: 790 d['examples'] += _("* *%s*, normalized as\n\n") % escape(example['sample']) 791 for tag, value in example['normalization'].items(): 792 d['examples'] += " * **%s** -> %s\n" % (escape(tag), value) 793 d['examples'] += '\n' 794 return template % d 795