@@ -9,8 +9,9 @@ Uses proper protobuf parsing for more accurate failure detection.
99import argparse
1010import sys
1111import os
12+ import re
1213import struct
13- from typing import List , Dict , Any , Optional
14+ from typing import List , Dict , Any , Optional , Set
1415from pathlib import Path
1516import subprocess
1617import logging
@@ -28,6 +29,22 @@ except ImportError:
2829
2930logger = logging .getLogger (__name__ )
3031
32+ ANSI_ESCAPE = re .compile (r'\x1b\[[0-9;]*m' )
33+
34+ FAILURE_LINE = re .compile (r'(?:FAIL:|FAILED:)\s*(?:\x1b\[[0-9;]*m)*(//[^\s\(]+)' )
35+
36+ TARGET_PATTERN = re .compile (r'//[a-zA-Z0-9_\-][a-zA-Z0-9_/\-]*:[a-zA-Z0-9_/\-]+' )
37+
38+ FILE_LOCATION_PATTERNS = [
39+ re .compile (r'([a-zA-Z0-9_/\-\.]+\.(cc|cpp|h|hpp|py|java|go|js|ts|kt|scala|rs|c|C|cxx)):(\d+)' ),
40+ re .compile (r'(BUILD(?:\.bazel)?):(\d+)' ),
41+ re .compile (r'(WORKSPACE(?:\.bazel)?):(\d+)' )
42+ ]
43+
44+ MIN_STRING_LENGTH = 3
45+ MAX_STRING_LENGTH = 500
46+ MAX_PROTOBUF_STRING_LENGTH = 200
47+
3148
3249class BEPAnalyzer :
3350 """Analyzes Bazel Build Event Protocol protobuf files for failures."""
@@ -36,6 +53,7 @@ class BEPAnalyzer:
3653 self .verbose = verbose
3754 self .failures = []
3855 self .failures_skipped = 0 # Track skipped failures due to limits
56+ self .seen_targets = set () # Track targets to avoid duplicates
3957 # Limits (bytes, counts)
4058 self .MAX_FILE_SIZE = cfg .MAX_FILE_SIZE_MB_DEFAULT * 1024 * 1024
4159 self .MAX_FAILURES = cfg .MAX_FAILURES_DEFAULT
@@ -47,6 +65,7 @@ class BEPAnalyzer:
4765 def extract_failures (self , bep_file_path : str ) -> List [Dict [str , Any ]]:
4866 """Extract failure information from a BEP protobuf file."""
4967 failures = []
68+ self .seen_targets .clear () # Reset for each extract
5069
5170 # Check file size before processing
5271 try :
@@ -78,12 +97,17 @@ class BEPAnalyzer:
7897 )
7998 break
8099
81- # Parse the message for failure events
82- failure = self ._parse_build_event_for_failure (message_data )
83- if failure :
84- failures .append (failure )
85- if self .verbose :
86- logger .info ("Found failure: %s" , failure .get ('target' , 'Unknown' ))
100+ # Parse the message for failure events (returns a list or None)
101+ message_failures = self ._parse_build_event_for_failure (message_data )
102+ if message_failures :
103+ # Remove duplicates by target
104+ for failure in message_failures :
105+ target = failure .get ('target' , 'unknown' )
106+ if target not in self .seen_targets :
107+ self .seen_targets .add (target )
108+ failures .append (failure )
109+ if self .verbose :
110+ logger .info ("Found failure: %s" , target )
87111
88112 # Check if we hit the failure limit
89113 if len (failures ) >= self .MAX_FAILURES :
@@ -92,9 +116,14 @@ class BEPAnalyzer:
92116 message_data = self ._read_varint_delimited_message (f )
93117 if not message_data :
94118 break
95- failure = self ._parse_build_event_for_failure (message_data )
96- if failure :
97- self .failures_skipped += 1
119+ message_failures = self ._parse_build_event_for_failure (message_data )
120+ if message_failures :
121+ # Count only new unique failures
122+ for failure in message_failures :
123+ target = failure .get ('target' , 'unknown' )
124+ if target not in self .seen_targets :
125+ self .seen_targets .add (target )
126+ self .failures_skipped += 1
98127
99128 except Exception as e :
100129 if self .verbose :
@@ -158,17 +187,32 @@ class BEPAnalyzer:
158187
159188 return value
160189
161- def _parse_build_event_for_failure (self , message_data : bytes ) -> Optional [Dict [str , Any ]]:
190+ def _parse_build_event_for_failure (self , message_data : bytes ) -> Optional [List [ Dict [str , Any ] ]]:
162191 """Parse a build event message for failure information using protobuf."""
163192 if PROTOBUF_AVAILABLE :
164193 return self ._parse_protobuf_event (message_data )
165194 else :
166195 # Fallback to string parsing
167196 return self ._parse_string_event (message_data )
168197
169- def _parse_protobuf_event (self , message_data : bytes ) -> Optional [Dict [str , Any ]]:
170- """Parse BEP event using protobuf parsing."""
198+ def _parse_protobuf_event (self , message_data : bytes ) -> Optional [List [Dict [str , Any ]]]:
199+ """Parse BEP event using protobuf parsing.
200+
201+ Returns:
202+ A list of failure dicts, or None if no failures found.
203+ """
171204 try :
205+ # Check for Progress events with failure messages
206+ # May contain ANSI color codes
207+ message_str = message_data .decode ('utf-8' , errors = 'ignore' )
208+ if ('FAIL:' in message_str and '//' in message_str ) or \
209+ ('FAILED:' in message_str and '//' in message_str ):
210+ # Avoid false positives from command-line arguments
211+ if not ('--client_env' in message_str or '--default_override' in message_str ):
212+ if self .verbose :
213+ logger .info ("Found Progress event with failure, extracting all failures" )
214+ return self ._extract_all_progress_failures (message_str )
215+
172216 # Since we don't have the compiled protobuf classes, we'll use a hybrid approach
173217 # Parse key protobuf fields manually for common failure patterns
174218 event_dict = self ._decode_protobuf_to_dict (message_data )
@@ -180,24 +224,24 @@ class BEPAnalyzer:
180224 if 'action' in event_dict :
181225 action = event_dict ['action' ]
182226 if not action .get ('success' , True ) or action .get ('exit_code' , 0 ) != 0 :
183- return self ._extract_action_failure (event_dict , action )
227+ return [ self ._extract_action_failure (event_dict , action )]
184228
185229 # Check for TargetComplete events with failures
186230 if 'completed' in event_dict :
187231 completed = event_dict ['completed' ]
188232 if not completed .get ('success' , True ):
189- return self ._extract_target_failure (event_dict , completed )
233+ return [ self ._extract_target_failure (event_dict , completed )]
190234
191235 # Check for TestResult events with failures
192236 if 'test_result' in event_dict :
193237 test_result = event_dict ['test_result' ]
194238 status = test_result .get ('status' , 'NO_STATUS' )
195239 if status in ['FAILED' , 'TIMEOUT' , 'FAILED_TO_BUILD' , 'REMOTE_FAILURE' ]:
196- return self ._extract_test_failure (event_dict , test_result )
240+ return [ self ._extract_test_failure (event_dict , test_result )]
197241
198242 # Check for Aborted events
199243 if 'aborted' in event_dict :
200- return self ._extract_aborted_failure (event_dict )
244+ return [ self ._extract_aborted_failure (event_dict )]
201245
202246 except Exception as e :
203247 if self .verbose :
@@ -412,8 +456,6 @@ class BEPAnalyzer:
412456
413457 def _extract_file_location_from_strings (self , strings : List [str ]) -> tuple :
414458 """Extract file location and line number from strings."""
415- import re
416-
417459 file_patterns = [
418460 r'([a-zA-Z0-9_/\-\.]+\.(?:cc|cpp|h|hpp|py|java|go|js|ts|kt|scala|rs|c|C|cxx)):(\d+)' ,
419461 r'(BUILD(?:\.bazel)?):(\d+)' ,
@@ -428,42 +470,90 @@ class BEPAnalyzer:
428470
429471 return None , None
430472
431- def _parse_string_event (self , message_data : bytes ) -> Optional [Dict [str , Any ]]:
432- """Fallback string-based parsing when protobuf libraries aren't available."""
473+ def _extract_all_progress_failures (self , message_str : str ) -> Optional [List [Dict [str , Any ]]]:
474+ """Extract unique test failures from a Progress event."""
475+ failures = []
476+ seen = set ()
477+
478+ for line in message_str .split ('\n ' ):
479+ if 'FAIL:' not in line and 'FAILED:' not in line :
480+ continue
481+
482+ match = FAILURE_LINE .search (line )
483+ if not match :
484+ continue
485+
486+ target = ANSI_ESCAPE .sub ('' , match .group (1 )).strip ('",;()[]{}' )
487+
488+ if target in seen or not (target .startswith ('//' ) and ':' in target ):
489+ continue
490+
491+ seen .add (target )
492+
493+ log_match = re .search (r'see\s+([^\)]+\.log)' , line )
494+ log_file = log_match .group (1 ).strip () if log_match else None
495+
496+ failures .append ({
497+ 'type' : 'test_failure' ,
498+ 'status' : 'FAILED' ,
499+ 'target' : target ,
500+ 'message' : 'Test failed (see build output)' ,
501+ 'file_location' : log_file ,
502+ 'line_number' : None
503+ })
504+
505+ return failures if failures else None
506+
507+ def _parse_string_event (self , message_data : bytes ) -> Optional [List [Dict [str , Any ]]]:
508+ """Fallback string-based parsing when protobuf libraries aren't available.
509+
510+ Returns:
511+ A list of failure dicts, or None if no failures found.
512+ """
433513 try :
434514 # Look for failure indicators in the raw message
435515 message_str = message_data .decode ('utf-8' , errors = 'ignore' )
436-
516+
437517 # Pattern matching for actual build failures (be more specific)
438518 failure_patterns = [
439519 'FAILED_TO_BUILD' ,
440- 'COMPILATION_FAILED' ,
520+ 'COMPILATION_FAILED' ,
441521 'TEST_FAILED' ,
442522 'BUILD_FAILED' ,
443523 'FAILED TARGETS' ,
444524 'COMPILATION ERROR' ,
445525 '"status": "FAILED"' , # Test failures
446526 'testFailureMessage' # Test failure messages
447527 ]
448-
528+
529+ # Special check for progress output with ANSI codes: "FAIL:" or "FAILED:" near "//"
530+ has_progress_failure = (('FAIL:' in message_str or 'FAILED:' in message_str ) and '//' in message_str )
531+
449532 # Only look for failures in specific contexts, not in command-line args
450- if any (pattern in message_str for pattern in failure_patterns ):
533+ if any (pattern in message_str for pattern in failure_patterns ) or has_progress_failure :
451534 # Additional validation: make sure this looks like an actual failure event
452535 # Skip if it looks like command-line arguments or configuration
453- if ('--client_env' in message_str or
536+ if ('--client_env' in message_str or
454537 '--default_override' in message_str or
455538 'connect_timeout_secs' in message_str or
456539 'fatal_event_bus_exceptions' in message_str ):
457540 return None
458-
541+
459542 for pattern in failure_patterns :
460543 if pattern in message_str :
461- return self ._extract_failure_details (message_str , pattern )
462-
544+ return [self ._extract_failure_details (message_str , pattern )]
545+
546+ # Handle progress failures
547+ if has_progress_failure :
548+ if 'FAIL:' in message_str :
549+ return [self ._extract_failure_details (message_str , 'FAIL:' )]
550+ elif 'FAILED:' in message_str :
551+ return [self ._extract_failure_details (message_str , 'FAILED:' )]
552+
463553 except Exception as e :
464554 if self .verbose :
465555 logger .warning ("Error parsing message: %s" , e )
466-
556+
467557 return None
468558
469559 def _extract_failure_details (self , message_str : str , failure_type : str ) -> Dict [str , Any ]:
@@ -490,13 +580,11 @@ class BEPAnalyzer:
490580
491581 # Look for Bazel target patterns (avoid URLs)
492582 target_patterns = [
493- # Full target like //pkg/sub:target - must have colon and valid package chars
494- r'//[a-zA-Z0-9_\-][a-zA-Z0-9_/\-]*:[a-zA-Z0-9_\-]+' ,
495- # Root target like //:target - colon required
496- r'//:[a-zA-Z0-9_\-]+' ,
583+ r'//[a-zA-Z0-9_\-][a-zA-Z0-9_/\-]*:[a-zA-Z0-9_/\-]+' , # //pkg:target
584+ # //:target
585+ r'//:[a-zA-Z0-9_/\-]+' ,
497586 ]
498587
499- import re
500588 for line in lines :
501589 for pattern in target_patterns :
502590 matches = re .findall (pattern , line )
@@ -539,7 +627,6 @@ class BEPAnalyzer:
539627 r'(WORKSPACE(?:\.bazel)?):(\d+):(?:\d+:)?\s*(.+)' ,
540628 ]
541629
542- import re
543630 for line in lines :
544631 for pattern in file_location_patterns :
545632 match = re .search (pattern , line )
@@ -593,6 +680,12 @@ class BEPAnalyzer:
593680
594681
595682class BuildkiteAnnotator :
683+ def __init__ (self ):
684+ """Initialize the BuildkiteAnnotator with environment detection."""
685+ self .is_buildkite = self ._detect_buildkite_environment ()
686+ self .repo_url = self ._get_repo_url ()
687+ self .commit_sha = self ._get_commit_sha ()
688+
596689 def _get_buildkite_env (self , var_name : str , default : str = None ) -> Optional [str ]:
597690 """Helper to get Buildkite environment variable with consistent naming."""
598691 return os .getenv (f'BUILDKITE_{ var_name } ' if not var_name .startswith ('BUILDKITE' ) else var_name , default )
@@ -878,7 +971,7 @@ def main():
878971
879972 if args .verbose :
880973 logger .info ("Analyzing BEP file: %s" , args .bep_file )
881-
974+
882975 try :
883976 # Initialize analyzer with custom limits
884977 analyzer = BEPAnalyzer (verbose = args .verbose )
0 commit comments