@@ -28,6 +28,22 @@ except ImportError:
2828
2929logger = logging .getLogger (__name__ )
3030
31+ ANSI_ESCAPE = re .compile (r'\x1b\[[0-9;]*m' )
32+
33+ FAILURE_LINE = re .compile (r'(?:FAIL:|FAILED:)\s*(?:\x1b\[[0-9;]*m)*(//[^\s\(]+)' )
34+
35+ TARGET_PATTERN = re .compile (r'//[a-zA-Z0-9_\-][a-zA-Z0-9_/\-]*:[a-zA-Z0-9_/\-]+' )
36+
37+ FILE_LOCATION_PATTERNS = [
38+ re .compile (r'([a-zA-Z0-9_/\-\.]+\.(cc|cpp|h|hpp|py|java|go|js|ts|kt|scala|rs|c|C|cxx)):(\d+)' ),
39+ re .compile (r'(BUILD(?:\.bazel)?):(\d+)' ),
40+ re .compile (r'(WORKSPACE(?:\.bazel)?):(\d+)' )
41+ ]
42+
43+ MIN_STRING_LENGTH = 3
44+ MAX_STRING_LENGTH = 500
45+ MAX_PROTOBUF_STRING_LENGTH = 200
46+
3147
3248class BEPAnalyzer :
3349 """Analyzes Bazel Build Event Protocol protobuf files for failures."""
@@ -78,12 +94,13 @@ class BEPAnalyzer:
7894 )
7995 break
8096
81- # Parse the message for failure events
82- failure = self ._parse_build_event_for_failure (message_data )
83- if failure :
84- failures .append ( failure )
97+ # Parse the message for failure events (returns a list or None)
98+ message_failures = self ._parse_build_event_for_failure (message_data )
99+ if message_failures :
100+ failures .extend ( message_failures )
85101 if self .verbose :
86- logger .info ("Found failure: %s" , failure .get ('target' , 'Unknown' ))
102+ for failure in message_failures :
103+ logger .info ("Found failure: %s" , failure .get ('target' , 'Unknown' ))
87104
88105 # Check if we hit the failure limit
89106 if len (failures ) >= self .MAX_FAILURES :
@@ -92,9 +109,9 @@ class BEPAnalyzer:
92109 message_data = self ._read_varint_delimited_message (f )
93110 if not message_data :
94111 break
95- failure = self ._parse_build_event_for_failure (message_data )
96- if failure :
97- self .failures_skipped += 1
112+ message_failures = self ._parse_build_event_for_failure (message_data )
113+ if message_failures :
114+ self .failures_skipped += len ( message_failures )
98115
99116 except Exception as e :
100117 if self .verbose :
@@ -158,17 +175,32 @@ class BEPAnalyzer:
158175
159176 return value
160177
161- def _parse_build_event_for_failure (self , message_data : bytes ) -> Optional [Dict [str , Any ]]:
178+ def _parse_build_event_for_failure (self , message_data : bytes ) -> Optional [List [ Dict [str , Any ] ]]:
162179 """Parse a build event message for failure information using protobuf."""
163180 if PROTOBUF_AVAILABLE :
164181 return self ._parse_protobuf_event (message_data )
165182 else :
166183 # Fallback to string parsing
167184 return self ._parse_string_event (message_data )
168185
169- def _parse_protobuf_event (self , message_data : bytes ) -> Optional [Dict [str , Any ]]:
170- """Parse BEP event using protobuf parsing."""
186+ def _parse_protobuf_event (self , message_data : bytes ) -> Optional [List [Dict [str , Any ]]]:
187+ """Parse BEP event using protobuf parsing.
188+
189+ Returns:
190+ A list of failure dicts, or None if no failures found.
191+ """
171192 try :
193+ # Check for Progress events with failure messages
194+ # May contain ANSI color codes
195+ message_str = message_data .decode ('utf-8' , errors = 'ignore' )
196+ if ('FAIL:' in message_str and '//' in message_str ) or \
197+ ('FAILED:' in message_str and '//' in message_str ):
198+ # Avoid false positives from command-line arguments
199+ if not ('--client_env' in message_str or '--default_override' in message_str ):
200+ if self .verbose :
201+ logger .info ("Found Progress event with failure, extracting all failures" )
202+ return self ._extract_all_progress_failures (message_str )
203+
172204 # Since we don't have the compiled protobuf classes, we'll use a hybrid approach
173205 # Parse key protobuf fields manually for common failure patterns
174206 event_dict = self ._decode_protobuf_to_dict (message_data )
@@ -180,24 +212,24 @@ class BEPAnalyzer:
180212 if 'action' in event_dict :
181213 action = event_dict ['action' ]
182214 if not action .get ('success' , True ) or action .get ('exit_code' , 0 ) != 0 :
183- return self ._extract_action_failure (event_dict , action )
215+ return [ self ._extract_action_failure (event_dict , action )]
184216
185217 # Check for TargetComplete events with failures
186218 if 'completed' in event_dict :
187219 completed = event_dict ['completed' ]
188220 if not completed .get ('success' , True ):
189- return self ._extract_target_failure (event_dict , completed )
221+ return [ self ._extract_target_failure (event_dict , completed )]
190222
191223 # Check for TestResult events with failures
192224 if 'test_result' in event_dict :
193225 test_result = event_dict ['test_result' ]
194226 status = test_result .get ('status' , 'NO_STATUS' )
195227 if status in ['FAILED' , 'TIMEOUT' , 'FAILED_TO_BUILD' , 'REMOTE_FAILURE' ]:
196- return self ._extract_test_failure (event_dict , test_result )
228+ return [ self ._extract_test_failure (event_dict , test_result )]
197229
198230 # Check for Aborted events
199231 if 'aborted' in event_dict :
200- return self ._extract_aborted_failure (event_dict )
232+ return [ self ._extract_aborted_failure (event_dict )]
201233
202234 except Exception as e :
203235 if self .verbose :
@@ -412,8 +444,6 @@ class BEPAnalyzer:
412444
413445 def _extract_file_location_from_strings (self , strings : List [str ]) -> tuple :
414446 """Extract file location and line number from strings."""
415- import re
416-
417447 file_patterns = [
418448 r'([a-zA-Z0-9_/\-\.]+\.(?:cc|cpp|h|hpp|py|java|go|js|ts|kt|scala|rs|c|C|cxx)):(\d+)' ,
419449 r'(BUILD(?:\.bazel)?):(\d+)' ,
@@ -428,42 +458,90 @@ class BEPAnalyzer:
428458
429459 return None , None
430460
431- def _parse_string_event (self , message_data : bytes ) -> Optional [Dict [str , Any ]]:
432- """Fallback string-based parsing when protobuf libraries aren't available."""
461+ def _extract_all_progress_failures (self , message_str : str ) -> Optional [List [Dict [str , Any ]]]:
462+ """Extract unique test failures from a Progress event."""
463+ failures = []
464+ seen = set ()
465+
466+ for line in message_str .split ('\n ' ):
467+ if 'FAIL:' not in line and 'FAILED:' not in line :
468+ continue
469+
470+ match = FAILURE_LINE .search (line )
471+ if not match :
472+ continue
473+
474+ target = ANSI_ESCAPE .sub ('' , match .group (1 )).strip ('",;()[]{}' )
475+
476+ if target in seen or not (target .startswith ('//' ) and ':' in target ):
477+ continue
478+
479+ seen .add (target )
480+
481+ log_match = re .search (r'see\s+([^\)]+\.log)' , line )
482+ log_file = log_match .group (1 ).strip () if log_match else None
483+
484+ failures .append ({
485+ 'type' : 'test_failure' ,
486+ 'status' : 'FAILED' ,
487+ 'target' : target ,
488+ 'message' : 'Test failed (see build output)' ,
489+ 'file_location' : log_file ,
490+ 'line_number' : None
491+ })
492+
493+ return failures if failures else None
494+
495+ def _parse_string_event (self , message_data : bytes ) -> Optional [List [Dict [str , Any ]]]:
496+ """Fallback string-based parsing when protobuf libraries aren't available.
497+
498+ Returns:
499+ A list of failure dicts, or None if no failures found.
500+ """
433501 try :
434502 # Look for failure indicators in the raw message
435503 message_str = message_data .decode ('utf-8' , errors = 'ignore' )
436-
504+
437505 # Pattern matching for actual build failures (be more specific)
438506 failure_patterns = [
439507 'FAILED_TO_BUILD' ,
440- 'COMPILATION_FAILED' ,
508+ 'COMPILATION_FAILED' ,
441509 'TEST_FAILED' ,
442510 'BUILD_FAILED' ,
443511 'FAILED TARGETS' ,
444512 'COMPILATION ERROR' ,
445513 '"status": "FAILED"' , # Test failures
446514 'testFailureMessage' # Test failure messages
447515 ]
448-
516+
517+ # Special check for progress output with ANSI codes: "FAIL:" or "FAILED:" near "//"
518+ has_progress_failure = (('FAIL:' in message_str or 'FAILED:' in message_str ) and '//' in message_str )
519+
449520 # Only look for failures in specific contexts, not in command-line args
450- if any (pattern in message_str for pattern in failure_patterns ):
521+ if any (pattern in message_str for pattern in failure_patterns ) or has_progress_failure :
451522 # Additional validation: make sure this looks like an actual failure event
452523 # Skip if it looks like command-line arguments or configuration
453- if ('--client_env' in message_str or
524+ if ('--client_env' in message_str or
454525 '--default_override' in message_str or
455526 'connect_timeout_secs' in message_str or
456527 'fatal_event_bus_exceptions' in message_str ):
457528 return None
458-
529+
459530 for pattern in failure_patterns :
460531 if pattern in message_str :
461- return self ._extract_failure_details (message_str , pattern )
462-
532+ return [self ._extract_failure_details (message_str , pattern )]
533+
534+ # Handle progress failures
535+ if has_progress_failure :
536+ if 'FAIL:' in message_str :
537+ return [self ._extract_failure_details (message_str , 'FAIL:' )]
538+ elif 'FAILED:' in message_str :
539+ return [self ._extract_failure_details (message_str , 'FAILED:' )]
540+
463541 except Exception as e :
464542 if self .verbose :
465543 logger .warning ("Error parsing message: %s" , e )
466-
544+
467545 return None
468546
469547 def _extract_failure_details (self , message_str : str , failure_type : str ) -> Dict [str , Any ]:
@@ -490,13 +568,11 @@ class BEPAnalyzer:
490568
491569 # Look for Bazel target patterns (avoid URLs)
492570 target_patterns = [
493- # Full target like //pkg/sub:target - must have colon and valid package chars
494- r'//[a-zA-Z0-9_\-][a-zA-Z0-9_/\-]*:[a-zA-Z0-9_\-]+' ,
495- # Root target like //:target - colon required
496- r'//:[a-zA-Z0-9_\-]+' ,
571+ r'//[a-zA-Z0-9_\-][a-zA-Z0-9_/\-]*:[a-zA-Z0-9_/\-]+' , # //pkg:target
572+ # //:target
573+ r'//:[a-zA-Z0-9_/\-]+' ,
497574 ]
498575
499- import re
500576 for line in lines :
501577 for pattern in target_patterns :
502578 matches = re .findall (pattern , line )
@@ -539,7 +615,6 @@ class BEPAnalyzer:
539615 r'(WORKSPACE(?:\.bazel)?):(\d+):(?:\d+:)?\s*(.+)' ,
540616 ]
541617
542- import re
543618 for line in lines :
544619 for pattern in file_location_patterns :
545620 match = re .search (pattern , line )
@@ -593,6 +668,12 @@ class BEPAnalyzer:
593668
594669
595670class BuildkiteAnnotator :
671+ def __init__ (self ):
672+ """Initialize the BuildkiteAnnotator with environment detection."""
673+ self .is_buildkite = self ._detect_buildkite_environment ()
674+ self .repo_url = self ._get_repo_url ()
675+ self .commit_sha = self ._get_commit_sha ()
676+
596677 def _get_buildkite_env (self , var_name : str , default : str = None ) -> Optional [str ]:
597678 """Helper to get Buildkite environment variable with consistent naming."""
598679 return os .getenv (f'BUILDKITE_{ var_name } ' if not var_name .startswith ('BUILDKITE' ) else var_name , default )
0 commit comments