Skip to content

Commit ece29b8

Browse files
committed
Improve Bazel failure parsing and reporting
- Refactored failure extraction to support multiple failures per event - Added improved regex handling for progress events with ANSI codes. - Improved string parsing for test failures - Updated methods to return lists of failure dicts - Added new helper functions for extracting all progress failures as we did not support this - Fixed init bug with BuildkiteAnnotator.
1 parent 71b1f06 commit ece29b8

1 file changed

Lines changed: 115 additions & 34 deletions

File tree

bin/bazel_failure_analyzer

Lines changed: 115 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,22 @@ except ImportError:
2828

2929
logger = logging.getLogger(__name__)
3030

31+
ANSI_ESCAPE = re.compile(r'\x1b\[[0-9;]*m')
32+
33+
FAILURE_LINE = re.compile(r'(?:FAIL:|FAILED:)\s*(?:\x1b\[[0-9;]*m)*(//[^\s\(]+)')
34+
35+
TARGET_PATTERN = re.compile(r'//[a-zA-Z0-9_\-][a-zA-Z0-9_/\-]*:[a-zA-Z0-9_/\-]+')
36+
37+
FILE_LOCATION_PATTERNS = [
38+
re.compile(r'([a-zA-Z0-9_/\-\.]+\.(cc|cpp|h|hpp|py|java|go|js|ts|kt|scala|rs|c|C|cxx)):(\d+)'),
39+
re.compile(r'(BUILD(?:\.bazel)?):(\d+)'),
40+
re.compile(r'(WORKSPACE(?:\.bazel)?):(\d+)')
41+
]
42+
43+
MIN_STRING_LENGTH = 3
44+
MAX_STRING_LENGTH = 500
45+
MAX_PROTOBUF_STRING_LENGTH = 200
46+
3147

3248
class BEPAnalyzer:
3349
"""Analyzes Bazel Build Event Protocol protobuf files for failures."""
@@ -78,12 +94,13 @@ class BEPAnalyzer:
7894
)
7995
break
8096

81-
# Parse the message for failure events
82-
failure = self._parse_build_event_for_failure(message_data)
83-
if failure:
84-
failures.append(failure)
97+
# Parse the message for failure events (returns a list or None)
98+
message_failures = self._parse_build_event_for_failure(message_data)
99+
if message_failures:
100+
failures.extend(message_failures)
85101
if self.verbose:
86-
logger.info("Found failure: %s", failure.get('target', 'Unknown'))
102+
for failure in message_failures:
103+
logger.info("Found failure: %s", failure.get('target', 'Unknown'))
87104

88105
# Check if we hit the failure limit
89106
if len(failures) >= self.MAX_FAILURES:
@@ -92,9 +109,9 @@ class BEPAnalyzer:
92109
message_data = self._read_varint_delimited_message(f)
93110
if not message_data:
94111
break
95-
failure = self._parse_build_event_for_failure(message_data)
96-
if failure:
97-
self.failures_skipped += 1
112+
message_failures = self._parse_build_event_for_failure(message_data)
113+
if message_failures:
114+
self.failures_skipped += len(message_failures)
98115

99116
except Exception as e:
100117
if self.verbose:
@@ -158,17 +175,32 @@ class BEPAnalyzer:
158175

159176
return value
160177

161-
def _parse_build_event_for_failure(self, message_data: bytes) -> Optional[Dict[str, Any]]:
178+
def _parse_build_event_for_failure(self, message_data: bytes) -> Optional[List[Dict[str, Any]]]:
162179
"""Parse a build event message for failure information using protobuf."""
163180
if PROTOBUF_AVAILABLE:
164181
return self._parse_protobuf_event(message_data)
165182
else:
166183
# Fallback to string parsing
167184
return self._parse_string_event(message_data)
168185

169-
def _parse_protobuf_event(self, message_data: bytes) -> Optional[Dict[str, Any]]:
170-
"""Parse BEP event using protobuf parsing."""
186+
def _parse_protobuf_event(self, message_data: bytes) -> Optional[List[Dict[str, Any]]]:
187+
"""Parse BEP event using protobuf parsing.
188+
189+
Returns:
190+
A list of failure dicts, or None if no failures found.
191+
"""
171192
try:
193+
# Check for Progress events with failure messages
194+
# May contain ANSI color codes
195+
message_str = message_data.decode('utf-8', errors='ignore')
196+
if ('FAIL:' in message_str and '//' in message_str) or \
197+
('FAILED:' in message_str and '//' in message_str):
198+
# Avoid false positives from command-line arguments
199+
if not ('--client_env' in message_str or '--default_override' in message_str):
200+
if self.verbose:
201+
logger.info("Found Progress event with failure, extracting all failures")
202+
return self._extract_all_progress_failures(message_str)
203+
172204
# Since we don't have the compiled protobuf classes, we'll use a hybrid approach
173205
# Parse key protobuf fields manually for common failure patterns
174206
event_dict = self._decode_protobuf_to_dict(message_data)
@@ -180,24 +212,24 @@ class BEPAnalyzer:
180212
if 'action' in event_dict:
181213
action = event_dict['action']
182214
if not action.get('success', True) or action.get('exit_code', 0) != 0:
183-
return self._extract_action_failure(event_dict, action)
215+
return [self._extract_action_failure(event_dict, action)]
184216

185217
# Check for TargetComplete events with failures
186218
if 'completed' in event_dict:
187219
completed = event_dict['completed']
188220
if not completed.get('success', True):
189-
return self._extract_target_failure(event_dict, completed)
221+
return [self._extract_target_failure(event_dict, completed)]
190222

191223
# Check for TestResult events with failures
192224
if 'test_result' in event_dict:
193225
test_result = event_dict['test_result']
194226
status = test_result.get('status', 'NO_STATUS')
195227
if status in ['FAILED', 'TIMEOUT', 'FAILED_TO_BUILD', 'REMOTE_FAILURE']:
196-
return self._extract_test_failure(event_dict, test_result)
228+
return [self._extract_test_failure(event_dict, test_result)]
197229

198230
# Check for Aborted events
199231
if 'aborted' in event_dict:
200-
return self._extract_aborted_failure(event_dict)
232+
return [self._extract_aborted_failure(event_dict)]
201233

202234
except Exception as e:
203235
if self.verbose:
@@ -412,8 +444,6 @@ class BEPAnalyzer:
412444

413445
def _extract_file_location_from_strings(self, strings: List[str]) -> tuple:
414446
"""Extract file location and line number from strings."""
415-
import re
416-
417447
file_patterns = [
418448
r'([a-zA-Z0-9_/\-\.]+\.(?:cc|cpp|h|hpp|py|java|go|js|ts|kt|scala|rs|c|C|cxx)):(\d+)',
419449
r'(BUILD(?:\.bazel)?):(\d+)',
@@ -428,42 +458,90 @@ class BEPAnalyzer:
428458

429459
return None, None
430460

431-
def _parse_string_event(self, message_data: bytes) -> Optional[Dict[str, Any]]:
432-
"""Fallback string-based parsing when protobuf libraries aren't available."""
461+
def _extract_all_progress_failures(self, message_str: str) -> Optional[List[Dict[str, Any]]]:
462+
"""Extract unique test failures from a Progress event."""
463+
failures = []
464+
seen = set()
465+
466+
for line in message_str.split('\n'):
467+
if 'FAIL:' not in line and 'FAILED:' not in line:
468+
continue
469+
470+
match = FAILURE_LINE.search(line)
471+
if not match:
472+
continue
473+
474+
target = ANSI_ESCAPE.sub('', match.group(1)).strip('",;()[]{}')
475+
476+
if target in seen or not (target.startswith('//') and ':' in target):
477+
continue
478+
479+
seen.add(target)
480+
481+
log_match = re.search(r'see\s+([^\)]+\.log)', line)
482+
log_file = log_match.group(1).strip() if log_match else None
483+
484+
failures.append({
485+
'type': 'test_failure',
486+
'status': 'FAILED',
487+
'target': target,
488+
'message': 'Test failed (see build output)',
489+
'file_location': log_file,
490+
'line_number': None
491+
})
492+
493+
return failures if failures else None
494+
495+
def _parse_string_event(self, message_data: bytes) -> Optional[List[Dict[str, Any]]]:
496+
"""Fallback string-based parsing when protobuf libraries aren't available.
497+
498+
Returns:
499+
A list of failure dicts, or None if no failures found.
500+
"""
433501
try:
434502
# Look for failure indicators in the raw message
435503
message_str = message_data.decode('utf-8', errors='ignore')
436-
504+
437505
# Pattern matching for actual build failures (be more specific)
438506
failure_patterns = [
439507
'FAILED_TO_BUILD',
440-
'COMPILATION_FAILED',
508+
'COMPILATION_FAILED',
441509
'TEST_FAILED',
442510
'BUILD_FAILED',
443511
'FAILED TARGETS',
444512
'COMPILATION ERROR',
445513
'"status": "FAILED"', # Test failures
446514
'testFailureMessage' # Test failure messages
447515
]
448-
516+
517+
# Special check for progress output with ANSI codes: "FAIL:" or "FAILED:" near "//"
518+
has_progress_failure = (('FAIL:' in message_str or 'FAILED:' in message_str) and '//' in message_str)
519+
449520
# Only look for failures in specific contexts, not in command-line args
450-
if any(pattern in message_str for pattern in failure_patterns):
521+
if any(pattern in message_str for pattern in failure_patterns) or has_progress_failure:
451522
# Additional validation: make sure this looks like an actual failure event
452523
# Skip if it looks like command-line arguments or configuration
453-
if ('--client_env' in message_str or
524+
if ('--client_env' in message_str or
454525
'--default_override' in message_str or
455526
'connect_timeout_secs' in message_str or
456527
'fatal_event_bus_exceptions' in message_str):
457528
return None
458-
529+
459530
for pattern in failure_patterns:
460531
if pattern in message_str:
461-
return self._extract_failure_details(message_str, pattern)
462-
532+
return [self._extract_failure_details(message_str, pattern)]
533+
534+
# Handle progress failures
535+
if has_progress_failure:
536+
if 'FAIL:' in message_str:
537+
return [self._extract_failure_details(message_str, 'FAIL:')]
538+
elif 'FAILED:' in message_str:
539+
return [self._extract_failure_details(message_str, 'FAILED:')]
540+
463541
except Exception as e:
464542
if self.verbose:
465543
logger.warning("Error parsing message: %s", e)
466-
544+
467545
return None
468546

469547
def _extract_failure_details(self, message_str: str, failure_type: str) -> Dict[str, Any]:
@@ -490,13 +568,11 @@ class BEPAnalyzer:
490568

491569
# Look for Bazel target patterns (avoid URLs)
492570
target_patterns = [
493-
# Full target like //pkg/sub:target - must have colon and valid package chars
494-
r'//[a-zA-Z0-9_\-][a-zA-Z0-9_/\-]*:[a-zA-Z0-9_\-]+',
495-
# Root target like //:target - colon required
496-
r'//:[a-zA-Z0-9_\-]+',
571+
r'//[a-zA-Z0-9_\-][a-zA-Z0-9_/\-]*:[a-zA-Z0-9_/\-]+', # //pkg:target
572+
# //:target
573+
r'//:[a-zA-Z0-9_/\-]+',
497574
]
498575

499-
import re
500576
for line in lines:
501577
for pattern in target_patterns:
502578
matches = re.findall(pattern, line)
@@ -539,7 +615,6 @@ class BEPAnalyzer:
539615
r'(WORKSPACE(?:\.bazel)?):(\d+):(?:\d+:)?\s*(.+)',
540616
]
541617

542-
import re
543618
for line in lines:
544619
for pattern in file_location_patterns:
545620
match = re.search(pattern, line)
@@ -593,6 +668,12 @@ class BEPAnalyzer:
593668

594669

595670
class BuildkiteAnnotator:
671+
def __init__(self):
672+
"""Initialize the BuildkiteAnnotator with environment detection."""
673+
self.is_buildkite = self._detect_buildkite_environment()
674+
self.repo_url = self._get_repo_url()
675+
self.commit_sha = self._get_commit_sha()
676+
596677
def _get_buildkite_env(self, var_name: str, default: str = None) -> Optional[str]:
597678
"""Helper to get Buildkite environment variable with consistent naming."""
598679
return os.getenv(f'BUILDKITE_{var_name}' if not var_name.startswith('BUILDKITE') else var_name, default)

0 commit comments

Comments
 (0)