@@ -114,10 +114,13 @@ typedef struct {
114114
115115 int li_out ;
116116
117+ int pending_empty_lines ;
118+
117119 char ongoing ;
118120 char skip_this_one ;
119121 char lead_in_active ;
120122 char cx_active ;
123+ char have_seen_delta ;
121124
122125 char osh [128 ];
123126 char temp [64 ];
@@ -286,15 +289,17 @@ _mkstemp(const char *base, char *tmp, size_t len)
286289static int
287290fixdiff_stanza_start (dp_t * pdp , char * sh , size_t len )
288291{
289- pdp -> pre = 0 ;
290- pdp -> post = 0 ;
291- pdp -> lead_in = 0 ;
292- pdp -> lead_in_active = 1 ;
293- pdp -> lead_out = 0 ;
294- pdp -> cx_active = 1 ;
295- pdp -> lead_in_corrected = 0 ;
296- pdp -> d = DSS_PMSAD ;
297- pdp -> ongoing = 1 ;
292+ pdp -> pre = 0 ;
293+ pdp -> post = 0 ;
294+ pdp -> lead_in = 0 ;
295+ pdp -> lead_in_active = 1 ;
296+ pdp -> lead_out = 0 ;
297+ pdp -> cx_active = 1 ;
298+ pdp -> lead_in_corrected = 0 ;
299+ pdp -> d = DSS_PMSAD ;
300+ pdp -> ongoing = 1 ;
301+ pdp -> have_seen_delta = 0 ;
302+ pdp -> pending_empty_lines = 0 ;
298303
299304 pdp -> stanzas ++ ;
300305
@@ -432,7 +437,7 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
432437 break ;
433438
434439 if (!ls ) {
435- elog ("failed to match, best chunk %d lines at %s:%d (tabs shown below as >)\n" ,
440+ elog ("Failed to match, best chunk %d lines started at %s:%d (tabs shown below as >)\n" ,
436441 lmc , pdp -> pf , lg_lis );
437442 elog ("last match: patch = '%s"
438443 "', source = '%s'\n" , b1 , b2 );
@@ -628,6 +633,17 @@ fixdiff_stanza_end(dp_t *pdp)
628633 if (!pdp -> ongoing )
629634 return 0 ;
630635
636+ if (!pdp -> have_seen_delta ) {
637+ pdp -> ongoing = 0 ;
638+ close (pdp -> fd_temp );
639+ elog (" - stanza %d: (filtered out due to no delta inside)\n" , pdp -> stanzas );
640+
641+ return 0 ;
642+ }
643+
644+ if (dp .pending_empty_lines )
645+ elog (" Dropped %d unexpected empty lines\n" , dp .pending_empty_lines );
646+
631647 if (fixdiff_find_original (pdp , & orig )) {
632648 elog ("Unable to find original stanza in source\n" );
633649 goto probs ;
@@ -653,7 +669,7 @@ fixdiff_stanza_end(dp_t *pdp)
653669 /* is that what we already had? */
654670
655671 if (strcmp (buf , pdp -> osh )) {
656- elog (" - (lead_in %d, lead_out %d) % s" , pdp -> lead_in , pdp -> lead_out , buf );
672+ elog (" - stanza %d: % s" , pdp -> stanzas , buf );
657673 pdp -> bad ++ ;
658674 }
659675
@@ -865,6 +881,32 @@ main(int argc, char *argv[])
865881 goto bail ;
866882 }
867883
884+ if (dp .pending_empty_lines &&
885+ (in [0 ] == ' ' || in [0 ] == '-' || in [0 ] == '+' )) {
886+ char ctx [3 ];
887+
888+ elog (" Treating %d unexpected newline(s) as context\n" , dp .pending_empty_lines );
889+
890+ ctx [0 ] = ' ' ;
891+ ctx [1 ] = '\n' ;
892+ ctx [2 ] = '\0' ;
893+
894+ while (dp .pending_empty_lines > 0 ) {
895+ dp .pending_empty_lines -- ;
896+ dp .pre ++ ;
897+ dp .post ++ ;
898+ if (dp .lead_in_active )
899+ dp .lead_in ++ ;
900+ dp .cx_active ++ ;
901+
902+ w = write (dp .fd_temp != -1 ? dp .fd_temp : 1 , ctx , TO_POSLEN (2 ));
903+ if (w < 0 ) {
904+ elog ("write to stdout failed: %d\n" , errno );
905+ goto bail ;
906+ }
907+ }
908+ }
909+
868910 if (in [0 ] == ' ' ) { /* Space */
869911 dp .pre ++ ;
870912 dp .post ++ ;
@@ -888,6 +930,7 @@ main(int argc, char *argv[])
888930 dp .pre ++ ;
889931 dp .lead_in_active = 0 ;
890932 dp .cx_active = 0 ;
933+ dp .have_seen_delta = 1 ;
891934 break ;
892935 } else
893936 if (in [0 ] == '+' ) { /* Plus */
@@ -930,6 +973,7 @@ main(int argc, char *argv[])
930973 dp .post ++ ;
931974 dp .lead_in_active = 0 ;
932975 dp .cx_active = 0 ;
976+ dp .have_seen_delta = 1 ;
933977 break ;
934978 }
935979
@@ -957,15 +1001,29 @@ main(int argc, char *argv[])
9571001 }
9581002
9591003 if (in [0 ] == 0xa ) {
960- /* we can often find this from extra lines at EOT */
961- elog (" Skipping unexpected newline\n" );
1004+ /*
1005+ * We can find this blank diff line illegally generated by the LLM:
1006+ *
1007+ * 1) from extra lines at diff EOT, maybe the user
1008+ * picked them up from screenscraping too (tests/4)
1009+ * 2) because there was an empty line there,
1010+ * but the LLM did not prepend it with a
1011+ * character indicating what action to
1012+ * take with it (tests/7)
1013+ *
1014+ * We can distinguish what to (for these cases anyway) by waiting
1015+ * to see if there are any more lines in the stanza that have the
1016+ * +/-/space, if not, just drop the CR-only line
1017+ */
1018+
1019+ dp .pending_empty_lines ++ ;
9621020 continue ;
9631021 }
9641022
9651023 elog ("'%c' (0x%x)\n" , in [0 ], in [0 ]);
9661024 dp .reason = "unexpected character in stanza" ;
9671025 goto bail ;
968- }
1026+ } /* switch */
9691027
9701028 if (dp .skip_this_one ) {
9711029 dp .skip_this_one = 0 ;
0 commit comments