@@ -84,11 +84,21 @@ typedef struct {
8484 int li ;
8585} lbuf_t ;
8686
87+ typedef struct rewriter {
88+ struct rewriter * next ;
89+ size_t len ;
90+ int line ;
91+ char * text ;
92+ } rewriter_t ;
93+ /* new_text is overcommitted below */
94+
8795typedef struct {
8896 off_t flo ;
8997
9098 const char * reason ;
9199
100+ rewriter_t * rewriter_head ;
101+
92102 dss_t d ;
93103 int pre ;
94104 int post ;
@@ -102,6 +112,8 @@ typedef struct {
102112
103113 int fd_temp ;
104114
115+ int li_out ;
116+
105117 char ongoing ;
106118 char skip_this_one ;
107119 char lead_in_active ;
@@ -313,10 +325,26 @@ fixdiff_stanza_start(dp_t *pdp, char *sh, size_t len)
313325 return 0 ;
314326}
315327
328+ static void
329+ stain_copy (char * dest , const char * in , size_t len )
330+ {
331+ char * p = dest ;
332+
333+ strncpy (dest , in , len - 1 );
334+ dest [len - 1 ] = '\0' ;
335+ do {
336+ p = strchr (p , '\t' );
337+ if (!p )
338+ break ;
339+ * p = '>' ;
340+ p ++ ;
341+ } while (1 );
342+ }
343+
316344static int
317345fixdiff_find_original (dp_t * pdp , int * line_start )
318346{
319- char in_src [4096 ], in_temp [4096 ], b1 [256 ], b2 [256 ], hit = 0 ;
347+ char in_src [4096 ], in_temp [4096 ], b1 [256 ], b2 [256 ], f1 [ 256 ], f2 [ 256 ], hit = 0 ;
320348 int ret = 1 , mc = 0 , lmc = 0 , lis = 0 , lg_lis = 0 ;
321349 lbuf_t lb_temp , lb_src , lb ;
322350 size_t lt , ls ;
@@ -329,6 +357,8 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
329357 lb_src .fd = lb .fd = -1 ;
330358 b1 [0 ] = '\0' ;
331359 b2 [0 ] = '\0' ;
360+ f1 [0 ] = '\0' ;
361+ f2 [0 ] = '\0' ;
332362
333363 init_lbuf (& lb_temp , "temp" );
334364 lb_temp .fd = open (pdp -> temp , OFLAGS (O_RDWR ));
@@ -402,26 +432,103 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
402432 break ;
403433
404434 if (!ls ) {
405- elog ("failed to match, best chunk %d lines at %s:%d\n" ,
435+ elog ("failed to match, best chunk %d lines at %s:%d (tabs shown below as >) \n" ,
406436 lmc , pdp -> pf , lg_lis );
407- elog ("patch: '%s', source '%s'\n" , b1 , b2 );
437+ elog ("last match: patch = '%s"
438+ "', source = '%s'\n" , b1 , b2 );
439+ elog ("divergence: patch = '%s"
440+ "', source = '%s'\n" , f1 , f2 );
408441 mc = 0 ;
409442 break ;
410443 }
411444
412445 if (fixdiff_strcmp (in_temp + 1 , lt - 1 , & let , in_src , ls , & les )) {
413- if (mc > pdp -> pre + pdp -> post )
414- elog ("match failed after %d: '%s' / '%s'" , mc , in_temp + 1 , in_src );
446+ /*
447+ * It's not a match.
448+ *
449+ * It's still possible we only differ by whitespace.
450+ * Does it match if we treat any whitespace as a single
451+ * whitespace match token?
452+ */
453+
454+ char * p1 = in_temp + 1 , * p1_end = p1 + lt - 1 - (int )let ,
455+ * p2 = in_src , * p2_end = p2 + ls - (int )les ;
456+
457+ while (p1 < p1_end && p2 < p2_end ) {
458+ char wst1 = 0 , wst2 = 0 ;
459+
460+ while (* p1 == ' ' || * p1 == '\t' && p1 < p1_end ) {
461+ p1 ++ ;
462+ wst1 = 1 ;
463+ }
464+ while (* p2 == ' ' || * p2 == '\t' && p2 < p2_end ) {
465+ p2 ++ ;
466+ wst2 = 1 ;
467+ }
468+
469+ if (wst1 != wst2 )
470+ goto record_breakage ;
471+
472+ if (* p1 != * p2 )
473+ goto record_breakage ;
474+
475+ p1 ++ ;
476+ p2 ++ ;
477+ }
478+
479+ if ((p1 < p1_end ) != (p2 < p2_end ))
480+ goto record_breakage ;
481+
482+ elog ("(fixable whitespace-only difference at stanza line %d)\n" , lb_temp .li );
483+
484+ /*
485+ * We have to take care about picking up windows _TEXT
486+ * CRLF, eliminating that if present and only putting
487+ * the LF, so rewritten lines are indistinguishable
488+ */
489+
490+ {
491+ rewriter_t * rwt = malloc (sizeof (* rwt ) + ls + 1 - les + 1 );
492+ if (!rwt ) {
493+ elog ("OOM\n" );
494+ return -1 ;
495+ }
496+ rwt -> next = pdp -> rewriter_head ;
497+ pdp -> rewriter_head = rwt ;
498+ rwt -> line = lb_temp .li ;
499+ rwt -> text = (char * )& rwt [1 ];
500+ rwt -> text [0 ] = * in_temp ;
501+ rwt -> len = ls + 1 - les + 1 ;
502+ rwt -> text [rwt -> len - 1 ] = '\n' ;
503+ memcpy (rwt -> text + 1 , in_src , ls );
504+ }
505+ goto allow_match_ws ;
506+
507+ record_breakage :
508+ if (mc + 1 > lmc ) {
509+ stain_copy (f1 , in_temp + 1 , sizeof (f1 ));
510+ stain_copy (f2 , in_src , sizeof (f2 ));
511+ }
415512 mc = 0 ;
513+ {
514+ rewriter_t * rwt = pdp -> rewriter_head , * rwt1 ;
515+
516+ while (rwt ) {
517+ rwt1 = rwt -> next ;
518+ free (rwt );
519+ rwt = rwt1 ;
520+ }
521+
522+ pdp -> rewriter_head = NULL ;
523+ }
416524 break ;
417525 }
418526
527+ allow_match_ws :
419528 mc ++ ;
420529 if (mc > lmc ) {
421- strncpy (b1 , in_temp + 1 , sizeof (b1 ) - 1 );
422- b1 [sizeof (b1 ) - 1 ] = '\0' ;
423- strncpy (b2 , in_src + 1 , sizeof (b2 ) - 1 );
424- b2 [sizeof (b2 ) - 1 ] = '\0' ;
530+ stain_copy (b1 , in_temp + 1 , sizeof (b1 ));
531+ stain_copy (b2 , in_src , sizeof (b2 ));
425532 lmc ++ ;
426533 lg_lis = lis ;
427534 }
@@ -512,8 +619,9 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
512619static int
513620fixdiff_stanza_end (dp_t * pdp )
514621{
622+ int orig , nope = 0 ;
623+ lbuf_t lb_temp ;
515624 char buf [256 ];
516- int orig ;
517625
518626 if (!pdp -> ongoing )
519627 return 0 ;
@@ -554,21 +662,64 @@ fixdiff_stanza_end(dp_t *pdp)
554662
555663 /* dump the temp side-buffer into stdout */
556664
557- lseek (pdp -> fd_temp , pdp -> flo , SEEK_SET );
665+ init_lbuf (& lb_temp , "lb_temp" );
666+ lb_temp .fd = open (pdp -> temp , OFLAGS (O_RDONLY ));
667+ lseek (lb_temp .fd , pdp -> flo , SEEK_SET );
668+
558669 while (1 ) {
559- ssize_t l = read (pdp -> fd_temp , buf , sizeof (buf ));
670+ char buf [4096 ];
671+ ssize_t l = fixdiff_get_line (& lb_temp , buf , sizeof (buf ));
672+ rewriter_t * rwt = pdp -> rewriter_head ;
673+
560674 if (!l )
561675 break ;
562676
563- if (write (1 , buf , TO_POSLEN (l )) != (ssize_t )l ) {
564- pdp -> reason = "failed to write to stdout" ;
565- return 1 ;
677+ // elog("dumping %d (len %d)\n", (int)pdp->li_out, (int)l);
678+
679+ while (rwt ) {
680+ // elog("%d %d\n", rwt->line, pdp->li_out);
681+ if (rwt -> line == lb_temp .li /*pdp->li_out*/ ) /* we need to rewrite this line */
682+ break ;
683+
684+ rwt = rwt -> next ;
685+ }
686+
687+ if (rwt ) {
688+ // elog("rewriting '%.*s' to '%.*s'\n", (int)l, buf, (int)rwt->len, rwt->text);
689+ if (write (1 , rwt -> text , TO_POSLEN (rwt -> len )) != (ssize_t )rwt -> len ) {
690+ pdp -> reason = "failed to write to stdout" ;
691+ nope = 1 ;
692+ break ;
693+ }
694+ } else {
695+ if (write (1 , buf , TO_POSLEN (l )) != (ssize_t )l ) {
696+ pdp -> reason = "failed to write to stdout" ;
697+ nope = 1 ;
698+ break ;
699+ }
700+ }
701+
702+ pdp -> li_out ++ ;
703+ }
704+
705+ {
706+ rewriter_t * rwt = pdp -> rewriter_head , * rwt1 ;
707+
708+ while (rwt ) {
709+ rwt1 = rwt -> next ;
710+ free (rwt );
711+ rwt = rwt1 ;
566712 }
713+
714+ pdp -> rewriter_head = NULL ;
567715 }
568716
569- close (pdp -> fd_temp );
717+ close (lb_temp . fd );
570718 pdp -> fd_temp = -1 ;
571719
720+ if (nope )
721+ return 1 ;
722+
572723 /* track the effect stanza changes are having on line offsets */
573724 pdp -> delta += pdp -> post - pdp -> pre ;
574725
@@ -611,6 +762,7 @@ main(int argc, char *argv[])
611762 dp .d = DSS_WAIT_MMM ;
612763 dp .lb .fd = 0 ; /* stdin */
613764 dp .fd_temp = -1 ;
765+ dp .li_out = 1 ;
614766
615767 while (1 ) {
616768 size_t l = fixdiff_get_line (& dp .lb , in , sizeof (in ));
0 commit comments