2323import java .io .OutputStreamWriter ;
2424import java .io .PrintWriter ;
2525import java .text .BreakIterator ;
26- import java .text .CharacterIterator ;
27- import java .text .StringCharacterIterator ;
2826import java .util .ArrayList ;
2927import java .util .Collections ;
3028import java .util .Enumeration ;
4038
4139import com .ibm .g11n .pipeline .resfilter .ResourceString .ResourceStringComparator ;
4240
43- import org .apache .commons .lang3 .StringEscapeUtils ;
44-
4541/**
4642 * Java properties resource filter implementation.
4743 *
@@ -88,14 +84,14 @@ public Bundle parse(InputStream inStream) throws IOException {
8884 boolean globalNotesAvailable = true ;
8985 List <String > globalNotes = null ;
9086 while ((line = inStreamReader .readLine ()) != null ) {
91- line = line . trim ( );
87+ line = stripLeadingSpaces ( line );
9288 // Comment line - Add to list of comments (notes) until we find
9389 // either
9490 // a blank line (global comment) or a key/value pair
9591 if (line .startsWith ("#" ) || line .startsWith ("!" )) {
9692 // Strip off the leading comment marker, and perform any
9793 // necessary unescaping here.
98- currentNotes .add (StringEscapeUtils . unescapeJava (line .substring (1 )));
94+ currentNotes .add (unescape (line .substring (1 )));
9995 } else if (line .isEmpty ()) {
10096 // We are following the convention that the first blank line in
10197 // a properties
@@ -118,7 +114,7 @@ public Bundle parse(InputStream inStream) throws IOException {
118114 sb .setLength (sb .length () - 1 ); // Remove the continuation
119115 // "\"
120116 if (continuationLine != null ) {
121- sb .append (continuationLine . trim ( ));
117+ sb .append (stripLeadingSpaces ( continuationLine ));
122118 }
123119 }
124120 String logicalLine = sb .toString ();
@@ -241,7 +237,7 @@ public static PropDef parseLine(String line) {
241237 }
242238
243239 String key = unescapePropKey (line .substring (0 , sepIdx ).trim ());
244- String value = unescapePropValue (line .substring (sepIdx + 1 ). trim ( ));
240+ String value = unescapePropValue (stripLeadingSpaces ( line .substring (sepIdx + 1 )));
245241
246242 PropDef pl = new PropDef (key , value , sep );
247243 return pl ;
@@ -300,14 +296,21 @@ public void print(PrintWriter pw, String language) throws IOException {
300296 int start = 0 ;
301297 int end = brk .next ();
302298 boolean emitNext = false ;
299+ boolean firstSegment = true ;
303300 while (end != BreakIterator .DONE ) {
304301 String segment = value .substring (start , end );
305- String escSegment = escapePropValue (segment );
302+ String escSegment = null ;
303+ if (firstSegment ) {
304+ escSegment = escape (segment , EscapeSpace .LEADING_ONLY );
305+ firstSegment = false ;
306+ } else {
307+ escSegment = escape (segment , EscapeSpace .NONE );
308+ }
306309 if (emitNext || (buf .length () + escSegment .length () + 2 >= COLMAX )) {
307310 // First character in a continuation line must be
308311 // a non-space character. Otherwise, keep appending
309312 // segments to the current line.
310- if (!Character . isSpaceChar (escSegment .codePointAt (0 ))) {
313+ if (!isPropsWhiteSpaceChar (escSegment .charAt (0 ))) {
311314 // This segment is safe as the first word
312315 // of a continuation line.
313316 buf .append ('\\' );
@@ -356,56 +359,189 @@ public String toString() {
356359 }
357360 }
358361
359- private static String escapePropValue (String s ) {
360- StringBuilder escaped = new StringBuilder ();
361- StringCharacterIterator itr = new StringCharacterIterator (s );
362- for (char c = itr .first (); c != CharacterIterator .DONE ; c = itr .next ()) {
363- if (c == '\\' ) {
364- escaped .append ("\\ \\ " );
365- } else if (c > 0x7F ) {
366- escaped .append ("\\ u" ).append (String .format ("%04X" , (int ) c ));
367- } else if (c == ':' ) {
368- escaped .append ("\\ :" );
369- } else if (c == '=' ) {
370- escaped .append ("\\ :" );
362+ private static final char BACKSLASH = '\\' ;
363+
364+ private enum EscapeSpace {
365+ ALL ,
366+ LEADING_ONLY ,
367+ NONE ;
368+ }
369+
370+ private static String escape (String str , EscapeSpace escSpace ) {
371+ StringBuilder buf = new StringBuilder ();
372+ int idx = 0 ;
373+
374+ // Handle leading space characters
375+ if (escSpace == EscapeSpace .ALL || escSpace == EscapeSpace .LEADING_ONLY ) {
376+ // Java properties specification considers the characters space (' ', '\u0020'),
377+ // tab ('\t', '\u0009'), and form feed ('\f', '\u000C') to be white space.
378+ //
379+ // java.util.Properties#store() implementation escapes space characters
380+ // to "\ " in key string, as well as leading spaces in value string.
381+ // Other white space characters are encoded by Unicode escape sequence.
382+ for (; idx < str .length (); idx ++) {
383+ char c = str .charAt (idx );
384+ if (c == ' ' ) {
385+ buf .append (BACKSLASH ).append (' ' );
386+ } else if (c == '\t' ) {
387+ buf .append (BACKSLASH ).append ('t' );
388+ } else if (c == '\f' ) {
389+ buf .append (BACKSLASH ).append ('f' );
390+ } else {
391+ break ;
392+ }
393+ }
394+ }
395+
396+ for (int i = idx ; i < str .length (); i ++) {
397+ char c = str .charAt (i );
398+
399+ if (c < 0x20 || c >= 0x7E ) {
400+ // JDK API comment for Properties#store() specifies below:
401+ //
402+ // Characters less than \\u0020 and characters greater than \u007E in property keys
403+ // or values are written as \\uxxxx for the appropriate hexadecimal value xxxx.
404+ //
405+ // However, actual implementation uses "\t" for horizontal tab, "\n" for newline
406+ // and so on. This implementation support the equivalent behavior.
407+ switch (c ) {
408+ case '\t' :
409+ buf .append (BACKSLASH ).append ('t' );
410+ break ;
411+ case '\n' :
412+ buf .append (BACKSLASH ).append ('n' );
413+ break ;
414+ case '\f' :
415+ buf .append (BACKSLASH ).append ('f' );
416+ break ;
417+ case '\r' :
418+ buf .append (BACKSLASH ).append ('r' );
419+ break ;
420+ default :
421+ appendUnicodeEscape (buf , c );
422+ break ;
423+ }
371424 } else {
372- escaped .append (c );
425+ switch (c ) {
426+ case ' ' : // space
427+ if (escSpace == EscapeSpace .ALL ) {
428+ buf .append (BACKSLASH ).append (c );
429+ } else {
430+ buf .append (c );
431+ }
432+ break ;
433+
434+ // The key and element characters #, !, =, and : are written with
435+ // a preceding backslash
436+ case '#' :
437+ case '!' :
438+ case '=' :
439+ case ':' :
440+ case '\\' :
441+ buf .append (BACKSLASH ).append (c );
442+ break ;
443+
444+ default :
445+ buf .append (c );
446+ break ;
447+ }
373448 }
374449 }
375- return escaped .toString ();
450+
451+ return buf .toString ();
452+ }
453+
454+ static String escapePropKey (String str ) {
455+ return escape (str , EscapeSpace .ALL );
456+ }
457+
458+ static String escapePropValue (String str ) {
459+ return escape (str , EscapeSpace .LEADING_ONLY );
460+ }
461+
462+ static void appendUnicodeEscape (StringBuilder buf , char codeUnit ) {
463+ buf .append (BACKSLASH ).append ('u' )
464+ .append (String .format ("%04X" , (int )codeUnit ));
376465 }
377466
378- private static String unescapePropValue (String s ) {
379- StringBuilder unescaped = new StringBuilder ();
380- StringCharacterIterator itr = new StringCharacterIterator (s );
381- for (char c = itr .first (); c != CharacterIterator .DONE ; c = itr .next ()) {
382- if (c == '\\' && itr .getIndex () < itr .getEndIndex ()) {
383- char n = itr .next ();
384- if (n == '\\' || n == ':' || n == '=' ) {
385- unescaped .append (n );
386- } else if (n == 'u' && itr .getIndex () + 4 <= itr .getEndIndex ()) {
387- StringBuilder unicodeEscape = new StringBuilder ("\\ u" );
388- for (int i = 0 ; i < 4 ; i ++) {
389- unicodeEscape .append (itr .next ());
467+ static String unescapePropKey (String str ) {
468+ return unescape (str );
469+ }
470+
471+ static String unescapePropValue (String str ) {
472+ return unescape (str );
473+ }
474+
475+ private static String unescape (String str ) {
476+ StringBuilder buf = new StringBuilder ();
477+ boolean isEscSeq = false ;
478+ for (int i = 0 ; i < str .length (); i ++) {
479+ char c = str .charAt (i );
480+ if (isEscSeq ) {
481+ switch (c ) {
482+ case 't' :
483+ buf .append ('\t' );
484+ break ;
485+
486+ case 'n' :
487+ buf .append ('\n' );
488+ break ;
489+
490+ case 'f' :
491+ buf .append ('\f' );
492+ break ;
493+
494+ case 'r' :
495+ buf .append ('\r' );
496+ break ;
497+
498+ case 'u' :
499+ {
500+ // This implementation throws an IllegalArgumentException
501+ // when the input string contains a malformed Unicode escape
502+ // character sequence. This behavior matches java.util.Properties#load(Reader).
503+ final String errMsg = "Malformed \\ uxxxx encoding." ;
504+ if (i + 4 > str .length ()) {
505+ throw new IllegalArgumentException (errMsg );
390506 }
391- unescaped .append (StringEscapeUtils .unescapeJava (unicodeEscape .toString ()));
392- } else {
393- unescaped .append (c );
394- unescaped .append (n );
507+ // Parse hex digits
508+ String hexDigits = str .substring (i + 1 , i + 5 );
509+ try {
510+ char codeUnit = (char )Integer .parseInt (hexDigits , 16 );
511+ buf .append (Character .valueOf (codeUnit ));
512+ i += 4 ;
513+ } catch (NumberFormatException e ) {
514+ throw new IllegalArgumentException (errMsg , e );
515+ }
516+ break ;
395517 }
518+
519+ default :
520+ // Special rules applied to Java properties format
521+ // beyond standard Java escape character sequence.
522+ //
523+ // 1. Octal escapes are not recognized
524+ // 2. \b does not represent a backspace character
525+ // 3. Backslash is dropped from unrecognized escape sequence.
526+ // For example, "\z" is interpreted as a single character 'z'.
527+
528+ buf .append (c );
529+ break ;
530+ }
531+ isEscSeq = false ;
396532 } else {
397- unescaped .append (c );
533+ if (c == BACKSLASH ) {
534+ isEscSeq = true ;
535+ } else {
536+ buf .append (c );
537+ }
398538 }
399539 }
400- return unescaped .toString ();
401- }
402540
403- private static String escapePropKey (String s ) {
404- return s .replace (" " , "\\ " );
405- }
541+ // Note: Incomplete escape sequence should not be there.
542+ // This implementation silently drop the character for the case.
406543
407- private static String unescapePropKey (String s ) {
408- return s .replaceAll ("\\ \\ " , " " );
544+ return buf .toString ();
409545 }
410546
411547 @ Override
@@ -435,7 +571,7 @@ public void merge(InputStream base, OutputStream outStream, String language, Bun
435571 logicalLine = logicalLineBuf .toString ();
436572 }
437573 } else {
438- String normLine = line . trim ( );
574+ String normLine = stripLeadingSpaces ( line );
439575
440576 if (orgLines .isEmpty ()) {
441577 // No continuation marker in the previous line
@@ -467,6 +603,13 @@ public void merge(InputStream base, OutputStream outStream, String language, Bun
467603 if (logicalLine != null ) {
468604 PropDef pd = PropDef .parseLine (logicalLine );
469605 if (pd != null && resMap .containsKey (pd .getKey ())) {
606+ // Preserve original leading spaces
607+ String firstLine = orgLines .isEmpty () ? line : orgLines .get (0 );
608+ int len = getLeadingSpacesLength (firstLine );
609+ if (len > 0 ) {
610+ outWriter .print (firstLine .substring (0 , len ));
611+ }
612+ // Write the property key and value
470613 String key = pd .getKey ();
471614 PropDef modPd = new PropDef (key , resMap .get (key ), pd .getSeparator ());
472615 modPd .print (outWriter , language );
@@ -490,4 +633,25 @@ public void merge(InputStream base, OutputStream outStream, String language, Bun
490633
491634 outWriter .flush ();
492635 }
636+
637+ private static int getLeadingSpacesLength (String s ) {
638+ int idx = 0 ;
639+ for (; idx < s .length (); idx ++) {
640+ if (!isPropsWhiteSpaceChar (s .charAt (idx ))) {
641+ break ;
642+ }
643+ }
644+ return idx ;
645+ }
646+
647+ private static String stripLeadingSpaces (String s ) {
648+ return s .substring (getLeadingSpacesLength (s ));
649+ }
650+
651+ private static boolean isPropsWhiteSpaceChar (char c ) {
652+ // Java properties specification considers the characters space (' ', '\u0020'),
653+ // tab ('\t', '\u0009'), and form feed ('\f', '\u000C') to be white space.
654+
655+ return c == ' ' || c == '\t' || c == '\f' ;
656+ }
493657}
0 commit comments