2323import java .io .OutputStreamWriter ;
2424import java .io .PrintWriter ;
2525import java .text .BreakIterator ;
26- import java .text .CharacterIterator ;
27- import java .text .StringCharacterIterator ;
2826import java .util .ArrayList ;
2927import java .util .Collections ;
3028import java .util .Enumeration ;
4038
4139import com .ibm .g11n .pipeline .resfilter .ResourceString .ResourceStringComparator ;
4240
43- import org .apache .commons .lang3 .StringEscapeUtils ;
44-
4541/**
4642 * Java properties resource filter implementation.
4743 *
@@ -88,14 +84,14 @@ public Bundle parse(InputStream inStream) throws IOException {
8884 boolean globalNotesAvailable = true ;
8985 List <String > globalNotes = null ;
9086 while ((line = inStreamReader .readLine ()) != null ) {
91- line = line . trim ( );
87+ line = stripLeadingSpaces ( line );
9288 // Comment line - Add to list of comments (notes) until we find
9389 // either
9490 // a blank line (global comment) or a key/value pair
9591 if (line .startsWith ("#" ) || line .startsWith ("!" )) {
9692 // Strip off the leading comment marker, and perform any
9793 // necessary unescaping here.
98- currentNotes .add (StringEscapeUtils . unescapeJava (line .substring (1 )));
94+ currentNotes .add (unescape (line .substring (1 )));
9995 } else if (line .isEmpty ()) {
10096 // We are following the convention that the first blank line in
10197 // a properties
@@ -118,7 +114,7 @@ public Bundle parse(InputStream inStream) throws IOException {
118114 sb .setLength (sb .length () - 1 ); // Remove the continuation
119115 // "\"
120116 if (continuationLine != null ) {
121- sb .append (continuationLine . trim ( ));
117+ sb .append (stripLeadingSpaces ( continuationLine ));
122118 }
123119 }
124120 String logicalLine = sb .toString ();
@@ -241,7 +237,7 @@ public static PropDef parseLine(String line) {
241237 }
242238
243239 String key = unescapePropKey (line .substring (0 , sepIdx ).trim ());
244- String value = unescapePropValue (line .substring (sepIdx + 1 ). trim ( ));
240+ String value = unescapePropValue (stripLeadingSpaces ( line .substring (sepIdx + 1 )));
245241
246242 PropDef pl = new PropDef (key , value , sep );
247243 return pl ;
@@ -300,14 +296,21 @@ public void print(PrintWriter pw, String language) throws IOException {
300296 int start = 0 ;
301297 int end = brk .next ();
302298 boolean emitNext = false ;
299+ boolean firstSegment = true ;
303300 while (end != BreakIterator .DONE ) {
304301 String segment = value .substring (start , end );
305- String escSegment = escapePropValue (segment );
302+ String escSegment = null ;
303+ if (firstSegment ) {
304+ escSegment = escape (segment , EscapeSpace .LEADING_ONLY );
305+ firstSegment = false ;
306+ } else {
307+ escSegment = escape (segment , EscapeSpace .NONE );
308+ }
306309 if (emitNext || (buf .length () + escSegment .length () + 2 >= COLMAX )) {
307310 // First character in a continuation line must be
308311 // a non-space character. Otherwise, keep appending
309312 // segments to the current line.
310- if (!Character . isSpaceChar (escSegment .codePointAt (0 ))) {
313+ if (!isPropsWhiteSpaceChar (escSegment .charAt (0 ))) {
311314 // This segment is safe as the first word
312315 // of a continuation line.
313316 buf .append ('\\' );
@@ -356,56 +359,187 @@ public String toString() {
356359 }
357360 }
358361
359- private static String escapePropValue (String s ) {
360- StringBuilder escaped = new StringBuilder ();
361- StringCharacterIterator itr = new StringCharacterIterator (s );
362- for (char c = itr .first (); c != CharacterIterator .DONE ; c = itr .next ()) {
363- if (c == '\\' ) {
364- escaped .append ("\\ \\ " );
365- } else if (c > 0x7F ) {
366- escaped .append ("\\ u" ).append (String .format ("%04X" , (int ) c ));
367- } else if (c == ':' ) {
368- escaped .append ("\\ :" );
369- } else if (c == '=' ) {
370- escaped .append ("\\ :" );
362+ private static final char BACKSLASH = '\\' ;
363+
364+ private enum EscapeSpace {
365+ ALL ,
366+ LEADING_ONLY ,
367+ NONE ;
368+ }
369+
370+ private static String escape (String str , EscapeSpace escSpace ) {
371+ StringBuilder buf = new StringBuilder ();
372+ int idx = 0 ;
373+
374+ // Handle leading space characters
375+ if (escSpace == EscapeSpace .ALL || escSpace == EscapeSpace .LEADING_ONLY ) {
376+ // Java properties specification considers the characters space (' ', '\u0020'),
377+ // tab ('\t', '\u0009'), and form feed ('\f', '\u000C') to be white space.
378+ //
379+ // java.util.Properties#store() implementation escapes space characters
380+ // to "\ " in key string, as well as leading spaces in value string.
381+ // Other white space characters are encoded by Unicode escape sequence.
382+ for (; idx < str .length (); idx ++) {
383+ char c = str .charAt (idx );
384+ if (c == ' ' ) {
385+ buf .append (BACKSLASH ).append (' ' );
386+ } else if (c == '\t' || c == '\f' ) {
387+ appendUnicodeEscape (buf , c );
388+ } else {
389+ break ;
390+ }
391+ }
392+ }
393+
394+ for (int i = idx ; i < str .length (); i ++) {
395+ char c = str .charAt (i );
396+
397+ if (c < 0x20 || c >= 0x7E ) {
398+ // JDK API comment for Properties#store() specifies below:
399+ //
400+ // Characters less than \\u0020 and characters greater than \u007E in property keys
401+ // or values are written as \\uxxxx for the appropriate hexadecimal value xxxx.
402+ //
403+ // However, actual implementation uses "\t" for horizontal tab, "\n" for newline
404+ // and so on. This implementation support the equivalent behavior.
405+ switch (c ) {
406+ case '\t' :
407+ buf .append (BACKSLASH ).append ('t' );
408+ break ;
409+ case '\n' :
410+ buf .append (BACKSLASH ).append ('n' );
411+ break ;
412+ case '\f' :
413+ buf .append (BACKSLASH ).append ('f' );
414+ break ;
415+ case '\r' :
416+ buf .append (BACKSLASH ).append ('r' );
417+ break ;
418+ default :
419+ appendUnicodeEscape (buf , c );
420+ break ;
421+ }
371422 } else {
372- escaped .append (c );
423+ switch (c ) {
424+ case ' ' : // space
425+ if (escSpace == EscapeSpace .ALL ) {
426+ buf .append (BACKSLASH ).append (c );
427+ } else {
428+ buf .append (c );
429+ }
430+ break ;
431+
432+ // The key and element characters #, !, =, and : are written with
433+ // a preceding backslash
434+ case '#' :
435+ case '!' :
436+ case '=' :
437+ case ':' :
438+ case '\\' :
439+ buf .append (BACKSLASH ).append (c );
440+ break ;
441+
442+ default :
443+ buf .append (c );
444+ break ;
445+ }
373446 }
374447 }
375- return escaped .toString ();
448+
449+ return buf .toString ();
450+ }
451+
452+ static String escapePropKey (String str ) {
453+ return escape (str , EscapeSpace .ALL );
454+ }
455+
456+ static String escapePropValue (String str ) {
457+ return escape (str , EscapeSpace .LEADING_ONLY );
458+ }
459+
460+ static void appendUnicodeEscape (StringBuilder buf , char codeUnit ) {
461+ buf .append (BACKSLASH ).append ('u' )
462+ .append (String .format ("%04X" , (int )codeUnit ));
376463 }
377464
378- private static String unescapePropValue (String s ) {
379- StringBuilder unescaped = new StringBuilder ();
380- StringCharacterIterator itr = new StringCharacterIterator (s );
381- for (char c = itr .first (); c != CharacterIterator .DONE ; c = itr .next ()) {
382- if (c == '\\' && itr .getIndex () < itr .getEndIndex ()) {
383- char n = itr .next ();
384- if (n == '\\' || n == ':' || n == '=' ) {
385- unescaped .append (n );
386- } else if (n == 'u' && itr .getIndex () + 4 <= itr .getEndIndex ()) {
387- StringBuilder unicodeEscape = new StringBuilder ("\\ u" );
388- for (int i = 0 ; i < 4 ; i ++) {
389- unicodeEscape .append (itr .next ());
465+ static String unescapePropKey (String str ) {
466+ return unescape (str );
467+ }
468+
469+ static String unescapePropValue (String str ) {
470+ return unescape (str );
471+ }
472+
473+ private static String unescape (String str ) {
474+ StringBuilder buf = new StringBuilder ();
475+ boolean isEscSeq = false ;
476+ for (int i = 0 ; i < str .length (); i ++) {
477+ char c = str .charAt (i );
478+ if (isEscSeq ) {
479+ switch (c ) {
480+ case 't' :
481+ buf .append ('\t' );
482+ break ;
483+
484+ case 'n' :
485+ buf .append ('\n' );
486+ break ;
487+
488+ case 'f' :
489+ buf .append ('\f' );
490+ break ;
491+
492+ case 'r' :
493+ buf .append ('\r' );
494+ break ;
495+
496+ case 'u' :
497+ {
498+ // This implementation throws an IllegalArgumentException
499+ // when the input string contains a malformed Unicode escape
500+ // character sequence. This behavior matches java.util.Properties#load(Reader).
501+ final String errMsg = "Malformed \\ uxxxx encoding." ;
502+ if (i + 4 > str .length ()) {
503+ throw new IllegalArgumentException (errMsg );
390504 }
391- unescaped .append (StringEscapeUtils .unescapeJava (unicodeEscape .toString ()));
392- } else {
393- unescaped .append (c );
394- unescaped .append (n );
505+ // Parse hex digits
506+ String hexDigits = str .substring (i + 1 , i + 5 );
507+ try {
508+ char codeUnit = (char )Integer .parseInt (hexDigits , 16 );
509+ buf .append (Character .valueOf (codeUnit ));
510+ i += 4 ;
511+ } catch (NumberFormatException e ) {
512+ throw new IllegalArgumentException (errMsg , e );
513+ }
514+ break ;
395515 }
516+
517+ default :
518+ // Special rules applied to Java properties format
519+ // beyond standard Java escape character sequence.
520+ //
521+ // 1. Octal escapes are not recognized
522+ // 2. \b does not represent a backspace character
523+ // 3. Backslash is dropped from unrecognized escape sequence.
524+ // For example, "\z" is interpreted as a single character 'z'.
525+
526+ buf .append (c );
527+ break ;
528+ }
529+ isEscSeq = false ;
396530 } else {
397- unescaped .append (c );
531+ if (c == BACKSLASH ) {
532+ isEscSeq = true ;
533+ } else {
534+ buf .append (c );
535+ }
398536 }
399537 }
400- return unescaped .toString ();
401- }
402538
403- private static String escapePropKey (String s ) {
404- return s .replace (" " , "\\ " );
405- }
539+ // Note: Incomplete escape sequence should not be there.
540+ // This implementation silently drop the character for the case.
406541
407- private static String unescapePropKey (String s ) {
408- return s .replaceAll ("\\ \\ " , " " );
542+ return buf .toString ();
409543 }
410544
411545 @ Override
@@ -435,7 +569,7 @@ public void merge(InputStream base, OutputStream outStream, String language, Bun
435569 logicalLine = logicalLineBuf .toString ();
436570 }
437571 } else {
438- String normLine = line . trim ( );
572+ String normLine = stripLeadingSpaces ( line );
439573
440574 if (orgLines .isEmpty ()) {
441575 // No continuation marker in the previous line
@@ -467,6 +601,13 @@ public void merge(InputStream base, OutputStream outStream, String language, Bun
467601 if (logicalLine != null ) {
468602 PropDef pd = PropDef .parseLine (logicalLine );
469603 if (pd != null && resMap .containsKey (pd .getKey ())) {
604+ // Preserve original leading spaces
605+ String firstLine = orgLines .isEmpty () ? line : orgLines .get (0 );
606+ int len = getLeadingSpacesLength (firstLine );
607+ if (len > 0 ) {
608+ outWriter .print (firstLine .substring (0 , len ));
609+ }
610+ // Write the property key and value
470611 String key = pd .getKey ();
471612 PropDef modPd = new PropDef (key , resMap .get (key ), pd .getSeparator ());
472613 modPd .print (outWriter , language );
@@ -490,4 +631,25 @@ public void merge(InputStream base, OutputStream outStream, String language, Bun
490631
491632 outWriter .flush ();
492633 }
634+
635+ private static int getLeadingSpacesLength (String s ) {
636+ int idx = 0 ;
637+ for (; idx < s .length (); idx ++) {
638+ if (!isPropsWhiteSpaceChar (s .charAt (idx ))) {
639+ break ;
640+ }
641+ }
642+ return idx ;
643+ }
644+
645+ private static String stripLeadingSpaces (String s ) {
646+ return s .substring (getLeadingSpacesLength (s ));
647+ }
648+
649+ private static boolean isPropsWhiteSpaceChar (char c ) {
650+ // Java properties specification considers the characters space (' ', '\u0020'),
651+ // tab ('\t', '\u0009'), and form feed ('\f', '\u000C') to be white space.
652+
653+ return c == ' ' || c == '\t' || c == '\f' ;
654+ }
493655}
0 commit comments