Skip to content
This repository was archived by the owner on Dec 4, 2023. It is now read-only.

Commit 2daa350

Browse files
tracyboehrerfran893Batta32
authored
[Cherry Pick 4.13] Updates to Recognizers and unit tests. (#1171)
* [SDK][Recognizers-Text] Add Recognizer-Text unit tests into the temporal Recognizer-Text folder in bot-dialogs library (#1168) * Add timex-expression unit tests * Add Specs folder * Add datetime unit tests * Add sequence unit tests * Add number with unit unit tests * Add number unit tests * Add choice unit tests * Add text unit tests * Update bot-dialogs pom to use UTF-8 to execute unit tests correctly Co-authored-by: Martin Battaglino <martinbatta32@gmail.com> * [SDK][Recognizers-Text] Update temporal folder with latest changes (#1164) * Update Datetime extractors * Update Datetime resources * Update Number resources * Update NumberWithUnit resources * Update NumberWithUnit extractors * Update NumberWithUnit parsers Co-authored-by: Franco Alvarez <51216149+fran893@users.noreply.github.com> Co-authored-by: Martin Battaglino <martinbatta32@gmail.com>
1 parent be388e6 commit 2daa350

File tree

509 files changed

+646966
-164
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

509 files changed

+646966
-164
lines changed

libraries/bot-dialogs/pom.xml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,15 @@
158158
</plugins>
159159
</reporting>
160160

161+
<build>
162+
<plugins>
163+
<plugin>
164+
<groupId>org.apache.maven.plugins</groupId>
165+
<artifactId>maven-surefire-plugin</artifactId>
166+
<configuration>
167+
<argLine>-Dfile.encoding=UTF-8</argLine>
168+
</configuration>
169+
</plugin>
170+
</plugins>
171+
</build>
161172
</project>

libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateExtractor.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,25 @@ private boolean validateMatch(Match match, String text) {
108108
isValidMatch = startsWithBasicDate(subText);
109109
}
110110
}
111+
112+
// Expressions with mixed separators are not considered valid dates e.g. "30/4.85" (unless one is a comma "30/4, 2016")
113+
MatchGroup dayGroup = match.getGroup("day");
114+
MatchGroup monthGroup = match.getGroup("month");
115+
if (!StringUtility.isNullOrEmpty(dayGroup.value) && !StringUtility.isNullOrEmpty(monthGroup.value)) {
116+
String noDateText = match.value.replace(yearGroup.value, "")
117+
.replace(monthGroup.value, "").replace(dayGroup.value, "");
118+
String[] separators = {"/", "\\", "-", "."};
119+
int separatorCount = 0;
120+
for (String separator : separators) {
121+
if (noDateText.contains(separator)) {
122+
separatorCount++;
123+
}
124+
if (separatorCount > 1) {
125+
isValidMatch = false;
126+
break;
127+
}
128+
}
129+
}
111130
}
112131

113132
return isValidMatch;

libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseDateTimeExtractor.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,27 @@ public List<Token> timeOfTodayAfter(String input, LocalDateTime reference) {
128128

129129
Match[] matches = RegExpUtility.getMatches(this.config.getSimpleTimeOfTodayAfterRegex(), input);
130130
for (Match match : matches) {
131+
// @TODO Remove when lookbehinds are handled correctly
132+
if (isDecimal(match, input)) {
133+
continue;
134+
}
135+
131136
ret.add(new Token(match.index, match.index + match.length));
132137
}
133138

134139
return ret;
135140
}
141+
142+
// Check if the match is part of a decimal number (e.g. 123.24)
143+
private boolean isDecimal(Match match, String text) {
144+
boolean isDecimal = false;
145+
if (match.index > 1 && (text.charAt(match.index - 1) == ',' ||
146+
text.charAt(match.index - 1) == '.') && Character.isDigit(text.charAt(match.index - 2)) && Character.isDigit(match.value.charAt(0))) {
147+
isDecimal = true;
148+
}
149+
150+
return isDecimal;
151+
}
136152

137153
public List<Token> timeOfTodayBefore(String input, LocalDateTime reference) {
138154
List<Token> ret = new ArrayList<>();

libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/extractors/BaseTimeExtractor.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ public final List<Token> basicRegexMatch(String text) {
8888

8989
Match[] matches = RegExpUtility.getMatches(regex, text);
9090
for (Match match : matches) {
91+
92+
// @TODO Remove when lookbehinds are handled correctly
93+
if (isDecimal(match, text)) {
94+
continue;
95+
}
9196

9297
// @TODO Workaround to avoid incorrect partial-only matches. Remove after time regex reviews across languages.
9398
String lth = match.getGroup("lth").value;
@@ -102,6 +107,17 @@ public final List<Token> basicRegexMatch(String text) {
102107

103108
return ret;
104109
}
110+
111+
// Check if the match is part of a decimal number (e.g. 123.24)
112+
private boolean isDecimal(Match match, String text) {
113+
boolean isDecimal = false;
114+
if (match.index > 1 && (text.charAt(match.index - 1) == ',' ||
115+
text.charAt(match.index - 1) == '.') && Character.isDigit(text.charAt(match.index - 2)) && Character.isDigit(match.value.charAt(0))) {
116+
isDecimal = true;
117+
}
118+
119+
return isDecimal;
120+
}
105121

106122
private List<Token> atRegexMatch(String text) {
107123
List<Token> ret = new ArrayList<>();

libraries/bot-dialogs/src/main/java/com/microsoft/recognizers/text/datetime/resources/BaseDateTime.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
public class BaseDateTime {
1919

20-
public static final String HourRegex = "(?<hour>2[0-4]|[0-1]?\\d)(h)?";
20+
public static final String HourRegex = "(?<!\\d[,.])(?<hour>2[0-4]|[0-1]?\\d)(h)?";
2121

2222
public static final String TwoDigitHourRegex = "(?<hour>[0-1]\\d|2[0-4])(h)?";
2323

@@ -36,6 +36,8 @@ public class BaseDateTime {
3636
public static final String IllegalYearRegex = "([-])({FourDigitYearRegex})([-])"
3737
.replace("{FourDigitYearRegex}", FourDigitYearRegex);
3838

39+
public static final String CheckDecimalRegex = "(?![,.]\\d)";
40+
3941
public static final String RangeConnectorSymbolRegex = "(--|-|—|——|~|–)";
4042

4143
public static final String BaseAmDescRegex = "(am\\b|a\\s*\\.\\s*m\\s*\\.|a[\\.]?\\s*m\\b)";

0 commit comments

Comments
 (0)