Skip to content

Commit a90bfc1

Browse files
authored
Change image and clarify parts of tutorial 1 (#15)
1 parent d8bf90e commit a90bfc1

6 files changed

Lines changed: 389 additions & 340 deletions

File tree

Conceptual_Guide/Part_1-model_deployment/README.md

Lines changed: 177 additions & 126 deletions
Large diffs are not rendered by default.
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
import math
28+
import numpy as np
29+
import cv2
30+
import tritonclient.http as httpclient
31+
32+
SAVE_INTERMEDIATE_IMAGES = False
33+
34+
35+
def detection_preprocessing(image: cv2.Mat) -> np.ndarray:
36+
37+
inpWidth = 640
38+
inpHeight = 480
39+
40+
# pre-process image
41+
blob = cv2.dnn.blobFromImage(
42+
image, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False
43+
)
44+
blob = np.transpose(blob, (0, 2, 3, 1))
45+
return blob
46+
47+
48+
def detection_postprocessing(scores, geometry, preprocessed_image):
49+
def fourPointsTransform(frame, vertices):
50+
vertices = np.asarray(vertices)
51+
outputSize = (100, 32)
52+
targetVertices = np.array(
53+
[
54+
[0, outputSize[1] - 1],
55+
[0, 0],
56+
[outputSize[0] - 1, 0],
57+
[outputSize[0] - 1, outputSize[1] - 1],
58+
],
59+
dtype="float32",
60+
)
61+
62+
rotationMatrix = cv2.getPerspectiveTransform(vertices, targetVertices)
63+
result = cv2.warpPerspective(frame, rotationMatrix, outputSize)
64+
return result
65+
66+
def decodeBoundingBoxes(scores, geometry, scoreThresh=0.5):
67+
detections = []
68+
confidences = []
69+
70+
############ CHECK DIMENSIONS AND SHAPES OF geometry AND scores ########
71+
assert len(scores.shape) == 4, "Incorrect dimensions of scores"
72+
assert len(geometry.shape) == 4, "Incorrect dimensions of geometry"
73+
assert scores.shape[0] == 1, "Invalid dimensions of scores"
74+
assert geometry.shape[0] == 1, "Invalid dimensions of geometry"
75+
assert scores.shape[1] == 1, "Invalid dimensions of scores"
76+
assert geometry.shape[1] == 5, "Invalid dimensions of geometry"
77+
assert (
78+
scores.shape[2] == geometry.shape[2]
79+
), "Invalid dimensions of scores and geometry"
80+
assert (
81+
scores.shape[3] == geometry.shape[3]
82+
), "Invalid dimensions of scores and geometry"
83+
height = scores.shape[2]
84+
width = scores.shape[3]
85+
for y in range(0, height):
86+
# Extract data from scores
87+
scoresData = scores[0][0][y]
88+
x0_data = geometry[0][0][y]
89+
x1_data = geometry[0][1][y]
90+
x2_data = geometry[0][2][y]
91+
x3_data = geometry[0][3][y]
92+
anglesData = geometry[0][4][y]
93+
for x in range(0, width):
94+
score = scoresData[x]
95+
96+
# If score is lower than threshold score, move to next x
97+
if score < scoreThresh:
98+
continue
99+
100+
# Calculate offset
101+
offsetX = x * 4.0
102+
offsetY = y * 4.0
103+
angle = anglesData[x]
104+
105+
# Calculate cos and sin of angle
106+
cosA = math.cos(angle)
107+
sinA = math.sin(angle)
108+
h = x0_data[x] + x2_data[x]
109+
w = x1_data[x] + x3_data[x]
110+
111+
# Calculate offset
112+
offset = [
113+
offsetX + cosA * x1_data[x] + sinA * x2_data[x],
114+
offsetY - sinA * x1_data[x] + cosA * x2_data[x],
115+
]
116+
117+
# Find points for rectangle
118+
p1 = (-sinA * h + offset[0], -cosA * h + offset[1])
119+
p3 = (-cosA * w + offset[0], sinA * w + offset[1])
120+
center = (0.5 * (p1[0] + p3[0]), 0.5 * (p1[1] + p3[1]))
121+
detections.append((center, (w, h), -1 * angle * 180.0 / math.pi))
122+
confidences.append(float(score))
123+
124+
# Return detections and confidences
125+
return [detections, confidences]
126+
127+
scores = scores.transpose(0, 3, 1, 2)
128+
geometry = geometry.transpose(0, 3, 1, 2)
129+
frame = np.squeeze(preprocessed_image, axis=0)
130+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
131+
[boxes, confidences] = decodeBoundingBoxes(scores, geometry)
132+
indices = cv2.dnn.NMSBoxesRotated(boxes, confidences, 0.5, 0.4)
133+
134+
cropped_list = []
135+
cv2.imwrite("frame.png", frame)
136+
count = 0
137+
for i in indices:
138+
# get 4 corners of the rotated rect
139+
count += 1
140+
vertices = cv2.boxPoints(boxes[i])
141+
cropped = fourPointsTransform(frame, vertices)
142+
cv2.imwrite(str(count) + ".png", cropped)
143+
cropped = np.expand_dims(cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY), axis=0)
144+
145+
cropped_list.append(((cropped / 255.0) - 0.5) * 2)
146+
cropped_arr = np.stack(cropped_list, axis=0)
147+
148+
# Only keep the first image, since the models don't currently allow batching.
149+
# See part 2 for enabling batch sizes > 0
150+
return cropped_arr[None, 0]
151+
152+
153+
def recognition_postprocessing(scores: np.ndarray) -> str:
154+
text = ""
155+
alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
156+
157+
scores = np.transpose(scores, (1,0,2))
158+
159+
for i in range(scores.shape[0]):
160+
c = np.argmax(scores[i][0])
161+
if c != 0:
162+
text += alphabet[c - 1]
163+
else:
164+
text += "-"
165+
# adjacent same letters as well as background text must be removed
166+
# to get the final output
167+
char_list = []
168+
for i, char in enumerate(text):
169+
if char != "-" and (not (i > 0 and char == text[i - 1])):
170+
char_list.append(char)
171+
return "".join(char_list)
172+
173+
174+
if __name__ == "__main__":
175+
176+
# Setting up client
177+
client = httpclient.InferenceServerClient(url="localhost:8000")
178+
179+
# Read image and create input object
180+
raw_image = cv2.imread("./img1.jpg")
181+
preprocessed_image = detection_preprocessing(raw_image)
182+
183+
detection_input = httpclient.InferInput(
184+
"input_images:0", preprocessed_image.shape, datatype="FP32"
185+
)
186+
detection_input.set_data_from_numpy(preprocessed_image, binary_data=True)
187+
188+
# Query the server
189+
detection_response = client.infer(
190+
model_name="text_detection", inputs=[detection_input]
191+
)
192+
193+
# Process responses from detection model
194+
scores = detection_response.as_numpy("feature_fusion/Conv_7/Sigmoid:0")
195+
geometry = detection_response.as_numpy("feature_fusion/concat_3:0")
196+
cropped_images = detection_postprocessing(scores, geometry, preprocessed_image)
197+
198+
# Create input object for recognition model
199+
recognition_input = httpclient.InferInput(
200+
"input.1", cropped_images.shape, datatype="FP32"
201+
)
202+
recognition_input.set_data_from_numpy(cropped_images, binary_data=True)
203+
204+
# Query the server
205+
recognition_response = client.infer(
206+
model_name="text_recognition", inputs=[recognition_input]
207+
)
208+
209+
# Process response from recognition model
210+
final_text = recognition_postprocessing(recognition_response.as_numpy("308"))
211+
212+
print(final_text)

Conceptual_Guide/Part_1-model_deployment/clients/client.py

Lines changed: 0 additions & 143 deletions
This file was deleted.

0 commit comments

Comments
 (0)