Skip to content

Commit c848bd7

Browse files
committed
hotfix: string extraction for text files using with statement; requirements updates; versioning increase
1 parent 2168f2d commit c848bd7

5 files changed

Lines changed: 49 additions & 49 deletions

File tree

TextSpitter/core.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ def __init__(
1515
filename: str or None = None,
1616
):
1717
"""
18-
The extractor wrapper will initialize by assinging the filename to the object's file property; if a file-like object is provided instead of a name, then a file_ext arg will be required.
18+
The extractor wrapper will initialize by assinging the filename to the
19+
object's file property; if a file-like object is provided instead of a
20+
name, then a file_ext arg will be required.
1921
"""
2022
if filename:
2123
self.file = FileIO(filename)
@@ -26,7 +28,9 @@ def __init__(
2628
self.file_ext = file_obj.name.split(".")[-1]
2729
else:
2830
raise Exception(
29-
"Your file object does not contain a name attribute. Please add a name attribute with a file extension, and try again. Need the file ext. data for mime-typing."
31+
"Your file object does not contain a name attribute. Please"
32+
" add a name attribute with a file extension, and try "
33+
"again. Need the file ext. data for mime-typing."
3034
)
3135

3236
@staticmethod
@@ -40,11 +44,13 @@ def get_contents(self):
4044
return f.read()
4145

4246
def PdfFileRead(self):
43-
"""This current code provides a workaround in case MuPDF (a dependency for
44-
PyMuPDF) is not usable in the development environment. For such instances,
45-
the module relies on PyPDF2 to extract text data. However, because of the
46-
likelihood of white spaces being rampant in the extracted string data,
47-
those characters get filtered out."""
47+
"""
48+
This current code provides a workaround in case MuPDF (a dependency
49+
for PyMuPDF) is not usable in the development environment. For such
50+
instances, the module relies on PyPDF2 to extract text data. However,
51+
because of the likelihood of white spaces being rampant in the
52+
extracted string data, those characters get filtered out.
53+
"""
4854

4955
contents = self.get_contents()
5056

@@ -72,5 +78,5 @@ def DocxFileRead(self):
7278
return text
7379

7480
def TextFileRead(self):
75-
text = open(self.file).read()
76-
return text
81+
with open(self.file) as f:
82+
return f.read()

core_requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
#
55
# pip-compile core_requirements.in
66
#
7-
lxml==4.6.3
7+
lxml==4.6.4
88
# via python-docx
9-
pymupdf==1.18.19
9+
pymupdf==1.19.1
1010
# via -r core_requirements.in
1111
pypdf2==1.26.0
1212
# via -r core_requirements.in

dev_requirements.txt

Lines changed: 29 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
# pip-compile dev_requirements.in
66
#
7-
anyio==3.3.3
7+
anyio==3.3.4
88
# via jupyter-server
99
argon2-cffi==21.1.0
1010
# via
@@ -16,17 +16,17 @@ babel==2.9.1
1616
# via jupyterlab-server
1717
backcall==0.2.0
1818
# via ipython
19-
black==21.9b0
19+
black==21.10b0
2020
# via nb-black
2121
bleach==4.1.0
2222
# via
2323
# nbconvert
2424
# readme-renderer
2525
certifi==2021.10.8
2626
# via requests
27-
cffi==1.14.6
27+
cffi==1.15.0
2828
# via argon2-cffi
29-
charset-normalizer==2.0.6
29+
charset-normalizer==2.0.7
3030
# via requests
3131
click==8.0.3
3232
# via black
@@ -36,43 +36,42 @@ colorama==0.4.4
3636
# ipython
3737
# tqdm
3838
# twine
39-
debugpy==1.5.0
39+
debugpy==1.5.1
4040
# via ipykernel
4141
decorator==5.1.0
4242
# via ipython
4343
defusedxml==0.7.1
4444
# via nbconvert
45-
docutils==0.17.1
45+
docutils==0.18
4646
# via readme-renderer
4747
entrypoints==0.3
4848
# via
4949
# jupyter-client
5050
# jupyterlab-server
5151
# nbconvert
52-
idna==3.2
52+
idna==3.3
5353
# via
5454
# anyio
5555
# requests
56-
importlib-metadata==4.8.1
56+
importlib-metadata==4.8.2
5757
# via
5858
# keyring
5959
# twine
60-
ipykernel==6.4.1
60+
ipykernel==6.5.0
6161
# via notebook
62-
ipython==7.28.0
62+
ipython==7.29.0
6363
# via
6464
# ipykernel
6565
# jupyterlab
6666
# nb-black
6767
ipython-genutils==0.2.0
6868
# via
69-
# ipykernel
7069
# jupyter-server
7170
# nbformat
7271
# notebook
7372
jedi==0.18.0
7473
# via ipython
75-
jinja2==3.0.2
74+
jinja2==3.0.3
7675
# via
7776
# jupyter-server
7877
# jupyterlab
@@ -81,7 +80,7 @@ jinja2==3.0.2
8180
# notebook
8281
json5==0.9.6
8382
# via jupyterlab-server
84-
jsonschema==4.1.0
83+
jsonschema==4.2.1
8584
# via
8685
# jupyterlab-server
8786
# nbformat
@@ -91,20 +90,20 @@ jupyter-client==7.0.6
9190
# jupyter-server
9291
# nbclient
9392
# notebook
94-
jupyter-core==4.8.1
93+
jupyter-core==4.9.1
9594
# via
9695
# jupyter-client
9796
# jupyter-server
9897
# jupyterlab
9998
# nbconvert
10099
# nbformat
101100
# notebook
102-
jupyter-server==1.11.1
101+
jupyter-server==1.11.2
103102
# via
104103
# jupyterlab
105104
# jupyterlab-server
106105
# nbclassic
107-
jupyterlab==3.1.18
106+
jupyterlab==3.2.2
108107
# via -r dev_requirements.in
109108
jupyterlab-pygments==0.1.2
110109
# via nbconvert
@@ -124,9 +123,9 @@ mypy-extensions==0.4.3
124123
# via black
125124
nb-black==1.0.7
126125
# via -r dev_requirements.in
127-
nbclassic==0.3.2
126+
nbclassic==0.3.4
128127
# via jupyterlab
129-
nbclient==0.5.4
128+
nbclient==0.5.5
130129
# via nbconvert
131130
nbconvert==6.2.0
132131
# via
@@ -142,9 +141,9 @@ nest-asyncio==1.5.1
142141
# via
143142
# jupyter-client
144143
# nbclient
145-
notebook==6.4.4
144+
notebook==6.4.5
146145
# via nbclassic
147-
packaging==21.0
146+
packaging==21.2
148147
# via
149148
# bleach
150149
# jupyterlab
@@ -161,13 +160,13 @@ pkginfo==1.7.1
161160
# via twine
162161
platformdirs==2.4.0
163162
# via black
164-
prometheus-client==0.11.0
163+
prometheus-client==0.12.0
165164
# via
166165
# jupyter-server
167166
# notebook
168-
prompt-toolkit==3.0.20
167+
prompt-toolkit==3.0.22
169168
# via ipython
170-
pycparser==2.20
169+
pycparser==2.21
171170
# via cffi
172171
pygments==2.10.0
173172
# via
@@ -183,11 +182,11 @@ python-dateutil==2.8.2
183182
# via jupyter-client
184183
pytz==2021.3
185184
# via babel
186-
pywin32==301
185+
pywin32==302
187186
# via jupyter-core
188187
pywin32-ctypes==0.2.0
189188
# via keyring
190-
pywinpty==1.1.4
189+
pywinpty==1.1.5
191190
# via terminado
192191
pyzmq==22.3.0
193192
# via
@@ -196,18 +195,15 @@ pyzmq==22.3.0
196195
# notebook
197196
readme-renderer==30.0
198197
# via twine
199-
regex==2021.10.8
198+
regex==2021.11.10
200199
# via black
201200
requests==2.26.0
202201
# via
203202
# jupyterlab-server
204203
# requests-toolbelt
205-
# requests-unixsocket
206204
# twine
207205
requests-toolbelt==0.9.1
208206
# via twine
209-
requests-unixsocket==0.2.0
210-
# via jupyter-server
211207
rfc3986==1.5.0
212208
# via twine
213209
send2trash==1.8.0
@@ -226,7 +222,7 @@ terminado==0.12.1
226222
# notebook
227223
testpath==0.5.0
228224
# via nbconvert
229-
tomli==1.2.1
225+
tomli==1.2.2
230226
# via black
231227
tornado==6.1
232228
# via
@@ -238,7 +234,7 @@ tornado==6.1
238234
# terminado
239235
tqdm==4.62.3
240236
# via twine
241-
traitlets==5.1.0
237+
traitlets==5.1.1
242238
# via
243239
# ipykernel
244240
# ipython
@@ -250,14 +246,12 @@ traitlets==5.1.0
250246
# nbconvert
251247
# nbformat
252248
# notebook
253-
twine==3.4.2
249+
twine==3.5.0
254250
# via -r dev_requirements.in
255251
typing-extensions==3.10.0.2
256252
# via black
257253
urllib3==1.26.7
258-
# via
259-
# requests
260-
# requests-unixsocket
254+
# via requests
261255
wcwidth==0.2.5
262256
# via prompt-toolkit
263257
webencodings==0.5.1

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
#
55
# pip-compile requirements.in
66
#
7-
lxml==4.6.3
7+
lxml==4.6.4
88
# via python-docx
9-
pymupdf==1.18.19
9+
pymupdf==1.19.1
1010
# via -r requirements.in
1111
pypdf2==1.26.0
1212
# via -r requirements.in

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="TextSpitter",
8-
version="0.3.5",
8+
version="0.3.6",
99
author="Francis Secada",
1010
author_email="francis.secada@gmail.com",
1111
description="Python package that spits out text from your document files!",

0 commit comments

Comments
 (0)