hotfix: string extraction for text files using with statement; requirements updates; versioning increase

fsecada01 · fsecada01 · commit c848bd762fde · 2021-11-09T19:50:53.000-05:00
diff --git a/TextSpitter/core.py b/TextSpitter/core.py
@@ -15,7 +15,9 @@ def __init__(
         filename: str or None = None,
     ):
         """
-        The extractor wrapper will initialize by assinging the filename to the object's file property; if a file-like object is provided instead of a name, then a file_ext arg will be required.
+        The extractor wrapper will initialize by assinging the filename to the
+        object's file property; if a file-like object is provided instead of a
+        name, then a file_ext arg will be required.
         """
         if filename:
             self.file = FileIO(filename)
@@ -26,7 +28,9 @@ def __init__(
                 self.file_ext = file_obj.name.split(".")[-1]
             else:
                 raise Exception(
-                    "Your file object does not contain a name attribute. Please add a name attribute with a file extension, and try again. Need the file ext. data for mime-typing."
+                    "Your file object does not contain a name attribute. Please"
+                    " add a name attribute with a file extension, and try "
+                    "again. Need the file ext. data for mime-typing."
                 )
 
     @staticmethod
@@ -40,11 +44,13 @@ def get_contents(self):
             return f.read()
 
     def PdfFileRead(self):
-        """This current code provides a workaround in case MuPDF (a dependency for
-        PyMuPDF) is not usable in the development environment. For such instances,
-        the module relies on PyPDF2 to extract text data. However, because of the
-        likelihood of white spaces being rampant in the extracted string data,
-        those characters get filtered out."""
+        """
+        This current code provides a workaround in case MuPDF (a dependency
+        for PyMuPDF) is not usable in the development environment. For such
+        instances, the module relies on PyPDF2 to extract text data. However,
+        because of the likelihood of white spaces being rampant in the
+        extracted string data, those characters get filtered out.
+        """
 
         contents = self.get_contents()
 
@@ -72,5 +78,5 @@ def DocxFileRead(self):
         return text
 
     def TextFileRead(self):
-        text = open(self.file).read()
-        return text
+        with open(self.file) as f:
+            return f.read()
diff --git a/core_requirements.txt b/core_requirements.txt
@@ -4,9 +4,9 @@
 #
 #    pip-compile core_requirements.in
 #
-lxml==4.6.3
+lxml==4.6.4
     # via python-docx
-pymupdf==1.18.19
+pymupdf==1.19.1
     # via -r core_requirements.in
 pypdf2==1.26.0
     # via -r core_requirements.in
diff --git a/dev_requirements.txt b/dev_requirements.txt
@@ -4,7 +4,7 @@
 #
 #    pip-compile dev_requirements.in
 #
-anyio==3.3.3
+anyio==3.3.4
     # via jupyter-server
 argon2-cffi==21.1.0
     # via
@@ -16,17 +16,17 @@ babel==2.9.1
     # via jupyterlab-server
 backcall==0.2.0
     # via ipython
-black==21.9b0
+black==21.10b0
     # via nb-black
 bleach==4.1.0
     # via
     #   nbconvert
     #   readme-renderer
 certifi==2021.10.8
     # via requests
-cffi==1.14.6
+cffi==1.15.0
     # via argon2-cffi
-charset-normalizer==2.0.6
+charset-normalizer==2.0.7
     # via requests
 click==8.0.3
     # via black
@@ -36,43 +36,42 @@ colorama==0.4.4
     #   ipython
     #   tqdm
     #   twine
-debugpy==1.5.0
+debugpy==1.5.1
     # via ipykernel
 decorator==5.1.0
     # via ipython
 defusedxml==0.7.1
     # via nbconvert
-docutils==0.17.1
+docutils==0.18
     # via readme-renderer
 entrypoints==0.3
     # via
     #   jupyter-client
     #   jupyterlab-server
     #   nbconvert
-idna==3.2
+idna==3.3
     # via
     #   anyio
     #   requests
-importlib-metadata==4.8.1
+importlib-metadata==4.8.2
     # via
     #   keyring
     #   twine
-ipykernel==6.4.1
+ipykernel==6.5.0
     # via notebook
-ipython==7.28.0
+ipython==7.29.0
     # via
     #   ipykernel
     #   jupyterlab
     #   nb-black
 ipython-genutils==0.2.0
     # via
-    #   ipykernel
     #   jupyter-server
     #   nbformat
     #   notebook
 jedi==0.18.0
     # via ipython
-jinja2==3.0.2
+jinja2==3.0.3
     # via
     #   jupyter-server
     #   jupyterlab
@@ -81,7 +80,7 @@ jinja2==3.0.2
     #   notebook
 json5==0.9.6
     # via jupyterlab-server
-jsonschema==4.1.0
+jsonschema==4.2.1
     # via
     #   jupyterlab-server
     #   nbformat
@@ -91,20 +90,20 @@ jupyter-client==7.0.6
     #   jupyter-server
     #   nbclient
     #   notebook
-jupyter-core==4.8.1
+jupyter-core==4.9.1
     # via
     #   jupyter-client
     #   jupyter-server
     #   jupyterlab
     #   nbconvert
     #   nbformat
     #   notebook
-jupyter-server==1.11.1
+jupyter-server==1.11.2
     # via
     #   jupyterlab
     #   jupyterlab-server
     #   nbclassic
-jupyterlab==3.1.18
+jupyterlab==3.2.2
     # via -r dev_requirements.in
 jupyterlab-pygments==0.1.2
     # via nbconvert
@@ -124,9 +123,9 @@ mypy-extensions==0.4.3
     # via black
 nb-black==1.0.7
     # via -r dev_requirements.in
-nbclassic==0.3.2
+nbclassic==0.3.4
     # via jupyterlab
-nbclient==0.5.4
+nbclient==0.5.5
     # via nbconvert
 nbconvert==6.2.0
     # via
@@ -142,9 +141,9 @@ nest-asyncio==1.5.1
     # via
     #   jupyter-client
     #   nbclient
-notebook==6.4.4
+notebook==6.4.5
     # via nbclassic
-packaging==21.0
+packaging==21.2
     # via
     #   bleach
     #   jupyterlab
@@ -161,13 +160,13 @@ pkginfo==1.7.1
     # via twine
 platformdirs==2.4.0
     # via black
-prometheus-client==0.11.0
+prometheus-client==0.12.0
     # via
     #   jupyter-server
     #   notebook
-prompt-toolkit==3.0.20
+prompt-toolkit==3.0.22
     # via ipython
-pycparser==2.20
+pycparser==2.21
     # via cffi
 pygments==2.10.0
     # via
@@ -183,11 +182,11 @@ python-dateutil==2.8.2
     # via jupyter-client
 pytz==2021.3
     # via babel
-pywin32==301
+pywin32==302
     # via jupyter-core
 pywin32-ctypes==0.2.0
     # via keyring
-pywinpty==1.1.4
+pywinpty==1.1.5
     # via terminado
 pyzmq==22.3.0
     # via
@@ -196,18 +195,15 @@ pyzmq==22.3.0
     #   notebook
 readme-renderer==30.0
     # via twine
-regex==2021.10.8
+regex==2021.11.10
     # via black
 requests==2.26.0
     # via
     #   jupyterlab-server
     #   requests-toolbelt
-    #   requests-unixsocket
     #   twine
 requests-toolbelt==0.9.1
     # via twine
-requests-unixsocket==0.2.0
-    # via jupyter-server
 rfc3986==1.5.0
     # via twine
 send2trash==1.8.0
@@ -226,7 +222,7 @@ terminado==0.12.1
     #   notebook
 testpath==0.5.0
     # via nbconvert
-tomli==1.2.1
+tomli==1.2.2
     # via black
 tornado==6.1
     # via
@@ -238,7 +234,7 @@ tornado==6.1
     #   terminado
 tqdm==4.62.3
     # via twine
-traitlets==5.1.0
+traitlets==5.1.1
     # via
     #   ipykernel
     #   ipython
@@ -250,14 +246,12 @@ traitlets==5.1.0
     #   nbconvert
     #   nbformat
     #   notebook
-twine==3.4.2
+twine==3.5.0
     # via -r dev_requirements.in
 typing-extensions==3.10.0.2
     # via black
 urllib3==1.26.7
-    # via
-    #   requests
-    #   requests-unixsocket
+    # via requests
 wcwidth==0.2.5
     # via prompt-toolkit
 webencodings==0.5.1
diff --git a/requirements.txt b/requirements.txt
@@ -4,9 +4,9 @@
 #
 #    pip-compile requirements.in
 #
-lxml==4.6.3
+lxml==4.6.4
     # via python-docx
-pymupdf==1.18.19
+pymupdf==1.19.1
     # via -r requirements.in
 pypdf2==1.26.0
     # via -r requirements.in
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="TextSpitter",
-    version="0.3.5",
+    version="0.3.6",
     author="Francis Secada",
     author_email="francis.secada@gmail.com",
     description="Python package that spits out text from your document files!",

Original file line number	Diff line number	Diff line change
`@@ -4,9 +4,9 @@`
`4`	`4`	`#`
`5`	`5`	`# pip-compile core_requirements.in`
`6`	`6`	`#`
`7`		`-lxml==4.6.3`
	`7`	`+lxml==4.6.4`
`8`	`8`	`# via python-docx`
`9`		`-pymupdf==1.18.19`
	`9`	`+pymupdf==1.19.1`
`10`	`10`	`# via -r core_requirements.in`
`11`	`11`	`pypdf2==1.26.0`
`12`	`12`	`# via -r core_requirements.in`
Original file line number	Diff line number	Diff line change
`@@ -4,9 +4,9 @@`
`4`	`4`	`#`
`5`	`5`	`# pip-compile requirements.in`
`6`	`6`	`#`
`7`		`-lxml==4.6.3`
	`7`	`+lxml==4.6.4`
`8`	`8`	`# via python-docx`
`9`		`-pymupdf==1.18.19`
	`9`	`+pymupdf==1.19.1`
`10`	`10`	`# via -r requirements.in`
`11`	`11`	`pypdf2==1.26.0`
`12`	`12`	`# via -r requirements.in`