Merge pull request #5 from shahid017/master

fsecada01 · web-flow · commit 2168f2daa109 · 2021-11-09T19:37:35.000-05:00
patch for extracting text from text files; typo fix in README.
diff --git a/README.md b/README.md
@@ -14,7 +14,7 @@ This is my first python module, so I hope I did this well!
 This module is designed to run as simply as possible.  Just provide the file location string data into the argument, and get your text returned to you.
 
 ```
-from TextSpitter import TexSpitter as TS
+from TextSpitter import TextSpitter as TS
 folder_loc = 'foo/bar/'
 
 docx_file = folder_loc + 'file_thing.docx'
@@ -39,4 +39,4 @@ _*OH MY GOD, PLEASE DO.*_
 
 Just make a pull request and add whatever you want (or fix whatever you want).  I'll review and approve if everything seems good.  
 
-Thanks, everyone!
+Thanks, everyone!
diff --git a/TextSpitter/core.py b/TextSpitter/core.py
@@ -52,7 +52,7 @@ def PdfFileRead(self):
             import fitz
 
             pdf_file = fitz.Document(stream=contents, filetype="pdf")
-            raw_text = [ele.getText("text") for ele in pdf_file]
+            raw_text = [ele.get_text("text") for ele in pdf_file]
             text = "".join(raw_text)
         # else:
         except Exception:
@@ -72,4 +72,5 @@ def DocxFileRead(self):
         return text
 
     def TextFileRead(self):
-        return self.get_contents()
+        text = open(self.file).read()
+        return text