|
1 | 1 | from .core import FileExtractor |
| 2 | +from typing import IO |
2 | 3 | import mimetypes |
3 | 4 |
|
4 | 5 |
|
5 | 6 | class WordLoader: |
6 | | - def __init__(self, file): |
7 | | - self.name = file |
8 | | - self.extractor = FileExtractor(file) |
| 7 | + def __init__(self, file_obj=None, filename: str or None = None): |
| 8 | + self.file = FileExtractor(file_obj, filename) |
9 | 9 |
|
10 | 10 | def file_load(self): |
11 | | - file_loc = self.name |
12 | | - file_type = file_loc.split(".")[-1] |
| 11 | + file_type = self.file.file_ext |
13 | 12 | # file_type = file_loc.split('.')[-1] |
14 | 13 |
|
15 | 14 | # file_types_tup = ('pdf', 'docx', 'doc', 'txt', 'text') |
16 | 15 | file_types_tup = ("pdf", "docx", "txt", "text") |
17 | 16 | if file_type in file_types_tup: |
18 | 17 | if file_type == file_types_tup[0]: |
19 | | - text = self.extractor.PdfFileRead() |
| 18 | + text = self.file.PdfFileRead() |
20 | 19 | elif file_type == file_types_tup[1]: |
21 | | - text = self.extractor.DocxFileRead() |
| 20 | + text = self.file.DocxFileRead() |
22 | 21 | # elif file_type == file_types_tup[2]: |
23 | 22 | # text = DocFileRead(self.text) |
24 | 23 | else: |
25 | | - text = self.extractor.TextFileRead() |
| 24 | + text = self.file.TextFileRead() |
26 | 25 | return text |
27 | 26 | else: |
28 | | - mime_type = mimetypes.guess_type(file_loc) |
| 27 | + mime_type = self.file.get_file_type(self.file.name) |
29 | 28 | print( |
30 | 29 | f"You are using an incorrect file format for file submissions.\n\ |
31 | 30 | Please upload a .docx/.doc/.txt/.pdf file OR!\n\ |
|
0 commit comments