39 lines
1.1 KiB
Python
39 lines
1.1 KiB
Python
from PyPDF2 import PdfFileReader
|
|
from question_generation_main import QuestionGeneration
|
|
|
|
|
|
def pdf2text(file_path: str, file_exten: str) -> str:
|
|
""" Converts a given file to text content """
|
|
|
|
_content = ''
|
|
|
|
# Identify file type and get its contents
|
|
if file_exten == 'pdf':
|
|
with open(file_path, 'rb') as pdf_file:
|
|
_pdf_reader = PdfFileReader(pdf_file)
|
|
for p in range(_pdf_reader.numPages):
|
|
_content += _pdf_reader.getPage(p).extractText()
|
|
# _content = _pdf_reader.getPage(0).extractText()
|
|
print('PDF operation done!')
|
|
|
|
elif file_exten == 'txt':
|
|
with open(file_path, 'r') as txt_file:
|
|
_content = txt_file.read()
|
|
print('TXT operation done!')
|
|
|
|
return _content
|
|
|
|
|
|
def txt2questions(doc: str, n=5, o=4) -> dict:
|
|
""" Get all questions and options """
|
|
|
|
qGen = QuestionGeneration(n, o)
|
|
q = qGen.generate_questions_dict(doc)
|
|
for i in range(len(q)):
|
|
temp = []
|
|
for j in range(len(q[i+1]['options'])):
|
|
temp.append(q[i+1]['options'][j+1])
|
|
# print(temp)
|
|
q[i+1]['options'] = temp
|
|
return q
|