57 lines
1.9 KiB
Python
57 lines
1.9 KiB
Python
'''This module ties together the
|
|
questions generation and incorrect answer
|
|
generation modules
|
|
'''
|
|
from question_extraction import QuestionExtractor
|
|
from incorrect_answer_generation import IncorrectAnswerGenerator
|
|
import re
|
|
from nltk import sent_tokenize
|
|
|
|
|
|
class QuestionGeneration:
|
|
'''This class contains the method
|
|
to generate questions
|
|
'''
|
|
|
|
def __init__(self, num_questions, num_options):
|
|
self.num_questions = num_questions
|
|
self.num_options = num_options
|
|
self.question_extractor = QuestionExtractor(num_questions)
|
|
|
|
def clean_text(self, text):
|
|
text = text.replace('\n', ' ') # remove newline chars
|
|
sentences = sent_tokenize(text)
|
|
cleaned_text = ""
|
|
for sentence in sentences:
|
|
# remove non alphanumeric chars
|
|
cleaned_sentence = re.sub(r'([^\s\w]|_)+', '', sentence)
|
|
|
|
# substitute multiple spaces with single space
|
|
cleaned_sentence = re.sub(' +', ' ', cleaned_sentence)
|
|
cleaned_text += cleaned_sentence
|
|
|
|
if cleaned_text[-1] == ' ':
|
|
cleaned_text[-1] = '.'
|
|
else:
|
|
cleaned_text += '.'
|
|
|
|
cleaned_text += ' ' # pad with space at end
|
|
return cleaned_text
|
|
|
|
def generate_questions_dict(self, document):
|
|
document = self.clean_text(document)
|
|
self.questions_dict = self.question_extractor.get_questions_dict(
|
|
document)
|
|
self.incorrect_answer_generator = IncorrectAnswerGenerator(document)
|
|
|
|
for i in range(1, self.num_questions + 1):
|
|
if i not in self.questions_dict:
|
|
continue
|
|
self.questions_dict[i]["options"] \
|
|
= self.incorrect_answer_generator.get_all_options_dict(
|
|
self.questions_dict[i]["answer"],
|
|
self.num_options
|
|
)
|
|
|
|
return self.questions_dict
|