Merge branch 'nlp' of https://github.com/PragatiVerma18/Fantastic-Falcons-1.0 into webapp
This commit is contained in:
commit
7d91ff2c49
@ -4,6 +4,9 @@ generation modules
|
|||||||
'''
|
'''
|
||||||
from question_extraction import QuestionExtractor
|
from question_extraction import QuestionExtractor
|
||||||
from incorrect_answer_generation import IncorrectAnswerGenerator
|
from incorrect_answer_generation import IncorrectAnswerGenerator
|
||||||
|
import re
|
||||||
|
from nltk import sent_tokenize
|
||||||
|
|
||||||
|
|
||||||
class QuestionGeneration:
|
class QuestionGeneration:
|
||||||
'''This class contains the method
|
'''This class contains the method
|
||||||
@ -15,8 +18,30 @@ class QuestionGeneration:
|
|||||||
self.num_options = num_options
|
self.num_options = num_options
|
||||||
self.question_extractor = QuestionExtractor(num_questions)
|
self.question_extractor = QuestionExtractor(num_questions)
|
||||||
|
|
||||||
|
def clean_text(self, text):
|
||||||
|
text = text.replace('\n', ' ') # remove newline chars
|
||||||
|
sentences = sent_tokenize(text)
|
||||||
|
cleaned_text = ""
|
||||||
|
for sentence in sentences:
|
||||||
|
# remove non alphanumeric chars
|
||||||
|
cleaned_sentence = re.sub(r'([^\s\w]|_)+', '', sentence)
|
||||||
|
|
||||||
|
# substitute multiple spaces with single space
|
||||||
|
cleaned_sentence = re.sub(' +', ' ', cleaned_sentence)
|
||||||
|
cleaned_text += cleaned_sentence
|
||||||
|
|
||||||
|
if cleaned_text[-1] == ' ':
|
||||||
|
cleaned_text[-1] = '.'
|
||||||
|
else:
|
||||||
|
cleaned_text += '.'
|
||||||
|
|
||||||
|
cleaned_text += ' ' # pad with space at end
|
||||||
|
return cleaned_text
|
||||||
|
|
||||||
def generate_questions_dict(self, document):
|
def generate_questions_dict(self, document):
|
||||||
self.questions_dict = self.question_extractor.get_questions_dict(document)
|
document = self.clean_text(document)
|
||||||
|
self.questions_dict = self.question_extractor.get_questions_dict(
|
||||||
|
document)
|
||||||
self.incorrect_answer_generator = IncorrectAnswerGenerator(document)
|
self.incorrect_answer_generator = IncorrectAnswerGenerator(document)
|
||||||
|
|
||||||
for i in range(1, self.num_questions + 1):
|
for i in range(1, self.num_questions + 1):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user