Fix: handle word not in vocabulary error
This commit is contained in:
parent
9574fa0932
commit
91a4426d4b
@ -2,10 +2,12 @@
|
|||||||
for generating incorrect alternative
|
for generating incorrect alternative
|
||||||
answers for a given answer
|
answers for a given answer
|
||||||
'''
|
'''
|
||||||
|
import gensim
|
||||||
import gensim.downloader as api
|
import gensim.downloader as api
|
||||||
|
from gensim.models import Word2Vec
|
||||||
from nltk.tokenize import sent_tokenize, word_tokenize
|
from nltk.tokenize import sent_tokenize, word_tokenize
|
||||||
import random
|
import random
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
class IncorrectAnswerGenerator:
|
class IncorrectAnswerGenerator:
|
||||||
''' This class contains the methods
|
''' This class contains the methods
|
||||||
@ -13,20 +15,43 @@ class IncorrectAnswerGenerator:
|
|||||||
given an answer
|
given an answer
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, document):
|
||||||
# model required to fetch similar words
|
# model required to fetch similar words
|
||||||
self.model = api.load("glove-wiki-gigaword-100")
|
self.model = api.load("glove-wiki-gigaword-100")
|
||||||
|
self.all_words = []
|
||||||
|
for sent in sent_tokenize(document):
|
||||||
|
self.all_words.extend(word_tokenize(sent))
|
||||||
|
self.all_words = list(set(self.all_words))
|
||||||
|
|
||||||
def get_all_options_dict(self, answer, num_options):
|
def get_all_options_dict(self, answer, num_options):
|
||||||
''' This method returns a dict
|
''' This method returns a dict
|
||||||
of 'num_options' options out of
|
of 'num_options' options out of
|
||||||
which one is correct and is the answer
|
which one is correct and is the answer
|
||||||
'''
|
'''
|
||||||
similar_words = self.model.similar_by_word(answer, topn=15)[::-1]
|
|
||||||
options_dict = dict()
|
options_dict = dict()
|
||||||
|
try:
|
||||||
|
similar_words = self.model.similar_by_word(answer, topn=15)[::-1]
|
||||||
|
|
||||||
for i in range(1, num_options + 1):
|
for i in range(1, num_options + 1):
|
||||||
options_dict[i] = similar_words[i - 1][0]
|
options_dict[i] = similar_words[i - 1][0]
|
||||||
|
|
||||||
|
except:
|
||||||
|
self.all_sim = []
|
||||||
|
for word in self.all_words:
|
||||||
|
if word not in answer:
|
||||||
|
try:
|
||||||
|
self.all_sim.append(
|
||||||
|
(self.model.similarity(answer, word), word))
|
||||||
|
except:
|
||||||
|
self.all_sim.append(
|
||||||
|
(0.0, word))
|
||||||
|
else:
|
||||||
|
self.all_sim.append((-1.0, word))
|
||||||
|
|
||||||
|
self.all_sim.sort(reverse=True)
|
||||||
|
|
||||||
|
for i in range(1, num_options+1):
|
||||||
|
options_dict[i] = self.all_sim[i-1][1]
|
||||||
|
|
||||||
replacement_idx = random.randint(1, num_options)
|
replacement_idx = random.randint(1, num_options)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user