Implementing a beam search-based caption generator

We will now be implementing a basic beam search-based algorithm to generate caption sequences:

from keras.preprocessing import image, sequence 
 
def get_raw_caption_sequences(model, word_to_index, image_features, 
max_caption_size, beam_size=1): start = [word_to_index['<START>']] caption_seqs = [[start, 0.0]] while len(caption_seqs[0][0]) < max_caption_size: temp_caption_seqs = [] for caption_seq in caption_seqs: partial_caption_seq = sequence.pad_sequences( [caption_seq[0]], maxlen=max_caption_size, padding='post') next_words_pred = model.predict( [np.asarray([image_features]), np.asarray(partial_caption_seq)])[0] next_words = np.argsort(next_words_pred)[-beam_size:] for word in next_words: new_partial_caption, new_partial_caption_prob = caption_seq[0][:], caption_seq[1] new_partial_caption.append(word) new_partial_caption_prob += next_words_pred[word] temp_caption_seqs.append([new_partial_caption, new_partial_caption_prob]) caption_seqs = temp_caption_seqs caption_seqs.sort(key = lambda item: item[1]) caption_seqs = caption_seqs[-beam_size:] return caption_seqs

This helps us generate captions based on input image features using beam search. However, it is a raw sequence of tokens based on previous tokens at each step. Hence we will build a wrapper function on top of this that will generate a clean text sentence as a caption for an input image by leveraging the preceding function:

def generate_image_caption(model, word_to_index_map, index_to_word_map, 
image_features, max_caption_size,
beam_size=1): raw_caption_seqs = get_raw_caption_sequences(model=model, word_to_index=word_to_index_map, image_features=image_features, max_caption_size=max_caption_size, beam_size=beam_size) raw_caption_seqs.sort(key = lambda l: -l[1]) caption_list = [item[0] for item in raw_caption_seqs] captions = [[index_to_word_map[idx] for idx in caption] for caption in caption_list] final_captions = [] for caption in captions: start_index = caption.index('<START>')+1 max_len = len(caption) if len(caption) < max_caption_size else max_caption_size end_index = caption.index('<END>') if '<END>' in caption else max_len-1 proc_caption = ' '.join(caption[start_index:end_index]) final_captions.append(proc_caption) return final_captions

We also need our caption preprocessing function from earlier, which we used to preprocess the initial captions when training the model:

def preprocess_captions(caption_list): 
    pc = [] 
    for caption in caption_list: 
        caption = caption.strip().lower() 
        caption = caption.replace('.', '') 
                         .replace(',', '') 
                         .replace("'", "") 
                         .replace('"', '') 
        caption = caption.replace('&','and') 
                         .replace('(','') 
                         .replace(')', '') 
                         .replace('-', ' ') 
        caption = ' '.join(caption.split())  
        pc.append(caption) 
    return pc  
..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset