Trains a Bidirectional LSTM on the IMDB sentiment classification task.

Output after 4 epochs on CPU: ~0.8146Time per epoch on CPU (Core i7): ~150s.

  1. from __future__ import print_function
  2. import numpy as np
  3. from keras.preprocessing import sequence
  4. from keras.models import Sequential
  5. from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
  6. from keras.datasets import imdb
  7. max_features = 20000
  8. # cut texts after this number of words
  9. # (among top max_features most common words)
  10. maxlen = 100
  11. batch_size = 32
  12. print('Loading data...')
  13. (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
  14. print(len(x_train), 'train sequences')
  15. print(len(x_test), 'test sequences')
  16. print('Pad sequences (samples x time)')
  17. x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
  18. x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
  19. print('x_train shape:', x_train.shape)
  20. print('x_test shape:', x_test.shape)
  21. y_train = np.array(y_train)
  22. y_test = np.array(y_test)
  23. model = Sequential()
  24. model.add(Embedding(max_features, 128, input_length=maxlen))
  25. model.add(Bidirectional(LSTM(64)))
  26. model.add(Dropout(0.5))
  27. model.add(Dense(1, activation='sigmoid'))
  28. # try using different optimizers and different optimizer configs
  29. model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
  30. print('Train...')
  31. model.fit(x_train, y_train,
  32. batch_size=batch_size,
  33. epochs=4,
  34. validation_data=[x_test, y_test])