transformers에서 BERT

from transformers import TFBertForMaskedLM, AutoTokenizer

model = TFBertForMaskedLM.from_pretrained('bert-base-uncased')

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

tokenizer.mask_token

'[MASK]'

inputs = tokenizer('Pizza is my [MASK] food.', return_tensors='tf')

result = model(inputs)

inputs

{'input_ids': <tf.Tensor: shape=(1, 8), dtype=int32, numpy=
array([[  101, 10733,  2003,  2026,   103,  2833,  1012,   102]],
      dtype=int32)>, 'token_type_ids': <tf.Tensor: shape=(1, 8), dtype=int32, numpy=array([[0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(1, 8), dtype=int32, numpy=array([[1, 1, 1, 1, 1, 1, 1, 1]], dtype=int32)>}

logits = result[0]

import tensorflow as tf

top = tf.math.top_k(logits[0, 4], k=5)

tokenizer.decode(top.indices.numpy())

'favorite favourite comfort preferred staple'

from transformers import pipeline

pip = pipeline('fill-mask', model='bert-base-uncased')

pip('Pizza is my [MASK] food.')

[{'score': 0.9178981781005859,
  'sequence': '[CLS] pizza is my favorite food. [SEP]',
  'token': 5440,
  'token_str': 'favorite'},
 {'score': 0.05756445229053497,
  'sequence': '[CLS] pizza is my favourite food. [SEP]',
  'token': 8837,
  'token_str': 'favourite'},
 {'score': 0.009590543806552887,
  'sequence': '[CLS] pizza is my comfort food. [SEP]',
  'token': 7216,
  'token_str': 'comfort'},
 {'score': 0.0026588481850922108,
  'sequence': '[CLS] pizza is my preferred food. [SEP]',
  'token': 6871,
  'token_str': 'preferred'},
 {'score': 0.0014816045295447111,
  'sequence': '[CLS] pizza is my staple food. [SEP]',
  'token': 18785,
  'token_str': 'staple'}]

from transformers import FillMaskPipeline

pip2 = FillMaskPipeline(model=model, tokenizer=tokenizer)

pip2('Pizza is my [MASK] food.')

[{'score': 0.9178853034973145,
  'sequence': '[CLS] pizza is my favorite food. [SEP]',
  'token': 5440,
  'token_str': 'favorite'},
 {'score': 0.057563863694667816,
  'sequence': '[CLS] pizza is my favourite food. [SEP]',
  'token': 8837,
  'token_str': 'favourite'},
 {'score': 0.009590419009327888,
  'sequence': '[CLS] pizza is my comfort food. [SEP]',
  'token': 7216,
  'token_str': 'comfort'},
 {'score': 0.0026588006876409054,
  'sequence': '[CLS] pizza is my preferred food. [SEP]',
  'token': 6871,
  'token_str': 'preferred'},
 {'score': 0.0014815852046012878,
  'sequence': '[CLS] pizza is my staple food. [SEP]',
  'token': 18785,
  'token_str': 'staple'}]