使用pytorch_pretrained_bert.BertTokenizerfrom_pretrained()函数生成的随机中文标题
from pytorch_pretrained_bert import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
text = "今天是个好日子"
tokens = tokenizer.tokenize(text)
print(tokens)
# Output:
# ['今天', '是', '个', '好', '日子']
text = "这家餐馆的菜很好吃"
tokens = tokenizer.tokenize(text)
print(tokens)
# Output:
# ['这家', '餐馆', '的', '菜', '很', '好', '吃']
text = "明天要去上海旅游"
tokens = tokenizer.tokenize(text)
print(tokens)
# Output:
# ['明天', '要', '去', '上海', '旅游']
text = "这本书是我最喜欢的"
tokens = tokenizer.tokenize(text)
print(tokens)
# Output:
# ['这', '本', '书', '是', '我', '最', '喜', '欢', '的']
text = "天气预报说明天要下雨"
tokens = tokenizer.tokenize(text)
print(tokens)
# Output:
# ['天气', '预报', '说', '明天', '要', '下', '雨']
text = "请问这是哪个地方的美食"
tokens = tokenizer.tokenize(text)
print(tokens)
# Output:
# ['请问', '这', '是', '哪个', '地方', '的', '美食']
