ALT Code:
from minicons import scorer
lm = scorer.IncrementalLMScorer("gpt2-xl", "cuda:0")
stimuli = ["I was a matron in France", "I was a mat in France"]
# old way, no correction
# P.S. gpt2 does not automatically add a bos token at the beginning...
lm.token_score(stimuli, bos_token=True, surprisal=True, base_two=True, bow_correction=False)
'''Rounded Output
[[('<|endoftext|>', 0.0),
('I', 5.85),
('was', 4.28),
('a', 4.67),
('mat', 16.34),
('ron', 1.74),
('in', 2.12),
('France', 11.43)],
[('<|endoftext|>', 0.0),
('I', 5.85),
('was', 4.28),
('a', 4.67),
('mat', 16.34),
('in', 10.78),
('France', 10.71)]]
'''
# the new way! notice the surprisal of "mat" in both cases
lm.token_score(stimuli, bos_token=True, surprisal=True, base_two=True, bow_correction=True)
'''Rounded Output
[[('<|endoftext|>', 0.0),
('I', 6.30),
('was', 3.84),
('a', 4.68),
('mat', 16.34),
('ron', 2.11),
('in', 1.75),
('France', 11.42)],
[('<|endoftext|>', 0.0),
('I', 6.30),