# Script qui tokenise une liste de textes en entree
from nltk_lite import tokenize
import re
import sys

#sentence = "This is the time -- and this is the record of the time."
#words = sentence.split()
for arg in sys.argv[1:]:
  f = open(arg, 'rU')
  text = f.read()
  print arg + ':'
  pattern = r'\w+'
  print list(tokenize.regexp(text, pattern))
  f.close()