import re import numpy as np from sklearn.linear_model import LinearRegression with open('testi/collodi_pinocchio.txt', mode='r', encoding='iso-8859-1') as f: testo = f.read() testo=testo[1704:] testo=re.sub("'","' ", testo) testo=re.sub('[\.,;:\-?!]{1}','',testo) testo=testo.lower() testo=testo.split() fd=[testo.count(i) for i in set(testo)] fd.sort(reverse=True) V=len(set(testo)) Y=np.array([np.log(i) for i in fd]) X=np.array(list(np.log(i) for i in range(1,V+1))).reshape((-1, 1)) model = LinearRegression().fit(X, Y) C=np.exp(model.intercept_) a=model.coef_[0]*-1 print('C:', C) print('a:', a)