|
|
@@ -25,6 +25,7 @@ def main(options: list):
|
|
|
data, labels = map(np.array,
|
|
|
zip(*[(FeatureVector(p, features).get(), p.user)
|
|
|
for p in samples]))
|
|
|
+ num_users = len(np.unique([s.user for s in samples]))
|
|
|
avg, p = classify(data, labels, num_users, args)
|
|
|
print("Overall Accuracy: %f" % avg)
|
|
|
if args.p_value:
|
|
|
@@ -54,12 +55,12 @@ def parse_args(args: list):
|
|
|
def classify(data, labels, num_users: int, args):
|
|
|
s = np.arange(data.shape[0])
|
|
|
np.random.shuffle(s)
|
|
|
- res = randomForest(data[s], labels[s],
|
|
|
- n=args.folds, verbose=args.verbose, fn=args.criterion
|
|
|
+ res = random_forest(data[s], labels[s],
|
|
|
+ n=args.folds, verbose=args.verbose, fn=args.criterion,
|
|
|
estimators=args.estimators)
|
|
|
return (np.average(res), t_test(res, num_users)[1] / 2)
|
|
|
|
|
|
-def randomForest(data: list, labels: list, n=5, verbose=0, estimators=100,
|
|
|
+def random_forest(data: list, labels: list, n=5, verbose=0, estimators=100,
|
|
|
fn="gini"):
|
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
|
folds = KFold(n_splits=n)
|