def_binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None): """Calculate true and false positives per binary classification threshold. """ # Check to make sure y_true is valid y_type = type_of_target(y_true) ifnot (y_type == "binary"or (y_type == "multiclass"and pos_label isnotNone)): raise ValueError("{0} format is not supported".format(y_type))
if sample_weight isnotNone: sample_weight = column_or_1d(sample_weight)
# ensure binary classification if pos_label is not specified classes = np.unique(y_true) if (pos_label isNoneand not (np.array_equal(classes, [0, 1]) or np.array_equal(classes, [-1, 1]) or np.array_equal(classes, [0]) or np.array_equal(classes, [-1]) or np.array_equal(classes, [1]))): raise ValueError("Data is not binary and pos_label is not specified") elif pos_label isNone: pos_label = 1.
# make y_true a boolean vector y_true = (y_true == pos_label)
# sort scores and corresponding truth values desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1] #argsort升序排序得到索引, [::-1]是反转功能,这里就是降序 y_score = y_score[desc_score_indices] y_true = y_true[desc_score_indices] if sample_weight isnotNone: weight = sample_weight[desc_score_indices] else: weight = 1.
# y_score typically has many tied values. Here we extract # the indices associated with the distinct values. We also # concatenate a value for the end of the curve. distinct_value_indices = np.where(np.diff(y_score))[0] threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1] # np.r_按列concat