Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import analysis_functions
import analysis_plots
import data_model
import math
"""
A program to demonstrate forming a mean
precision vs recall distribution.
"""
# Load the CSV data.
query_results = data_model.load_query_results()
# Find all interpolated values and unique recall points.
all_values = []
recall_points = []
for query_id in query_results.keys():
# Find the recall and precision.
recall_vs_rank = analysis_functions.recall(query_results[query_id])
precision_vs_rank = analysis_functions.precision(query_results[query_id])
# Interpolate the precision, to the right
# and back to zero.
interpolated_values = analysis_functions.interpolate_precision(recall_vs_rank,
precision_vs_rank)
# Keep the interpolated values.
all_values.append(interpolated_values)
# Append the recall points, if they are not already
# in the list.
interpolated_recalls = interpolated_values[0]
for recall_value in interpolated_recalls:
matched = False
for recall_point in recall_points:
if math.isclose(recall_value, recall_point):
matched = True
break
if not matched:
recall_points.append(recall_value)
recall_points.sort()
# Find the precision average for each available recall point,
# interpolating to the right if needed.
n = len(recall_points)
precision_average = [0.] * n
n_counts = [0] * n
for i in range(n):
# Consider each known recall point.
recall_point = recall_points[i]
# Consider each set of interpolated values.
for interpolated_values in all_values:
interpolated_recalls = interpolated_values[0]
interpolated_precisions = interpolated_values[1]
n_values = len(interpolated_recalls)
# Find the matching precision value for this recall
# point.
for j in range(n_values):
interpolated_recall = interpolated_recalls[j]
# Get point to the right if needed.
if interpolated_recall >= recall_point:
precision_average[i] += interpolated_precisions[j]
n_counts[i] += 1
break
# Divide by the number of counts for the recall point.
for i in range(n):
precision_average[i] /= n_counts[i]
# Plot the average values.
analysis_plots.line_plot("precision_vs_recall_average.png",
recall_points,
precision_average,
labels={
"title": f"Average from {len(query_results.keys())} queries",
"xlabel": "Recall",
"ylabel": "Precision"
})