import analysis_functions import analysis_plots import data_model import math """ A program to demonstrate forming a mean precision vs recall distribution. """ # Load the CSV data. query_results = data_model.load_query_results() # Find all interpolated values and unique recall points. all_values = [] recall_points = [] for query_id in query_results.keys(): # Find the recall and precision. recall_vs_rank = analysis_functions.recall(query_results[query_id]) precision_vs_rank = analysis_functions.precision(query_results[query_id]) # Interpolate the precision, to the right # and back to zero. interpolated_values = analysis_functions.interpolate_precision(recall_vs_rank, precision_vs_rank) # Keep the interpolated values. all_values.append(interpolated_values) # Append the recall points, if they are not already # in the list. interpolated_recalls = interpolated_values[0] for recall_value in interpolated_recalls: matched = False for recall_point in recall_points: if math.isclose(recall_value, recall_point): matched = True break if not matched: recall_points.append(recall_value) recall_points.sort() # Find the precision average for each available recall point, # interpolating to the right if needed. n = len(recall_points) precision_average = [0.] * n n_counts = [0] * n for i in range(n): # Consider each known recall point. recall_point = recall_points[i] # Consider each set of interpolated values. for interpolated_values in all_values: interpolated_recalls = interpolated_values[0] interpolated_precisions = interpolated_values[1] n_values = len(interpolated_recalls) # Find the matching precision value for this recall # point. for j in range(n_values): interpolated_recall = interpolated_recalls[j] # Get point to the right if needed. if interpolated_recall >= recall_point: precision_average[i] += interpolated_precisions[j] n_counts[i] += 1 break # Divide by the number of counts for the recall point. for i in range(n): precision_average[i] /= n_counts[i] # Plot the average values. analysis_plots.line_plot("precision_vs_recall_average.png", recall_points, precision_average, labels={ "title": f"Average from {len(query_results.keys())} queries", "xlabel": "Recall", "ylabel": "Precision" })