Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • gxb20157/precision-and-recall
1 result
Show changes
Commits on Source (2)
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# For LaTeX
*.aux
*.glo
*.idx
*.log
*.toc
*.ist
*.acn
*.acr
*.alg
*.bbl
*.blg
*.dvi
*.glg
*.gls
*.ilg
*.ind
*.lof
*.lot
*.maf
*.mtc
*.mtc1
*.out
*.synctex.gz
*-eps-converted-to.pdf
# Ignore .png files that are produced.
*.png
\ No newline at end of file
def precision(relevant_ranks: list) -> list:
"""
A function to return the precision as a function of rank.
The function must be passed a list of the rank positions of
relevant documents.
"""
n = max(relevant_ranks)
results = [0.]*n
n_retrieved = 0
n_relevant = 0
for i in range(n):
rank = i + 1
if rank in relevant_ranks:
n_relevant += 1
n_retrieved += 1
results[i] = n_relevant/n_retrieved
return results
def recall(relevant_ranks: list) -> list:
"""
A function to return the recall as a function of rank.
The function must be passed a list of the rank positions of
relevant documents.
"""
n = max(relevant_ranks)
results = [0.]*n
total_relevant = len(relevant_ranks)
n_relevant = 0
for i in range(n):
rank = i + 1
if rank in relevant_ranks:
n_relevant += 1
results[i] = n_relevant/total_relevant
return results
def interpolate_precision(recall: list, precision: list) -> list:
"""
A function to interpolate precision values. The function
returns a tuple of the interpolated recall and precision.
"""
if len(recall) == 0 or len(precision) == 0:
return ([], [])
# Interpolate to the right.
n = len(precision)
int_precision = [0.] * n
for i in range(n):
int_precision[i] = max(precision[i:])
# Interpolate back to recall = 0.
int_recall = recall.copy()
if int_recall[0] != 0:
int_recall = [0] + int_recall
int_precision = [int_precision[0]] + int_precision
return (int_recall, int_precision)
def average_precision(relevant_ranks: list, precision: list) -> list:
"""
A function to calculate the average precision at
each rank.
"""
n = len(precision)
ranks = list(range(1, n+1))
results = [0.] * n
n_relevant = [0] * n
for i in range(n):
if i > 0:
results[i] += results[i - 1]
n_relevant[i] += n_relevant[i - 1]
if ranks[i] in relevant_ranks:
results[i] += precision[i]
n_relevant[i] += 1
for i in range(n):
results[i] /= n_relevant[i]
return results
import matplotlib.pyplot as plt
def recall_precision(file_name: str,
rank: list,
recall: list,
precision: list) -> None:
"""
A function to plot recall and precision verses
rank, where the x-axis is shared.
"""
fig, ax1 = plt.subplots()
# Draw the recall curve.
ax1.set_xlabel('Rank')
ax1.set_ylabel('Recall')
line_recall = ax1.plot(rank, recall, label="Recall")
ax1.tick_params(axis='y')
# Draw the precision curve.
ax2 = ax1.twinx()
ax2.set_ylabel('Precision')
line_precision = ax2.plot(rank, precision, '--', label="Precision")
ax2.tick_params(axis='y')
# Add a the legend.
lines = line_recall + line_precision
labels = [line.get_label() for line in lines]
ax1.legend(lines, labels, bbox_to_anchor=(1.12, 1), borderaxespad=0)
fig.tight_layout()
plt.savefig(file_name)
plt.close()
def precision_vs_recall(file_name: str,
recall: list,
precision: list,
interpolated_recall: list = [],
interpolated_precision: list = [],
use_legend: bool = True) -> None:
"""
A function to plot the precision vs recal
"""
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.plot(recall, precision, label="Precision")
if len(interpolated_precision) > 0:
plt.plot(interpolated_recall, interpolated_precision,
linestyle='dotted', label="Interpolated", color='red')
plt.tick_params(axis='y')
if use_legend:
plt.legend(loc=1)
plt.tight_layout()
plt.savefig(file_name)
plt.close()
def line_plot(file_name: str,
x: list,
y: list,
labels: dict) -> None:
"""
A function to produce a line plot.
"""
plt.title(labels["title"])
plt.xlabel(labels["xlabel"])
plt.ylabel(labels["ylabel"])
plt.plot(x, y)
plt.tick_params(axis='y')
plt.tight_layout()
plt.savefig(file_name)
plt.close()
import analysis_functions
import analysis_plots
import data_model
"""
A program to demonstrate average precision at a specific rank
and for all ranks.
"""
# Load the CSV data.
query_results = data_model.load_query_results()
# Find the precision.
precision_vs_rank = analysis_functions.precision(query_results[1])
rank = list(range(1, len(precision_vs_rank)+1))
# Find the average precision.
average_precision_vs_rank = analysis_functions.average_precision(query_results[1],
precision_vs_rank)
# Plot the average precision.
analysis_plots.line_plot("average_precision.png",
rank,
average_precision_vs_rank,
labels={
"title": "Average precision vs rank for query 1",
"xlabel": "Rank",
"ylabel": "Average precision"
})
# The average precision at the highest rank.
print(f"Average precision, after all ranks = {average_precision_vs_rank[-1]}")
# Calculating the average precision using recall.
recall_vs_rank = analysis_functions.recall(query_results[1])
avp = 0
n = len(recall_vs_rank)
previous_recall = 0
for i in range(n):
avp += (recall_vs_rank[i] - previous_recall) * precision_vs_rank[i]
previous_recall = recall_vs_rank[i]
print(f"Average precision, for all ranks = {avp}")
import csv
def load_query_results() -> dict:
"""
A function to load the query and relevant document rank
position data.
"""
query_results = {}
input_file = open("query_results.csv", "r")
dict_reader = csv.DictReader(input_file)
for row in dict_reader:
query_id = int(row["QueryId"])
rank = int(row["Rank"])
if query_id not in query_results:
query_results[query_id] = []
query_results[query_id].append(rank)
input_file.close()
return query_results
import analysis_functions
import analysis_plots
import data_model
"""
A program to demonstrate plotting
precision vs recall.
"""
# Load the CSV data.
query_results = data_model.load_query_results()
# Find the recall and precision.
recall_vs_rank = analysis_functions.recall(query_results[1])
precision_vs_rank = analysis_functions.precision(query_results[1])
# Interpolate the precision, to the right
# and back to zero.
interpolated_values = analysis_functions.interpolate_precision(recall_vs_rank,
precision_vs_rank)
# Plot the values and the interpolated values.
analysis_plots.precision_vs_recall("precision_vs_recall.png",
recall_vs_rank,
precision_vs_rank,
interpolated_values[0],
interpolated_values[1])
import analysis_functions
import analysis_plots
import data_model
import math
"""
A program to demonstrate forming a mean
precision vs recall distribution.
"""
# Load the CSV data.
query_results = data_model.load_query_results()
# Find all interpolated values and unique recall points.
all_values = []
recall_points = []
for query_id in query_results.keys():
# Find the recall and precision.
recall_vs_rank = analysis_functions.recall(query_results[query_id])
precision_vs_rank = analysis_functions.precision(query_results[query_id])
# Interpolate the precision, to the right
# and back to zero.
interpolated_values = analysis_functions.interpolate_precision(recall_vs_rank,
precision_vs_rank)
# Keep the interpolated values.
all_values.append(interpolated_values)
# Append the recall points, if they are not already
# in the list.
interpolated_recalls = interpolated_values[0]
for recall_value in interpolated_recalls:
matched = False
for recall_point in recall_points:
if math.isclose(recall_value, recall_point):
matched = True
break
if not matched:
recall_points.append(recall_value)
recall_points.sort()
# Find the precision average for each available recall point,
# interpolating to the right if needed.
n = len(recall_points)
precision_average = [0.] * n
n_counts = [0] * n
for i in range(n):
# Consider each known recall point.
recall_point = recall_points[i]
# Consider each set of interpolated values.
for interpolated_values in all_values:
interpolated_recalls = interpolated_values[0]
interpolated_precisions = interpolated_values[1]
n_values = len(interpolated_recalls)
# Find the matching precision value for this recall
# point.
for j in range(n_values):
interpolated_recall = interpolated_recalls[j]
# Get point to the right if needed.
if interpolated_recall >= recall_point:
precision_average[i] += interpolated_precisions[j]
n_counts[i] += 1
break
# Divide by the number of counts for the recall point.
for i in range(n):
precision_average[i] /= n_counts[i]
# Plot the average values.
analysis_plots.line_plot("precision_vs_recall_average.png",
recall_points,
precision_average,
labels={
"title": f"Average from {len(query_results.keys())} queries",
"xlabel": "Recall",
"ylabel": "Precision"
})
QueryId,Rank
1,1
1,2
1,5
1,7
2,1
2,4
2,5
3,1
3,5
3,8
4,2
4,3
4,5
4,7
\ No newline at end of file
import analysis_functions
import analysis_plots
import data_model
"""
A program to demonstrate plotting
precision and recall vs rank.
"""
# Load the CSV data.
query_results = data_model.load_query_results()
# Find the recall and precision.
recall_vs_rank = analysis_functions.recall(query_results[1])
precision_vs_rank = analysis_functions.precision(query_results[1])
rank = list(range(1, len(precision_vs_rank)+1))
# Plot the recall and precision.
analysis_plots.recall_precision("recall_precision.png",
rank,
recall_vs_rank,
precision_vs_rank)