#!/usr/bin/env python
# ENSIGN rights
""" Visualize a tensor decomposition by plotting scores for selected components
in each mode.
"""
# visualize.py comp
#
# Requires results to be inside VISUALS_INPUT_DIR and in
# the format: decomp_mode_i.txt where "i" is the mode.
#
# If comp isn't specified then all components will be visualized.
import multiprocessing as mltprc
import sys
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.collections import LineCollection
import numpy as np
import ensign.cp_decomp as cpd
import ensign.comp_top_k as ctk
import ensign.ensign_io.ensign_logging as ensign_logging
log = ensign_logging.get_logger()
# log.setLevel('DEBUG') # Developers uncomment this for stacktraces
DISCRETE_COUNT = 1000
AXIS_SCALE_OVERHEAD = 1.1
X_AXIS_OVERHEAD = 0.01
MAX_MODE_PER_FRAME = 8
TOLERANCE_VALUE = 0.001
DPI = 150
def _parse_args():
if len(sys.argv) != 4 and len(sys.argv) != 7:
print ("ERROR: visualize.py <comp> <input_directory> <num weight vectors> "
" [<decomposition_type> <num_components> <use_absolute_value>]")
return FAILURE
args = {}
args['decomp_dir'] = sys.argv[2]
args['num_components'] = -1
if len(sys.argv) == 7:
if sys.argv[4] == "CP":
args['decomp_type'] = sys.argv[4]
else:
print ("ERROR: Invalid decomposition type: " + sys.argv[4])
return FAILURE
args['num_components'] = int(sys.argv[5])
return args
def main():
args = _parse_args()
decomp = cpd.read_cp_decomp_dir(args['decomp_dir'])
if args['num_components'] == -1:
num_components = decomp.rank
else:
num_components = args['num_components']
if num_components > decomp.rank:
print('WARNING: More components requested than rank of the decomposition.')
print('Using {} instead.'.format(decomp.rank))
num_components = decomp.rank
visualize(decomp, num_components=num_components)
def visualize(decomp, top_k=True, num_components=None):
"""
Visualize all the components of a decomposition and save to a file 'decomp.pdf'.
Parameters
----------
decomp : ensign.cp_decomp.CPDecomp
Decomposition to visualize.
top_k : bool
Whether or not to include the top 10 labels of each mode in the
visuals.
num_components : int
Number of components to visualize. If left as None, then the rank of
the decomposition is used.
"""
if top_k:
top_k = ctk.get_top_k(decomp.factors, decomp.labels, list(range(decomp.rank)), 10)
if num_components is None:
num_components = decomp.rank
# use the number of physical processors, rather than logical
num_cores = (int((mltprc.cpu_count() / 2)) if (mltprc.cpu_count() > 1 ) else 1)
comps_per_core = int(num_components / num_cores)
comps_leftover = num_components % num_cores
# creates partitions of the components among the available CPUs
# if even partitions are not possible, the last CPU will visualize the "extra" components
# some cores may go unused if the number of cores exceeds the deomposition rank
# example: for 4 cores and 30 components, will give:
# [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15],
# [16, 17, 18, 19], [20, 21, 22, 23], [24, 25, 26, 27], [28,29]]
process_comp_map = [list(range(num_components))[core*comps_per_core
+ min(comps_leftover, core): (core+1)*(comps_per_core)
+ min(core+1, comps_leftover)]
for core in range(num_cores) if (core < num_components)]
procs = []
for process_id, comp_range in enumerate(process_comp_map):
p = mltprc.Process(target=draw_pdf, args=(decomp, comp_range, top_k))
procs.append(p)
p.start()
exitcode_sum = 0
for p in procs:
p.join()
exitcode_sum += p.exitcode
return exitcode_sum
def draw_pdf(decomp, comp_range, top_k):
# Helper function to parallelize computation.
for comp_id in comp_range:
_plot_component(decomp, comp_id, top_k=top_k, inline=False)
return 0
[docs]def plot_component(decomp, comp_id, inline=True):
"""
Visualizes a specific component of a decomposition using blue-line charts.
Parameters
----------
decomp : CPDecomp
Decomposition object from which to visualize a component.
comp_id : int
The ID of the component to visualize.
inline : bool
Set to True if using within a Jupyter notebook and False to get PNG
file as output. The output file is named '_comp_<comp_id>.png'.
Returns
-------
fig : matplotlib.figure.Figure
Figure object representing visualization.
"""
top_k = ctk.get_top_k(decomp.factors, decomp.labels, [comp_id], 10)
return _plot_component(decomp, comp_id, top_k, inline=inline)
def _plot_component(decomp, comp_id, top_k, inline=True):
"""
Driver function for plot_component(). Separated so that the Top 10 labels
are calculated for every component ahead of time instead of every process
computing the Top 10 for the components it's responsible for.
Parameters
----------
decomp : CPDecomp
Decomposition object from which to visualize a component.
comp_id : int
The ID of the component to visualize.
top_k : {int: {int: (str, int, float)}}
Top-K results. See ensign.comp_top_k
inline : bool
Set to True if using within a Jupyter notebook and False to get PNG
file as output. The output file is named '_comp_<comp_id>.png'.
Returns
-------
fig : matplotlib.figure.Figure
Figure object representing visualization.
"""
if inline:
FIG_WIDTH, FIG_HEIGHT, TITLE_SIZE, TEXT_SIZE = 14, 10, 14, 10
else:
FIG_WIDTH, FIG_HEIGHT, TITLE_SIZE, TEXT_SIZE = 14, 10, 14, 9
if decomp.order > MAX_MODE_PER_FRAME:
decomp.order = MAX_MODE_PER_FRAME
# create figure
fig, axes = (plt.subplots(decomp.order, figsize=(FIG_WIDTH, FIG_HEIGHT)))
fig.canvas.draw()
if decomp.order == 1:
axes = np.array([axes])
plot_clct = []
for frame_num in range(len(axes)):
plot_clct.append(axes[frame_num].plot([], [], color='blue')[0])
fig.canvas.blit(axes[0].bbox)
line_collect = {}
main_title = fig.suptitle('Component {}, Weight: {}'.format(comp_id, decomp.weights[comp_id]),
fontsize=TITLE_SIZE, y=1.02)
for mode_id in range(decomp.order):
# retrieve important values for plotting
mode_size = decomp.mode_sizes[mode_id]
max_val = max(decomp.factors[mode_id][:, comp_id])
min_val = min(decomp.factors[mode_id][:, comp_id])
if (max_val > 0) & (min_val > 0):
y_min = 0
y_max = AXIS_SCALE_OVERHEAD * max_val
elif (max_val < 0) & (min_val < 0):
y_min = AXIS_SCALE_OVERHEAD * min_val
y_max = 0
else:
y_min = AXIS_SCALE_OVERHEAD * min_val
y_max = AXIS_SCALE_OVERHEAD * max_val
if (max_val - min_val < TOLERANCE_VALUE):
max_val = min_val + TOLERANCE_VALUE
# set up axes
axes[mode_id].axis(xmin=-X_AXIS_OVERHEAD * mode_size,
xmax=(mode_size - 1) + X_AXIS_OVERHEAD * mode_size)
axes[mode_id].tick_params(labelsize=TEXT_SIZE)
axes[mode_id].get_xaxis().set_major_locator(ticker.MaxNLocator(integer=True))
# draw the x-axis, as long as it is not too close to y_max or y_min
smallest_view_increment = (y_max - y_min) / 50.0
if y_min < -1 * smallest_view_increment and y_max > smallest_view_increment:
axes[mode_id].axhline(y=0, color='k', linewidth=0.25)
axes[mode_id].set_title(decomp.mode_names[mode_id], size=TEXT_SIZE)
if (np.array(decomp.factors[mode_id][:, comp_id]) < 0).any():
axes[mode_id].set_ylabel("Score ~ [-1, 1]", fontsize=TEXT_SIZE)
else:
axes[mode_id].set_ylabel("Score ~ [0, 1]", fontsize=TEXT_SIZE)
axes[mode_id].set_xlabel("'{}' Label Index ~ [0, {})".format(decomp.mode_names[mode_id], decomp.mode_sizes[mode_id]), fontsize=TEXT_SIZE)
tick_marks = axes[mode_id].get_yticks()
# draw the bars
if mode_size < DISCRETE_COUNT:
# store the LineCollections in dictionary line_collect, so they can be removed later
# if number of scores < DISCRETE_COUNT; create a bar plot;
# otherwise, use linear plotting
# for discrete plotting, draw vertical lines to create bars representing the scores
# creates an array of lines with coordinates [0, score] (i.e., vertical lines)
lin_segmnts = LineCollection([list(zip([elmnt,elmnt],
[decomp.factors[mode_id][:, comp_id][elmnt], 0])) for elmnt in range(mode_size)],
colors='blue', linewidth=1.5)
line_collect[mode_id] = lin_segmnts
axes[mode_id].add_collection(lin_segmnts)
axes[mode_id].axis(ymin=y_min, ymax=y_max, xmin=-X_AXIS_OVERHEAD * mode_size, xmax=(mode_size - 1) + X_AXIS_OVERHEAD * mode_size)
axes[mode_id].draw_artist(lin_segmnts)
plot_clct[mode_id].set_data([],[])
axes[mode_id].set_yticks(np.linspace(y_min, y_max, num=5))
else:
indices = np.linspace(0, mode_size, num=mode_size)
plot_clct[mode_id].set_data(indices, decomp.factors[mode_id][:, comp_id])
axes[mode_id].relim()
axes[mode_id].autoscale()
axes[mode_id].axis(xmin=-X_AXIS_OVERHEAD * mode_size, xmax=(mode_size-1) + X_AXIS_OVERHEAD * mode_size)
axes[mode_id].set_yticks(np.linspace(y_min, y_max, num=5))
axes[mode_id].draw_artist(plot_clct[mode_id])
# draw top k
if top_k:
ymin, ymax = axes[mode_id].get_ylim()
xmin, xmax = axes[mode_id].get_xlim()
width, height = xmax, ymax + abs(ymin)
text_height = height / 10
fontsize = TEXT_SIZE - (2 + (decomp.order - 5))
for i, tup in enumerate(top_k[comp_id][mode_id]):
label, idx, score = tup
label = str(label)
text_y = ymax - (text_height * (i + 1))
label = label[:15] + '...' + label[-15:] if len(label) > 30 else label
axes[mode_id].text(x=width, y=text_y, s=" {:04d} | {} | {:04f}".format(idx, label, score), fontsize=fontsize)
axes[mode_id].text(x=width, y=ymax, s=" [Top 10] Index | Label | Score", fontsize=fontsize, color='red')
# Save the figure
plt.subplots_adjust(hspace=.99)
fig.tight_layout(rect=[0, 0, 1, 0.99])
if not inline:
fig.savefig('{}_comp_{}.png'.format(comp_id, comp_id), bbox_inches='tight', dpi=DPI)
plt.close('all')
return fig
if __name__ == "__main__":
main()