Skip to content

TFMA

tensorflow_model_analysis

Init module for TensorFlow Model Analysis.

Attributes

ANALYSIS_KEY module-attribute

ANALYSIS_KEY = 'analysis'

ARROW_INPUT_COLUMN module-attribute

ARROW_INPUT_COLUMN = '__raw_record__'

ARROW_RECORD_BATCH_KEY module-attribute

ARROW_RECORD_BATCH_KEY = 'arrow_record_batch'

ATTRIBUTIONS_KEY module-attribute

ATTRIBUTIONS_KEY = 'attributions'

AddMetricsCallbackType module-attribute

AttributionsForSlice module-attribute

AttributionsForSlice = AttributionsForSlice

BASELINE_KEY module-attribute

BASELINE_KEY = 'baseline'

BASELINE_SCORE_KEY module-attribute

BASELINE_SCORE_KEY = 'baseline_score'

CANDIDATE_KEY module-attribute

CANDIDATE_KEY = 'candidate'

DATA_CENTRIC_MODE module-attribute

DATA_CENTRIC_MODE = 'data_centric_mode'

EXAMPLE_SCORE_KEY module-attribute

EXAMPLE_SCORE_KEY = 'example_score'

EXAMPLE_WEIGHTS_KEY module-attribute

EXAMPLE_WEIGHTS_KEY = 'example_weights'

Extracts module-attribute

Extracts = MutableMapping[str, Any]

FEATURES_KEY module-attribute

FEATURES_KEY = 'features'

FEATURES_PREDICTIONS_LABELS_KEY module-attribute

FEATURES_PREDICTIONS_LABELS_KEY = '_fpl'

INPUT_KEY module-attribute

INPUT_KEY = 'input'

LABELS_KEY module-attribute

LABELS_KEY = 'labels'

METRICS_KEY module-attribute

METRICS_KEY = 'metrics'

MODEL_CENTRIC_MODE module-attribute

MODEL_CENTRIC_MODE = 'model_centric_mode'

MaybeMultipleEvalSharedModels module-attribute

MaybeMultipleEvalSharedModels = Union[
    EvalSharedModel,
    List[EvalSharedModel],
    Dict[str, EvalSharedModel],
]

MetricsForSlice module-attribute

MetricsForSlice = MetricsForSlice

PLOTS_KEY module-attribute

PLOTS_KEY = 'plots'

PREDICTIONS_KEY module-attribute

PREDICTIONS_KEY = 'predictions'

PlotsForSlice module-attribute

PlotsForSlice = PlotsForSlice

SLICE_KEY_TYPES_KEY module-attribute

SLICE_KEY_TYPES_KEY = '_slice_key_types'

TFMA_EVAL module-attribute

TFMA_EVAL = 'tfma_eval'

TF_ESTIMATOR module-attribute

TF_ESTIMATOR = 'tf_estimator'

TF_GENERIC module-attribute

TF_GENERIC = 'tf_generic'

TF_JS module-attribute

TF_JS = 'tf_js'

TF_KERAS module-attribute

TF_KERAS = 'tf_keras'

TF_LITE module-attribute

TF_LITE = 'tf_lite'

TensorType module-attribute

TensorType = Union[Tensor, SparseTensor, RaggedTensor]

TensorTypeMaybeDict module-attribute

TensorTypeMaybeDict = Union[TensorType, DictOfTensorType]

TensorValue module-attribute

TensorValue = Union[
    ndarray,
    SparseTensorValue,
    RaggedTensorValue,
    SparseTensorValue,
]

VALIDATIONS_KEY module-attribute

VALIDATIONS_KEY = 'validations'

VERSION_STRING module-attribute

VERSION_STRING = '0.49.0.dev'

ValidationResult module-attribute

ValidationResult = ValidationResult

Classes

EvalResult

Bases: NamedTuple('EvalResult', [('slicing_metrics', List[SlicedMetrics]), ('plots', List[SlicedPlots]), ('attributions', List[SlicedAttributions]), ('config', EvalConfig), ('data_location', str), ('file_format', str), ('model_location', str)])

The result of a single model analysis run.

Attributes

slicing_metrics: a list of tfma.SlicedMetrics, containing metric values for each slice. plots: List of slice-plot pairs. attributions: List of SlicedAttributions containing attribution values for each slice. config: The config containing slicing and metrics specification. data_location: Optional location for data used with config. file_format: Optional format for data used with config. model_location: Optional location(s) for model(s) used with config.

Functions
get_attributions_for_all_slices
get_attributions_for_all_slices(
    metric_name: str = "",
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Dict[str, AttributionsByFeatureKey]

Get attribution feature keys and values for every slice.


metric_name: Name of metric to get attributions for. Optional if only one metric used. output_name: The name of the output (optional, only used for multi-output models). class_id: Used with multi-class metrics to identify a specific class ID. k: Used with multi-class metrics to identify the kth predicted value. top_k: Used with multi-class and ranking metrics to identify top-k predicted values.


Dictionary mapping slices to attribution feature keys and values.

Source code in tensorflow_model_analysis/view/view_types.py
def get_attributions_for_all_slices(
    self,
    metric_name: str = "",
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Dict[str, AttributionsByFeatureKey]:
    """Get attribution feature keys and values for every slice.

    Args:
    ----
      metric_name: Name of metric to get attributions for. Optional if only one
        metric used.
      output_name: The name of the output (optional, only used for multi-output
        models).
      class_id: Used with multi-class metrics to identify a specific class ID.
      k: Used with multi-class metrics to identify the kth predicted value.
      top_k: Used with multi-class and ranking metrics to identify top-k
        predicted values.

    Returns:
    -------
      Dictionary mapping slices to attribution feature keys and values.
    """
    if class_id or k or top_k:
        sub_key = str(metric_types.SubKey(class_id, k, top_k))
    else:
        sub_key = ""

    all_sliced_attributions = {}
    for sliced_attributions in self.attributions:
        slice_name = sliced_attributions[0]
        attributions = sliced_attributions[1][output_name][sub_key]
        if metric_name:
            attributions = attributions[metric_name]
        elif len(attributions) == 1:
            attributions = list(attributions.values())[0]
        else:
            raise ValueError(
                f"metric_name must be one of the following: {attributions.keys()}"
            )
        all_sliced_attributions[slice_name] = copy.copy(attributions)
    return all_sliced_attributions  # pytype: disable=bad-return-type
get_attributions_for_slice
get_attributions_for_slice(
    slice_name: SliceKeyType = (),
    metric_name: str = "",
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Union[AttributionsByFeatureKey, None]

Get attribution features names and values for a slice.


slice_name: A tuple of the form (column, value), indicating which slice to get attributions from. Optional; if excluded, use overall slice. metric_name: Name of metric to get attributions for. Optional if only one metric used. output_name: The name of the output. Optional, only used for multi-output models. class_id: Used with multi-class models to identify a specific class ID. k: Used with multi-class models to identify the kth predicted value. top_k: Used with multi-class models to identify top-k attribution values.


Dictionary containing feature keys and values for the specified slice.


ValueError: If metric_name is required.

Source code in tensorflow_model_analysis/view/view_types.py
def get_attributions_for_slice(
    self,
    slice_name: slicer.SliceKeyType = (),
    metric_name: str = "",
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Union[AttributionsByFeatureKey, None]:
    """Get attribution features names and values for a slice.

    Args:
    ----
      slice_name: A tuple of the form (column, value), indicating which slice to
        get attributions from. Optional; if excluded, use overall slice.
      metric_name: Name of metric to get attributions for. Optional if only one
        metric used.
      output_name: The name of the output. Optional, only used for multi-output
        models.
      class_id: Used with multi-class models to identify a specific class ID.
      k: Used with multi-class models to identify the kth predicted value.
      top_k: Used with multi-class models to identify top-k attribution values.

    Returns:
    -------
      Dictionary containing feature keys and values for the specified slice.

    Raises:
    ------
      ValueError: If metric_name is required.
    """
    if class_id or k or top_k:
        sub_key = str(metric_types.SubKey(class_id, k, top_k))
    else:
        sub_key = ""

    def equals_slice_name(slice_key):
        if not slice_key:
            return not slice_name
        else:
            return slice_key == slice_name

    for sliced_attributions in self.attributions:
        slice_key = sliced_attributions[0]
        slice_val = sliced_attributions[1]
        if equals_slice_name(slice_key):
            if metric_name:
                return slice_val[output_name][sub_key][metric_name]
            elif len(slice_val[output_name][sub_key]) == 1:
                return list(slice_val[output_name][sub_key].values())[0]
            else:
                raise ValueError(
                    f"metric_name must be one of the following: {slice_val[output_name][sub_key].keys()}"
                )

    # if slice could not be found, return None
    return None
get_metric_names
get_metric_names() -> Sequence[str]

Get names of metrics.

Returns

List of metric names.

Source code in tensorflow_model_analysis/view/view_types.py
def get_metric_names(self) -> Sequence[str]:
    """Get names of metrics.

    Returns
    -------
      List of metric names.
    """
    metric_names = set()
    for slicing_metric in self.slicing_metrics:
        for output_name in slicing_metric[1]:
            for metrics in slicing_metric[1][output_name].values():
                metric_names.update(metrics)
    return list(metric_names)
get_metrics_for_all_slices
get_metrics_for_all_slices(
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Dict[str, MetricsByTextKey]

Get metric names and values for every slice.


output_name: The name of the output (optional, only used for multi-output models). class_id: Used with multi-class metrics to identify a specific class ID. k: Used with multi-class metrics to identify the kth predicted value. top_k: Used with multi-class and ranking metrics to identify top-k predicted values.


Dictionary mapping slices to metric names and values.

Source code in tensorflow_model_analysis/view/view_types.py
def get_metrics_for_all_slices(
    self,
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Dict[str, MetricsByTextKey]:
    """Get metric names and values for every slice.

    Args:
    ----
      output_name: The name of the output (optional, only used for multi-output
        models).
      class_id: Used with multi-class metrics to identify a specific class ID.
      k: Used with multi-class metrics to identify the kth predicted value.
      top_k: Used with multi-class and ranking metrics to identify top-k
        predicted values.

    Returns:
    -------
      Dictionary mapping slices to metric names and values.
    """
    if all(v is None for v in [class_id, k, top_k]):
        sub_key = ""
    else:
        sub_key = str(metric_types.SubKey(class_id, k, top_k))

    sliced_metrics = {}
    for slicing_metric in self.slicing_metrics:
        slice_name = slicing_metric[0]
        metrics = slicing_metric[1][output_name][sub_key]
        sliced_metrics[slice_name] = {
            metric_name: metric_value
            for metric_name, metric_value in metrics.items()
        }
    return sliced_metrics  # pytype: disable=bad-return-type
get_metrics_for_slice
get_metrics_for_slice(
    slice_name: SliceKeyType = (),
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Union[MetricsByTextKey, None]

Get metric names and values for a slice.


slice_name: A tuple of the form (column, value), indicating which slice to get metrics from. Optional; if excluded, return overall metrics. output_name: The name of the output. Optional, only used for multi-output models. class_id: Used with multi-class metrics to identify a specific class ID. k: Used with multi-class metrics to identify the kth predicted value. top_k: Used with multi-class and ranking metrics to identify top-k predicted values.


Dictionary containing metric names and values for the specified slice.

Source code in tensorflow_model_analysis/view/view_types.py
def get_metrics_for_slice(
    self,
    slice_name: slicer.SliceKeyType = (),
    output_name: str = "",
    class_id: Optional[int] = None,
    k: Optional[int] = None,
    top_k: Optional[int] = None,
) -> Union[MetricsByTextKey, None]:
    """Get metric names and values for a slice.

    Args:
    ----
      slice_name: A tuple of the form (column, value), indicating which slice to
        get metrics from. Optional; if excluded, return overall metrics.
      output_name: The name of the output. Optional, only used for multi-output
        models.
      class_id: Used with multi-class metrics to identify a specific class ID.
      k: Used with multi-class metrics to identify the kth predicted value.
      top_k: Used with multi-class and ranking metrics to identify top-k
        predicted values.

    Returns:
    -------
      Dictionary containing metric names and values for the specified slice.
    """
    if all(v is None for v in [class_id, k, top_k]):
        sub_key = ""
    else:
        sub_key = str(metric_types.SubKey(class_id, k, top_k))

    def equals_slice_name(slice_key):
        if not slice_key:
            return not slice_name
        else:
            return slice_key == slice_name

    for slicing_metric in self.slicing_metrics:
        slice_key = slicing_metric[0]
        slice_val = slicing_metric[1]
        if equals_slice_name(slice_key):
            return slice_val[output_name][sub_key]

    # if slice could not be found, return None
    return None
get_slice_names
get_slice_names() -> Sequence[str]

Get names of slices.

Returns

List of slice names.

Source code in tensorflow_model_analysis/view/view_types.py
def get_slice_names(self) -> Sequence[str]:
    """Get names of slices.

    Returns
    -------
      List of slice names.
    """
    return [
        slicing_metric[0] for slicing_metric in self.slicing_metrics
    ]  # pytype: disable=bad-return-type

EvalSharedModel

Bases: NamedTuple('EvalSharedModel', [('model_path', str), ('add_metrics_callbacks', List[Callable]), ('include_default_metrics', bool), ('example_weight_key', Union[str, Dict[str, str]]), ('additional_fetches', List[str]), ('model_loader', ModelLoader), ('model_name', str), ('model_type', str), ('rubber_stamp', bool), ('is_baseline', bool), ('resource_hints', Optional[Dict[str, Any]]), ('backend_config', Optional[Any])])

Shared model used during extraction and evaluation.

Attributes

model_path: Path to EvalSavedModel (containing the saved_model.pb file). add_metrics_callbacks: Optional list of callbacks for adding additional metrics to the graph. The names of the metrics added by the callbacks should not conflict with existing metrics. See below for more details about what each callback should do. The callbacks are only used during evaluation. include_default_metrics: True to include the default metrics that are part of the saved model graph during evaluation. example_weight_key: Example weight key (single-output model) or dict of example weight keys (multi-output model) keyed by output_name. additional_fetches: Prefixes of additional tensors stored in signature_def.inputs that should be fetched at prediction time. The "features" and "labels" tensors are handled automatically and should not be included in this list. model_loader: Model loader. model_name: Model name (should align with ModelSpecs.name). model_type: Model type (tfma.TF_KERAS, tfma.TF_LITE, tfma.TF_ESTIMATOR, ..). rubber_stamp: True if this model is being rubber stamped. When a model is rubber stamped diff thresholds will be ignored if an associated baseline model is not passed. is_baseline: The model is the baseline for comparison or not. resource_hints: The beam resource hints to apply to the PTransform which runs inference for this model. backend_config: The backend config for running model inference.

More details on add_metrics_callbacks:

Each add_metrics_callback should have the following prototype: def add_metrics_callback(features_dict, predictions_dict, labels_dict):

Note that features_dict, predictions_dict and labels_dict are not necessarily dictionaries - they might also be Tensors, depending on what the model's eval_input_receiver_fn returns.

It should create and return a metric_ops dictionary, such that metric_ops['metric_name'] = (value_op, update_op), just as in the Trainer.

Short example:

def add_metrics_callback(features_dict, predictions_dict, labels): metrics_ops = {} metric_ops['mean_label'] = tf.metrics.mean(labels) metric_ops['mean_probability'] = tf.metrics.mean(tf.slice( predictions_dict['probabilities'], [0, 1], [2, 1])) return metric_ops

FeaturesPredictionsLabels

Bases: NamedTuple

Attributes
features instance-attribute
input_ref instance-attribute
input_ref: int
labels instance-attribute
predictions instance-attribute

MaterializedColumn

Bases: NamedTuple

Attributes
name instance-attribute
name: str
value instance-attribute

ModelLoader

ModelLoader(
    construct_fn: Callable[[], Any],
    tags: Optional[List[str]] = None,
)

Model loader is responsible for loading shared model types.

Attributes

construct_fn: A callable which creates the model instance. The callable should take no args as input (typically a closure is used to capture necessary parameters). tags: Optional model tags (e.g. 'serve' for serving or 'eval' for EvalSavedModel).

Source code in tensorflow_model_analysis/api/types.py
def __init__(
    self, construct_fn: Callable[[], Any], tags: Optional[List[str]] = None
):
    self.construct_fn = construct_fn
    self.tags = tags
    self._shared_handle = shared.Shared()
Attributes
construct_fn instance-attribute
construct_fn = construct_fn
tags instance-attribute
tags = tags
Functions
load
load(
    model_load_time_callback: Optional[
        Callable[[int], None]
    ] = None,
) -> Any

Returns loaded model.


model_load_time_callback: Optional callback to track load time.

Source code in tensorflow_model_analysis/api/types.py
def load(
    self, model_load_time_callback: Optional[Callable[[int], None]] = None
) -> Any:
    """Returns loaded model.

    Args:
    ----
      model_load_time_callback: Optional callback to track load time.
    """
    if model_load_time_callback:
        construct_fn = self._construct_fn_with_load_time(model_load_time_callback)
    else:
        construct_fn = self.construct_fn
    return self._shared_handle.acquire(construct_fn)

RaggedTensorValue

Bases: NamedTuple('RaggedTensorValue', [('values', ndarray), ('nested_row_splits', List[ndarray])])

RaggedTensorValue encapsulates a batch of ragged tensor values.

Attributes

values: A np.ndarray of values. nested_row_splits: A list of np.ndarray values representing the row splits (one per dimension including the batch dimension).

SparseTensorValue

Bases: NamedTuple('SparseTensorValue', [('values', ndarray), ('indices', ndarray), ('dense_shape', ndarray)])

SparseTensorValue encapsulates a batch of sparse tensor values.

Attributes

values: A np.ndarray of values. indices: A np.ndarray of indices. dense_shape: A np.ndarray representing the dense shape.

VarLenTensorValue

Bases: NamedTuple('VarLenTensorValue', [('values', ndarray), ('indices', ndarray), ('dense_shape', ndarray)])

VarLenTensorValue encapsulates a batch of varlen dense tensor values.

Attributes

values: A np.ndarray of values. indices: A np.ndarray of indices. dense_shape: A np.ndarray representing the dense shape of the entire tensor. Note that each row (i.e. set of values sharing the same value for the first / batch dimension) is considered to have its own shape based on the presence of values.

Classes
DenseRowIterator
DenseRowIterator(tensor)

An Iterator over rows of a VarLenTensorValue as dense np.arrays.

Because the VarLenTensorValue was created from a set of variable length (dense) arrays, we can invert this process to turn a VarLenTensorValue back into the original dense arrays.

Source code in tensorflow_model_analysis/api/types.py
def __init__(self, tensor):
    self._tensor = tensor
    self._offset = 0
Functions
Functions
dense_rows
dense_rows()
Source code in tensorflow_model_analysis/api/types.py
def dense_rows(self):
    return self.DenseRowIterator(self)
from_dense_rows classmethod
from_dense_rows(
    dense_rows: Iterable[ndarray],
) -> VarLenTensorValue

Converts a collection of variable length dense arrays into a tensor.


dense_rows: A sequence of possibly variable length 1D arrays.


A new VarLenTensorValue containing the sparse representation of the vertically stacked dense rows. The dense_shape attribute on the result will be (num_rows, max_row_len).

Source code in tensorflow_model_analysis/api/types.py
@classmethod
def from_dense_rows(cls, dense_rows: Iterable[np.ndarray]) -> "VarLenTensorValue":
    """Converts a collection of variable length dense arrays into a tensor.

    Args:
    ----
      dense_rows: A sequence of possibly variable length 1D arrays.

    Returns:
    -------
      A new VarLenTensorValue containing the sparse representation of the
      vertically stacked dense rows. The dense_shape attribute on the result
      will be (num_rows, max_row_len).
    """
    rows = []
    index_arrays = []
    max_row_len = 0
    num_rows = 0
    for i, row in enumerate(dense_rows):
        num_rows += 1
        if row.size:
            if row.ndim <= 1:
                # Add a dimension for unsized numpy array. This will solve the problem
                # where scalar numpy arrays like np.array(None), np.array(0) can not
                # be merged with other numpy arrays.
                row = row.reshape(-1)
                rows.append(row)
            else:
                raise ValueError(
                    "Each non-empty dense row should be 1D or scalar but"
                    f" found row with shape {row.shape}."
                )
            index_arrays.append(np.array([[i, j] for j in range(len(row))]))
        max_row_len = max(max_row_len, row.size)
    if index_arrays:
        values = np.concatenate(rows, axis=0)
        indices = np.concatenate(index_arrays, axis=0)
    else:
        # empty case
        values = np.array([])
        indices = np.empty((0, 2))
    dense_shape = np.array([num_rows, max_row_len])
    return cls.__new__(cls, values=values, indices=indices, dense_shape=dense_shape)

Functions

BatchedInputsToExtracts

BatchedInputsToExtracts(
    batched_inputs: PCollection,
) -> PCollection

Converts Arrow RecordBatch inputs to Extracts.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
@beam.ptransform_fn
@beam.typehints.with_input_types(Union[bytes, pa.RecordBatch, types.Extracts])
@beam.typehints.with_output_types(types.Extracts)
def BatchedInputsToExtracts(  # pylint: disable=invalid-name
    batched_inputs: beam.pvalue.PCollection,
) -> beam.pvalue.PCollection:
    """Converts Arrow RecordBatch inputs to Extracts."""

    def to_extracts(
        x: Union[bytes, types.Extracts, pa.RecordBatch],
    ) -> types.Extracts:
        result = {}
        if isinstance(x, dict):
            result.update(x)
        else:
            result[constants.ARROW_RECORD_BATCH_KEY] = x
        return result

    return batched_inputs | "AddArrowRecordBatchKey" >> beam.Map(to_extracts)

ExtractAndEvaluate

ExtractAndEvaluate(
    extracts: PCollection,
    extractors: List[Extractor],
    evaluators: List[Evaluator],
) -> Evaluation

Performs Extractions and Evaluations in provided order.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
@beam.ptransform_fn
@beam.typehints.with_input_types(types.Extracts)
@beam.typehints.with_output_types(Any)
def ExtractAndEvaluate(  # pylint: disable=invalid-name
    extracts: beam.pvalue.PCollection,
    extractors: List[extractor.Extractor],
    evaluators: List[evaluator.Evaluator],
) -> evaluator.Evaluation:
    """Performs Extractions and Evaluations in provided order."""
    # evaluation[k] = list of values for k
    evaluation = {}

    def update(evaluation: Dict[str, Any], new_evaluation: Dict[str, Any]):
        for k, v in new_evaluation.items():
            if k not in evaluation:
                evaluation[k] = []
            evaluation[k].append(v)
        return evaluation

    _ = extracts | "TrackInputBytes" >> _TrackBytesProcessed()  # pylint: disable=no-value-for-parameter
    # Run evaluators that run before extraction (i.e. that only require
    # the incoming input extract added by ReadInputs)
    for v in evaluators:
        if not v.run_after:
            update(evaluation, extracts | v.stage_name >> v.ptransform)
    for x in extractors:
        extracts = extracts | x.stage_name >> x.ptransform
        for v in evaluators:
            if v.run_after == x.stage_name:
                update(evaluation, extracts | v.stage_name >> v.ptransform)
    for v in evaluators:
        if v.run_after == extractor.LAST_EXTRACTOR_STAGE_NAME:
            update(evaluation, extracts | v.stage_name >> v.ptransform)

    # Merge multi-valued keys if necessary.
    result = {}
    for k, v in evaluation.items():
        if len(v) == 1:
            result[k] = v[0]
            continue

        # Note that we assume that if a key is multivalued, its values are
        # dictionaries with disjoint keys. The combined value will simply be the
        # disjoint union of all the dictionaries.
        result[k] = (
            v
            | "FlattenEvaluationOutput(%s)" % k >> beam.Flatten()
            | "CombineEvaluationOutput(%s)" % k
            >> beam.CombinePerKey(_CombineEvaluationDictionariesFn())
        )

    return result

ExtractEvaluateAndWriteResults

ExtractEvaluateAndWriteResults(
    examples: PCollection,
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    extractors: Optional[List[Extractor]] = None,
    evaluators: Optional[List[Evaluator]] = None,
    writers: Optional[List[Writer]] = None,
    output_path: Optional[str] = None,
    display_only_data_location: Optional[str] = None,
    display_only_file_format: Optional[str] = None,
    slice_spec: Optional[List[SingleSliceSpec]] = None,
    write_config: Optional[bool] = True,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    random_seed_for_testing: Optional[int] = None,
    tensor_adapter_config: Optional[
        TensorAdapterConfig
    ] = None,
    schema: Optional[Schema] = None,
    config_version: Optional[int] = None,
) -> Dict[str, PCollection]

PTransform for performing extraction, evaluation, and writing results.

Users who want to construct their own Beam pipelines instead of using the lightweight run_model_analysis functions should use this PTransform.

Example usage:

eval_config = tfma.EvalConfig(model_specs=[...], metrics_specs=[...],
                              slicing_specs=[...])
eval_shared_model = tfma.default_eval_shared_model(
    eval_saved_model_path=model_location, eval_config=eval_config)
tfx_io = tf_example_record.TFExampleRecord(
    file_pattern=data_location,
    raw_record_column_name=tfma.ARROW_INPUT_COLUMN)
with beam.Pipeline(runner=...) as p:
  _ = (p
       | 'ReadData' >> tfx_io.BeamSource()
       | 'ExtractEvaluateAndWriteResults' >>
       tfma.ExtractEvaluateAndWriteResults(
           eval_shared_model=eval_shared_model,
           eval_config=eval_config,
           ...))
result = tfma.load_eval_result(output_path=output_path)
tfma.view.render_slicing_metrics(result)

NOTE: If running with an EvalSavedModel (i.e. the ModelSpec has signature_name
"eval"), then instead of using the tfxio.BeamSource() code use the following
beam.io.ReadFromTFRecord(data_location)

Note that the exact serialization format is an internal implementation detail and subject to change. Users should only use the TFMA functions to write and read the results.


examples: PCollection of input examples or Arrow Record batches. Examples can be any format the model accepts (e.g. string containing CSV row, TensorFlow.Example, etc). If the examples are in the form of a dict it will be assumed that input is already in the form of tfma.Extracts with examples stored under tfma.INPUT_KEY (any other keys will be passed along unchanged to downstream extractors and evaluators). eval_shared_model: Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if needed by default extractors, evaluators, or writers and for display purposes of the model path. eval_config: Eval config. extractors: Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used. evaluators: Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used. writers: Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers will be used. output_path: Path to output results to (config file, metrics, plots, etc). display_only_data_location: Optional path indicating where the examples were read from. This is used only for display purposes - data will not actually be read from this path. display_only_file_format: Optional format of the examples. This is used only for display purposes. slice_spec: Deprecated (use EvalConfig). write_config: Deprecated (use EvalConfig). compute_confidence_intervals: Deprecated (use EvalConfig). min_slice_size: Deprecated (use EvalConfig). random_seed_for_testing: Provide for deterministic tests only. tensor_adapter_config: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. If None, an attempt will be made to create the tensors using default TensorRepresentations. schema: A schema to use for customizing evaluators. config_version: Optional config version for this evaluation. This should not be explicitly set by users. It is only intended to be used in cases where the provided eval_config was generated internally, and thus not a reliable indicator of user intent.


ValueError: If EvalConfig invalid or matching Extractor not found for an Evaluator.


A dict of writer results keyed by the writer stage name.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
@beam.ptransform_fn
def ExtractEvaluateAndWriteResults(  # pylint: disable=invalid-name
    examples: beam.PCollection,
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    extractors: Optional[List[extractor.Extractor]] = None,
    evaluators: Optional[List[evaluator.Evaluator]] = None,
    writers: Optional[List[writer.Writer]] = None,
    output_path: Optional[str] = None,
    display_only_data_location: Optional[str] = None,
    display_only_file_format: Optional[str] = None,
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    write_config: Optional[bool] = True,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    random_seed_for_testing: Optional[int] = None,
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
    schema: Optional[schema_pb2.Schema] = None,
    config_version: Optional[int] = None,
) -> Dict[str, beam.PCollection]:
    """PTransform for performing extraction, evaluation, and writing results.

    Users who want to construct their own Beam pipelines instead of using the
    lightweight run_model_analysis functions should use this PTransform.

    Example usage:

    ```python
    eval_config = tfma.EvalConfig(model_specs=[...], metrics_specs=[...],
                                  slicing_specs=[...])
    eval_shared_model = tfma.default_eval_shared_model(
        eval_saved_model_path=model_location, eval_config=eval_config)
    tfx_io = tf_example_record.TFExampleRecord(
        file_pattern=data_location,
        raw_record_column_name=tfma.ARROW_INPUT_COLUMN)
    with beam.Pipeline(runner=...) as p:
      _ = (p
           | 'ReadData' >> tfx_io.BeamSource()
           | 'ExtractEvaluateAndWriteResults' >>
           tfma.ExtractEvaluateAndWriteResults(
               eval_shared_model=eval_shared_model,
               eval_config=eval_config,
               ...))
    result = tfma.load_eval_result(output_path=output_path)
    tfma.view.render_slicing_metrics(result)

    NOTE: If running with an EvalSavedModel (i.e. the ModelSpec has signature_name
    "eval"), then instead of using the tfxio.BeamSource() code use the following
    beam.io.ReadFromTFRecord(data_location)
    ```

    Note that the exact serialization format is an internal implementation detail
    and subject to change. Users should only use the TFMA functions to write and
    read the results.

    Args:
    ----
      examples: PCollection of input examples or Arrow Record batches. Examples
        can be any format the model accepts (e.g. string containing CSV row,
        TensorFlow.Example, etc). If the examples are in the form of a dict it
        will be assumed that input is already in the form of tfma.Extracts with
        examples stored under tfma.INPUT_KEY (any other keys will be passed along
        unchanged to downstream extractors and evaluators).
      eval_shared_model: Optional shared model (single-model evaluation) or list
        of shared models (multi-model evaluation). Only required if needed by
        default extractors, evaluators, or writers and for display purposes of the
        model path.
      eval_config: Eval config.
      extractors: Optional list of Extractors to apply to Extracts. Typically
        these will be added by calling the default_extractors function. If no
        extractors are provided, default_extractors (non-materialized) will be
        used.
      evaluators: Optional list of Evaluators for evaluating Extracts. Typically
        these will be added by calling the default_evaluators function. If no
        evaluators are provided, default_evaluators will be used.
      writers: Optional list of Writers for writing Evaluation output. Typically
        these will be added by calling the default_writers function. If no writers
        are provided, default_writers will be used.
      output_path: Path to output results to (config file, metrics, plots, etc).
      display_only_data_location: Optional path indicating where the examples were
        read from. This is used only for display purposes - data will not actually
        be read from this path.
      display_only_file_format: Optional format of the examples. This is used only
        for display purposes.
      slice_spec: Deprecated (use EvalConfig).
      write_config: Deprecated (use EvalConfig).
      compute_confidence_intervals: Deprecated (use EvalConfig).
      min_slice_size: Deprecated (use EvalConfig).
      random_seed_for_testing: Provide for deterministic tests only.
      tensor_adapter_config: Tensor adapter config which specifies how to obtain
        tensors from the Arrow RecordBatch. If None, an attempt will be made to
        create the tensors using default TensorRepresentations.
      schema: A schema to use for customizing evaluators.
      config_version: Optional config version for this evaluation. This should not
        be explicitly set by users. It is only intended to be used in cases where
        the provided eval_config was generated internally, and thus not a reliable
        indicator of user intent.

    Raises:
    ------
      ValueError: If EvalConfig invalid or matching Extractor not found for an
        Evaluator.

    Returns:
    -------
      A dict of writer results keyed by the writer stage name.
    """
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model
    )

    if eval_config is None:
        config_version = 1 if config_version is None else config_version
        eval_config = _default_eval_config(
            eval_shared_models,
            slice_spec,
            write_config,
            compute_confidence_intervals,
            min_slice_size,
        )
    else:
        config_version = 2 if config_version is None else config_version
        eval_config = _update_eval_config_with_defaults(eval_config, eval_shared_model)
    config_util.verify_eval_config(eval_config)

    if not extractors:
        extractors = default_extractors(
            eval_config=eval_config,
            eval_shared_model=eval_shared_model,
            tensor_adapter_config=tensor_adapter_config,
            config_version=config_version,
        )

    if not evaluators:
        evaluators = default_evaluators(
            eval_config=eval_config,
            eval_shared_model=eval_shared_model,
            random_seed_for_testing=random_seed_for_testing,
            schema=schema,
            config_version=config_version,
        )

    for v in evaluators:
        evaluator.verify_evaluator(v, extractors)

    if not writers:
        writers = default_writers(
            output_path=output_path,
            eval_shared_model=eval_shared_model,
            eval_config=eval_config,
            display_only_data_location=display_only_data_location,
            display_only_data_file_format=display_only_file_format,
        )

    # pylint: disable=no-value-for-parameter
    if is_batched_input(eval_shared_model, eval_config, config_version):
        extracts = examples | "BatchedInputsToExtracts" >> BatchedInputsToExtracts()
    else:
        extracts = examples | "InputsToExtracts" >> InputsToExtracts()

    return (
        extracts
        | "ExtractAndEvaluate"
        >> ExtractAndEvaluate(extractors=extractors, evaluators=evaluators)
        | "WriteResults" >> WriteResults(writers=writers)
    )

InputsToExtracts

InputsToExtracts(inputs: PCollection) -> PCollection

Converts serialized inputs (e.g. examples) to Extracts if not already.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
@beam.ptransform_fn
# TODO(b/156538355): Find out why str is also required instead of just bytes
#   after adding types.Extracts.
@beam.typehints.with_input_types(Union[bytes, str, types.Extracts])
@beam.typehints.with_output_types(types.Extracts)
def InputsToExtracts(  # pylint: disable=invalid-name
    inputs: beam.pvalue.PCollection,
) -> beam.pvalue.PCollection:
    """Converts serialized inputs (e.g. examples) to Extracts if not already."""

    def to_extracts(x: Union[bytes, str, types.Extracts]) -> types.Extracts:
        result = {}
        if isinstance(x, dict):
            result.update(x)
        else:
            result[constants.INPUT_KEY] = x
        return result

    return inputs | "AddInputKey" >> beam.Map(to_extracts)

Validate

Validate(
    extracts: PCollection,
    alternatives: Dict[str, PTransform],
    validators: List[Validator],
) -> Validation

Performs validation of alternative evaluations.


extracts: PCollection of extracts. alternatives: Dict of PTransforms (Extracts -> Evaluation) whose output will be compared for validation purposes (e.g. 'baseline' vs 'candidate'). validators: List of validators for validating the output from running the alternatives. The Validation outputs produced by the validators will be merged into a single output. If there are overlapping output keys, later outputs will replace earlier outputs sharing the same key.


Validation dict.

Source code in tensorflow_model_analysis/api/verifier_lib.py
@beam.ptransform_fn
@beam.typehints.with_input_types(types.Extracts)
@beam.typehints.with_output_types(Any)
def Validate(  # pylint: disable=invalid-name
    extracts: beam.pvalue.PCollection,
    alternatives: Dict[str, beam.PTransform],
    validators: List[validator.Validator],
) -> validator.Validation:
    """Performs validation of alternative evaluations.

    Args:
    ----
      extracts: PCollection of extracts.
      alternatives: Dict of PTransforms (Extracts -> Evaluation) whose output will
        be compared for validation purposes (e.g. 'baseline' vs 'candidate').
      validators: List of validators for validating the output from running the
        alternatives. The Validation outputs produced by the validators will be
        merged into a single output. If there are overlapping output keys, later
        outputs will replace earlier outputs sharing the same key.

    Returns:
    -------
      Validation dict.
    """
    evaluations = {}
    for key in alternatives:
        evaluations[key] = extracts | "Evaluate(%s)" % key >> alternatives[key]

    validation = {}
    for v in validators:
        validation.update(evaluations | v.stage_name >> v.ptransform)
    return validation

WriteResults

WriteResults(
    evaluation_or_validation: Union[Evaluation, Validation],
    writers: List[Writer],
) -> Dict[str, PCollection]

Writes Evaluation or Validation results using given writers.


evaluation_or_validation: Evaluation or Validation output. writers: Writes to use for writing out output.


ValueError: If Evaluation or Validation is empty.


A dict of writer results keyed by the writer stage name.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
@beam.ptransform_fn
# TODO(b/157600974): Add input typehint.
def WriteResults(  # pylint: disable=invalid-name
    evaluation_or_validation: Union[evaluator.Evaluation, validator.Validation],
    writers: List[writer.Writer],
) -> Dict[str, beam.PCollection]:
    """Writes Evaluation or Validation results using given writers.

    Args:
    ----
      evaluation_or_validation: Evaluation or Validation output.
      writers: Writes to use for writing out output.

    Raises:
    ------
      ValueError: If Evaluation or Validation is empty.

    Returns:
    -------
      A dict of writer results keyed by the writer stage name.
    """
    if not evaluation_or_validation:
        raise ValueError("Evaluations and Validations cannot be empty")
    result = {}
    for w in writers:
        result[w.stage_name] = evaluation_or_validation | w.stage_name >> w.ptransform
    return result

analyze_raw_data

analyze_raw_data(
    data: DataFrame,
    eval_config: Optional[EvalConfig] = None,
    output_path: Optional[str] = None,
    extractors: Optional[List[Extractor]] = None,
    evaluators: Optional[List[Evaluator]] = None,
    writers: Optional[List[Writer]] = None,
    add_metric_callbacks: Optional[
        List[AddMetricsCallbackType]
    ] = None,
) -> EvalResult

Runs TensorFlow model analysis on a pandas.DataFrame.

This function allows you to use TFMA with Pandas DataFrames. The dataframe must include a 'predicted' column for the predicted label and a 'label' column for the actual label.

In addition to a DataFrame, this function requires an eval_config, a tfma.EvalConfig object containing various configuration parameters (see config.proto for a comprehensive list)...

  • the metrics to compute
  • the slices to compute metrics on
  • the DataFrame's column names for example labels and predictions ('label' and 'prediction' by default)
  • confidence interval options

This function returns a tfma.EvalResult, which contains TFMA's computed metrics and can be used to generate plots with tfma.view.render_slicing_metrics.

Example usage:

model_specs = [
  tfma.ModelSpec(
      prediction_key='prediction',
      label_key='label')
]
metrics_specs = [
    tfma.MetricsSpec(metrics=[
      tfma.MetricConfig(class_name='Accuracy'),
      tfma.MetricConfig(class_name='ExampleCount')
    ])
]
slicing_specs = [
    tfma.SlicingSpec(),  # the empty slice represents overall dataset
    tfma.SlicingSpec(feature_keys=['language'])
]
eval_config = tfma.EvalConfig(
    model_specs=model_specs,
    metrics_specs=metrics_specs,
    slicing_specs=slicing_specs)
result = tfma.analyze_raw_data(df, eval_config)
tfma.view.render_slicing_metrics(result)

# Example with Fairness Indicators
from tensorflow_model_analysis.addons.fairness.post_export_metrics import
fairness_indicators
from tensorflow_model_analysis.addons.fairness.view import widget_view
add_metrics_callbacks = [
    tfma.post_export_metrics.fairness_indicators(thresholds=[0.25, 0.5, 0.75])
]
result = tfma.analyze_raw_data(
    data=df,
    metrics_specs=metrics_specs,
    slicing_specs=slicing_specs,
    add_metric_callbacks=add_metrics_callbacks
)
widget_view.render_fairness_indicator(result)

data: A pandas.DataFrame, where rows correspond to examples and columns correspond to features. One column must indicate a row's predicted label, and one column must indicate a row's actual label. eval_config: A tfma.EvalConfig, which contains various configuration parameters including metrics, slices, and label/prediction column names. output_path: Path to write EvalResult to. extractors: Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used. evaluators: Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used. writers: Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers with add_metric_callbacks will be used. add_metric_callbacks: Optional list of metric callbacks (if used).


A tfma.EvalResult to extract metrics or generate visualizations from.


KeyError: If the prediction or label columns are not found within the DataFrame.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def analyze_raw_data(
    data: pd.DataFrame,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    output_path: Optional[str] = None,
    extractors: Optional[List[extractor.Extractor]] = None,
    evaluators: Optional[List[evaluator.Evaluator]] = None,
    writers: Optional[List[writer.Writer]] = None,
    add_metric_callbacks: Optional[List[types.AddMetricsCallbackType]] = None,
) -> view_types.EvalResult:
    """Runs TensorFlow model analysis on a pandas.DataFrame.

    This function allows you to use TFMA with Pandas DataFrames. The dataframe
    must include a 'predicted' column for the predicted label and a 'label' column
    for the actual label.

    In addition to a DataFrame, this function requires an eval_config, a
    `tfma.EvalConfig` object containing various configuration parameters (see
    [config.proto](https://github.com/tensorflow/model-analysis/blob/master/tensorflow_model_analysis/proto/config.proto)
    for a comprehensive list)...

    * the metrics to compute
    * the slices to compute metrics on
    * the DataFrame's column names for example labels and predictions ('label'
      and 'prediction' by default)
    * confidence interval options

    This function returns a `tfma.EvalResult`, which contains TFMA's computed
    metrics and can be used to generate plots with
    `tfma.view.render_slicing_metrics`.

    Example usage:

    ```python
    model_specs = [
      tfma.ModelSpec(
          prediction_key='prediction',
          label_key='label')
    ]
    metrics_specs = [
        tfma.MetricsSpec(metrics=[
          tfma.MetricConfig(class_name='Accuracy'),
          tfma.MetricConfig(class_name='ExampleCount')
        ])
    ]
    slicing_specs = [
        tfma.SlicingSpec(),  # the empty slice represents overall dataset
        tfma.SlicingSpec(feature_keys=['language'])
    ]
    eval_config = tfma.EvalConfig(
        model_specs=model_specs,
        metrics_specs=metrics_specs,
        slicing_specs=slicing_specs)
    result = tfma.analyze_raw_data(df, eval_config)
    tfma.view.render_slicing_metrics(result)

    # Example with Fairness Indicators
    from tensorflow_model_analysis.addons.fairness.post_export_metrics import
    fairness_indicators
    from tensorflow_model_analysis.addons.fairness.view import widget_view
    add_metrics_callbacks = [
        tfma.post_export_metrics.fairness_indicators(thresholds=[0.25, 0.5, 0.75])
    ]
    result = tfma.analyze_raw_data(
        data=df,
        metrics_specs=metrics_specs,
        slicing_specs=slicing_specs,
        add_metric_callbacks=add_metrics_callbacks
    )
    widget_view.render_fairness_indicator(result)
    ```

    Args:
    ----
      data: A pandas.DataFrame, where rows correspond to examples and columns
        correspond to features. One column must indicate a row's predicted label,
        and one column must indicate a row's actual label.
      eval_config: A `tfma.EvalConfig`, which contains various configuration
        parameters including metrics, slices, and label/prediction column names.
      output_path: Path to write EvalResult to.
      extractors: Optional list of Extractors to apply to Extracts. Typically
        these will be added by calling the default_extractors function. If no
        extractors are provided, default_extractors (non-materialized) will be
        used.
      evaluators: Optional list of Evaluators for evaluating Extracts. Typically
        these will be added by calling the default_evaluators function. If no
        evaluators are provided, default_evaluators will be used.
      writers: Optional list of Writers for writing Evaluation output. Typically
        these will be added by calling the default_writers function. If no writers
        are provided, default_writers with `add_metric_callbacks` will be used.
      add_metric_callbacks: Optional list of metric callbacks (if used).

    Returns:
    -------
      A tfma.EvalResult to extract metrics or generate visualizations from.

    Raises:
    ------
      KeyError: If the prediction or label columns are not found within the
        DataFrame.
    """
    for model_spec in eval_config.model_specs:  # pytype: disable=attribute-error
        model_spec.prediction_key = model_spec.prediction_key or "prediction"
        model_spec.label_key = model_spec.label_key or "label"
        if model_spec.prediction_key not in data.columns:
            raise KeyError(
                "The prediction_key column was not found. Looked for %s but found: %s"
                % (model_spec.prediction_key, list(data.columns))
            )
        if model_spec.label_key not in data.columns:
            raise KeyError(
                "The label_key column was not found. Looked for %s but found: %s"
                % (model_spec.label_key, list(data.columns))
            )

    # TODO(b/153570803): Validity check / assertions for dataframe structure
    if eval_config.slicing_specs is None:  # pytype: disable=attribute-error
        eval_config.slicing_specs = [config_pb2.SlicingSpec(feature_keys=[""])]
    if output_path is None:
        output_path = tempfile.mkdtemp()

    arrow_data = table_util.CanonicalizeRecordBatch(pa.RecordBatch.from_pandas(data))
    beam_data = beam.Create([arrow_data])

    if not writers:
        writers = default_writers(
            output_path,
            eval_config=eval_config,
            add_metric_callbacks=add_metric_callbacks,
        )

    with beam.Pipeline() as p:
        _ = (
            p
            | beam_data
            | "ExtractEvaluateAndWriteResults"
            >> ExtractEvaluateAndWriteResults(  # pylint: disable=no-value-for-parameter
                extractors=extractors,
                evaluators=evaluators,
                writers=writers,
                eval_config=eval_config,
                output_path=output_path,
            )
        )

    return load_eval_result(output_path)

default_eval_shared_model

default_eval_shared_model(
    eval_saved_model_path: str,
    add_metrics_callbacks: Optional[
        List[AddMetricsCallbackType]
    ] = None,
    include_default_metrics: Optional[bool] = True,
    example_weight_key: Optional[
        Union[str, Dict[str, str]]
    ] = None,
    additional_fetches: Optional[List[str]] = None,
    blacklist_feature_fetches: Optional[List[str]] = None,
    tags: Optional[List[str]] = None,
    model_name: str = "",
    eval_config: Optional[EvalConfig] = None,
    custom_model_loader: Optional[ModelLoader] = None,
    rubber_stamp: Optional[bool] = False,
    resource_hints: Optional[Dict[str, Any]] = None,
    backend_config: Optional[Any] = None,
) -> EvalSharedModel

Returns default EvalSharedModel.


eval_saved_model_path: Path to EvalSavedModel. add_metrics_callbacks: Optional list of callbacks for adding additional metrics to the graph (see EvalSharedModel for more information on how to configure additional metrics). Metrics for example count and example weights will be added automatically. Only used if EvalSavedModel used. include_default_metrics: DEPRECATED. Use eval_config.options.include_default_metrics. example_weight_key: DEPRECATED. Use eval_config.model_specs.example_weight_key or eval_config.model_specs.example_weight_keys. additional_fetches: Optional prefixes of additional tensors stored in signature_def.inputs that should be fetched at prediction time. The "features" and "labels" tensors are handled automatically and should not be included. Only used if EvalSavedModel used. blacklist_feature_fetches: Optional list of tensor names in the features dictionary which should be excluded from the fetches request. This is useful in scenarios where features are large (e.g. images) and can lead to excessive memory use if stored. Only used if EvalSavedModel used. tags: Optional model tags (e.g. 'serve' for serving or 'eval' for EvalSavedModel). model_name: Optional name of the model being created (should match ModelSpecs.name). The name should only be provided if multiple models are being evaluated. eval_config: Eval config. custom_model_loader: Optional custom model loader for non-TF models. rubber_stamp: True when this run is a first run without a baseline model while a baseline is configured, the diff thresholds will be ignored. resource_hints: The beam resource hints to apply to the PTransform which runs inference for this model. backend_config: Optional configuration of backend running model inference with some prediction extractors.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def default_eval_shared_model(
    eval_saved_model_path: str,
    add_metrics_callbacks: Optional[List[types.AddMetricsCallbackType]] = None,
    include_default_metrics: Optional[bool] = True,
    example_weight_key: Optional[Union[str, Dict[str, str]]] = None,
    additional_fetches: Optional[List[str]] = None,
    blacklist_feature_fetches: Optional[List[str]] = None,
    tags: Optional[List[str]] = None,
    model_name: str = "",
    eval_config: Optional[config_pb2.EvalConfig] = None,
    custom_model_loader: Optional[types.ModelLoader] = None,
    rubber_stamp: Optional[bool] = False,
    resource_hints: Optional[Dict[str, Any]] = None,
    backend_config: Optional[Any] = None,
) -> types.EvalSharedModel:
    """Returns default EvalSharedModel.

    Args:
    ----
      eval_saved_model_path: Path to EvalSavedModel.
      add_metrics_callbacks: Optional list of callbacks for adding additional
        metrics to the graph (see EvalSharedModel for more information on how to
        configure additional metrics). Metrics for example count and example
        weights will be added automatically. Only used if EvalSavedModel used.
      include_default_metrics: DEPRECATED. Use
        eval_config.options.include_default_metrics.
      example_weight_key: DEPRECATED. Use
        eval_config.model_specs.example_weight_key or
        eval_config.model_specs.example_weight_keys.
      additional_fetches: Optional prefixes of additional tensors stored in
        signature_def.inputs that should be fetched at prediction time. The
        "features" and "labels" tensors are handled automatically and should not
        be included. Only used if EvalSavedModel used.
      blacklist_feature_fetches: Optional list of tensor names in the features
        dictionary which should be excluded from the fetches request. This is
        useful in scenarios where features are large (e.g. images) and can lead to
        excessive memory use if stored. Only used if EvalSavedModel used.
      tags: Optional model tags (e.g. 'serve' for serving or 'eval' for
        EvalSavedModel).
      model_name: Optional name of the model being created (should match
        ModelSpecs.name). The name should only be provided if multiple models are
        being evaluated.
      eval_config: Eval config.
      custom_model_loader: Optional custom model loader for non-TF models.
      rubber_stamp: True when this run is a first run without a baseline model
        while a baseline is configured, the diff thresholds will be ignored.
      resource_hints: The beam resource hints to apply to the PTransform which
        runs inference for this model.
      backend_config: Optional configuration of backend running model inference
        with *some* prediction extractors.
    """
    if not eval_config:
        # Default to tfma eval model unless eval
        is_baseline = False
        if tags and _LEGACY_EVAL_TAG in tags:
            model_type = constants.TFMA_EVAL
        elif tags and tf.saved_model.SERVING in tags:
            model_type = constants.TF_ESTIMATOR
        else:
            model_type = constants.TFMA_EVAL
        if tags is None:
            tags = [_LEGACY_EVAL_TAG]
    else:
        model_spec = model_util.get_model_spec(eval_config, model_name)
        if not model_spec:
            raise ValueError(
                f"ModelSpec for model name {model_name} not found in EvalConfig: "
                f"config={eval_config}"
            )
        is_baseline = model_spec.is_baseline
        model_type = model_util.get_model_type(model_spec, eval_saved_model_path, tags)
        if tags is None:
            # Default to serving unless tfma_eval is used.
            if model_type == constants.TFMA_EVAL:
                tags = [_LEGACY_EVAL_TAG]
            else:
                tags = [tf.saved_model.SERVING]
        if model_spec.example_weight_key or model_spec.example_weight_keys:
            example_weight_key = (
                model_spec.example_weight_key or model_spec.example_weight_keys
            )
        if eval_config.options.HasField("include_default_metrics"):
            include_default_metrics = eval_config.options.include_default_metrics.value

    model_loader = custom_model_loader
    if not model_loader and model_type in constants.VALID_TF_MODEL_TYPES:
        model_loader = types.ModelLoader(
            construct_fn=model_util.model_construct_fn(
                eval_saved_model_path=eval_saved_model_path,
                add_metrics_callbacks=add_metrics_callbacks,
                include_default_metrics=include_default_metrics,
                additional_fetches=additional_fetches,
                blacklist_feature_fetches=blacklist_feature_fetches,
                model_type=model_type,
                tags=tags,
            ),
            tags=tags,
        )

    return types.EvalSharedModel(
        model_name=model_name,
        model_type=model_type,
        model_path=eval_saved_model_path,
        add_metrics_callbacks=add_metrics_callbacks,
        include_default_metrics=include_default_metrics,
        example_weight_key=example_weight_key,
        additional_fetches=additional_fetches,
        model_loader=model_loader,
        rubber_stamp=rubber_stamp,
        is_baseline=is_baseline,
        resource_hints=resource_hints,
        backend_config=backend_config,
    )

default_evaluators

default_evaluators(
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    schema: Optional[Schema] = None,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    serialize: bool = False,
    random_seed_for_testing: Optional[int] = None,
    config_version: Optional[int] = None,
) -> List[Evaluator]

Returns the default evaluators for use in ExtractAndEvaluate.


eval_shared_model: Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if there are metrics to be computed in-graph using the model. eval_config: Eval config. schema: A schema to use for customizing default evaluators. compute_confidence_intervals: Deprecated (use eval_config). min_slice_size: Deprecated (use eval_config). serialize: Deprecated. random_seed_for_testing: Provide for deterministic tests only. config_version: Optional config version for this evaluation. This should not be explicitly set by users. It is only intended to be used in cases where the provided eval_config was generated internally, and thus not a reliable indicator of user intent.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def default_evaluators(  # pylint: disable=invalid-name
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    schema: Optional[schema_pb2.Schema] = None,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    serialize: bool = False,
    random_seed_for_testing: Optional[int] = None,
    config_version: Optional[int] = None,
) -> List[evaluator.Evaluator]:
    """Returns the default evaluators for use in ExtractAndEvaluate.

    Args:
    ----
      eval_shared_model: Optional shared model (single-model evaluation) or list
        of shared models (multi-model evaluation). Only required if there are
        metrics to be computed in-graph using the model.
      eval_config: Eval config.
      schema: A schema to use for customizing default evaluators.
      compute_confidence_intervals: Deprecated (use eval_config).
      min_slice_size: Deprecated (use eval_config).
      serialize: Deprecated.
      random_seed_for_testing: Provide for deterministic tests only.
      config_version: Optional config version for this evaluation. This should not
        be explicitly set by users. It is only intended to be used in cases where
        the provided eval_config was generated internally, and thus not a reliable
        indicator of user intent.
    """
    disabled_outputs = []
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model
    )
    if eval_config:
        eval_config = _update_eval_config_with_defaults(eval_config, eval_shared_model)
        disabled_outputs = eval_config.options.disabled_outputs.values
        if _model_types(eval_shared_models) == {constants.TF_LITE} or _model_types(
            eval_shared_models
        ) == {constants.TF_JS}:
            # no in-graph metrics present when tflite or tfjs is used.
            if eval_shared_models:
                eval_shared_models = [
                    v._replace(include_default_metrics=False)
                    for v in eval_shared_models
                ]
    if (
        constants.METRICS_KEY in disabled_outputs
        and constants.PLOTS_KEY in disabled_outputs
        and constants.ATTRIBUTIONS_KEY in disabled_outputs
    ):
        return []

    return [
        metrics_plots_and_validations_evaluator.MetricsPlotsAndValidationsEvaluator(
            eval_config=eval_config,
            eval_shared_model=eval_shared_model,
            schema=schema,
            random_seed_for_testing=random_seed_for_testing,
        )
    ]

default_extractors

default_extractors(
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    slice_spec: Optional[List[SingleSliceSpec]] = None,
    materialize: Optional[bool] = None,
    tensor_adapter_config: Optional[
        TensorAdapterConfig
    ] = None,
    custom_predict_extractor: Optional[Extractor] = None,
    config_version: Optional[int] = None,
) -> List[Extractor]

Returns the default extractors for use in ExtractAndEvaluate.


eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). Required unless the predictions are provided alongside of the features (i.e. model-agnostic evaluations). eval_config: Eval config. slice_spec: Deprecated (use EvalConfig). materialize: True to have extractors create materialized output. tensor_adapter_config: Tensor adapter config which specifies how to obtain tensors from the Arrow RecordBatch. If None, an attempt will be made to create the tensors using default TensorRepresentations. custom_predict_extractor: Optional custom predict extractor for non-TF models. config_version: Optional config version for this evaluation. This should not be explicitly set by users. It is only intended to be used in cases where the provided eval_config was generated internally, and thus not a reliable indicator of user intent.


NotImplementedError: If eval_config contains mixed serving and eval models.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def default_extractors(  # pylint: disable=invalid-name
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    materialize: Optional[bool] = None,
    tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
    custom_predict_extractor: Optional[extractor.Extractor] = None,
    config_version: Optional[int] = None,
) -> List[extractor.Extractor]:
    """Returns the default extractors for use in ExtractAndEvaluate.

    Args:
    ----
      eval_shared_model: Shared model (single-model evaluation) or list of shared
        models (multi-model evaluation). Required unless the predictions are
        provided alongside of the features (i.e. model-agnostic evaluations).
      eval_config: Eval config.
      slice_spec: Deprecated (use EvalConfig).
      materialize: True to have extractors create materialized output.
      tensor_adapter_config: Tensor adapter config which specifies how to obtain
        tensors from the Arrow RecordBatch. If None, an attempt will be made to
        create the tensors using default TensorRepresentations.
      custom_predict_extractor: Optional custom predict extractor for non-TF
        models.
      config_version: Optional config version for this evaluation. This should not
        be explicitly set by users. It is only intended to be used in cases where
        the provided eval_config was generated internally, and thus not a reliable
        indicator of user intent.

    Raises:
    ------
      NotImplementedError: If eval_config contains mixed serving and eval models.
    """
    if materialize is None:
        # TODO(b/172969312): Once analysis table is supported, remove defaulting
        #  to false unless 'analysis' is in disabled_outputs.
        materialize = False
    if slice_spec and eval_config:
        raise ValueError("slice_spec is deprecated, only use eval_config")

    if eval_config is not None:
        eval_config = _update_eval_config_with_defaults(eval_config, eval_shared_model)
    tensor_representations = None
    if tensor_adapter_config:
        tensor_representations = tensor_adapter_config.tensor_representations

    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model
    )
    slicing_extractors = []
    if _has_sql_slices(eval_config):
        slicing_extractors.append(
            sql_slice_key_extractor.SqlSliceKeyExtractor(eval_config)
        )
    slicing_extractors.extend(
        [
            unbatch_extractor.UnbatchExtractor(),
            slice_key_extractor.SliceKeyExtractor(
                eval_config=eval_config, materialize=materialize
            ),
        ]
    )

    extract_features = features_extractor.FeaturesExtractor(
        eval_config=eval_config, tensor_representations=tensor_representations
    )
    extract_labels = labels_extractor.LabelsExtractor(eval_config=eval_config)
    extract_example_weights = example_weights_extractor.ExampleWeightsExtractor(
        eval_config=eval_config
    )
    extract_materialized_predictions = (
        materialized_predictions_extractor.MaterializedPredictionsExtractor(
            eval_config=eval_config
        )
    )
    if eval_shared_model:
        model_types = _model_types(eval_shared_models)
        logging.info("eval_shared_models have model_types: %s", model_types)
        assert model_types is not None
        if (
            not model_types.issubset(constants.VALID_TF_MODEL_TYPES)
            and not custom_predict_extractor
        ):
            raise NotImplementedError(
                "either a custom_predict_extractor must be used or model type must "
                f"be one of: {str(constants.VALID_TF_MODEL_TYPES)}. evalconfig={eval_config}"
            )

        if model_types == {constants.MATERIALIZED_PREDICTION}:
            return [
                extract_features,
                extract_labels,
                extract_example_weights,
                extract_materialized_predictions,
            ] + slicing_extractors
        elif model_types == {constants.TF_LITE}:
            # TODO(b/163889779): Convert TFLite extractor to operate on batched
            # extracts. Then we can remove the input extractor.
            return [
                extract_features,
                transformed_features_extractor.TransformedFeaturesExtractor(
                    eval_config=eval_config, eval_shared_model=eval_shared_model
                ),
                extract_labels,
                extract_example_weights,
                (
                    custom_predict_extractor
                    or tflite_predict_extractor.TFLitePredictExtractor(
                        eval_config=eval_config, eval_shared_model=eval_shared_model
                    )
                ),
            ] + slicing_extractors
        elif constants.TF_LITE in model_types:
            raise NotImplementedError(
                "support for mixing tf_lite and non-tf_lite models is not "
                f"implemented: eval_config={eval_config}"
            )
        elif model_types == {constants.TF_JS}:
            return [
                extract_features,
                extract_labels,
                extract_example_weights,
                (
                    custom_predict_extractor
                    or tfjs_predict_extractor.TFJSPredictExtractor(
                        eval_config=eval_config, eval_shared_model=eval_shared_model
                    )
                ),
            ] + slicing_extractors
        elif constants.TF_JS in model_types:
            raise NotImplementedError(
                "support for mixing tf_js and non-tf_js models is not "
                f"implemented: eval_config={eval_config}"
            )
        else:
            extractors = [extract_features]
            if not custom_predict_extractor:
                extractors.append(
                    transformed_features_extractor.TransformedFeaturesExtractor(
                        eval_config=eval_config, eval_shared_model=eval_shared_model
                    )
                )
            extractors.extend(
                [
                    extract_labels,
                    extract_example_weights,
                    (
                        custom_predict_extractor
                        or predictions_extractor.PredictionsExtractor(
                            eval_config=eval_config, eval_shared_model=eval_shared_model
                        )
                    ),
                ]
            )
            extractors.extend(slicing_extractors)
            return extractors
    else:
        return [
            extract_features,
            extract_labels,
            extract_example_weights,
            extract_materialized_predictions,
        ] + slicing_extractors

default_writers

default_writers(
    output_path: Optional[str],
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    display_only_data_location: Optional[str] = None,
    display_only_data_file_format: Optional[str] = None,
    output_file_format: str = "tfrecord",
    add_metric_callbacks: Optional[
        List[AddMetricsCallbackType]
    ] = None,
) -> List[Writer]

Returns the default writers for use in WriteResults.

Note, sharding will be enabled by default if an output_file_format is provided. Filenames will be -SSSSS-of-NNNNN. where SSSSS is the shard number and NNNNN is the number of shards.


output_path: Output path. eval_shared_model: Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Required unless the predictions are provided alongside of the features (i.e. model-agnostic evaluations). eval_config: Eval config for writing out config along with results. Also used for to check for missing slices. display_only_data_location: Optional path indicating where the examples were read from. This is used only for display purposes - data will not actually be read from this path. display_only_data_file_format: Optional format of the input examples. This is used only for display purposes. output_file_format: File format to use when saving files. Currently only 'tfrecord' is supported. add_metric_callbacks: Optional list of metric callbacks (if used).

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def default_writers(
    output_path: Optional[str],
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    display_only_data_location: Optional[str] = None,
    display_only_data_file_format: Optional[str] = None,
    output_file_format: str = "tfrecord",
    add_metric_callbacks: Optional[List[types.AddMetricsCallbackType]] = None,
) -> List[writer.Writer]:  # pylint: disable=invalid-name
    """Returns the default writers for use in WriteResults.

    Note, sharding will be enabled by default if an output_file_format is
    provided. Filenames will be <output_path>-SSSSS-of-NNNNN.<output_file_format>
    where SSSSS is the shard number and NNNNN is the number of shards.

    Args:
    ----
      output_path: Output path.
      eval_shared_model: Optional shared model (single-model evaluation) or list
        of shared models (multi-model evaluation). Required unless the predictions
        are provided alongside of the features (i.e. model-agnostic evaluations).
      eval_config: Eval config for writing out config along with results. Also
        used for to check for missing slices.
      display_only_data_location: Optional path indicating where the examples were
        read from. This is used only for display purposes - data will not actually
        be read from this path.
      display_only_data_file_format: Optional format of the input examples. This
        is used only for display purposes.
      output_file_format: File format to use when saving files. Currently only
        'tfrecord' is supported.
      add_metric_callbacks: Optional list of metric callbacks (if used).
    """
    writers = []

    if not add_metric_callbacks:
        add_metric_callbacks = []
    # The add_metric_callbacks are used in the metrics and plots serialization
    # code to post process the metric data by calling populate_stats_and_pop.
    # While both the legacy (V1) and new (V2) evaluation implementations support
    # EvalSavedModels using add_metric_callbacks, this particular code is only
    # required for the legacy evaluation based on the MetricsAndPlotsEvaluator.
    # The V2 MetricsAndPlotsEvaluator output requires no additional processing.
    # Since the V1 code only supports a single EvalSharedModel, we only set the
    # add_metrics_callbacks if a dict is not passed.
    if (
        eval_shared_model
        and not isinstance(eval_shared_model, dict)
        and not isinstance(eval_shared_model, list)
    ):
        add_metric_callbacks = eval_shared_model.add_metrics_callbacks

    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model
    )

    if eval_config:
        model_locations = {}
        for v in eval_shared_models or [None]:
            k = "" if v is None else v.model_name
            model_locations[k] = (
                "<unknown>" if v is None or v.model_path is None else v.model_path
            )
        writers.append(
            eval_config_writer.EvalConfigWriter(
                output_path,
                eval_config=eval_config,
                data_location=display_only_data_location,
                data_file_format=display_only_data_file_format,
                model_locations=model_locations,
            )
        )

    output_paths = {
        constants.METRICS_KEY: os.path.join(output_path, constants.METRICS_KEY),
        constants.PLOTS_KEY: os.path.join(output_path, constants.PLOTS_KEY),
        constants.ATTRIBUTIONS_KEY: os.path.join(
            output_path, constants.ATTRIBUTIONS_KEY
        ),
        constants.VALIDATIONS_KEY: os.path.join(output_path, constants.VALIDATIONS_KEY),
    }
    writers.append(
        metrics_plots_and_validations_writer.MetricsPlotsAndValidationsWriter(
            output_paths=output_paths,
            # Empty EvalConfig supported for backwards compatibility.
            eval_config=eval_config or config_pb2.EvalConfig(),
            add_metrics_callbacks=add_metric_callbacks,
            output_file_format=output_file_format,
            rubber_stamp=model_util.has_rubber_stamp(eval_shared_models),
        )
    )
    return writers

is_batched_input

is_batched_input(
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    config_version: Optional[int] = None,
) -> bool

Returns true if batched input should be used.

We will keep supporting the legacy unbatched V1 PredictExtractor as it parses the features and labels, and is the only solution currently that allows for slicing on transformed features. Eventually we should have support for transformed features via keras preprocessing layers.


eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). Required unless the predictions are provided alongside of the features (i.e. model-agnostic evaluations). eval_config: Eval config. config_version: Optional config version for this evaluation. This should not be explicitly set by users. It is only intended to be used in cases where the provided eval_config was generated internally, and thus not a reliable indicator of user intent.


A boolean indicating if batched extractors should be used.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def is_batched_input(
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    config_version: Optional[int] = None,
) -> bool:
    """Returns true if batched input should be used.

     We will keep supporting the legacy unbatched V1 PredictExtractor as it parses
     the features and labels, and is the only solution currently that allows for
     slicing on transformed features. Eventually we should have support for
     transformed features via keras preprocessing layers.

    Args:
    ----
      eval_shared_model: Shared model (single-model evaluation) or list of shared
        models (multi-model evaluation). Required unless the predictions are
        provided alongside of the features (i.e. model-agnostic evaluations).
      eval_config: Eval config.
      config_version: Optional config version for this evaluation. This should not
        be explicitly set by users. It is only intended to be used in cases where
        the provided eval_config was generated internally, and thus not a reliable
        indicator of user intent.

    Returns:
    -------
      A boolean indicating if batched extractors should be used.
    """
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model
    )
    return not _is_legacy_eval(config_version, eval_shared_models, eval_config)

is_legacy_estimator

is_legacy_estimator(
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
) -> bool

Returns true if there is a legacy estimator.


eval_shared_model: Shared model (single-model evaluation) or list of shared models (multi-model evaluation). Required unless the predictions are provided alongside of the features (i.e. model-agnostic evaluations).


A boolean indicating if legacy predict extractor will be used.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def is_legacy_estimator(
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
) -> bool:
    """Returns true if there is a legacy estimator.

    Args:
    ----
      eval_shared_model: Shared model (single-model evaluation) or list of shared
        models (multi-model evaluation). Required unless the predictions are
        provided alongside of the features (i.e. model-agnostic evaluations).

    Returns:
    -------
      A boolean indicating if legacy predict extractor will be used.
    """
    eval_shared_models = model_util.verify_and_update_eval_shared_models(
        eval_shared_model
    )
    model_types = _model_types(eval_shared_models)
    return (
        model_types is not None
        and model_types == {constants.TFMA_EVAL}
        and all(_LEGACY_EVAL_TAG in m.model_loader.tags for m in eval_shared_models)
    )

load_attributions

load_attributions(
    output_path: str, output_file_format: str = "tfrecord"
) -> Iterator[AttributionsForSlice]

Read and deserialize the AttributionsForSlice records.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_attributions(
    output_path: str, output_file_format: str = "tfrecord"
) -> Iterator[AttributionsForSlice]:
    """Read and deserialize the AttributionsForSlice records."""
    for a in metrics_plots_and_validations_writer.load_and_deserialize_attributions(
        output_path, output_file_format
    ):
        yield a

load_eval_result

load_eval_result(
    output_path: str,
    output_file_format: Optional[str] = "tfrecord",
    model_name: Optional[str] = None,
) -> EvalResult

Loads EvalResult object for use with the visualization functions.


output_path: Output directory containing config, metrics, plots, etc. output_file_format: Optional file extension to filter files by. model_name: Optional model name. Required if multi-model evaluation was run.


EvalResult object for use with the visualization functions.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_eval_result(
    output_path: str,
    output_file_format: Optional[str] = "tfrecord",
    model_name: Optional[str] = None,
) -> view_types.EvalResult:
    """Loads EvalResult object for use with the visualization functions.

    Args:
    ----
      output_path: Output directory containing config, metrics, plots, etc.
      output_file_format: Optional file extension to filter files by.
      model_name: Optional model name. Required if multi-model evaluation was run.

    Returns:
    -------
      EvalResult object for use with the visualization functions.
    """
    # Config, metrics, and plots files should all exist under the given output
    # directory, but fairness plugin has a use-case where only the metrics are
    # provided so we support all files as being optional (the EvalResult will have
    # corresponding None values for files that are not present).
    eval_config, data_location, file_format, model_locations = (
        eval_config_writer.load_eval_run(output_path)
    )
    metrics_list = []
    for p in metrics_plots_and_validations_writer.load_and_deserialize_metrics(
        output_path, output_file_format
    ):
        metrics = view_util.convert_metrics_proto_to_dict(p, model_name=model_name)
        if metrics is not None:
            metrics_list.append(metrics)
    plots_list = []
    for p in metrics_plots_and_validations_writer.load_and_deserialize_plots(
        output_path, output_file_format
    ):
        plots = view_util.convert_plots_proto_to_dict(p, model_name=model_name)
        if plots is not None:
            plots_list.append(plots)
    attributions_list = []
    for a in metrics_plots_and_validations_writer.load_and_deserialize_attributions(
        output_path, output_file_format
    ):
        attributions = view_util.convert_attributions_proto_to_dict(
            a, model_name=model_name
        )
        if attributions is not None:
            attributions_list.append(attributions)
    if not model_locations:
        model_location = ""
    elif model_name is None:
        model_location = list(model_locations.values())[0]
    else:
        model_location = model_locations[model_name]
    return view_types.EvalResult(  # pytype: disable=wrong-arg-types
        slicing_metrics=metrics_list,
        plots=plots_list,
        attributions=attributions_list,
        config=eval_config,
        data_location=data_location,
        file_format=file_format,
        model_location=model_location,
    )

load_eval_results

load_eval_results(
    output_paths: Union[str, List[str]],
    output_file_format: Optional[str] = "tfrecord",
    mode: str = MODEL_CENTRIC_MODE,
    model_name: Optional[str] = None,
) -> EvalResults

Loads results for multiple models or multiple data sets.


output_paths: A single path or list of output paths of completed tfma runs. output_file_format: Optional file extension to filter files by. mode: The mode of the evaluation. Currently, tfma.DATA_CENTRIC_MODE and tfma.MODEL_CENTRIC_MODE are supported. model_name: Filters to only return results for given model. If unset all models are returned.


An EvalResults containing the evaluation results serialized at output_paths. This can be used to construct a time series view.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_eval_results(
    output_paths: Union[str, List[str]],
    output_file_format: Optional[str] = "tfrecord",
    mode: str = constants.MODEL_CENTRIC_MODE,
    model_name: Optional[str] = None,
) -> view_types.EvalResults:
    """Loads results for multiple models or multiple data sets.

    Args:
    ----
      output_paths: A single path or list of output paths of completed tfma runs.
      output_file_format: Optional file extension to filter files by.
      mode: The mode of the evaluation. Currently, tfma.DATA_CENTRIC_MODE and
        tfma.MODEL_CENTRIC_MODE are supported.
      model_name: Filters to only return results for given model. If unset all
        models are returned.

    Returns:
    -------
      An EvalResults containing the evaluation results serialized at output_paths.
      This can be used to construct a time series view.
    """
    results = []
    if not isinstance(output_paths, list):
        output_paths = [output_paths]
    for output_path in output_paths:
        if model_name is None:
            _, _, _, model_locations = eval_config_writer.load_eval_run(output_path)
            model_names = list(model_locations)
        else:
            model_names = [model_name]
        for model_name in model_names:
            results.append(
                load_eval_result(output_path, output_file_format, model_name=model_name)
            )
    return make_eval_results(results, mode)

load_metrics

load_metrics(
    output_path: str, output_file_format: str = "tfrecord"
) -> Iterator[MetricsForSlice]

Read and deserialize the MetricsForSlice records.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_metrics(
    output_path: str, output_file_format: str = "tfrecord"
) -> Iterator[MetricsForSlice]:
    """Read and deserialize the MetricsForSlice records."""
    for m in metrics_plots_and_validations_writer.load_and_deserialize_metrics(
        output_path, output_file_format
    ):
        yield m

load_plots

load_plots(
    output_path: str, output_file_format: str = "tfrecord"
) -> Iterator[PlotsForSlice]

Read and deserialize the PlotsForSlice records.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_plots(
    output_path: str, output_file_format: str = "tfrecord"
) -> Iterator[PlotsForSlice]:
    """Read and deserialize the PlotsForSlice records."""
    for p in metrics_plots_and_validations_writer.load_and_deserialize_plots(
        output_path, output_file_format
    ):
        yield p

load_validation_result

load_validation_result(
    output_path: str, output_file_format: str = ""
) -> ValidationResult

Read and deserialize the ValidationResult.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def load_validation_result(
    output_path: str, output_file_format: str = ""
) -> ValidationResult:
    """Read and deserialize the ValidationResult."""
    return metrics_plots_and_validations_writer.load_and_deserialize_validation_result(
        output_path, output_file_format
    )

make_eval_results

make_eval_results(
    results: List[EvalResult], mode: str
) -> EvalResults

Run model analysis for a single model on multiple data sets.


results: A list of TFMA evaluation results. mode: The mode of the evaluation. Currently, tfma.DATA_CENTRIC_MODE and tfma.MODEL_CENTRIC_MODE are supported.


An tfma.view.EvalResults object containing all evaluation results. This can be used to construct a time series view.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def make_eval_results(
    results: List[view_types.EvalResult], mode: str
) -> view_types.EvalResults:
    """Run model analysis for a single model on multiple data sets.

    Args:
    ----
      results: A list of TFMA evaluation results.
      mode: The mode of the evaluation. Currently, tfma.DATA_CENTRIC_MODE and
        tfma.MODEL_CENTRIC_MODE are supported.

    Returns:
    -------
      An `tfma.view.EvalResults` object containing all evaluation results. This
      can be used to construct a time series view.
    """
    return view_types.EvalResults(results, mode)

multiple_data_analysis

multiple_data_analysis(
    model_location: str, data_locations: List[str], **kwargs
) -> EvalResults

Run model analysis for a single model on multiple data sets.


model_location: The location of the exported eval saved model. data_locations: A list of data set locations. **kwargs: The args used for evaluation. See tfma.run_model_analysis() for details.


A tfma.EvalResults containing all the evaluation results with the same order as data_locations.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def multiple_data_analysis(
    model_location: str, data_locations: List[str], **kwargs
) -> view_types.EvalResults:
    """Run model analysis for a single model on multiple data sets.

    Args:
    ----
      model_location: The location of the exported eval saved model.
      data_locations: A list of data set locations.
      **kwargs: The args used for evaluation. See tfma.run_model_analysis() for
        details.

    Returns:
    -------
      A tfma.EvalResults containing all the evaluation results with the same order
      as data_locations.
    """
    results = []
    for d in data_locations:
        results.append(single_model_analysis(model_location, d, **kwargs))
    return view_types.EvalResults(results, constants.DATA_CENTRIC_MODE)

multiple_model_analysis

multiple_model_analysis(
    model_locations: List[str], data_location: str, **kwargs
) -> EvalResults

Run model analysis for multiple models on the same data set.


model_locations: A list of paths to the export eval saved model. data_location: The location of the data files. **kwargs: The args used for evaluation. See tfma.single_model_analysis() for details.


A tfma.EvalResults containing all the evaluation results with the same order as model_locations.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def multiple_model_analysis(
    model_locations: List[str], data_location: str, **kwargs
) -> view_types.EvalResults:
    """Run model analysis for multiple models on the same data set.

    Args:
    ----
      model_locations: A list of paths to the export eval saved model.
      data_location: The location of the data files.
      **kwargs: The args used for evaluation. See tfma.single_model_analysis() for
        details.

    Returns:
    -------
      A tfma.EvalResults containing all the evaluation results with the same order
      as model_locations.
    """
    results = []
    for m in model_locations:
        results.append(single_model_analysis(m, data_location, **kwargs))
    return view_types.EvalResults(results, constants.MODEL_CENTRIC_MODE)

run_model_analysis

run_model_analysis(
    eval_shared_model: Optional[
        MaybeMultipleEvalSharedModels
    ] = None,
    eval_config: Optional[EvalConfig] = None,
    data_location: str = "",
    file_format: str = "tfrecords",
    output_path: Optional[str] = None,
    extractors: Optional[List[Extractor]] = None,
    evaluators: Optional[List[Evaluator]] = None,
    writers: Optional[List[Writer]] = None,
    pipeline_options: Optional[Any] = None,
    slice_spec: Optional[List[SingleSliceSpec]] = None,
    write_config: Optional[bool] = True,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    random_seed_for_testing: Optional[int] = None,
    schema: Optional[Schema] = None,
) -> Union[EvalResult, EvalResults]

Runs TensorFlow model analysis.

It runs a Beam pipeline to compute the slicing metrics exported in TensorFlow Eval SavedModel and returns the results.

This is a simplified API for users who want to quickly get something running locally. Users who wish to create their own Beam pipelines can use the Evaluate PTransform instead.


eval_shared_model: Optional shared model (single-model evaluation) or list of shared models (multi-model evaluation). Only required if needed by default extractors, evaluators, or writers. eval_config: Eval config. data_location: The location of the data files. file_format: The file format of the data, can be either 'text' or 'tfrecords' for now. By default, 'tfrecords' will be used. output_path: The directory to output metrics and results to. If None, we use a temporary directory. extractors: Optional list of Extractors to apply to Extracts. Typically these will be added by calling the default_extractors function. If no extractors are provided, default_extractors (non-materialized) will be used. evaluators: Optional list of Evaluators for evaluating Extracts. Typically these will be added by calling the default_evaluators function. If no evaluators are provided, default_evaluators will be used. writers: Optional list of Writers for writing Evaluation output. Typically these will be added by calling the default_writers function. If no writers are provided, default_writers will be used. pipeline_options: Optional arguments to run the Pipeline, for instance whether to run directly. slice_spec: Deprecated (use EvalConfig). write_config: Deprecated (use EvalConfig). compute_confidence_intervals: Deprecated (use EvalConfig). min_slice_size: Deprecated (use EvalConfig). random_seed_for_testing: Provide for deterministic tests only. schema: Optional tf.Metadata schema of the input data.


An EvalResult that can be used with the TFMA visualization functions.


ValueError: If the file_format is unknown to us.

Source code in tensorflow_model_analysis/api/model_eval_lib.py
def run_model_analysis(
    eval_shared_model: Optional[types.MaybeMultipleEvalSharedModels] = None,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    data_location: str = "",
    file_format: str = "tfrecords",
    output_path: Optional[str] = None,
    extractors: Optional[List[extractor.Extractor]] = None,
    evaluators: Optional[List[evaluator.Evaluator]] = None,
    writers: Optional[List[writer.Writer]] = None,
    pipeline_options: Optional[Any] = None,
    slice_spec: Optional[List[slicer.SingleSliceSpec]] = None,
    write_config: Optional[bool] = True,
    compute_confidence_intervals: Optional[bool] = False,
    min_slice_size: int = 1,
    random_seed_for_testing: Optional[int] = None,
    schema: Optional[schema_pb2.Schema] = None,
) -> Union[view_types.EvalResult, view_types.EvalResults]:
    """Runs TensorFlow model analysis.

    It runs a Beam pipeline to compute the slicing metrics exported in TensorFlow
    Eval SavedModel and returns the results.

    This is a simplified API for users who want to quickly get something running
    locally. Users who wish to create their own Beam pipelines can use the
    Evaluate PTransform instead.

    Args:
    ----
      eval_shared_model: Optional shared model (single-model evaluation) or list
        of shared models (multi-model evaluation). Only required if needed by
        default extractors, evaluators, or writers.
      eval_config: Eval config.
      data_location: The location of the data files.
      file_format: The file format of the data, can be either 'text' or
        'tfrecords' for now. By default, 'tfrecords' will be used.
      output_path: The directory to output metrics and results to. If None, we use
        a temporary directory.
      extractors: Optional list of Extractors to apply to Extracts. Typically
        these will be added by calling the default_extractors function. If no
        extractors are provided, default_extractors (non-materialized) will be
        used.
      evaluators: Optional list of Evaluators for evaluating Extracts. Typically
        these will be added by calling the default_evaluators function. If no
        evaluators are provided, default_evaluators will be used.
      writers: Optional list of Writers for writing Evaluation output. Typically
        these will be added by calling the default_writers function. If no writers
        are provided, default_writers will be used.
      pipeline_options: Optional arguments to run the Pipeline, for instance
        whether to run directly.
      slice_spec: Deprecated (use EvalConfig).
      write_config: Deprecated (use EvalConfig).
      compute_confidence_intervals: Deprecated (use EvalConfig).
      min_slice_size: Deprecated (use EvalConfig).
      random_seed_for_testing: Provide for deterministic tests only.
      schema: Optional tf.Metadata schema of the input data.

    Returns:
    -------
      An EvalResult that can be used with the TFMA visualization functions.

    Raises:
    ------
      ValueError: If the file_format is unknown to us.
    """
    _assert_tensorflow_version()

    if output_path is None:
        output_path = tempfile.mkdtemp()
    if not tf.io.gfile.exists(output_path):
        tf.io.gfile.makedirs(output_path)

    if eval_config is None:
        config_version = 1
        eval_shared_models = model_util.verify_and_update_eval_shared_models(
            eval_shared_model
        )
        eval_config = _default_eval_config(
            eval_shared_models,
            slice_spec,
            write_config,
            compute_confidence_intervals,
            min_slice_size,
        )
    else:
        config_version = 2
        eval_config = _update_eval_config_with_defaults(eval_config, eval_shared_model)

    tensor_adapter_config = None
    with beam.Pipeline(options=pipeline_options) as p:
        if file_format == "tfrecords":
            if is_batched_input(eval_shared_model, eval_config, config_version):
                if is_legacy_estimator(eval_shared_model):
                    tfxio = raw_tf_record.RawTfRecordTFXIO(
                        file_pattern=data_location,
                        raw_record_column_name=constants.ARROW_INPUT_COLUMN,
                        telemetry_descriptors=["StandaloneTFMA"],
                    )
                else:
                    tfxio = tf_example_record.TFExampleRecord(
                        file_pattern=data_location,
                        schema=schema,
                        raw_record_column_name=constants.ARROW_INPUT_COLUMN,
                        telemetry_descriptors=["StandaloneTFMA"],
                    )
                    if schema is not None:
                        tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
                            arrow_schema=tfxio.ArrowSchema(),
                            tensor_representations=tfxio.TensorRepresentations(),
                        )
                data = p | "ReadFromTFRecordToArrow" >> tfxio.BeamSource()
            else:
                data = p | "ReadFromTFRecord" >> beam.io.ReadFromTFRecord(
                    file_pattern=data_location,
                    compression_type=beam.io.filesystem.CompressionTypes.AUTO,
                )
        elif file_format == "text":
            tfxio = raw_tf_record.RawBeamRecordTFXIO(
                physical_format="csv",
                raw_record_column_name=constants.ARROW_INPUT_COLUMN,
                telemetry_descriptors=["StandaloneTFMA"],
            )
            data = (
                p
                | "ReadFromText"
                >> beam.io.textio.ReadFromText(
                    data_location, coder=beam.coders.BytesCoder()
                )
                | "ConvertToArrow" >> tfxio.BeamSource()
            )
        else:
            raise ValueError(f"unknown file_format: {file_format}")

        # pylint: disable=no-value-for-parameter
        _ = data | "ExtractEvaluateAndWriteResults" >> ExtractEvaluateAndWriteResults(
            eval_config=eval_config,
            eval_shared_model=eval_shared_model,
            display_only_data_location=data_location,
            display_only_file_format=file_format,
            output_path=output_path,
            extractors=extractors,
            evaluators=evaluators,
            writers=writers,
            random_seed_for_testing=random_seed_for_testing,
            tensor_adapter_config=tensor_adapter_config,
            schema=schema,
            config_version=config_version,
        )
        # pylint: enable=no-value-for-parameter

    if len(eval_config.model_specs) <= 1:
        return load_eval_result(output_path)
    else:
        results = []
        for spec in eval_config.model_specs:
            results.append(load_eval_result(output_path, model_name=spec.name))
        return view_types.EvalResults(results, constants.MODEL_CENTRIC_MODE)