evaluator

`PeriodicEvaluator`

Periodic Evaluation assesses the entire incremental word embeddingsmodel's performance using an intrinsic NLP task-related test dataset after a set number, p, of instances, have been processed and trained. This allows for the continuous evaluation of the model's accuracy and helps identify improvement areas.

Source code in rivertext/evaluator/eval.py

class PeriodicEvaluator:
    """Periodic Evaluation assesses the entire incremental word embeddingsmodel's
    performance using an intrinsic NLP task-related test dataset after a set number, p,
    of instances, have been processed and trained. This allows for the continuous
    evaluation of the model's accuracy and helps identify improvement areas."""

    def __init__(
        self,
        dataset: IterableDataset,
        model: IWVBase,
        p: int = 32,
        golden_dataset: Callable = None,
        eval_func: Callable[[Dict, np.ndarray, np.ndarray], int] = None,
        path_output_file: str = None,
    ):
        """Create a instance of PeriodicEvaluator class.

        Args:
            dataset: Stream to train.
            model: Model to train.
            batch_size: batch size for the dataloader, by default 32
            golden_dataset: Golden dataset relations, by default None
            eval_func: Function evaluator acording to the golden dataset, by default
            None.
        """
        self.dataset = dataset
        self.dataloader = DataLoader(self.dataset, batch_size=p)
        self.model = model
        self.gold_relation = golden_dataset()
        self.evaluator = eval_func
        self.path_output_file = path_output_file
        self.store_results = []

        if path_output_file is not None:
            if not not self.path_output_file.endswith(".json"):
                raise ValueError(
                    f"the extension file must be an JSON, but you got: \
                    {self.path_output_file}."
                )

    def run(self, p: int = 3200):
        """Algorithm executes periodic assessments of the entire
        model every p instances, providing continuous evaluation and identification of
        areas for improvement.

        Args:
            p: Number of instances to process before evaluating the model,
                by default 3200.
        """
        c = 0
        for batch in self.dataloader:
            self.model.learn_many(batch)
            if c != 0 and c % p == 0:
                embs = self.model.vocab2dict()
                result = self.evaluator(
                    embs, self.gold_relation.X, self.gold_relation.y
                )
                self.store_results.append(result)
                if self.path_output_file is not None:
                    self._save_result(result)
            c += len(batch)

    def _save_result(self, result: float):
        if self.path_output_file is not None and not os.path.exists(
            self.path_output_file
        ):
            with open(self.path_output_file, "w", encoding="utf-8") as writer:
                json.dump(
                    {"model_name": self.model.model_name, "values": [result]}, writer
                )
        else:
            with open(self.path_output_file, encoding="utf-8") as reader:
                data = json.load(reader)
                data["values"].append(result)

            with open(self.path_output_file, "w", encoding="utf-8") as writer:
                json.dump(data, writer)

`init(dataset, model, p=32, golden_dataset=None, eval_func=None, path_output_file=None)`

Create a instance of PeriodicEvaluator class.

Parameters:

Name	Type	Description	Default
`dataset`	`IterableDataset`	Stream to train.	required
`model`	`IWVBase`	Model to train.	required
`batch_size`		batch size for the dataloader, by default 32	required
`golden_dataset`	`Callable`	Golden dataset relations, by default None	`None`
`eval_func`	`Callable[[Dict, np.ndarray, np.ndarray], int]`	Function evaluator acording to the golden dataset, by default	`None`

Source code in rivertext/evaluator/eval.py

def __init__(
    self,
    dataset: IterableDataset,
    model: IWVBase,
    p: int = 32,
    golden_dataset: Callable = None,
    eval_func: Callable[[Dict, np.ndarray, np.ndarray], int] = None,
    path_output_file: str = None,
):
    """Create a instance of PeriodicEvaluator class.

    Args:
        dataset: Stream to train.
        model: Model to train.
        batch_size: batch size for the dataloader, by default 32
        golden_dataset: Golden dataset relations, by default None
        eval_func: Function evaluator acording to the golden dataset, by default
        None.
    """
    self.dataset = dataset
    self.dataloader = DataLoader(self.dataset, batch_size=p)
    self.model = model
    self.gold_relation = golden_dataset()
    self.evaluator = eval_func
    self.path_output_file = path_output_file
    self.store_results = []

    if path_output_file is not None:
        if not not self.path_output_file.endswith(".json"):
            raise ValueError(
                f"the extension file must be an JSON, but you got: \
                {self.path_output_file}."
            )

`run(p=3200)`

Algorithm executes periodic assessments of the entire model every p instances, providing continuous evaluation and identification of areas for improvement.

Parameters:

Name	Type	Description	Default
`p`	`int`	Number of instances to process before evaluating the model, by default 3200.	`3200`

Source code in rivertext/evaluator/eval.py

def run(self, p: int = 3200):
    """Algorithm executes periodic assessments of the entire
    model every p instances, providing continuous evaluation and identification of
    areas for improvement.

    Args:
        p: Number of instances to process before evaluating the model,
            by default 3200.
    """
    c = 0
    for batch in self.dataloader:
        self.model.learn_many(batch)
        if c != 0 and c % p == 0:
            embs = self.model.vocab2dict()
            result = self.evaluator(
                embs, self.gold_relation.X, self.gold_relation.y
            )
            self.store_results.append(result)
            if self.path_output_file is not None:
                self._save_result(result)
        c += len(batch)

evaluator

PeriodicEvaluator

__init__(dataset, model, p=32, golden_dataset=None, eval_func=None, path_output_file=None)

run(p=3200)

`PeriodicEvaluator`

`init(dataset, model, p=32, golden_dataset=None, eval_func=None, path_output_file=None)`

`run(p=3200)`