Skip to content

Getting started

Installation

RiverText is meant to work with Python 3.10 and above. Installation can be done via pip:

pip install rivertext

Feel welcome to open an issue on GitHub if you are having any trouble.

Usage

Incremental Word Context Matrix

>>> from rivertext.models.wcm import WordContextMatrix
>>> from torch.utils.data import DataLoader
>>> from rivertext.utils import TweetStream

>>> from web.datasets.similarity import fetch_MEN
>>> from web.evaluate import evaluate_similarity

>>> ts = TweetStream("/path/to/tweets.txt")

>>> wcm = WordContextMatrix(10000, 5, 500)

>>> dataloader = DataLoader(ts, batch_size=32)

>>> men = fetch_MEN()

>>> for batch in tqdm(dataloader):
...    wcm.learn_many(batch)

>>> embs = wcm.vocab2dict()
>>> result = evaluate_similarity(embs, men.X, men.y)
>>> print(f'Spearman Correlation: {result}')
Spearman Correlation: 0.08286971636085129

Incremental Word2Vec

>>> from torch.utils.data import DataLoader

>>> from rivertext.models.iw2v import IWord2Vec
>>> from rivertext.utils import TweetStream

>>> from web.datasets.similarity import fetch_MEN
>>> from web.evaluate import evaluate_similarity

>>> ts = TweetStream("/path/to/tweets.txt")
>>> men = fetch_MEN()
>>> dataloader = DataLoader(ts, batch_size=32)

>>> iw2v = IWord2Vec(window_size=3,
... emb_size=200,
... sg=1,
... neg_samples_sum=8,
... device="cuda:0"
... )

>>> for batch in tqdm(dataloader):
...    iw2v.learn_many(batch)

>>> embs = iw2v.vocab2dict()
>>> result = evaluate_similarity(embs, men.X, men.y)
>>> print(f'Spearman Correlation: {result}')
Spearman Correlation: 0.18149249269528014