Skip to content

Large Conversation

Open In Colab

Notebook Preparation

Screenshot of final results

For fastest processing

  1. Click Edit > Notebook settings
  2. Select T4 GPU and save.

The runtime will restart, and the pacmap and localmap steps should run much quicker :)

import os

if os.environ.get("IS_GENERATING_DOCS", None):
    from itables import init_notebook_mode
    init_notebook_mode(all_interactive=True) # (1)!
  1. renders interactive tables for dataframes, which look really nice on the documentation website

Installation & Import

%pip install -qqq git+https://github.com/patcon/valency-anndata@main
import valency_anndata as val

Loading Data

# Loads dataset of 33k german-speaking participants,
# run by Aufstehen political party in 2018
adata = val.datasets.aufstehen(translate_to="en")
Downloading (incomplete total...): 0.00B [00:00, ?B/s]



Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]
adata.var
Loading ITables v2.7.0 from the init_notebook_mode cell... (need help?)

Running Vanilla Polis Pipeline

# Run the Polis steps and generate a "visual diff" for before and after (green = new)
with val.viz.schematic_diagram(diff_from=adata):
  # Takes ~1 minute
  val.tools.recipe_polis(adata)
/home/runner/work/valency-anndata/valency-anndata/.venv/lib/python3.10/site-packages/scanpy/preprocessing/_pca/__init__.py:226: FutureWarning: Argument `use_highly_variable` is deprecated, consider using the mask argument. Use_highly_variable=True can be called through mask_var="highly_variable". Use_highly_variable=False can be called through mask_var=None
  mask_var_param, mask_var = _handle_mask_var(

svg

val.viz.embedding(adata, basis="pca_polis", color="kmeans_polis")

png

Running Additional Projections: PaCMAP, LocalMAP, UMAP

# Takes ~1 min (~4 min without GPU)
val.tools.pacmap(adata, layer="X_masked_imputed_mean")
val.viz.embedding(adata, basis="pacmap", color="kmeans_polis")

png

# Takes ~3 min (~10 min without GPU)
val.tools.localmap(adata, layer="X_masked_imputed_mean")
val.viz.embedding(adata, basis="localmap", color="kmeans_polis")

png

# Takes ~2 min
val.preprocessing.neighbors(adata, use_rep="X_pca_polis")
val.tools.umap(adata)
/home/runner/work/valency-anndata/valency-anndata/.venv/lib/python3.10/site-packages/umap/spectral.py:548: UserWarning: Spectral initialisation failed! The eigenvector solver
failed. This is likely due to too small an eigengap. Consider
adding some noise or jitter to your data.

Falling back to random initialisation!
  warn(
val.viz.embedding(adata, basis="umap", color="kmeans_polis")

png

Animating 33k grouped participants

# Explore the differences between each projection based on the
# results of k-means clustering on the basic Polis PCA projections.
val.viz.jscatter(
    adata,
    use_reps=[
        "X_pca_polis",
        "X_pacmap",
        "X_localmap",
        "X_umap",
    ],
    color="kmeans_polis",
)

Coloring with data

# Calculate general vote metrics on all participants and statements.
with val.viz.schematic_diagram(diff_from=adata):
  val.preprocessing.calculate_qc_metrics(adata, inplace=True)

svg

# Explore the differences between each projection based on these metrics.
val.viz.jscatter(
    adata,
    use_reps=[
        "X_pca_polis",
        "X_pacmap",
        "X_localmap",
        "X_umap",
    ],
    color="pct_seen", # Groups from k-means clustering of PCA projections
)