Polis 2.0 Pipeline

%%capture
%pip install --quiet \
"git+https://github.com/patcon/polis@package-commentgraph#subdirectory=delphi/umap_narrative/polismath_commentgraph" \
"git+https://github.com/patcon/valency-anndata@main"
import valency_anndata as val
adata = val.datasets.polis.load("https://pol.is/report/r7wehfsmutrwndviddnii")
Data was gathered using the Polis software (see: https://compdemocracy.org/polis
and https://github.com/compdemocracy/polis) and is sub-licensed under CC BY 4.0
with Attribution to The Computational Democracy Project.
The data and more information about how the data was collected can be found at
the following link: https://pol.is/report/r7wehfsmutrwndviddnii
with val.viz.schematic_diagram(diff_from=adata):
val.tools.recipe_polis2_statements(adata)
modules.json: 0%| | 0.00/349 [00:00<?, ?B/s]
config_sentence_transformers.json: 0%| | 0.00/116 [00:00<?, ?B/s]
README.md: 0.00B [00:00, ?B/s]
sentence_bert_config.json: 0%| | 0.00/53.0 [00:00<?, ?B/s]
config.json: 0%| | 0.00/612 [00:00<?, ?B/s]
model.safetensors: 0%| | 0.00/90.9M [00:00<?, ?B/s]
Loading weights: 0%| | 0/103 [00:00<?, ?it/s]
tokenizer_config.json: 0%| | 0.00/350 [00:00<?, ?B/s]
vocab.txt: 0.00B [00:00, ?B/s]
tokenizer.json: 0.00B [00:00, ?B/s]
special_tokens_map.json: 0%| | 0.00/112 [00:00<?, ?B/s]
config.json: 0%| | 0.00/190 [00:00<?, ?B/s]

val.viz.embedding(
# Transpose .var and .obs axes for plotting
adata.transpose(),
basis="content_umap",
color=["evoc_polis2_top", "moderation_state"],
)

%%capture
%pip install datamapplot
FAKE_RENDER_DATAMAPPLOT = True
if FAKE_RENDER_DATAMAPPLOT:
from IPython.display import Image, display
display(Image(url="https://i.imgur.com/CMiO6nu.png", width=800))
else:
import datamapplot
import numpy as np
label_layers = adata.varm["evoc_polis2"].transpose()
# Humanized cluster labels in layers (not real topics yet)
label_layers_humanized = [
[f"Zoom{zoom_level}:Group{group_id}" for group_id in row]
for zoom_level, row in enumerate(reversed(label_layers), start=1)
]
datamapplot.create_interactive_plot(
adata.varm["content_umap"],
*label_layers_humanized,
title="Bowling Green 2050",
sub_title=f"{adata.shape[1]} statements",
hover_text=adata.var["content"],
enable_search=True,
darkmode=True,
height=500,
# Needed when there are too many statements.
# See: https://github.com/TutteInstitute/datamapplot/pull/67
palette_theta_range=np.pi/8,
)
