adata=val.datasets.polis.load("https://pol.is/report/r29kkytnipymd3exbynkd")# These workl similarly to the above source:# - r29kkytnipymd3exbynkd# - https://pol.is/3hfmicmybc# - 3hfmicmybc## NOTE: Technically, there are some differences between# 1. data fetched from CSV exports (best source when we have report_id) and# 2. data fetched from API endpoints (best we can do when we only have conversation_id)# Show schematic diff from a fresh state.val.viz.schematic_diagram(adata,diff_from=None)# (1)!adata# (2)!
renders a visual version of our AnnData object (only partial summary for now)
renders a text version of our AnnData object with full summary
Data was gathered using the Polis software (see: https://compdemocracy.org/polis
and https://github.com/compdemocracy/polis) and is sub-licensed under CC BY 4.0
with Attribution to The Computational Democracy Project.
The data and more information about how the data was collected can be found at
the following link: https://pol.is/report/r29kkytnipymd3exbynkd
# Translate all statements into a common language. Let's try English.# We could have added this during initial loading:## adata = val.datasets.polis.load("<report url>", translate_to="en")## Done using unofficial [Google Translate] APIs, so no auth required!# See: https://github.com/ssut/py-googletransval.datasets.polis.translate_statements(adata,translate_to="en")adata.var
Loading ITables v2.7.0 from the init_notebook_mode cell...
(need help?)
polis_adata=adata.copy()withval.viz.schematic_diagram(diff_from=polis_adata):# Normally recipe_polis writes PCA projects to X_pca_polis, but# we'll override key to X_pca for simplicity of plotting.# (If we don't override, we must use val.scanpy.pl.embedding to plot.)val.tools.recipe_polis(polis_adata,key_added_pca="X_pca",key_added_kmeans="kmeans_polis",)polis_adata
/home/runner/work/valency-anndata/valency-anndata/.venv/lib/python3.10/site-packages/scanpy/preprocessing/_pca/__init__.py:226: FutureWarning: Argument `use_highly_variable` is deprecated, consider using the mask argument. Use_highly_variable=True can be called through mask_var="highly_variable". Use_highly_variable=False can be called through mask_var=None
mask_var_param, mask_var = _handle_mask_var(
fromvalency_anndata.tools._polisimport_zero_mask,_cluster_maskpacmap_adata=adata.copy()withval.viz.schematic_diagram(diff_from=pacmap_adata):_zero_mask(pacmap_adata)_cluster_mask(pacmap_adata)val.preprocessing.impute(pacmap_adata,strategy="mean",source_layer="X_masked",target_layer="X_masked_imputed_mean",)val.tools.pacmap(pacmap_adata,layer="X_masked_imputed_mean",)val.tools.kmeans(pacmap_adata,init="polis",# BUG: fix this to accept kmeans++k_bounds=(2,9),use_rep="X_pacmap",mask_obs="cluster_mask",key_added="kmeans_pacmap",)pacmap_adata
localmap_adata=adata.copy()withval.viz.schematic_diagram(diff_from=localmap_adata):_zero_mask(localmap_adata)_cluster_mask(localmap_adata)val.preprocessing.impute(localmap_adata,strategy="mean",source_layer="X_masked",target_layer="X_masked_imputed_mean",)val.tools.localmap(localmap_adata,layer="X_masked_imputed_mean",)val.tools.kmeans(localmap_adata,init="polis",# BUG: fix this to accept kmeans++k_bounds=(2,9),use_rep="X_localmap",mask_obs="cluster_mask",key_added="kmeans_localmap",)localmap_adata
# Prepare a rough Polis pipeline (not fully prepared)val.preprocessing.impute(adata,strategy="mean")val.tools.pca(adata,layer="X_imputed_mean")# See how various participant QC metrics look.# Participant rows are "observations", so use "obs" entries listed above.val.viz.pca(adata,color=['pct_agree','pct_disagree','pct_pass','pct_engaged','pct_agree_engaged','pct_seen'],ncols=2,size=20,)
# Check out how different different principle components look.val.viz.pca(adata,color=['pct_engaged','pct_engaged','pct_agree_engaged','pct_agree_engaged'],dimensions=[(0,1),(2,3),(0,1),(2,3)],ncols=2,)
/home/runner/work/valency-anndata/valency-anndata/.venv/lib/python3.10/site-packages/scipy/sparse/_index.py:210: SparseEfficiencyWarning: Changing the sparsity structure of a csr_matrix is expensive. lil and dok are more efficient.
self._set_arrayXarray(i, j, x)
fromIPython.displayimportImage,display# When we are executing prospective functionality that hasn't yet been written,# we wrap it in a conditional and in the meantime output a placeholder/mockup.## Thanks to Gregor Martynus (gr2m) for concept: https://github.com/gr2m/dreamcode.iodefdreamcode_exists(placeholder_image=None):ifplaceholder_image:display(Image(url=placeholder_image,width=500))# Dreamcode never exists yet.returnFalse
ifdreamcode_exists():# See: https://github.com/patcon/universal-polis-wrapper/adata=val.datasets.polis.load(source="https://pol.is/2demo")# This would augment Polis conversation data by importing# participant data from a typeform into anndata's adata.obs DataFrame# by opportunistically joining the data via any matched xidsval.datasets.import_typeform(adata,typeform_id="wFXxYRdJ")
# Inspiration: https://scanpy.readthedocs.io/en/stable/api/datasets.htmlifdreamcode_exists():# This would be a variant where likert scale data is loaded.# - this would use `val.preprocessing.convert_likert` (see below)# - this could either augment polis conversation data, or be used in lieu# of Polis data, processing all data from Typeform responses.## See: https://github.com/polis-community/red-dwarf/issues/89val.datasets.import_typeform(adata,typeform_id="wFXxYRdJ",likert_conversion=True)val.datasets.import_typeform(adata,typeform_id="wFXxYRdJ",likert_conversion="liberal")val.datasets.import_typeform(adata,typeform_id="wFXxYRdJ",likert_conversion="conversative")# Extract polislike valence data from CIP Global Dialogues on AI.# - citizens of many countries surveyed over time.# - pre-generated synthetic agree/disagree/pass data.## See: https://globaldialogues.ai/about# See: https://globaldialogues.ai/download-dataadata=val.datasets.load_cip_global_dialogues()# Extract polislike valence data from UTokyo Asahi Survey (UTAS).# - pre-election survey of both voters and electoral candidates.# - long history, running most years since 2003.# - media collaboration gives rare high rate of politician responses.# - repeats a substantial number of questions across survey waves.## See: https://www.masaki.j.u-tokyo.ac.jp/utas/utasindex_en.html# See: https://github.com/nishio/UTAS-UMAPadata=val.datasets.load_utokyo_asahi_survey()# Extract polislike valence data from Convert World Values Survey (WVS).# - well-known repeated survey of Swartz human values.# - can specify data subset in various ways for convenience.## See: https://www.worldvaluessurvey.org/WVSContents.jspadata=val.datasets.load_world_values_survey(wave=8)adata=val.datasets.load_world_values_survey(years=1997)# Extract polislike valence data from European Social Survey (ESS).# - well-known repeated survey of Swartz human values.# - has sections also focussed on current events.# - can specify data subset in various ways for convenience.## See: https://ess.sikt.no/en/series/321b06ad-1b98-4b7d-93ad-ca8a24e8788a# See: https://github.com/ropensci/essurvey/issues/57#issuecomment-3643483042adata=val.datasets.load_european_social_survey(round=11)
ifdreamcode_exists():# Process likert scale data into polislike/valence data. e.g., -1/0/+1# - there are different ways to convert likert scales:# - liberal: 12345 => DDPAA# - strict: 12345 => DPPPA# - there might be clever, per-participant ways to deduce thresholds# - this is used as basis of processing many other datasetsval.preprocessing.convert_likert(vote_data,conversion_scheme="DDPAA")
ifdreamcode_exists("https://imgur.com/t7G45jo.png"):# Inspiration: https://scanpy.readthedocs.io/en/stable/tutorials/basics/clustering.html#nearest-neighbor-graph-construction-and-visualization# Inspiration: https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.neighbors.htmlval.preprocessing.neighbors(adata)# Inspiration: https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.umap.htmlval.tools.umap(adata)# Inspiration: https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pl.umap.htmlval.viz.umap(adata,color="kmeans",# Setting a smaller point size to get prevent overlapsize=2,)
ifdreamcode_exists("https://imgur.com/0dQuYJW.png"):# 1. Use an LLM to analyse all the statements of indications of geographic boundaries.# 2. Search Open Street Map for boundaries, and then extract via API and process.# 3. Choose the best representation of a boundary for each participant.# 4. Add centroid as latlon to adata.obsm["geospatial"]# See: https://github.com/patcon/kedro-polislike-pipelines/issues/1# See: https://main--68c53b7909ee2fb48f1979dd.chromatic.com/iframe.html?id=components-app-kedro-mode--kedro-mode-with-animation&args=kedroBaseUrl%3Ahttps__COLON____SLASH____SLASH__patcon__DOT__github__DOT__io__SLASH__kedro-polislike-pipelines-san-juan-islands__SLASH__&viewMode=storyval.tools.extract_geo_boundaries(adata)val.viz.perspective_explorer(adata,use_reps=["X_mean_pca","X_mean_localmap","geospatial"])