|
| 1 | +## A Dataset Documenting Representations of Machine Vision Technologies |
| 2 | +## in Artworks, Games and Narratives |
| 3 | +## |
| 4 | +# Scripts for handling the data in R. Scripts written by Jill Walker Rettberg. |
| 5 | +# March 18, 2022. |
| 6 | +# |
| 7 | + |
| 8 | +# The dataset can be found at: -------------------------------------------- |
| 9 | + |
| 10 | +# Rettberg, Jill Walker; Kronman, Linda; Solberg, Ragnhild; Gunderson, Marianne; |
| 11 | +# Bjørklund, Stein Magne; Stokkedal, Linn Heidi; Jacob, Kurdin; Markham, Annette, |
| 12 | +# 2022, "A Dataset Documenting Representations of Machine Vision Technologies |
| 13 | +# in Artworks, Games and Narratives", |
| 14 | +# https://doi.org/10.18710/2G0XKN, DataverseNO. |
| 15 | + |
| 16 | +# Set working directory and load tidyverse--------------------------------------------------- |
| 17 | + |
| 18 | +# These scripts assume that the data files are in the a subdirectory |
| 19 | +# of your working directory called data. |
| 20 | +# |
| 21 | +#getwd() |
| 22 | +#setwd() |
| 23 | + |
| 24 | +# These scripts use the Tidyverse package. If you haven't already installed that, |
| 25 | +# remove the # from the following line and run it. |
| 26 | +# |
| 27 | +#install.packages("tidyverse") |
| 28 | +library(tidyverse) |
| 29 | + |
| 30 | +# Import creativeworks ---------------------------------------------------- |
| 31 | + |
| 32 | + |
| 33 | +# Importing each file with factors etc |
| 34 | + |
| 35 | +CreativeWorks <- read_csv("data/creativeworks.csv", |
| 36 | + locale = locale(encoding = "UTF-8"), |
| 37 | + col_types = cols( |
| 38 | + WorkID = col_integer(), |
| 39 | + WorkTitle = col_character(), |
| 40 | + Sentiment = col_factor(levels = c( |
| 41 | + "Exciting", "Flawed", "Helpful", "Neutral", "Wondrous", |
| 42 | + "Hostile","Oppressive", "Alien", "Creepy", "Subversive", |
| 43 | + "Dangerous", "Intrusive", "Empowering", "Protective", |
| 44 | + "Intimate", "Misleading", "Fun", "Overwhelming", |
| 45 | + "Prosocial", "Disgusting")), |
| 46 | + Topic = col_factor(levels = c( |
| 47 | + "Nudity", "Social Media", "Romantic relationship", "Climate Change", |
| 48 | + "Dystopian", "Horror", "Robots/androids", "Surveillance", "Automation", |
| 49 | + "City", "Labour", "War", "Identity", "AI", "Animals", "Consciousness", |
| 50 | + "Nature", "Companionship", "Competition", "Playful", "Family", |
| 51 | + "Free will", "Physical violence", "Crime", "Hacking", "Conflict", |
| 52 | + "Empathy", "Utopian", "Race", "Sex", "Cyborgs", "Inequality", |
| 53 | + "Economy", "Grief", "Autonomous vehicles", "Gender")), |
| 54 | + TechRef= col_factor(levels = c( |
| 55 | + "Holograms", "Augmented reality", "Ocular implant", |
| 56 | + "Emotion recognition", "Surveillance cameras", "AI", |
| 57 | + "Virtual reality", "Motion tracking", "Body scans", |
| 58 | + "Drones", "MicroscopeOrTelescope", "Biometrics", |
| 59 | + "Image generation", "Facial recognition", |
| 60 | + "Object recognition", "3D scans", "Machine learning", |
| 61 | + "Filtering", "Deepfake", "Camera", "Cameraphone", |
| 62 | + "Interactive panoramas", "Non-Visible Spectrum", "UGV", |
| 63 | + "Webcams", "Satellite images")), |
| 64 | + TechUsed= col_factor(levels = c( |
| 65 | + "Holograms", "Augmented reality", "Ocular implant", |
| 66 | + "Emotion recognition", "Surveillance cameras", "AI", |
| 67 | + "Virtual reality", "Motion tracking", "Body scans", |
| 68 | + "Drones", "MicroscopeOrTelescope", "Biometrics", |
| 69 | + "Image generation", "Facial recognition", |
| 70 | + "Object recognition", "3D scans", "Machine learning", |
| 71 | + "Filtering", "Deepfake", "Camera", "Cameraphone", |
| 72 | + "Interactive panoramas", "Non-Visible Spectrum", "UGV", |
| 73 | + "Webcams", "Satellite images")))) |
| 74 | + |
| 75 | + |
| 76 | +# Import characters.csv --------------------------------------------------- |
| 77 | + |
| 78 | +Characters <- read_csv("data/characters.csv", |
| 79 | + locale = locale(encoding = "UTF-8"), |
| 80 | + col_types = cols( |
| 81 | + CharacterID = col_integer(), |
| 82 | + Character = col_character(), |
| 83 | + Species = col_factor(levels = c( |
| 84 | + "Animal", "Cyborg", "Fictional", |
| 85 | + "Human", "Machine", "Unknown")), |
| 86 | + Gender = col_factor(levels = c( |
| 87 | + "Female","Male","Non-binary or Other", "Trans Woman", |
| 88 | + "Unknown")), |
| 89 | + RaceOrEthnicity = col_factor(levels = c( |
| 90 | + "Asian", "Black", "Person of Colour", "White", |
| 91 | + "Immigrant", "Indigenous", "Complex", "Unknown")), |
| 92 | + Age = col_factor(levels = c( |
| 93 | + "Child", "Young Adult", "Adult", "Elderly", |
| 94 | + "Unknown"), |
| 95 | + ordered = TRUE), |
| 96 | + Sexuality = col_factor(levels = c( |
| 97 | + "Homosexual", "Heterosexual", "Bi-sexual", "Other", |
| 98 | + "Unknown")), |
| 99 | + IsGroup = col_logical(), |
| 100 | + IsCustomizable = col_logical() |
| 101 | + ) |
| 102 | +) |
| 103 | + |
| 104 | + |
| 105 | +# Simplify character traits ----------------------------------------------- |
| 106 | + |
| 107 | +# Change some of the variables to simplify for analysis. This example merges |
| 108 | +# black, person of colour, indigenous, immigrant and complex into one category: |
| 109 | +# PoC, and merges machines and cyborgs into a value called Robot. Since our |
| 110 | +# categories for race and ethnicity do not follow the specific demographic |
| 111 | +# categories of a country, and there are very few cases of some of the values |
| 112 | +# like indigenous), for some analyses combining categories will be better. |
| 113 | +# The code can easily be adapted to combine or rename categories differently. |
| 114 | +# |
| 115 | +# Format here is: |
| 116 | +# mutate(New_column_name = recode(Old_column_name, "New value" = "Old value")) |
| 117 | +# |
| 118 | +# Could remove customizable characters with this line: |
| 119 | +# filter(IsCustomizable == FALSE) %>% |
| 120 | +# |
| 121 | +# Convert "Unknown" values to NA. |
| 122 | +# |
| 123 | +# Select relevant columns. |
| 124 | + |
| 125 | +Characters <- Orig_Characters %>% |
| 126 | + na_if("Unknown") %>% |
| 127 | + select(Character, Species, Gender, Sexuality, |
| 128 | + RaceOrEthnicity, Age) %>% |
| 129 | + mutate(RaceOrEthnicity = recode(RaceOrEthnicity, |
| 130 | + "Asian" = "Asian", |
| 131 | + "Black" = "PoC", |
| 132 | + "White" = "White", |
| 133 | + "Person of Colour" = "PoC", |
| 134 | + "Indigenous" = "PoC", |
| 135 | + "Immigrant" = "PoC", |
| 136 | + "Complex" = "PoC")) %>% |
| 137 | + mutate(Species = recode(Species, |
| 138 | + "Human" = "Human", |
| 139 | + "Machine" = "Robot", |
| 140 | + "Cyborg" = "Robot", |
| 141 | + "Fictional" = "Fictional", |
| 142 | + "Animal" = "Animal")) |
| 143 | + |
| 144 | + |
| 145 | +# Load situations.csv ----------------------------------------------------- |
| 146 | + |
| 147 | +Situations <- read_csv("data/situations.csv", |
| 148 | + locale = locale(encoding = "UTF-8"), |
| 149 | + col_types = cols( |
| 150 | + SituationID = col_integer(), |
| 151 | + Situation = col_character(), |
| 152 | + Genre = col_character(), |
| 153 | + Character = col_character(), |
| 154 | + Entity = col_character(), |
| 155 | + Technology = col_character(), |
| 156 | + Verb = col_character() |
| 157 | + ) |
| 158 | + ) |
| 159 | + |
| 160 | + |
| 161 | +# Merge characters.csv with situations.csv to see character actions ------- |
| 162 | + |
| 163 | +# Make a new dataframe called Verbs that shows all characters with their traits |
| 164 | +# (species, age etc), what situations they are in and what actions they take |
| 165 | +# when interacting with machine vision. |
| 166 | + |
| 167 | +Verbs <- merge(Situations, Characters, by = "Character", all = TRUE) |
| 168 | + |
| 169 | + |
| 170 | +# Contingency tables ----------------------------------------------------- |
| 171 | + |
| 172 | +# The following scripts transform Verbs into contingency tables where each row |
| 173 | +# contains one verb and the number of times it is used in each of the genres (art, |
| 174 | +# games, narratives) and who or what uses it. A new column has also been added |
| 175 | +# which is TRUE if the verb is active (ends in -ing) and FALSE if it is passive |
| 176 | +# (ends in -ed). |
| 177 | +# |
| 178 | +Tech_verbs_contingency <- Verbs %>% |
| 179 | + filter(!is.na(Technology)) %>% |
| 180 | + select(Verb, Genre, Technology) %>% |
| 181 | + pivot_longer(cols= -Verb, |
| 182 | + names_to = "variable", |
| 183 | + values_to = "value") %>% |
| 184 | + group_by(Verb, value) %>% |
| 185 | + summarise(n=n()) %>% |
| 186 | + pivot_wider(names_from = "value", values_from = "n") %>% |
| 187 | + mutate_all(~replace(., is.na(.), 0)) %>% # convert NA to 0 since it's count |
| 188 | + mutate(target = str_detect(Verb, "ing"), .after = Verb) %>% # new col target |
| 189 | + relocate(Verb, target, Art, Game, Narrative) #put these cols first |
| 190 | + |
| 191 | + |
| 192 | +Entity_verbs_contingency <- Verbs %>% |
| 193 | + filter(!is.na(Entity)) %>% |
| 194 | + select(Verb, Genre, Entity) %>% |
| 195 | + pivot_longer(cols= -Verb, |
| 196 | + names_to = "variable", |
| 197 | + values_to = "value") %>% |
| 198 | + group_by(Verb, value) %>% |
| 199 | + summarise(n=n()) %>% |
| 200 | + pivot_wider(names_from = "value", values_from = "n") %>% |
| 201 | + mutate_all(~replace(., is.na(.), 0)) %>% # convert NA to 0 since it's count |
| 202 | + mutate(target = str_detect(Verb, "ing"), .after = Verb) %>% # new col target |
| 203 | + relocate(Verb, target, Art, Game, Narrative) |
| 204 | + |
| 205 | +Character_verbs_contingency <- Verbs %>% |
| 206 | + filter(!is.na(Character)) %>% |
| 207 | + select(Verb, Gender, Species, RaceOrEthnicity, Age, Sexuality) %>% |
| 208 | + pivot_longer(cols= -Verb, |
| 209 | + names_to = "variable", |
| 210 | + values_to = "value") %>% |
| 211 | + group_by(Verb, value) %>% |
| 212 | + summarise(n=n()) %>% |
| 213 | + pivot_wider(names_from = "value", values_from = "n") %>% |
| 214 | + mutate_all(~replace(., is.na(.), 0)) %>% # convert NA to 0 since it's count |
| 215 | + mutate(target = str_detect(Verb, "ing"), .after = Verb) %>% # new col target |
| 216 | + relocate() |
| 217 | + |
| 218 | +write_csv(Tech_verbs_contingency, "data/tech_verbs_contingency.csv") |
| 219 | +write_csv(Character_verbs_contingency, "data/character_verbs_contingency.csv") |
| 220 | +write_csv(Entity_verbs_contingency, "data/entity_verbs_contingency.csv") |
| 221 | + |
| 222 | +# Make continency table from situations showing verb use----------------------------------- |
| 223 | + |
| 224 | +# Make contingency table SitCounts with count of times each verb is used by |
| 225 | +# a Character, a Technology or an Entity. Then add a column VerbType stating |
| 226 | +# whether Verb is active or passive. |
| 227 | + |
| 228 | +SitCounts <- Situations %>% |
| 229 | + mutate(Tech = !is.na(Technology), |
| 230 | + Char = !is.na(Character), |
| 231 | + Ent = !is.na(Entity)) %>% |
| 232 | + select(Verb, Tech, Char, Ent) %>% |
| 233 | + pivot_longer(-Verb, names_to = "var", values_to = "value") %>% |
| 234 | + count(Verb, var, wt = value) %>% |
| 235 | + pivot_wider(Verb, names_from = var, values_from = n) %>% |
| 236 | + mutate(VerbType = case_when( |
| 237 | + str_detect(Verb, "ing$") ~ "Active", |
| 238 | + str_detect(Verb, "ed$") ~ "Passive")) |
| 239 | + |
| 240 | +# Transform worksinfo.csv to a wide format table -------------------------- |
| 241 | + |
| 242 | +# If you find it easier to work with a wide table, with a row for each work |
| 243 | +# and columns for WorkID, Work_WikidataID, WorkTitle, Genre, Year, Creator, |
| 244 | +# Country, URL and IsSciFI, you can use this code to generate it. |
| 245 | +# Because many works have multiple creators, multiple countries and |
| 246 | +# even mulitple URLs, the code creates a new column for each creator. |
| 247 | + |
| 248 | +worksinfo <- read_csv("data/worksinfo.csv") %>% |
| 249 | + arrange(WorkID, Variable) %>% |
| 250 | + group_by(WorkID, Variable) %>% |
| 251 | + mutate(dupe = n()>1, |
| 252 | + dup_id = row_number(), |
| 253 | + Variable = as.character(Variable), |
| 254 | + Variable = case_when( |
| 255 | + dup_id == 1 & Variable == "Creator" ~ "Creator1", |
| 256 | + dup_id == 2 & Variable == "Creator" ~ "Creator2", |
| 257 | + dup_id == 3 & Variable == "Creator" ~ "Creator3", |
| 258 | + dup_id == 4 & Variable == "Creator" ~ "Creator4", |
| 259 | + dup_id == 5 & Variable == "Creator" ~ "Creator5", |
| 260 | + dup_id == 6 & Variable == "Creator" ~ "Creator6", |
| 261 | + dup_id == 7 & Variable == "Creator" ~ "Creator7", |
| 262 | + dup_id == 8 & Variable == "Creator" ~ "Creator8", |
| 263 | + dup_id == 1 & Variable == "Country" ~ "Country1", |
| 264 | + dup_id == 2 & Variable == "Country" ~ "Country2", |
| 265 | + dup_id == 3 & Variable == "Country" ~ "Country3", |
| 266 | + dup_id == 4 & Variable == "Country" ~ "Country4", |
| 267 | + dup_id == 5 & Variable == "Country" ~ "Country5", |
| 268 | + dup_id == 1 & Variable == "URL" ~ "URL1", |
| 269 | + dup_id == 2 & Variable == "URL" ~ "URL2", |
| 270 | + dup_id == 3 & Variable == "URL" ~ "URL3", |
| 271 | + TRUE ~ Variable)) %>% |
| 272 | + select(-c(dupe, dup_id)) %>% |
| 273 | + pivot_wider(names_from = Variable, |
| 274 | + values_from = Value) %>% |
| 275 | + select(WorkID, Work_WikidataID, WorkTitle, Genre, Year, Creator1, |
| 276 | + Country1, URL1, URL2, URL3, Country2, Country3, Country4, |
| 277 | + Country5, Creator2, Creator3, Creator4, Creator5, Creator6, |
| 278 | + Creator7, Creator8, IsSciFi) %>% |
| 279 | + ungroup() |
0 commit comments