Skip to content

Commit 09106c2

Browse files
committed
Initial commit
1 parent 3f94443 commit 09106c2

1 file changed

Lines changed: 279 additions & 0 deletions

File tree

machinevisionscripts.R

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
## A Dataset Documenting Representations of Machine Vision Technologies
2+
## in Artworks, Games and Narratives
3+
##
4+
# Scripts for handling the data in R. Scripts written by Jill Walker Rettberg.
5+
# March 18, 2022.
6+
#
7+
8+
# The dataset can be found at: --------------------------------------------
9+
10+
# Rettberg, Jill Walker; Kronman, Linda; Solberg, Ragnhild; Gunderson, Marianne;
11+
# Bjørklund, Stein Magne; Stokkedal, Linn Heidi; Jacob, Kurdin; Markham, Annette,
12+
# 2022, "A Dataset Documenting Representations of Machine Vision Technologies
13+
# in Artworks, Games and Narratives",
14+
# https://doi.org/10.18710/2G0XKN, DataverseNO.
15+
16+
# Set working directory and load tidyverse---------------------------------------------------
17+
18+
# These scripts assume that the data files are in the a subdirectory
19+
# of your working directory called data.
20+
#
21+
#getwd()
22+
#setwd()
23+
24+
# These scripts use the Tidyverse package. If you haven't already installed that,
25+
# remove the # from the following line and run it.
26+
#
27+
#install.packages("tidyverse")
28+
library(tidyverse)
29+
30+
# Import creativeworks ----------------------------------------------------
31+
32+
33+
# Importing each file with factors etc
34+
35+
CreativeWorks <- read_csv("data/creativeworks.csv",
36+
locale = locale(encoding = "UTF-8"),
37+
col_types = cols(
38+
WorkID = col_integer(),
39+
WorkTitle = col_character(),
40+
Sentiment = col_factor(levels = c(
41+
"Exciting", "Flawed", "Helpful", "Neutral", "Wondrous",
42+
"Hostile","Oppressive", "Alien", "Creepy", "Subversive",
43+
"Dangerous", "Intrusive", "Empowering", "Protective",
44+
"Intimate", "Misleading", "Fun", "Overwhelming",
45+
"Prosocial", "Disgusting")),
46+
Topic = col_factor(levels = c(
47+
"Nudity", "Social Media", "Romantic relationship", "Climate Change",
48+
"Dystopian", "Horror", "Robots/androids", "Surveillance", "Automation",
49+
"City", "Labour", "War", "Identity", "AI", "Animals", "Consciousness",
50+
"Nature", "Companionship", "Competition", "Playful", "Family",
51+
"Free will", "Physical violence", "Crime", "Hacking", "Conflict",
52+
"Empathy", "Utopian", "Race", "Sex", "Cyborgs", "Inequality",
53+
"Economy", "Grief", "Autonomous vehicles", "Gender")),
54+
TechRef= col_factor(levels = c(
55+
"Holograms", "Augmented reality", "Ocular implant",
56+
"Emotion recognition", "Surveillance cameras", "AI",
57+
"Virtual reality", "Motion tracking", "Body scans",
58+
"Drones", "MicroscopeOrTelescope", "Biometrics",
59+
"Image generation", "Facial recognition",
60+
"Object recognition", "3D scans", "Machine learning",
61+
"Filtering", "Deepfake", "Camera", "Cameraphone",
62+
"Interactive panoramas", "Non-Visible Spectrum", "UGV",
63+
"Webcams", "Satellite images")),
64+
TechUsed= col_factor(levels = c(
65+
"Holograms", "Augmented reality", "Ocular implant",
66+
"Emotion recognition", "Surveillance cameras", "AI",
67+
"Virtual reality", "Motion tracking", "Body scans",
68+
"Drones", "MicroscopeOrTelescope", "Biometrics",
69+
"Image generation", "Facial recognition",
70+
"Object recognition", "3D scans", "Machine learning",
71+
"Filtering", "Deepfake", "Camera", "Cameraphone",
72+
"Interactive panoramas", "Non-Visible Spectrum", "UGV",
73+
"Webcams", "Satellite images"))))
74+
75+
76+
# Import characters.csv ---------------------------------------------------
77+
78+
Characters <- read_csv("data/characters.csv",
79+
locale = locale(encoding = "UTF-8"),
80+
col_types = cols(
81+
CharacterID = col_integer(),
82+
Character = col_character(),
83+
Species = col_factor(levels = c(
84+
"Animal", "Cyborg", "Fictional",
85+
"Human", "Machine", "Unknown")),
86+
Gender = col_factor(levels = c(
87+
"Female","Male","Non-binary or Other", "Trans Woman",
88+
"Unknown")),
89+
RaceOrEthnicity = col_factor(levels = c(
90+
"Asian", "Black", "Person of Colour", "White",
91+
"Immigrant", "Indigenous", "Complex", "Unknown")),
92+
Age = col_factor(levels = c(
93+
"Child", "Young Adult", "Adult", "Elderly",
94+
"Unknown"),
95+
ordered = TRUE),
96+
Sexuality = col_factor(levels = c(
97+
"Homosexual", "Heterosexual", "Bi-sexual", "Other",
98+
"Unknown")),
99+
IsGroup = col_logical(),
100+
IsCustomizable = col_logical()
101+
)
102+
)
103+
104+
105+
# Simplify character traits -----------------------------------------------
106+
107+
# Change some of the variables to simplify for analysis. This example merges
108+
# black, person of colour, indigenous, immigrant and complex into one category:
109+
# PoC, and merges machines and cyborgs into a value called Robot. Since our
110+
# categories for race and ethnicity do not follow the specific demographic
111+
# categories of a country, and there are very few cases of some of the values
112+
# like indigenous), for some analyses combining categories will be better.
113+
# The code can easily be adapted to combine or rename categories differently.
114+
#
115+
# Format here is:
116+
# mutate(New_column_name = recode(Old_column_name, "New value" = "Old value"))
117+
#
118+
# Could remove customizable characters with this line:
119+
# filter(IsCustomizable == FALSE) %>%
120+
#
121+
# Convert "Unknown" values to NA.
122+
#
123+
# Select relevant columns.
124+
125+
Characters <- Orig_Characters %>%
126+
na_if("Unknown") %>%
127+
select(Character, Species, Gender, Sexuality,
128+
RaceOrEthnicity, Age) %>%
129+
mutate(RaceOrEthnicity = recode(RaceOrEthnicity,
130+
"Asian" = "Asian",
131+
"Black" = "PoC",
132+
"White" = "White",
133+
"Person of Colour" = "PoC",
134+
"Indigenous" = "PoC",
135+
"Immigrant" = "PoC",
136+
"Complex" = "PoC")) %>%
137+
mutate(Species = recode(Species,
138+
"Human" = "Human",
139+
"Machine" = "Robot",
140+
"Cyborg" = "Robot",
141+
"Fictional" = "Fictional",
142+
"Animal" = "Animal"))
143+
144+
145+
# Load situations.csv -----------------------------------------------------
146+
147+
Situations <- read_csv("data/situations.csv",
148+
locale = locale(encoding = "UTF-8"),
149+
col_types = cols(
150+
SituationID = col_integer(),
151+
Situation = col_character(),
152+
Genre = col_character(),
153+
Character = col_character(),
154+
Entity = col_character(),
155+
Technology = col_character(),
156+
Verb = col_character()
157+
)
158+
)
159+
160+
161+
# Merge characters.csv with situations.csv to see character actions -------
162+
163+
# Make a new dataframe called Verbs that shows all characters with their traits
164+
# (species, age etc), what situations they are in and what actions they take
165+
# when interacting with machine vision.
166+
167+
Verbs <- merge(Situations, Characters, by = "Character", all = TRUE)
168+
169+
170+
# Contingency tables -----------------------------------------------------
171+
172+
# The following scripts transform Verbs into contingency tables where each row
173+
# contains one verb and the number of times it is used in each of the genres (art,
174+
# games, narratives) and who or what uses it. A new column has also been added
175+
# which is TRUE if the verb is active (ends in -ing) and FALSE if it is passive
176+
# (ends in -ed).
177+
#
178+
Tech_verbs_contingency <- Verbs %>%
179+
filter(!is.na(Technology)) %>%
180+
select(Verb, Genre, Technology) %>%
181+
pivot_longer(cols= -Verb,
182+
names_to = "variable",
183+
values_to = "value") %>%
184+
group_by(Verb, value) %>%
185+
summarise(n=n()) %>%
186+
pivot_wider(names_from = "value", values_from = "n") %>%
187+
mutate_all(~replace(., is.na(.), 0)) %>% # convert NA to 0 since it's count
188+
mutate(target = str_detect(Verb, "ing"), .after = Verb) %>% # new col target
189+
relocate(Verb, target, Art, Game, Narrative) #put these cols first
190+
191+
192+
Entity_verbs_contingency <- Verbs %>%
193+
filter(!is.na(Entity)) %>%
194+
select(Verb, Genre, Entity) %>%
195+
pivot_longer(cols= -Verb,
196+
names_to = "variable",
197+
values_to = "value") %>%
198+
group_by(Verb, value) %>%
199+
summarise(n=n()) %>%
200+
pivot_wider(names_from = "value", values_from = "n") %>%
201+
mutate_all(~replace(., is.na(.), 0)) %>% # convert NA to 0 since it's count
202+
mutate(target = str_detect(Verb, "ing"), .after = Verb) %>% # new col target
203+
relocate(Verb, target, Art, Game, Narrative)
204+
205+
Character_verbs_contingency <- Verbs %>%
206+
filter(!is.na(Character)) %>%
207+
select(Verb, Gender, Species, RaceOrEthnicity, Age, Sexuality) %>%
208+
pivot_longer(cols= -Verb,
209+
names_to = "variable",
210+
values_to = "value") %>%
211+
group_by(Verb, value) %>%
212+
summarise(n=n()) %>%
213+
pivot_wider(names_from = "value", values_from = "n") %>%
214+
mutate_all(~replace(., is.na(.), 0)) %>% # convert NA to 0 since it's count
215+
mutate(target = str_detect(Verb, "ing"), .after = Verb) %>% # new col target
216+
relocate()
217+
218+
write_csv(Tech_verbs_contingency, "data/tech_verbs_contingency.csv")
219+
write_csv(Character_verbs_contingency, "data/character_verbs_contingency.csv")
220+
write_csv(Entity_verbs_contingency, "data/entity_verbs_contingency.csv")
221+
222+
# Make continency table from situations showing verb use-----------------------------------
223+
224+
# Make contingency table SitCounts with count of times each verb is used by
225+
# a Character, a Technology or an Entity. Then add a column VerbType stating
226+
# whether Verb is active or passive.
227+
228+
SitCounts <- Situations %>%
229+
mutate(Tech = !is.na(Technology),
230+
Char = !is.na(Character),
231+
Ent = !is.na(Entity)) %>%
232+
select(Verb, Tech, Char, Ent) %>%
233+
pivot_longer(-Verb, names_to = "var", values_to = "value") %>%
234+
count(Verb, var, wt = value) %>%
235+
pivot_wider(Verb, names_from = var, values_from = n) %>%
236+
mutate(VerbType = case_when(
237+
str_detect(Verb, "ing$") ~ "Active",
238+
str_detect(Verb, "ed$") ~ "Passive"))
239+
240+
# Transform worksinfo.csv to a wide format table --------------------------
241+
242+
# If you find it easier to work with a wide table, with a row for each work
243+
# and columns for WorkID, Work_WikidataID, WorkTitle, Genre, Year, Creator,
244+
# Country, URL and IsSciFI, you can use this code to generate it.
245+
# Because many works have multiple creators, multiple countries and
246+
# even mulitple URLs, the code creates a new column for each creator.
247+
248+
worksinfo <- read_csv("data/worksinfo.csv") %>%
249+
arrange(WorkID, Variable) %>%
250+
group_by(WorkID, Variable) %>%
251+
mutate(dupe = n()>1,
252+
dup_id = row_number(),
253+
Variable = as.character(Variable),
254+
Variable = case_when(
255+
dup_id == 1 & Variable == "Creator" ~ "Creator1",
256+
dup_id == 2 & Variable == "Creator" ~ "Creator2",
257+
dup_id == 3 & Variable == "Creator" ~ "Creator3",
258+
dup_id == 4 & Variable == "Creator" ~ "Creator4",
259+
dup_id == 5 & Variable == "Creator" ~ "Creator5",
260+
dup_id == 6 & Variable == "Creator" ~ "Creator6",
261+
dup_id == 7 & Variable == "Creator" ~ "Creator7",
262+
dup_id == 8 & Variable == "Creator" ~ "Creator8",
263+
dup_id == 1 & Variable == "Country" ~ "Country1",
264+
dup_id == 2 & Variable == "Country" ~ "Country2",
265+
dup_id == 3 & Variable == "Country" ~ "Country3",
266+
dup_id == 4 & Variable == "Country" ~ "Country4",
267+
dup_id == 5 & Variable == "Country" ~ "Country5",
268+
dup_id == 1 & Variable == "URL" ~ "URL1",
269+
dup_id == 2 & Variable == "URL" ~ "URL2",
270+
dup_id == 3 & Variable == "URL" ~ "URL3",
271+
TRUE ~ Variable)) %>%
272+
select(-c(dupe, dup_id)) %>%
273+
pivot_wider(names_from = Variable,
274+
values_from = Value) %>%
275+
select(WorkID, Work_WikidataID, WorkTitle, Genre, Year, Creator1,
276+
Country1, URL1, URL2, URL3, Country2, Country3, Country4,
277+
Country5, Creator2, Creator3, Creator4, Creator5, Creator6,
278+
Creator7, Creator8, IsSciFi) %>%
279+
ungroup()

0 commit comments

Comments
 (0)