-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpred_functions.R
More file actions
66 lines (61 loc) · 2.2 KB
/
pred_functions.R
File metadata and controls
66 lines (61 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
library(dplyr)
library(RSQLite)
dumbPred <- function(instr, codex) {
inwords<-strsplit(instr, ' ')[[1]]
if (length(inwords)==4) {
print('Matching 4...')
res = codex %>% filter(w1==inwords[1], w2==inwords[2], w3==inwords[3], w4==inwords[4])
}
if (nrow(res)==0) {inwords = inwords[2:length(inwords)]}
if (length(inwords)==3) {
print('Matching 3...')
res = codex %>% filter(w2==inwords[1], w3==inwords[2], w4==inwords[3])
}
if (nrow(res)==0) {inwords = inwords[2:length(inwords)]}
if (length(inwords)==2) {
print('Matching 2...')
res = codex %>% filter(w3==inwords[1], w4==inwords[2])
}
if (nrow(res)==0) {inwords = inwords[2:length(inwords)]}
if (length(inwords)==1) {
print('Matching 1...')
res = codex %>% filter(w4==inwords[1])
}
return (head(res %>% group_by(w5) %>% summarise(cnt=n()) %>% ungroup() %>% arrange(desc(cnt))))
}
dumbPred2 <- function(instr, codex) {
inwords<-strsplit(tolower(instr), ' ')[[1]]
res=head(codex,1)
if (nrow(codex)>0) {
if (length(inwords)==4) {
res <- codex %>% filter(w1==inwords[1], w2==inwords[2], w3==inwords[3], w4==inwords[4])
}
if (nrow(res)==0) {inwords = inwords[2:length(inwords)]}
if (length(inwords)==3) {
res <- codex %>% filter(w2==inwords[1], w3==inwords[2], w4==inwords[3])
}
if (nrow(res)==0) {inwords = inwords[2:length(inwords)]}
if (length(inwords)==2) {
res <- codex %>% filter(w3==inwords[1], w4==inwords[2])
}
if (nrow(res)==0) {inwords = inwords[2:length(inwords)]}
if (length(inwords)==1) {
res <- codex %>% filter(w4==inwords[1])
}
return (res %>% group_by(w5) %>% summarise(cnt=sum(C5Gram)) %>% ungroup() %>% arrange(desc(cnt))) %>% head(3) %>% as.matrix()
}
}
predWrap <- function(x) {
instr <- strsplit(gsub("[^[:alnum:] ]", "", x), " +")[[1]]
instr <- paste(tail(instr, n=4), collapse=' ')
res1 <- dumbPred2(instr, con)
if (is.null(res1)) {
searchlist <<- rbind(searchlist, cbind(instr, 'NONE'))
return (c('the', 'on', 'a'))
}else{
res1 <- res1 %>% filter(w5!='<SEN>', w5!='<sen>')
res2 <- as.vector(unlist(res1[1:3,1]))
searchlist <<- rbind(searchlist, cbind(instr, paste(res2, collapse=', ')))
return (res2)
}
}