Skip to content

Commit 6dffcb5

Browse files
committed
Add support for running LDA
1 parent 4b79265 commit 6dffcb5

File tree

4 files changed

+165
-1
lines changed

4 files changed

+165
-1
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
if (Sys.getenv('SEURAT_MAX_THREADS') != '') {
2+
nCores <- Sys.getenv('SEURAT_MAX_THREADS')
3+
} else {
4+
nCores <- 1
5+
}
6+
7+
ldaFiles <- data.frame(DatasetId = character(), FileName = character())
8+
9+
for (datasetId in names(seuratObjects)) {
10+
printName(datasetId)
11+
seuratObj <- readRDS(seuratObjects[[datasetId]])
12+
13+
if (!is.null(maxAllowableCells) && maxAllowableCells > 0 && ncol(seuratObj) > maxAllowableCells) {
14+
stop(paste0('The object has ', ncol(seuratObj), ' which is above the maxAllowableCells: ', maxAllowableCells))
15+
}
16+
17+
ldaResults <- bindArgs(CellMembrane::RunLDA, seuratObj)()
18+
19+
outputFileId <- ifelse(datasetId %in% names(datasetIdToOutputFileId), yes = datasetIdToOutputFileId[[datasetId]], no = NA)
20+
ldaResults$OutputFileId <- outputFileId
21+
22+
fileName <- paste0('lda.', datasetId, '.rds')
23+
saveRDS(ldaResults, file = fileName)
24+
25+
ldaFiles <- rbind(ldaFiles, data.frame(DatasetId = datasetId, FileName = fileName))
26+
27+
# Cleanup
28+
rm(seuratObj)
29+
gc()
30+
}
31+
32+
write.table(ldaFiles, file = 'ldaFiles.txt', row.names = FALSE, col.names = FALSE, quote = FALSE, sep = '\t')

singlecell/resources/web/singlecell/panel/PoolImportPanel.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ Ext4.define('SingleCell.panel.PoolImportPanel', {
222222
},
223223

224224
citeSeqPanel: function(val, panel) {
225-
if (val && (val.toLowerCase() === 'no' || val.toLowerCase() === 'n')) {
225+
if (val && (val.toLowerCase() === 'no' || val.toLowerCase() === 'n' || val.toLowerCase() === 'na' || val.toLowerCase() === 'n/a')) {
226226
return null;
227227
}
228228
else if (val && (val.toLowerCase() === 'yes' || val.toLowerCase() === 'y')) {

singlecell/src/org/labkey/singlecell/SingleCellModule.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ public static void registerPipelineSteps()
212212
SequencePipelineService.get().registerPipelineStep(new IntegrateData.Provider());
213213
SequencePipelineService.get().registerPipelineStep(new CustomUCell.Provider());
214214
SequencePipelineService.get().registerPipelineStep(new RunSDA.Provider());
215+
SequencePipelineService.get().registerPipelineStep(new RunLDA.Provider());
215216
}
216217

217218
@Override
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
package org.labkey.singlecell.pipeline.singlecell;
2+
3+
import au.com.bytecode.opencsv.CSVReader;
4+
import org.apache.commons.lang3.StringUtils;
5+
import org.json.old.JSONObject;
6+
import org.labkey.api.pipeline.PipelineJobException;
7+
import org.labkey.api.reader.Readers;
8+
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
9+
import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider;
10+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
11+
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
12+
import org.labkey.api.singlecell.pipeline.SeuratToolParameter;
13+
import org.labkey.api.singlecell.pipeline.SingleCellStep;
14+
15+
import java.io.File;
16+
import java.io.IOException;
17+
import java.util.Arrays;
18+
import java.util.List;
19+
import java.util.Set;
20+
import java.util.stream.Collectors;
21+
22+
public class RunLDA extends AbstractCellMembraneStep
23+
{
24+
public RunLDA(PipelineContext ctx, RunLDA.Provider provider)
25+
{
26+
super(provider, ctx);
27+
}
28+
29+
public static class Provider extends AbstractPipelineStepProvider<SingleCellStep>
30+
{
31+
public Provider()
32+
{
33+
super("RunLDA", "Run LDA", "CellMembrane/LDA", "This will run LDA on the seurat object.", Arrays.asList(
34+
SeuratToolParameter.create("ntopics", "Num Topics", "The number of topics to generate. Can either be a single value or a comma-separated list.", "textfield", new JSONObject(){{
35+
36+
}}, 30, null, true, true).delimiter(","),
37+
SeuratToolParameter.create("maxAllowableCells", "Max Allowable Cells", "If the input cells are above this value, an error will be thrown. This is designed to limit the amount of data passed to LDA. Use -1 for no limit.", "ldk-integerfield", new JSONObject(){{
38+
put("minValue", -1);
39+
}}, 150000, null, true),
40+
SeuratToolParameter.create("varFeatures", "# Variable Features", "The number of variable features to select.", "ldk-integerfield", new JSONObject(){{
41+
put("minValue", 0);
42+
}}, 5000),
43+
SeuratToolParameter.create(SEURAT_THREADS, "Max Threads", "The number of threads to use. Cannot be higher than the threads allocated to the job.", "ldk-integerfield", new JSONObject(){{
44+
put("minValue", 0);
45+
}}, 8)
46+
), Arrays.asList("/sequenceanalysis/field/TrimmingTextArea.js"), null);
47+
}
48+
49+
@Override
50+
public RunLDA create(PipelineContext ctx)
51+
{
52+
return new RunLDA(ctx, this);
53+
}
54+
}
55+
56+
@Override
57+
public boolean createsSeuratObjects()
58+
{
59+
return false;
60+
}
61+
62+
@Override
63+
public String getFileSuffix()
64+
{
65+
return "lda";
66+
}
67+
68+
@Override
69+
public Output execute(SequenceOutputHandler.JobContext ctx, List<SeuratObjectWrapper> inputObjects, String outputPrefix) throws PipelineJobException
70+
{
71+
Output output = super.execute(ctx, inputObjects, outputPrefix);
72+
73+
File saved = new File(ctx.getOutputDir(), "ldaFiles.txt");
74+
if (!saved.exists())
75+
{
76+
throw new PipelineJobException("Unable to find file: " + saved.getPath());
77+
}
78+
79+
try (CSVReader reader = new CSVReader(Readers.getReader(saved), '\t'))
80+
{
81+
String[] line;
82+
while ((line = reader.readNext()) != null)
83+
{
84+
File rds = new File(ctx.getOutputDir(), line[1]);
85+
if (!rds.exists())
86+
{
87+
throw new PipelineJobException("Unable to find file: " + rds.getPath());
88+
}
89+
90+
final String datasetId = line[0];
91+
Set<SeuratObjectWrapper> wrappers = inputObjects.stream().filter(x -> datasetId.equals(x.getDatasetId())).collect(Collectors.toSet());
92+
if (wrappers.size() == 0)
93+
{
94+
throw new PipelineJobException("Unable to find seurat object wrapper for: " + datasetId);
95+
}
96+
else if (wrappers.size() > 1)
97+
{
98+
throw new PipelineJobException("More than one seurat object wrapper matched: " + datasetId + ", found: " + wrappers.stream().map(SeuratObjectWrapper::getDatasetId).collect(Collectors.joining(", ")));
99+
}
100+
101+
SeuratObjectWrapper wrapper = wrappers.iterator().next();
102+
103+
SequenceOutputFile so = new SequenceOutputFile();
104+
so.setFile(rds);
105+
so.setCategory("LDA Results");
106+
so.setLibrary_id(ctx.getSequenceSupport().getCachedGenomes().iterator().next().getGenomeId());
107+
so.setReadset(wrapper.getReadsetId());
108+
so.setName(wrapper.getDatasetName() == null ? wrapper.getDatasetId() : wrapper.getDatasetName() + ": LDA Analysis");
109+
110+
String jobDescription = StringUtils.trimToNull(ctx.getParams().optString("jobDescription"));
111+
if (jobDescription != null)
112+
{
113+
so.setDescription(jobDescription);
114+
}
115+
116+
ctx.getFileManager().addSequenceOutput(so);
117+
}
118+
}
119+
catch (IOException e)
120+
{
121+
throw new PipelineJobException(e);
122+
}
123+
124+
ctx.getFileManager().addIntermediateFile(saved);
125+
126+
return output;
127+
}
128+
129+
}
130+
131+

0 commit comments

Comments
 (0)