Add support for running LDA

bbimber · bbimber · commit 6dffcb56ec15 · 2023-02-01T14:23:04.000-08:00
diff --git a/singlecell/resources/chunks/RunLDA.R b/singlecell/resources/chunks/RunLDA.R
@@ -0,0 +1,32 @@
+if (Sys.getenv('SEURAT_MAX_THREADS') != '') {
+   nCores <- Sys.getenv('SEURAT_MAX_THREADS')
+} else {
+   nCores <- 1
+}
+
+ldaFiles <- data.frame(DatasetId = character(), FileName = character())
+
+ for (datasetId in names(seuratObjects)) {
+    printName(datasetId)
+    seuratObj <- readRDS(seuratObjects[[datasetId]])
+
+    if (!is.null(maxAllowableCells) && maxAllowableCells > 0 && ncol(seuratObj) > maxAllowableCells) {
+       stop(paste0('The object has ', ncol(seuratObj), ' which is above the maxAllowableCells: ', maxAllowableCells))
+    }
+
+    ldaResults <- bindArgs(CellMembrane::RunLDA, seuratObj)()
+
+    outputFileId <- ifelse(datasetId %in% names(datasetIdToOutputFileId), yes = datasetIdToOutputFileId[[datasetId]], no = NA)
+    ldaResults$OutputFileId <- outputFileId
+
+    fileName <- paste0('lda.', datasetId, '.rds')
+    saveRDS(ldaResults, file = fileName)
+
+    ldaFiles <- rbind(ldaFiles, data.frame(DatasetId = datasetId, FileName = fileName))
+
+    # Cleanup
+    rm(seuratObj)
+    gc()
+}
+
+write.table(ldaFiles, file = 'ldaFiles.txt', row.names = FALSE, col.names = FALSE, quote = FALSE, sep = '\t')
diff --git a/singlecell/resources/web/singlecell/panel/PoolImportPanel.js b/singlecell/resources/web/singlecell/panel/PoolImportPanel.js
@@ -222,7 +222,7 @@ Ext4.define('SingleCell.panel.PoolImportPanel', {
         },
 
         citeSeqPanel: function(val, panel) {
-            if (val && (val.toLowerCase() === 'no' || val.toLowerCase() === 'n')) {
+            if (val && (val.toLowerCase() === 'no' || val.toLowerCase() === 'n' || val.toLowerCase() === 'na' || val.toLowerCase() === 'n/a')) {
                 return null;
             }
             else if (val && (val.toLowerCase() === 'yes' || val.toLowerCase() === 'y')) {
diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java
@@ -212,6 +212,7 @@ public static void registerPipelineSteps()
         SequencePipelineService.get().registerPipelineStep(new IntegrateData.Provider());
         SequencePipelineService.get().registerPipelineStep(new CustomUCell.Provider());
         SequencePipelineService.get().registerPipelineStep(new RunSDA.Provider());
+        SequencePipelineService.get().registerPipelineStep(new RunLDA.Provider());
     }
 
     @Override
diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunLDA.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunLDA.java
@@ -0,0 +1,131 @@
+package org.labkey.singlecell.pipeline.singlecell;
+
+import au.com.bytecode.opencsv.CSVReader;
+import org.apache.commons.lang3.StringUtils;
+import org.json.old.JSONObject;
+import org.labkey.api.pipeline.PipelineJobException;
+import org.labkey.api.reader.Readers;
+import org.labkey.api.sequenceanalysis.SequenceOutputFile;
+import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider;
+import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
+import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
+import org.labkey.api.singlecell.pipeline.SeuratToolParameter;
+import org.labkey.api.singlecell.pipeline.SingleCellStep;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class RunLDA extends AbstractCellMembraneStep
+{
+    public RunLDA(PipelineContext ctx, RunLDA.Provider provider)
+    {
+        super(provider, ctx);
+    }
+
+    public static class Provider extends AbstractPipelineStepProvider<SingleCellStep>
+    {
+        public Provider()
+        {
+            super("RunLDA", "Run LDA", "CellMembrane/LDA", "This will run LDA on the seurat object.", Arrays.asList(
+                    SeuratToolParameter.create("ntopics", "Num Topics", "The number of topics to generate. Can either be a single value or a comma-separated list.", "textfield", new JSONObject(){{
+
+                    }}, 30, null, true, true).delimiter(","),
+                    SeuratToolParameter.create("maxAllowableCells", "Max Allowable Cells", "If the input cells are above this value, an error will be thrown. This is designed to limit the amount of data passed to LDA. Use -1 for no limit.", "ldk-integerfield", new JSONObject(){{
+                        put("minValue", -1);
+                    }}, 150000, null, true),
+                    SeuratToolParameter.create("varFeatures", "# Variable Features", "The number of variable features to select.", "ldk-integerfield", new JSONObject(){{
+                        put("minValue", 0);
+                    }}, 5000),
+                    SeuratToolParameter.create(SEURAT_THREADS, "Max Threads", "The number of threads to use. Cannot be higher than the threads allocated to the job.", "ldk-integerfield", new JSONObject(){{
+                        put("minValue", 0);
+                    }}, 8)
+                ), Arrays.asList("/sequenceanalysis/field/TrimmingTextArea.js"), null);
+        }
+
+        @Override
+        public RunLDA create(PipelineContext ctx)
+        {
+            return new RunLDA(ctx, this);
+        }
+    }
+
+    @Override
+    public boolean createsSeuratObjects()
+    {
+        return false;
+    }
+
+    @Override
+    public String getFileSuffix()
+    {
+        return "lda";
+    }
+
+    @Override
+    public Output execute(SequenceOutputHandler.JobContext ctx, List<SeuratObjectWrapper> inputObjects, String outputPrefix) throws PipelineJobException
+    {
+        Output output = super.execute(ctx, inputObjects, outputPrefix);
+
+        File saved = new File(ctx.getOutputDir(), "ldaFiles.txt");
+        if (!saved.exists())
+        {
+            throw new PipelineJobException("Unable to find file: " + saved.getPath());
+        }
+
+        try (CSVReader reader = new CSVReader(Readers.getReader(saved), '\t'))
+        {
+            String[] line;
+            while ((line = reader.readNext()) != null)
+            {
+                File rds = new File(ctx.getOutputDir(), line[1]);
+                if (!rds.exists())
+                {
+                    throw new PipelineJobException("Unable to find file: " + rds.getPath());
+                }
+
+                final String datasetId = line[0];
+                Set<SeuratObjectWrapper> wrappers = inputObjects.stream().filter(x -> datasetId.equals(x.getDatasetId())).collect(Collectors.toSet());
+                if (wrappers.size() == 0)
+                {
+                    throw new PipelineJobException("Unable to find seurat object wrapper for: " + datasetId);
+                }
+                else if (wrappers.size() > 1)
+                {
+                    throw new PipelineJobException("More than one seurat object wrapper matched: " + datasetId + ", found: " + wrappers.stream().map(SeuratObjectWrapper::getDatasetId).collect(Collectors.joining(", ")));
+                }
+
+                SeuratObjectWrapper wrapper = wrappers.iterator().next();
+
+                SequenceOutputFile so = new SequenceOutputFile();
+                so.setFile(rds);
+                so.setCategory("LDA Results");
+                so.setLibrary_id(ctx.getSequenceSupport().getCachedGenomes().iterator().next().getGenomeId());
+                so.setReadset(wrapper.getReadsetId());
+                so.setName(wrapper.getDatasetName() == null ? wrapper.getDatasetId() : wrapper.getDatasetName() + ": LDA Analysis");
+
+                String jobDescription = StringUtils.trimToNull(ctx.getParams().optString("jobDescription"));
+                if (jobDescription != null)
+                {
+                    so.setDescription(jobDescription);
+                }
+
+                ctx.getFileManager().addSequenceOutput(so);
+            }
+        }
+        catch (IOException e)
+        {
+            throw new PipelineJobException(e);
+        }
+
+        ctx.getFileManager().addIntermediateFile(saved);
+
+        return output;
+    }
+
+}
+
+

Original file line number	Diff line number	Diff line change
`@@ -222,7 +222,7 @@ Ext4.define('SingleCell.panel.PoolImportPanel', {`
`222`	`222`	`},`
`223`	`223`
`224`	`224`	`citeSeqPanel: function(val, panel) {`
`225`		`- if (val && (val.toLowerCase() === 'no' \|\| val.toLowerCase() === 'n')) {`
	`225`	`+ if (val && (val.toLowerCase() === 'no' \|\| val.toLowerCase() === 'n' \|\| val.toLowerCase() === 'na' \|\| val.toLowerCase() === 'n/a')) {`
`226`	`226`	`return null;`
`227`	`227`	`}`
`228`	`228`	`else if (val && (val.toLowerCase() === 'yes' \|\| val.toLowerCase() === 'y')) {`
Original file line number	Diff line number	Diff line change
`@@ -212,6 +212,7 @@ public static void registerPipelineSteps()`
`212`	`212`	`SequencePipelineService.get().registerPipelineStep(new IntegrateData.Provider());`
`213`	`213`	`SequencePipelineService.get().registerPipelineStep(new CustomUCell.Provider());`
`214`	`214`	`SequencePipelineService.get().registerPipelineStep(new RunSDA.Provider());`
	`215`	`+ SequencePipelineService.get().registerPipelineStep(new RunLDA.Provider());`
`215`	`216`	`}`
`216`	`217`
`217`	`218`	`@Override`