Skip to content

Commit 743aa49

Browse files
committed
Change the pattern of nimble fastq-to-bam
1 parent 82f1dc6 commit 743aa49

File tree

4 files changed

+121
-14
lines changed

4 files changed

+121
-14
lines changed

SequenceAnalysis/pipeline_code/extra_tools_install.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,3 +333,20 @@ then
333333
else
334334
echo "Already installed"
335335
fi
336+
337+
338+
if [[ ! -e ${LKTOOLS_DIR}/primer3_core || ! -z $FORCE_REINSTALL ]];
339+
then
340+
echo "Cleaning up previous installs"
341+
rm -Rf $LKTOOLS_DIR/primer3_core*
342+
rm -Rf primer3*
343+
rm -Rf v2.6.1.tar.gz
344+
345+
wget https://github.com/primer3-org/primer3/archive/refs/tags/v2.6.1.tar.gz
346+
tar -xf v2.6.1.tar.gz
347+
cd primer3-2.6.1/src
348+
make
349+
install primer3_core $LKTOOLS_DIR/
350+
else
351+
echo "Already installed"
352+
fi

singlecell/src/org/labkey/singlecell/run/CellRangerGexCountStep.java

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import org.apache.commons.io.FileUtils;
66
import org.apache.commons.lang3.StringUtils;
77
import org.apache.commons.lang3.tuple.Pair;
8+
import org.apache.logging.log4j.Logger;
89
import org.jetbrains.annotations.Nullable;
910
import org.json.JSONObject;
1011
import org.labkey.api.collections.CaseInsensitiveHashMap;
@@ -47,15 +48,19 @@
4748
import org.labkey.api.writer.PrintWriters;
4849
import org.labkey.singlecell.SingleCellSchema;
4950

51+
import java.io.BufferedReader;
5052
import java.io.File;
5153
import java.io.IOException;
54+
import java.nio.file.Files;
5255
import java.util.ArrayList;
5356
import java.util.Arrays;
5457
import java.util.Collection;
5558
import java.util.Date;
5659
import java.util.HashMap;
5760
import java.util.List;
5861
import java.util.Map;
62+
import java.util.regex.Matcher;
63+
import java.util.regex.Pattern;
5964

6065
public class CellRangerGexCountStep extends AbstractAlignmentPipelineStep<CellRangerWrapper> implements AlignmentStep
6166
{
@@ -613,4 +618,88 @@ public void complete(SequenceAnalysisJobSupport support, AnalysisModel model, Co
613618
}
614619
}
615620
}
621+
622+
public enum Chemistry
623+
{
624+
// See: https://kb.10xgenomics.com/s/article/115004506263-What-is-a-barcode-inclusion-list-formerly-barcode-whitelist
625+
// cellranger-x.y.z/lib/python/cellranger/barcodes/
626+
FivePE_V3("Single Cell 5' PE v3", "3M-5pgex-jan-2023.txt.gz"),
627+
FivePE_V2("Single Cell 5' PE v2", "737k-august-2016.txt");
628+
629+
final String _label;
630+
final String _inclusionListFile;
631+
632+
Chemistry(String label, String inclusionListFile)
633+
{
634+
_label = label;
635+
_inclusionListFile = inclusionListFile;
636+
}
637+
638+
public File getInclusionListFile(Logger logger) throws PipelineJobException
639+
{
640+
File exe = new CellRangerWrapper(logger).getExe();
641+
if (Files.isSymbolicLink(exe.toPath()))
642+
{
643+
try
644+
{
645+
exe = Files.readSymbolicLink(exe.toPath()).toFile();
646+
}
647+
catch (IOException e)
648+
{
649+
throw new PipelineJobException(e);
650+
}
651+
}
652+
653+
File il = new File(exe.getParentFile(), "lib/python/cellranger/barcodes/" + _inclusionListFile);
654+
if (!il.exists())
655+
{
656+
throw new PipelineJobException("Unable to find file: " + il.getPath());
657+
}
658+
659+
return il;
660+
}
661+
662+
public static Chemistry getByLabel(String label)
663+
{
664+
for (Chemistry c : Chemistry.values())
665+
{
666+
if (c._label.equals(label))
667+
{
668+
return c;
669+
}
670+
}
671+
672+
throw new IllegalArgumentException("Unknown chemistry: " + label);
673+
}
674+
}
675+
676+
public static Chemistry inferChemistry(File cloupeFile) throws PipelineJobException
677+
{
678+
File html = new File(cloupeFile.getPath().replaceAll("_cloupe.cloupe$", "_web_summary.html"));
679+
if (!html.exists())
680+
{
681+
throw new IllegalArgumentException("Missing file: " + html.getPath());
682+
}
683+
684+
final Pattern pattern = Pattern.compile("\\[\"Chemistry\",\"(.*?)\"],");
685+
try (BufferedReader reader = Readers.getReader(html))
686+
{
687+
String line;
688+
while ((line = reader.readLine()) != null)
689+
{
690+
Matcher m = pattern.matcher(line);
691+
if (m.find())
692+
{
693+
String chem = m.group(1);
694+
return Chemistry.getByLabel(chem);
695+
}
696+
}
697+
}
698+
catch (IOException e)
699+
{
700+
throw new PipelineJobException(e);
701+
}
702+
703+
throw new IllegalArgumentException("Unable to infer chemistry for file: " + html.getPath());
704+
}
616705
}

singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ public AlignmentOutput performAlignment(Readset rs, List<File> inputFastqs1, @Nu
8686
AlignmentOutputImpl output = new AlignmentOutputImpl();
8787

8888
boolean throwIfNotFound = getProvider().getParameterByName(REQUIRE_CACHED_BARCODES).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);
89-
File cachedBarcodes = getCachedBarcodeFile(rs, throwIfNotFound);
89+
File loupeFile = getCachedLoupeFile(rs, throwIfNotFound);
9090

9191
File localBam;
92-
if (cachedBarcodes == null)
92+
if (loupeFile == null)
9393
{
9494
localBam = performCellRangerAlignment(output, rs, inputFastqs1, inputFastqs2, outputDirectory, referenceGenome, basename, readGroupId, platformUnit);
9595
}
@@ -109,12 +109,12 @@ public AlignmentOutput performAlignment(Readset rs, List<File> inputFastqs1, @Nu
109109

110110
private File createNimbleBam(AlignmentOutputImpl output, Readset rs, List<File> inputFastqs1, List<File> inputFastqs2) throws PipelineJobException
111111
{
112-
File cellBarcodeUmiMap = getCachedBarcodeFile(rs, true);
112+
File loupeFile = getCachedLoupeFile(rs, true);
113113

114-
return NimbleHelper.runFastqToBam(output, getPipelineCtx(), rs, inputFastqs1, inputFastqs2, cellBarcodeUmiMap);
114+
return NimbleHelper.runFastqToBam(output, getPipelineCtx(), rs, inputFastqs1, inputFastqs2, loupeFile);
115115
}
116116

117-
private File getCachedBarcodeFile(Readset rs, boolean throwIfNotFound) throws PipelineJobException
117+
private File getCachedLoupeFile(Readset rs, boolean throwIfNotFound) throws PipelineJobException
118118
{
119119
Map<Integer, Integer> map = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructParametricType(Map.class, Integer.class, Integer.class));
120120
Integer dataId = map.get(rs.getReadsetId());
@@ -137,14 +137,14 @@ private File getCachedBarcodeFile(Readset rs, boolean throwIfNotFound) throws Pi
137137
return ret;
138138
}
139139

140-
private ExpData findCellBarcodeFiles(Readset rs) throws PipelineJobException
140+
private ExpData findLoupeFile(Readset rs) throws PipelineJobException
141141
{
142142
Container targetContainer = getPipelineCtx().getJob().getContainer().isWorkbookOrTab() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer();
143143
UserSchema us = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), targetContainer, SingleCellSchema.SEQUENCE_SCHEMA_NAME);
144144
TableInfo ti = us.getTable("outputfiles");
145145

146146
SimpleFilter sf = new SimpleFilter(FieldKey.fromString("readset"), rs.getRowId());
147-
sf.addCondition(FieldKey.fromString("category"), NimbleHelper.CATEGORY_CB);
147+
sf.addCondition(FieldKey.fromString("category"), CellRangerGexCountStep.LOUPE_CATEGORY);
148148
List<Integer> cbs = new TableSelector(ti, PageFlowUtil.set("dataid"), sf, new Sort("-rowid")).getArrayList(Integer.class);
149149
if (!cbs.isEmpty())
150150
{
@@ -199,19 +199,19 @@ public void init(SequenceAnalysisJobSupport support) throws PipelineJobException
199199
}
200200

201201
// Try to find 10x barcodes:
202-
HashMap<Integer, Integer> readsetToBarcodes = new HashMap<>();
202+
HashMap<Integer, Integer> readsetToLoupe = new HashMap<>();
203203
for (Readset rs : support.getCachedReadsets())
204204
{
205-
ExpData f = findCellBarcodeFiles(rs);
205+
ExpData f = findLoupeFile(rs);
206206
if (f != null)
207207
{
208208
support.cacheExpData(f);
209-
readsetToBarcodes.put(rs.getReadsetId(), f.getRowId());
209+
readsetToLoupe.put(rs.getReadsetId(), f.getRowId());
210210
}
211211
}
212212

213-
support.cacheObject(CACHE_KEY, readsetToBarcodes);
213+
support.cacheObject(CACHE_KEY, readsetToLoupe);
214214
}
215215

216-
private static final String CACHE_KEY = "nimble.cb";
216+
private static final String CACHE_KEY = "nimble.loupe";
217217
}

singlecell/src/org/labkey/singlecell/run/NimbleHelper.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ private static File getNimbleDoneFile(File parentDir, String resumeString)
594594
return new File(parentDir, "nimble." + resumeString + ".done");
595595
}
596596

597-
public static File runFastqToBam(PipelineStepOutput output, PipelineContext ctx, Readset rs, List<File> inputFastqs1, List<File> inputFastqs2, File cellBarcodeUmiMap) throws PipelineJobException
597+
public static File runFastqToBam(PipelineStepOutput output, PipelineContext ctx, Readset rs, List<File> inputFastqs1, List<File> inputFastqs2, File loupeFile) throws PipelineJobException
598598
{
599599
List<File> outputBams = new ArrayList<>();
600600
int bamIdx = 0;
@@ -627,7 +627,8 @@ public static File runFastqToBam(PipelineStepOutput output, PipelineContext ctx,
627627
args.add(inputFastqs2.get(bamIdx).getPath());
628628

629629
args.add("--map");
630-
args.add(cellBarcodeUmiMap.getPath());
630+
CellRangerGexCountStep.Chemistry chem = CellRangerGexCountStep.inferChemistry(loupeFile);
631+
args.add(chem.getInclusionListFile(ctx.getLogger()).getPath());
631632

632633
args.add("--output");
633634
args.add(outputBam.getPath());

0 commit comments

Comments
 (0)