Java源码示例:htsjdk.variant.variantcontext.VariantContextBuilder
示例1
@Test
public void filterOutHetSinglton2() {
VariantContextBuilder b = new VariantContextBuilder();
Allele a1 = Allele.create("A", true);
Allele a2 = Allele.create("T", false);
final List<Allele> allelesHet = new ArrayList<>(Arrays.asList(a1,a2));
final List<Allele> allelesRef = new ArrayList<>(Arrays.asList(a1,a1));
final List<Allele> allelesAlt = new ArrayList<>(Arrays.asList(a2,a2));
// this does not have a het singleton.
Collection<Genotype> genotypes = new ArrayList<>();
genotypes.add(GenotypeBuilder.create("donor1", allelesRef));
genotypes.add(GenotypeBuilder.create("donor2", allelesHet));
genotypes.add(GenotypeBuilder.create("donor3", allelesHet));
VariantContext vc = b.alleles(allelesHet).start(1).stop(1).chr("1").genotypes(genotypes).make();
Assert.assertNotNull(vc);
Iterator<VariantContext> underlyingIterator = Collections.emptyIterator();
VariantContextSingletonFilter f = new VariantContextSingletonFilter(underlyingIterator, true);
boolean t1 = f.filterOut(vc);
Assert.assertTrue(t1);
}
示例2
/**
* Filters out variants by testing against provided
* filter key, threshold.
*
* Variants with value below specified threshold (or null value)
* are filtered out citing given reason.
*
* @throws ClassCastException if the value corresponding to provided key cannot be casted as a {@link Double}
*/
private static VariantContext applyFilters(final VariantContext variantContext,
final List<StructuralVariantFilter> filters) {
final Set<String> appliedFilters = new HashSet<>();
for (final StructuralVariantFilter filter : filters) {
if ( !filter.test(variantContext) )
appliedFilters.add(filter.getName());
}
if (appliedFilters.isEmpty())
return variantContext;
else {
return new VariantContextBuilder(variantContext).filters(appliedFilters).make();
}
}
示例3
/**
* Loads an external cnv call list and returns the results in an SVIntervalTree. NB: the contig indices in the SVIntervalTree
* are based on the sequence indices in the SAM header, _NOT_ the ReadMetadata (which we might not have access to at this
* time).
*/
public static SVIntervalTree<VariantContext> loadCNVCalls(final String cnvCallsFile,
final SAMFileHeader headerForReads) {
Utils.validate(cnvCallsFile != null, "Can't load null CNV calls file");
try ( final FeatureDataSource<VariantContext> dataSource = new FeatureDataSource<>(cnvCallsFile, null, 0, null) ) {
final String sampleId = SVUtils.getSampleId(headerForReads);
validateCNVcallDataSource(headerForReads, sampleId, dataSource);
final SVIntervalTree<VariantContext> cnvCallTree = new SVIntervalTree<>();
Utils.stream(dataSource.iterator())
.map(vc -> new VariantContextBuilder(vc).genotypes(vc.getGenotype(sampleId)).make()) // forces a decode of the genotype for serialization purposes
.map(vc -> new Tuple2<>(new SVInterval(headerForReads.getSequenceIndex(vc.getContig()), vc.getStart(), vc.getEnd()),vc))
.forEach(pv -> cnvCallTree.put(pv._1(), pv._2()));
return cnvCallTree;
}
}
示例4
@Test
public void testLeftAlignWithVaryingMaxDistances() {
final byte[] refSequence = new byte[200];
Arrays.fill(refSequence, 0, 100, (byte) 'C');
Arrays.fill(refSequence, 100, 200, (byte) 'A');
final String contig = "1";
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, refSequence.length);
final SimpleInterval interval = new SimpleInterval(contig, 1, refSequence.length);
final ReferenceBases refBases = new ReferenceBases(refSequence, interval);
final ReferenceDataSource referenceDataSource = new ReferenceMemorySource(refBases, header.getSequenceDictionary());
final ReferenceContext referenceContext = new ReferenceContext(referenceDataSource, interval);
final List<Allele> alleles = Arrays.asList(Allele.create("AA", true), Allele.create("A", false));
for (int maxShift : new int[] {0, 1, 5, 10, 30}) {
for (int start : new int[]{101, 105, 109, 110, 111, 115, 120, 130, 141}) {
final VariantContext vc = new VariantContextBuilder("source", contig, start, start + 1, alleles).make();
final VariantContext realignedV = LeftAlignAndTrimVariants.leftAlignAndTrim(vc, referenceContext, maxShift, true);
Assert.assertEquals(realignedV.getStart(), Math.max(start - maxShift, 100));
}
}
}
示例5
@DataProvider(name = "testUsingADDataProvider")
public Object[][] testUsingADDataProvider() {
final Allele A = Allele.create("A", true);
final Allele C = Allele.create("C");
final List<Allele> AA = Arrays.asList(A, A);
final List<Allele> AC = Arrays.asList(A, C);
final Genotype gAC = new GenotypeBuilder("1", AC).AD(new int[]{5,5}).make();
final Genotype gAA = new GenotypeBuilder("2", AA).AD(new int[]{10,0}).make();
final VariantContext vc = new VariantContextBuilder("test", "20", 10, 10, AC).genotypes(Arrays.asList(gAA, gAC)).make();
return new Object[][] {
{vc, gAC, new double[]{0.5}},
{vc, gAA, new double[]{0.0}}
};
}
示例6
@Test(dataProvider = "MaxMinIndelSize")
public void maxIndelSizeTest(final int size, final int otherSize, final int max, final int min, final String op) {
final byte[] largerAllele = Utils.dupBytes((byte) 'A', size+1);
final byte[] smallerAllele = Utils.dupBytes((byte) 'A', 1);
final List<Allele> alleles = new ArrayList<>(2);
final Allele ref = Allele.create(op.equals("I") ? smallerAllele : largerAllele, true);
final Allele alt = Allele.create(op.equals("D") ? smallerAllele : largerAllele, false);
alleles.add(ref);
alleles.add(alt);
final VariantContext vc = new VariantContextBuilder("test", "1", 10, 10 + ref.length() - 1, alleles).make();
final boolean hasIndelTooLargeOrSmall = SelectVariants.containsIndelLargerOrSmallerThan(vc, max, min);
Assert.assertEquals(hasIndelTooLargeOrSmall, size > max || size < min);
}
示例7
@Test
public void testDoesReadHaveN() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
final String contig = "chr1";
final int pos = 100;
final VariantContext vc = new VariantContextBuilder().chr(contig).start(pos).stop(pos).alleles("A","C").make();
final GATKRead nonOverlappingUpstreamRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 96, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
Assert.assertFalse(CountNs.doesReadHaveN(nonOverlappingUpstreamRead, vc));
final GATKRead nonOverlappingDownstreamRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 101, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
Assert.assertFalse(CountNs.doesReadHaveN(nonOverlappingDownstreamRead, vc));
final GATKRead spanningDeletionRead = ArtificialReadUtils.createArtificialRead(header, "read", 0, 95, new byte[] {'N','N','N','N','N','N'}, new byte[] {20,20,20,20,20,20}, "3M10D3M");
Assert.assertFalse(CountNs.doesReadHaveN(spanningDeletionRead, vc));
final GATKRead notN = ArtificialReadUtils.createArtificialRead(header, "read", 0, 99, new byte[] {'A','C','G'}, new byte[] {20,20,20}, "3M");
Assert.assertFalse(CountNs.doesReadHaveN(notN, vc));
final GATKRead yesN = ArtificialReadUtils.createArtificialRead(header, "read", 0, 99, new byte[] {'A','N','G'}, new byte[] {20,20,20}, "3M");
Assert.assertTrue(CountNs.doesReadHaveN(yesN, vc));
}
示例8
/**
* Convert a segment (as annotated interval) to a {@link VariantContext}. The variant context will have a ref allele
* of the starting base of the segment. The end position will be in the attribute {@link VCFConstants#END_KEY}.
* Any remaining annotations in the annotated interval will be converted to string attributes. The segment call
* will be the alternate allele. Currently, the alternate allele will be one of neutral, amplification, or deletion.
*
* The variant context will use symbolic alleles for the calls (copy neutral, amplification, or deletion).
* See {@link CalledCopyRatioSegment.Call} for insertions and and deletions.
*
* @param segment Never {@code null}
* @param referenceContext Never {@code null}
* @return Never {@code null}
*/
public static VariantContext convert(final AnnotatedInterval segment, final ReferenceContext referenceContext) {
Utils.nonNull(segment);
Utils.nonNull(referenceContext);
final SimpleInterval startPointInterval = new SimpleInterval(segment.getContig(), segment.getStart(), segment.getStart());
final Allele refAlleleAtStart = Allele.create(referenceContext.getBases(startPointInterval), true);
final CalledCopyRatioSegment.Call call = retrieveCall(segment);
final VariantContextBuilder variantContextBuilder = new VariantContextBuilder().chr(segment.getContig())
.start(segment.getStart())
.stop(segment.getEnd())
.alleles(Arrays.asList(
Allele.create(refAlleleAtStart, false),
convertActualSegCallToAllele(call)
))
.attribute(VCFConstants.END_KEY, segment.getEnd());
// Grab every field in the annotated interval and make it a string attribute
segment.getAnnotations().keySet().stream().forEach(k -> variantContextBuilder.attribute(k, segment.getAnnotationValue(k)));
return variantContextBuilder
.make();
}
示例9
/**
* Convert short, i.e. duplicated range is < 50 bp, duplication call to insertion call.
*/
@VisibleForTesting
static VariantContext postProcessConvertShortDupToIns(final VariantContext simple) {
final String type = simple.getAttributeAsString(SVTYPE, "");
if ( type.equals(SimpleSVType.SupportedType.DUP.name()) ) {
final SimpleInterval duplicatedRegion = new SimpleInterval(simple.getAttributeAsString(DUP_REPEAT_UNIT_REF_SPAN, ""));
if (duplicatedRegion.size() > EVENT_SIZE_THRESHOLD) {
return simple;
} else {
return new VariantContextBuilder(simple)
.alleles(Arrays.asList(simple.getReference(), altSymbAlleleIns))
.rmAttribute(SVTYPE)
.attribute(SVTYPE, SimpleSVType.SupportedType.INS.name())
.make();
}
} else
return simple;
}
示例10
public void writeOutRecalibrationTable(final VariantContextWriter recalWriter, final SAMSequenceDictionary seqDictionary) {
// we need to sort in coordinate order in order to produce a valid VCF
Collections.sort( data, VariantDatum.getComparator(seqDictionary) );
// create dummy alleles to be used
List<Allele> alleles = Arrays.asList(Allele.create("N", true), Allele.create("<VQSR>", false));
for( final VariantDatum datum : data ) {
if (VRAC.useASannotations)
alleles = Arrays.asList(datum.referenceAllele, datum.alternateAllele); //use the alleles to distinguish between multiallelics in AS mode
VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getEnd(), alleles);
builder.attribute(VCFConstants.END_KEY, datum.loc.getEnd());
builder.attribute(GATKVCFConstants.VQS_LOD_KEY, String.format("%.4f", datum.lod));
builder.attribute(GATKVCFConstants.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));
if ( datum.atTrainingSite ) builder.attribute(GATKVCFConstants.POSITIVE_LABEL_KEY, true);
if ( datum.atAntiTrainingSite ) builder.attribute(GATKVCFConstants.NEGATIVE_LABEL_KEY, true);
recalWriter.add(builder.make());
}
}
示例11
@Override
protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
final VariantContextBuilder builder = new VariantContextBuilder(variant);
if (removeOldFilters) {
builder.unfiltered();
}
if (variant.hasAttribute(infoKey)) {
final double score = Double.parseDouble((String) variant.getAttribute(infoKey));
if (variant.isSNP() && snpCutoffs.size() != 0 && isTrancheFiltered(score, snpCutoffs)) {
builder.filter(filterStringFromScore(SNPString, score, snpTranches, snpCutoffs));
filteredSnps++;
} else if (variant.isIndel() && indelCutoffs.size() != 0 && isTrancheFiltered(score, indelCutoffs)) {
builder.filter(filterStringFromScore(INDELString, score, indelTranches, indelCutoffs));
filteredIndels++;
}
}
if (builder.getFilters() == null || builder.getFilters().size() == 0){
builder.passFilters();
}
vcfWriter.add(builder.make());
}
示例12
@Override
protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
String sv = scoreScan.nextLine();
String[] scoredVariant = sv.split("\\t");
if (variant.getContig().equals(scoredVariant[CONTIG_INDEX])
&& Integer.toString(variant.getStart()).equals(scoredVariant[POS_INDEX])
&& variant.getReference().getBaseString().equals(scoredVariant[REF_INDEX])
&& variant.getAlternateAlleles().toString().equals(scoredVariant[ALT_INDEX])) {
final VariantContextBuilder builder = new VariantContextBuilder(variant);
if (scoredVariant.length > KEY_INDEX) {
builder.attribute(scoreKey, scoredVariant[KEY_INDEX]);
}
vcfWriter.add(builder.make());
} else {
String errorMsg = "Score file out of sync with original VCF. Score file has:" + sv;
errorMsg += "\n But VCF has:" + variant.toStringWithoutGenotypes();
throw new GATKException(errorMsg);
}
}
示例13
private VariantContext buildVariantContextWithDroppedAnnotationsRemoved(final VariantContext vc) {
if (infoAnnotationsToDrop.isEmpty() && genotypeAnnotationsToDrop.isEmpty()) {
return vc;
}
final VariantContextBuilder rmAnnotationsBuilder = new VariantContextBuilder(vc);
for (String infoField : infoAnnotationsToDrop) {
rmAnnotationsBuilder.rmAttribute(infoField);
}
if (!genotypeAnnotationsToDrop.isEmpty()) {
final ArrayList<Genotype> genotypesToWrite = new ArrayList<>();
for (Genotype genotype : vc.getGenotypes()) {
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(genotype).noAttributes();
final Map<String, Object> attributes = new HashMap<>(genotype.getExtendedAttributes());
for (String genotypeAnnotation : genotypeAnnotationsToDrop) {
attributes.remove(genotypeAnnotation);
}
genotypeBuilder.attributes(attributes);
genotypesToWrite.add(genotypeBuilder.make());
}
rmAnnotationsBuilder.genotypes(GenotypesContext.create(genotypesToWrite));
}
return rmAnnotationsBuilder.make();
}
示例14
/**
* Adds the new allele filters to the existing allele filters in the vc. Computes whether there are
* new site filters and updates the filter in the vc. If there are no site filters, sets filters to pass
* Sets the filters for each allele and calculates the intersection of the allele filters to set on the variant.
* PASS if the intersection is empty.
* @param vc The variant context to add the filters to, both at the allele and site level
* @param newAlleleFilters filters to be applied to each allele, the intersection of these filters are applied at the site level
* @param invalidatePreviousFilters whether existing filters should be removed
* @return The updated variant context
*/
public static VariantContext addAlleleAndSiteFilters(VariantContext vc, List<Set<String>> newAlleleFilters, boolean invalidatePreviousFilters) {
if (newAlleleFilters.isEmpty()) {
return vc;
}
List<List<String>> currentAlleleFilters = decodeASFilters(vc);
if (!currentAlleleFilters.isEmpty() && newAlleleFilters.size() != currentAlleleFilters.size()) {
// log an error
return vc;
}
if (currentAlleleFilters.isEmpty() || invalidatePreviousFilters) {
currentAlleleFilters = new ArrayList<>(Collections.nCopies(newAlleleFilters.size(), Collections.singletonList(GATKVCFConstants.SITE_LEVEL_FILTERS)));
}
ListIterator<List<String>> currentAlleleFiltersIt = currentAlleleFilters.listIterator();
List<List<String>> updatedAlleleFilters = newAlleleFilters.stream().map(newfilters -> addAlleleFilters(currentAlleleFiltersIt.next(), newfilters.stream().collect(Collectors.toList()))).collect(Collectors.toList());
String encodedFilters = encodeASFilters(updatedAlleleFilters);
VariantContextBuilder vcb = new VariantContextBuilder(vc).attribute(GATKVCFConstants.AS_FILTER_STATUS_KEY, encodedFilters);
if (invalidatePreviousFilters) {
vcb.unfiltered();
}
Set<String> siteFiltersToAdd = newAlleleFilters.stream().skip(1)
.collect(()->new HashSet<>(newAlleleFilters.get(0)), Set::retainAll, Set::retainAll);
siteFiltersToAdd.stream().forEach(filter -> vcb.filter(filter));
if ((vcb.getFilters() == null || vcb.getFilters().isEmpty()) && !invalidatePreviousFilters) {
vcb.passFilters();
}
return vcb.make();
}
示例15
private void run() {
fileWriter.writeHeader(fileReader.getFileHeader());
for (final VariantContext context : fileReader) {
final VariantContextBuilder builder = new VariantContextBuilder(context);
final List<Genotype> genotypes = Lists.newArrayList();
for (int genotypeIndex = 0; genotypeIndex < context.getGenotypes().size(); genotypeIndex++) {
Genotype genotype = context.getGenotype(genotypeIndex);
if (genotype.hasAD() && genotype.hasDP()) {
int[] ad = genotype.getAD();
int total = 0;
boolean updateRecord = false;
for (int i = 0; i < ad.length; i++) {
int adValue = ad[i];
if (adValue < 0) {
updateRecord = true;
ad[i] = Math.abs(adValue);
}
total += Math.abs(adValue);
}
if (updateRecord) {
LOGGER.info("Updated entry at {}:{}", context.getContig(), context.getStart());
Genotype updated = new GenotypeBuilder(genotype).AD(ad).DP(total).make();
genotypes.add(updated);
}
else {
genotypes.add(genotype);
}
}
}
builder.genotypes(genotypes);
fileWriter.add(builder.make());
}
}
示例16
/**
* Depending on how the ref segment is present in alt arrangement (if at all), logic as follows (order is important):
* <ul>
* <li>
* if ref segment appear inverted and large enough
* <ul>
* <li> INV call is warranted </li>
* <li> INS call(s) before and after the INV, if inserted sequence long enough </li>
* </ul>
* </li>
*
* <li>
* otherwise if ref segment is present as-is, i.e. no deletion call can be made,
* make insertion calls when possible
* </li>
* <li>
* otherwise
* <ul>
* <li> if the segment is large enough, make a DEL call, and insertion calls when possible </li>
* <li> otherwise a single fat INS call</li>
* </ul>
* </li>
* </ul>
*
* <p>
* Note that the above logic has a bias towards getting INV calls, because
* when the (large enough) reference segment appears both as-is and inverted,
* the above logic will emit at least an INV call,
* whereas the (inverted) duplication(s) could also be reported as an DUP call as well, but...
* </p>
*/
@Override
List<VariantContext> extract(final VariantContext complexVC, final ReferenceMultiSparkSource reference) {
final List<String> segments = SVUtils.getAttributeAsStringList(complexVC, CPX_SV_REF_SEGMENTS);
if (segments.isEmpty()) return whenZeroSegments(complexVC, reference);
final SimpleInterval refSegment = new SimpleInterval(segments.get(0));
final List<String> altArrangement = SVUtils.getAttributeAsStringList(complexVC, CPX_EVENT_ALT_ARRANGEMENTS);
final int altSeqLength = complexVC.getAttributeAsString(SEQ_ALT_HAPLOTYPE, "").length();
final List<VariantContextBuilder> result = new ArrayList<>();
final int asIsAppearanceIdx = altArrangement.indexOf("1");
final int invertedAppearanceIdx = altArrangement.indexOf("-1");
if (invertedAppearanceIdx != -1 && refSegment.size() > EVENT_SIZE_THRESHOLD) { // inversion call
whenInversionIsWarranted(refSegment, invertedAppearanceIdx, altArrangement, reference, result);
} else if (asIsAppearanceIdx != -1) { // no inverted appearance or appear inverted but not large enough, and in the mean time appear as-is, so no deletion
whenNoDeletionIsAllowed(refSegment, asIsAppearanceIdx, altArrangement, altSeqLength, reference, result);
} else { // no as-is appearance && (inverted appearance might present not not large enough)
whenNoInvAndNoAsIsAppearance(refSegment, altSeqLength, reference, result);
}
final String sourceID = complexVC.getID();
final List<String> evidenceContigs = SVUtils.getAttributeAsStringList(complexVC, CONTIG_NAMES);
final List<String> mappingQualities = SVUtils.getAttributeAsStringList(complexVC, MAPPING_QUALITIES);
final int maxAlignLength = complexVC.getAttributeAsInt(MAX_ALIGN_LENGTH, 0);
return result.stream()
.map(vc -> vc.attribute(CPX_EVENT_KEY, sourceID).attribute(CONTIG_NAMES, evidenceContigs)
.attribute(MAPPING_QUALITIES, mappingQualities)
.attribute(MAX_ALIGN_LENGTH, maxAlignLength).make())
.collect(Collectors.toList());
}
示例17
@NotNull
private VariantContext enrich(@NotNull final VariantContext context) {
VariantContextBuilder builder =
new VariantContextBuilder(context).attribute(HOTSPOT_FLAG, false).attribute(NEAR_HOTSPOT_FLAG, false);
if (hotspotEnrichment.isOnHotspot(context)) {
return builder.attribute(HOTSPOT_FLAG, true).make();
} else if (hotspotEnrichment.isNearHotspot(context)) {
return builder.attribute(NEAR_HOTSPOT_FLAG, true).make();
}
return builder.make();
}
示例18
/**
* Note that {@code delRange} is expected to be pre-process to VCF spec compatible,
* e.g. if chr1:101-200 is deleted, then {@code delRange} should be chr1:100-200
* @param delRange
*/
@VisibleForTesting
static VariantContextBuilder makeDeletion(final SimpleInterval delRange, final Allele refAllele) {
return new VariantContextBuilder()
.chr(delRange.getContig()).start(delRange.getStart()).stop(delRange.getEnd())
.alleles(Arrays.asList(refAllele, altSymbAlleleDel))
.id(makeID(SimpleSVType.SupportedType.DEL.name(), delRange.getContig(), delRange.getStart(), delRange.getEnd()))
.attribute(VCFConstants.END_KEY, delRange.getEnd())
.attribute(SVLEN, - delRange.size() + 1)
.attribute(SVTYPE, SimpleSVType.SupportedType.DEL.name());
}
示例19
private static void whenInversionIsWarranted(final SimpleInterval refSegment, final int invertedAppearanceIdx,
final List<String> altArrangement, final ReferenceMultiSparkSource reference,
final List<VariantContextBuilder> result) {
final Allele anchorBaseRefAllele = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getStart(), reference);
result.add( makeInversion(refSegment, anchorBaseRefAllele) );
// further check if alt seq length is long enough to trigger an insertion as well,
// but guard against case smallIns1 + INV + smallIns2, in theory one could annotate the inversion
// with micro-insertions if that's the case, but we try to have minimal annotations here
final Allele anchorBaseRefAlleleFront = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getStart() - 1, reference);
final Allele anchorBaseRefAlleleRear = getAnchorBaseRefAllele(refSegment.getContig(), refSegment.getEnd(), reference);
extractFrontAndRearInsertions(refSegment, invertedAppearanceIdx, altArrangement,
anchorBaseRefAlleleFront, anchorBaseRefAlleleRear, result);
}
示例20
@NotNull
private static VariantContext addRecoveryDetails(@NotNull final VariantContext context, @NotNull final String recoveryMethod,
@NotNull final List<String> recoveryFilters) {
return new VariantContextBuilder(context).unfiltered()
.attribute(RECOVERED, true)
.attribute(RECOVERY_METHOD, recoveryMethod)
.attribute(RECOVERY_FILTER, recoveryFilters)
.make();
}
示例21
@NotNull
private static VariantContext dummyContext() {
final Collection<Allele> alleles = Lists.newArrayList(Allele.create("N", true));
return new VariantContextBuilder("purple", "2", 1, 1, alleles)
.noGenotypes()
.make();
}
示例22
private void writeHeaderAndBadVariant(final VariantContextWriter writer) {
final VariantContextBuilder vcBuilder = new VariantContextBuilder(
"chr1","1", 1, 1, Arrays.asList(Allele.create("A", true)));
vcBuilder.attribute("fake", new Object());
final VariantContext vc = vcBuilder.make();
final VCFHeader vcfHeader = new VCFHeader();
writer.writeHeader(vcfHeader);
writer.add(vc);
}
示例23
private VariantContext transformAttributesToStandardNames(final VariantContext vc) {
final Map<String,Object> transformedAttributes = vc.getAttributes().entrySet().stream()
.collect(Collectors.toMap(e-> aliasToKeyMapping.getOrDefault(e.getKey(), e.getKey()), e -> e.getValue()));
final VariantContextBuilder vcb = new VariantContextBuilder(vc).attributes(transformedAttributes);
return vcb.make();
}
示例24
private VariantContext makeVC() {
final GenotypesContext testGC = GenotypesContext.create(2);
final Allele refAllele = Allele.create("A", true);
final Allele altAllele = Allele.create("T");
return (new VariantContextBuilder())
.alleles(Arrays.asList(refAllele, altAllele)).chr("1").start(15L).stop(15L).genotypes(testGC).make();
}
示例25
private static void writeHotspots(@NotNull String hotspotVcf, @NotNull Map<ViccEntry, ViccExtractionResult> resultsPerEntry) {
VariantContextWriter writer = new VariantContextWriterBuilder().setOutputFile(hotspotVcf)
.setOutputFileType(VariantContextWriterBuilder.OutputType.VCF)
.setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER)
.modifyOption(Options.INDEX_ON_THE_FLY, false)
.build();
VCFHeader header = new VCFHeader(Sets.newHashSet(), Lists.newArrayList());
writer.writeHeader(header);
for (Map.Entry<VariantHotspot, HotspotAnnotation> entry : convertAndSort(resultsPerEntry).entrySet()) {
VariantHotspot hotspot = entry.getKey();
HotspotAnnotation annotation = entry.getValue();
List<Allele> hotspotAlleles = buildAlleles(hotspot);
VariantContext variantContext = new VariantContextBuilder().noGenotypes()
.source("VICC")
.chr(hotspot.chromosome())
.start(hotspot.position())
.alleles(hotspotAlleles)
.computeEndFromAlleles(hotspotAlleles, (int) hotspot.position())
.attribute("sources", annotation.sources())
.attribute("feature",
ProteinKeyFormatter.toProteinKey(annotation.gene(), annotation.transcript(), annotation.proteinAnnotation()))
.make();
LOGGER.debug("Writing {}", variantContext);
writer.add(variantContext);
}
writer.close();
}
示例26
@NotNull
static VariantContext simplifyVariant(@NotNull final VariantContext variant, @NotNull final String sampleName) {
// Force GT to 0/1 even for variants with multiple alts
final List<Allele> outputVariantAlleles = variant.getAlleles().subList(0, 2);
final Genotype genotype = new GenotypeBuilder(sampleName, outputVariantAlleles).DP(getDP(variant)).AD(getAD(variant)).make();
return new VariantContextBuilder(variant).genotypes(genotype).make();
}
示例27
private VariantContext makeVC() {
final GenotypesContext testGC = GenotypesContext.create(2);
final Allele refAllele = Allele.create("A", true);
final Allele altAllele = Allele.create("T");
return (new VariantContextBuilder())
.alleles(Arrays.asList(refAllele, altAllele)).chr("1").start(15L).stop(15L).genotypes(testGC).make();
}
示例28
@Test(dataProvider = "gvcfCases")
public void testEnableDisableGVCFWriting(boolean writeGvcf, String extension) throws IOException {
List<VariantContext> vcs = new ArrayList<>();
for(int i = 1; i <= 10; i++) {
final Allele A = Allele.create("A", true);
final VariantContext vc = new VariantContextBuilder("hand crafted", "1", i, i,
Arrays.asList(A, Allele.NON_REF_ALLELE))
.genotypes(new GenotypeBuilder(SAMPLE).alleles(Arrays.asList(A, A)).DP(10).GQ(10).PL(new int[]{0, 60, 10}).make())
.make();
vcs.add(vc);
}
final JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();
final File output = createTempFile(outputFileName, extension);
VariantsSparkSink.writeVariants(ctx, output.toString(), ctx.parallelize(vcs), getHeader(), writeGvcf, Arrays.asList(100), 2, 1, true, true);
checkFileExtensionConsistentWithContents(output.toString(), true);
new CommandLineProgramTest(){
@Override
public String getTestedToolName(){
return IndexFeatureFile.class.getSimpleName();
}
}.runCommandLine(new String[]{"-I", output.getAbsolutePath()});
final List<VariantContext> writtenVcs = readVariants(output.toString());
//if we are actually writing a gvcf, all the variant blocks will be merged into a single homref block with
Assert.assertEquals(writtenVcs.size(), writeGvcf ? 1 : 10);
Assert.assertEquals(writtenVcs.stream().mapToInt(VariantContext::getStart).min().getAsInt(), 1);
Assert.assertEquals(writtenVcs.stream().mapToInt(VariantContext::getEnd).max().getAsInt(), 10);
}
示例29
@Override
public void apply(final AnnotatedInterval segment, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
// Convert segment into a VariantContext while honoring the funcotation engine's necessary conversions.
final VariantContext segmentVariantContext = funcotatorEngine.getDefaultVariantTransformer()
.andThen(getTransformAttributesToStandardNames())
.apply(AnnotatedIntervalToSegmentVariantContextConverter.convert(segment, referenceContext));
// Get the correct reference for B37/HG19 compliance:
// This is necessary because of the variant transformation that gets applied in VariantWalkerBase::apply.
final ReferenceContext correctReferenceContext = funcotatorEngine.getCorrectReferenceContext(segmentVariantContext, referenceContext);
// funcotate
// The resulting funcotation map should only have one transcript ID (which is the "no transcript" ID).
final FuncotationMap funcotationMap = funcotatorEngine.createFuncotationMapForSegment(segmentVariantContext, correctReferenceContext, featureContext);
// This will propagate input variant context attributes to the output
for (final String txId : funcotationMap.getTranscriptList()) {
funcotationMap.add(txId, FuncotatorUtils.createFuncotationsFromMetadata(segmentVariantContext, createMetadata(), "FUNCOTATE_SEGMENTS"));
}
// Force the final output to have the same contig convention as the input.
final VariantContext finalVC = new VariantContextBuilder(segmentVariantContext)
.chr(segment.getContig())
.make();
// write the variant context
outputRenderer.write(finalVC, funcotationMap);
}
示例30
/**
* Note that {@code delRange} is expected to be VCF spec compatible,
* e.g. if chr1:101-200 is deleted, then {@code delRange} should be chr1:100-200
*/
static final VariantContextBuilder makeDeletion(final SimpleInterval delRange, final Allele refAllele, final boolean isFromDupContraction) {
return new VariantContextBuilder()
.chr(delRange.getContig()).start(delRange.getStart()).stop(delRange.getEnd())
.alleles(Arrays.asList(refAllele, DEL_SYMB_ALLELE))
.id(makeID((isFromDupContraction? DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING : SimpleSVType.SupportedType.DEL.name()), delRange.getContig(), delRange.getStart(), delRange.getContig(), delRange.getEnd(), ""))
.attribute(VCFConstants.END_KEY, delRange.getEnd())
.attribute(SVLEN, - delRange.size() + 1)
.attribute(SVTYPE, SimpleSVType.SupportedType.DEL.name());
}