from __future__ import annotations
from typing import Any, Sequence
class HeaderItem(dict):DOCS
"""The base class of header items"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.raw = None
@classmethod
def from_str(cls, line: str):
obj = cls()
obj.raw = line
line = line.rstrip("\r\n")
items = line[line.find("<") + 1 : -1].split(",", 3)
for item in items:
key, value = item.split("=", 1)
if key == "Description":
value = value[1:-1]
obj[key] = value
return obj
def __setattr__(self, name: str, value: Any) -> None:
return super().__setitem__(name, value)
def __getattr__(self, name: str) -> Any:
return super().__getitem__(name)
class HeaderInfo(HeaderItem):DOCS
"""The INFO items in the header"""
kind = "info"
def __str__(self):
return (
f"##INFO=<ID={self['ID']},"
f"Number={self['Number']},"
f"Type={self['Type']},"
f"Description=\"{self['Description']}\">"
)
@staticmethod
def is_type(raw: str) -> bool:
return raw.startswith("##INFO")
class HeaderFormat(HeaderItem):DOCS
"""The FORMAT items in the header"""
kind = "format"
def __str__(self):
return (
f"##FORMAT=<ID={self['ID']},"
f"Number={self['Number']},"
f"Type={self['Type']},"
f"Description=\"{self['Description']}\">"
)
@staticmethod
def is_type(raw: str) -> bool:
return raw.startswith("##FORMAT")
class HeaderFilter(HeaderItem):DOCS
"""The FILTER items in the header"""
kind = "filter"
def __str__(self):
return (
f"##FILTER=<ID={self['ID']},"
f"Description=\"{self['Description']}\">"
)
@staticmethod
def is_type(raw: str) -> bool:
return raw.startswith("##FILTER")
class HeaderContig(HeaderItem):DOCS
"""The contig items in the header"""
kind = "contig"
def __str__(self):
return f"##contig=<ID={self['ID']}," f"length={self['length']}>"
@staticmethod
def is_type(raw: str) -> bool:
return raw.startswith("##contig")
class HeaderGeneral(HeaderItem):DOCS
"""The general items in the header"""
kind = "header"
@classmethod
def from_str(cls, line: str):
obj = cls()
obj.raw = line
line = line.rstrip("\r\n")
obj["key"], obj["value"] = line[2:].split("=", 1)
return obj
def __str__(self):
return f"##{self['key']}={self['value']}"
@staticmethod
def is_type(raw: str) -> bool:
if not raw.startswith("##"):
return False
key = raw[2:].split("=", 1)[0]
return key not in ("INFO", "FILTER", "FORMAT", "contig")
class Fields(list):DOCS
"""The fields/column names"""
kind = "fields"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.raw = None
@classmethod
def from_str(cls, line: str):
obj = cls()
obj.raw = line
line = line.rstrip("\r\n")
obj.extend(line[1:].split("\t"))
return obj
def __str__(self):
return "#" + "\t".join(self)
@property
def samples(self):
return self[9:]
@staticmethod
def is_type(raw: str) -> bool:
return raw.startswith("#CHROM")
class Info(dict):DOCS
"""The INFO of the variant"""
@classmethod
def from_str(cls, infostr: str):
obj = cls()
for part in infostr.split(";"):
# a flag
if "=" not in part:
obj[part] = True
else:
name, value = part.split("=", 1)
obj[name] = value
return obj
def __str__(self) -> str:
return ";".join(
k if v is True else f"{k}={v}"
for k, v in self.items()
if v is not False
)
class Format(list):DOCS
"""The FORMAT of the variant"""
@classmethod
def from_str(cls, formatstr: str):
return cls(formatstr.split(":"))
def __str__(self) -> str:
return ":".join(self)
class Alt(list):DOCS
"""The ALT of the variant"""
@classmethod
def from_str(cls, altstr):
return cls(altstr.split(","))
def __str__(self) -> str:
return ",".join(self)
class Filter(list):DOCS
"""The FILTER of the variant"""
@classmethod
def from_str(cls, filtstr: str):
return cls(filtstr.split(";"))
def __str__(self) -> str:
return ";".join(self)
class Sample(dict):DOCS
"""One sample of the variant"""
def __init__(self, values: Sequence[str], format: Format):
super().__init__()
self._format = format
for name, value in zip(format, values):
self[name] = value
@property
def format(self):
return self._format
@classmethod
def from_str(cls, value_str: str, format: Format):
return cls(value_str.split(":"), format)
@classmethod
def from_strs(cls, value_strs: Sequence[str], format: Format):
return cls(value_strs, format)
def __str__(self) -> str:
values = [self[fmt] for fmt in self._format]
return ":".join(values)
class Samples(list):DOCS
"""The samples of the variant"""
def __init__(self, samples: Sequence[Sample], format: Format):
super().__init__(samples)
self._format = format
@property
def format(self):
return self._format
@classmethod
def from_str(cls, sample_str: str, format: Format):
return cls(
[
Sample.from_str(sam_str, format)
for sam_str in sample_str.split("\t")
],
format,
)
@classmethod
def from_strs(cls, sample_strs: Sequence[str], format: Format):
return cls(
[
Sample.from_str(sam_str, format)
for sam_str in sample_strs
],
format,
)
@classmethod
def from_strss(cls, sample_strss: Sequence[Sequence[str]], format: Format):
return cls(
[
Sample.from_strs(sam_strs, format)
for sam_strs in sample_strss
],
format,
)
def __str__(self) -> str:
return "\t".join(str(s) for s in self)
class Variant:
kind = "variant"
def __init__(
self,
chrom: str,
pos: int,
id: str,
ref: str,
alt: Alt,
qual: str,
filter: Filter,
info: Info,
format: Format,
samples: Samples,
):
self.chrom = chrom
self.pos = pos
self.id = id
self.ref = ref
self.alt = alt
self.qual = qual
self.filter = filter
self.info = info
self.format = format
self.samples = samples
self.raw = None
@classmethod
def from_strs(
cls,
chrom: str,
pos: int | str,
id: str,
ref: str,
alt: str | Sequence[str],
qual: str,
filter: str | Sequence[str],
info: str | dict,
format: str | Sequence[str],
samples: str | Sequence[str] | Sequence[Sequence[str]],
):
format = (
Format.from_str(format)
if isinstance(format, str)
else Format(format)
)
if isinstance(samples, str):
samples = Samples.from_str(samples, format)
elif isinstance(samples[0], str):
samples = Samples.from_strs(samples, format)
else:
samples = Samples.from_strss(samples, format)
obj = cls(
chrom,
int(pos),
id,
ref,
Alt.from_str(alt) if isinstance(alt, str) else Alt(alt),
qual,
Filter.from_str(filter)
if isinstance(filter, str)
else Filter(filter),
Info.from_str(info) if isinstance(info, str) else Info(info),
format,
samples,
)
return obj
@classmethod
def from_str(cls, variant_line: str):
raw = variant_line
variant_line = variant_line.rstrip("\r\n")
items = variant_line.split("\t")
chrom = items[0]
pos = int(items[1])
id = items[2]
ref = items[3]
alt = Alt.from_str(items[4])
qual = items[5]
filter = Filter.from_str(items[6])
info = Info.from_str(items[7])
format = Format.from_str(items[8])
samples = Samples.from_strs(items[9:], format)
obj = cls(
chrom,
pos,
id,
ref,
alt,
qual,
filter,
info,
format,
samples,
)
obj.raw = raw
return obj
def __str__(self):
return (
f"{self.chrom}\t{self.pos}\t{self.id}\t{self.ref}\t"
f"{self.alt}\t{self.qual}\t{self.filter}\t{self.info}\t"
f"{self.format}\t{self.samples}"
)
def __repr__(self):
return f"Variant({self.chrom}, {self.pos}, {self.id})"
@staticmethod
def is_type(raw: str) -> bool:
return not raw.startswith("#")