pyCirclize: Python中的圆形可视化
目录
概述
pyCirclize是一个基于matplotlib实现的圆形可视化Python包。 该包的开发目的是在Python中轻松且美观地绘制圆形图表,如Circos图和和弦图。 此外,还实现了生物信息学领域有用的基因组和系统发育树可视化方法。 pyCirclize受到circlize和pyCircos的启发。 更详细的文档可在此处获取。
图1 pyCirclize示例图库
安装
安装需要Python 3.8或更高版本
。
安装PyPI包:
pip install pycirclize
安装conda-forge包:
conda install -c conda-forge pycirclize
API使用
API使用在文档的以下各节中进行了描述。
代码示例
1. Circos图
from pycirclize import Circos
import numpy as np
np.random.seed(0)
# 初始化Circos扇区
sectors = {"A": 10, "B": 15, "C": 12, "D": 20, "E": 15}
circos = Circos(sectors, space=5)
for sector in circos.sectors:
# 绘制扇区名称
sector.text(f"扇区: {sector.name}", r=110, size=15)
# 创建x位置和随机y值
x = np.arange(sector.start, sector.end) + 0.5
y = np.random.randint(0, 100, len(x))
# 绘制线条
track1 = sector.add_track((80, 100), r_pad_ratio=0.1)
track1.xticks_by_interval(interval=1)
track1.axis()
track1.line(x, y)
# 绘制散点
track2 = sector.add_track((55, 75), r_pad_ratio=0.1)
track2.axis()
track2.scatter(x, y)
# 绘制条形图
track3 = sector.add_track((30, 50), r_pad_ratio=0.1)
track3.axis()
track3.bar(x, y)
# 绘制连接
circos.link(("A", 0, 3), ("B", 15, 12))
circos.link(("B", 0, 3), ("C", 7, 11), color="skyblue")
circos.link(("C", 2, 5), ("E", 15, 12), color="chocolate", direction=1)
circos.link(("D", 3, 5), ("D", 18, 15), color="lime", ec="black", lw=0.5, hatch="//", direction=2)
circos.link(("D", 8, 10), ("E", 2, 8), color="violet", ec="red", lw=1.0, ls="dashed")
circos.savefig("example01.png")
2. Circos图(基因组学)
from pycirclize import Circos
from pycirclize.utils import fetch_genbank_by_accid
from pycirclize.parser import Genbank
# 下载`NC_002483` E.coli质粒GenBank文件
gbk_fetch_data = fetch_genbank_by_accid("NC_002483")
gbk = Genbank(gbk_fetch_data)
# 使用基因组大小初始化Circos实例
circos = Circos(sectors={gbk.name: gbk.range_size})
circos.text(f"大肠杆菌K-12 质粒F\n\n{gbk.name}", size=14)
circos.rect(r_lim=(90, 100), fc="lightgrey", ec="none", alpha=0.5)
sector = circos.sectors[0]
# 绘制正向链CDS
f_cds_track = sector.add_track((95, 100))
f_cds_feats = gbk.extract_features("CDS", target_strand=1)
f_cds_track.genomic_features(f_cds_feats, plotstyle="arrow", fc="salmon", lw=0.5)
# 绘制反向链CDS
r_cds_track = sector.add_track((90, 95))
r_cds_feats = gbk.extract_features("CDS", target_strand=-1)
r_cds_track.genomic_features(r_cds_feats, plotstyle="arrow", fc="skyblue", lw=0.5)
# 如果存在,绘制'gene'限定符标签
labels, label_pos_list = [], []
for feat in gbk.extract_features("CDS"):
start = int(feat.location.start)
end = int(feat.location.end)
label_pos = (start + end) / 2
gene_name = feat.qualifiers.get("gene", [None])[0]
if gene_name is not None:
labels.append(gene_name)
label_pos_list.append(label_pos)
f_cds_track.xticks(label_pos_list, labels, label_size=6, label_orientation="vertical")
# 绘制刻度(间隔 = 10 Kb)
r_cds_track.xticks_by_interval(
10000, outer=False, label_formatter=lambda v: f"{v/1000:.1f} Kb"
)
circos.savefig("example02.png")
3. 弦图
from pycirclize import Circos
import pandas as pd
# 创建矩阵数据框(3 x 6)
行名 = ["F1", "F2", "F3"]
列名 = ["T1", "T2", "T3", "T4", "T5", "T6"]
矩阵数据 = [
[10, 16, 7, 7, 10, 8],
[4, 9, 10, 12, 12, 7],
[17, 13, 7, 4, 20, 4],
]
矩阵数据框 = pd.DataFrame(矩阵数据, index=行名, columns=列名)
# 从矩阵初始化Circos以绘制弦图
circos = Circos.initialize_from_matrix(
矩阵数据框,
space=5,
cmap="tab10",
label_kws=dict(size=12),
link_kws=dict(ec="black", lw=0.5, direction=1),
)
circos.savefig("example03.png")
4. 系统发育树
from pycirclize import Circos
from pycirclize.utils import load_example_tree_file, ColorCycler
from matplotlib.lines import Line2D
# 从系统发育树初始化Circos
树文件 = load_example_tree_file("large_example.nwk")
circos, tv = Circos.initialize_from_tree(
树文件,
r_lim=(30, 100),
leaf_label_size=5,
line_kws=dict(color="lightgrey", lw=1.0),
)
# 定义用于树注释的组-物种字典
# 在此例中,设置最小物种列表以指定组的最近共同祖先节点
组名到物种列表 = dict(
单孔目=["Tachyglossus_aculeatus", "Ornithorhynchus_anatinus"],
有袋目=["Monodelphis_domestica", "Vombatus_ursinus"],
貧齒目=["Choloepus_didactylus", "Dasypus_novemcinctus"],
非洲獸目=["Trichechus_manatus", "Chrysochloris_asiatica"],
真猿类=["Galeopterus_variegatus", "Theropithecus_gelada"],
啮齿目=["Oryctolagus_cuniculus", "Microtus_oregoni"],
真獸類=["Talpa_occidentalis", "Mirounga_leonina"],
)
# 设置树线颜色和标签颜色
ColorCycler.set_cmap("tab10")
组名到颜色 = {name: ColorCycler() for name in 组名到物种列表.keys()}
for 组名, 物种列表 in 组名到物种列表.items():
颜色 = 组名到颜色[组名]
tv.set_node_line_props(物种列表, color=颜色, apply_label_color=True)
# 绘制图形并在中心设置图例
fig = circos.plotfig()
_ = circos.ax.legend(
handles=[Line2D([], [], label=n, color=c) for n, c in 组名到颜色.items()],
labelcolor=组名到颜色.values(),
fontsize=6,
loc="center",
bbox_to_anchor=(0.5, 0.5),
)
fig.savefig("example04.png")
5. 雷达图
from pycirclize import Circos
import pandas as pd
# 创建RPG职业参数数据框(3个职业,7个参数)
df = pd.DataFrame(
data=[
[80, 80, 80, 80, 80, 80, 80],
[90, 20, 95, 95, 30, 30, 80],
[60, 90, 20, 20, 100, 90, 50],
],
index=["英雄", "战士", "法师"],
columns=["生命值", "魔法值", "攻击力", "防御力", "特攻", "特防", "速度"],
)
# 初始化Circos实例以绘制雷达图
circos = Circos.radar_chart(
df,
vmax=100,
marker_size=6,
grid_interval_ratio=0.2,
)
# 绘制图形并在右上角设置图例
fig = circos.plotfig()
_ = circos.ax.legend(loc="upper right", fontsize=10)
fig.savefig("example05.png")
尚未实现的功能
其他Circos绘图工具中已实现但pyCirclize尚未实现的功能列表。 我可能会在有兴趣时实现它们。
- 绘制直方图
- 绘制箱线图
- 绘制小提琴图
- 绘制曲线文本
- 调整重叠标签位置