TraceML
Polyaxon 的 ML/数据跟踪、可视化、可解释性、漂移检测和仪表板引擎。
安装
pip install traceml
如果您想使用跟踪功能,还需要安装 polyaxon
:
pip install polyaxon traceml
[WIP] 本地沙盒
即将推出
离线使用
您可以启用离线模式以在没有 API 的情况下跟踪运行:
export POLYAXON_OFFLINE="true"
或者传递离线标志
from traceml import tracking
tracking.init(..., is_offline=True, ...)
在 Python 脚本中的简单使用
import random
import traceml as tracking
tracking.init(
is_offline=True,
project='quick-start',
name="my-new-run",
description="trying TraceML",
tags=["examples"],
artifacts_path="path/to/artifacts/repo"
)
# 跟踪一些数据引用
tracking.log_data_ref(content=X_train, name='x_train')
tracking.log_data_ref(content=y_train, name='y_train')
# 跟踪输入
tracking.log_inputs(
batch_size=64,
dropout=0.2,
learning_rate=0.001,
optimizer="Adam"
)
def get_loss(step):
result = 10 / (step + 1)
noise = (random.random() - 0.5) * 0.5 * result
return result + noise
# 跟踪指标
for step in range(100):
loss = get_loss(step)
tracking.log_metrics(
loss=loss,
accuracy=(100 - loss) / 100.0,
)
# 跟踪一些一次性的结果
tracking.log_outputs(validation_score=0.66)
# 可选择手动停止跟踪过程
tracking.stop()
与深度学习和机器学习库和框架的集成
Keras
您可以使用 TraceML 的回调自动保存所有指标并收集输出和模型,还可以使用日志方法跟踪其他信息:
from traceml import tracking
from traceml.integrations.keras import Callback
tracking.init(
is_offline=True,
project='tracking-project',
name="keras-run",
description="trying TraceML & Keras",
tags=["examples"],
artifacts_path="path/to/artifacts/repo"
)
tracking.log_inputs(
batch_size=64,
dropout=0.2,
learning_rate=0.001,
optimizer="Adam"
)
tracking.log_data_ref(content=x_train, name='x_train')
tracking.log_data_ref(content=y_train, name='y_train')
tracking.log_data_ref(content=x_test, name='x_test')
tracking.log_data_ref(content=y_test, name='y_test')
# ...
model.fit(
x_train,
y_train,
validation_data=(X_test, y_test),
epochs=epochs,
batch_size=100,
callbacks=[Callback()],
)
PyTorch
您可以使用跟踪模块记录 Pytorch 实验的指标、输入和输出:
from traceml import tracking
tracking.init(
is_offline=True,
project='tracking-project',
name="pytorch-run",
description="trying TraceML & PyTorch",
tags=["examples"],
artifacts_path="path/to/artifacts/repo"
)
tracking.log_inputs(
batch_size=64,
dropout=0.2,
learning_rate=0.001,
optimizer="Adam"
)
# 指标
for batch_idx, (data, target) in enumerate(train_loader):
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
tracking.log_metrics(loss=loss)
asset_path = tracking.get_outputs_path('model.ckpt')
torch.save(model.state_dict(), asset_path)
# 记录模型
tracking.log_artifact_ref(asset_path, framework="pytorch", ...)
Tensorflow
您可以使用跟踪模块记录 Tensorflow 和分布式 Tensorflow 实验的指标、输出和模型:
from traceml import tracking
from traceml.integrations.tensorflow import Callback
tracking.init(
is_offline=True,
project='tracking-project',
name="tf-run",
description="trying TraceML & Tensorflow",
tags=["examples"],
artifacts_path="path/to/artifacts/repo"
)
tracking.log_inputs(
batch_size=64,
dropout=0.2,
learning_rate=0.001,
optimizer="Adam"
)
# 记录模型
estimator.train(hooks=[Callback(log_image=True, log_histo=True, log_tensor=True)])
Fastai
您可以使用跟踪模块记录 Fastai 实验的指标、输出和模型:
from traceml import tracking
from traceml.integrations.fastai import Callback
tracking.init(
is_offline=True,
project='tracking-project',
name="fastai-run",
description="trying TraceML & Fastai",
tags=["examples"],
artifacts_path="path/to/artifacts/repo"
)
# 记录模型指标
learn.fit(..., cbs=[Callback()])
Pytorch Lightning
您可以使用跟踪模块记录 Pytorch Lightning 实验的指标、输出和模型:
from traceml import tracking
from traceml.integrations.pytorch_lightning import Callback
tracking.init(
is_offline=True,
project='tracking-project',
name="pytorch-lightning-run",
description="trying TraceML & Lightning",
tags=["examples"],
artifacts_path="path/to/artifacts/repo"
)
...
trainer = pl.Trainer(
gpus=0,
progress_bar_refresh_rate=20,
max_epochs=2,
logger=Callback(),
)
HuggingFace
您可以使用跟踪模块记录 HuggingFace 实验的指标、输出和模型:
from traceml import tracking
from traceml.integrations.hugging_face import Callback
tracking.init(
is_offline=True,
project='tracking-project',
name="hg-run",
description="trying TraceML & HuggingFace",
tags=["examples"],
artifacts_path="path/to/artifacts/repo"
)
...
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
callbacks=[Callback],
# ...
)
跟踪工件
import altair as alt
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
from bokeh.plotting import figure
from vega_datasets import data
from traceml import tracking
def plot_mpl_figure(step):
np.random.seed(19680801)
data = np.random.randn(2, 100)
figure, axs = plt.subplots(2, 2, figsize=(5, 5))
axs[0, 0].hist(data[0])
axs[1, 0].scatter(data[0], data[1])
axs[0, 1].plot(data[0], data[1])
axs[1, 1].hist2d(data[0], data[1])
tracking.log_mpl_image(figure, 'mpl_image', step=step)
def log_bokeh(step):
factors = ["a", "b", "c", "d", "e", "f", "g", "h"]
x = [50, 40, 65, 10, 25, 37, 80, 60]
dot = figure(title="Categorical Dot Plot", tools="", toolbar_location=None,
y_range=factors, x_range=[0, 100])
dot.segment(0, factors, x, factors, line_width=2, line_color="green", )
dot.circle(x, factors, size=15, fill_color="orange", line_color="green", line_width=3, )
factors = ["foo 123", "bar:0.2", "baz-10"]
x = ["foo 123", "foo 123", "foo 123", "bar:0.2", "bar:0.2", "bar:0.2", "baz-10", "baz-10",
"baz-10"]
y = ["foo 123", "bar:0.2", "baz-10", "foo 123", "bar:0.2", "baz-10", "foo 123", "bar:0.2",
"baz-10"]
colors = [
"#0B486B", "#79BD9A", "#CFF09E",
"#79BD9A", "#0B486B", "#79BD9A",
"#CFF09E", "#79BD9A", "#0B486B"
]
hm = figure(title="Categorical Heatmap", tools="hover", toolbar_location=None,
x_range=factors, y_range=factors)
hm.rect(x, y, color=colors, width=1, height=1)
tracking.log_bokeh_chart(name='confusion-bokeh', figure=hm, step=step)
def log_altair(step):
source = data.cars()
brush = alt.selection(type='interval')
points = alt.Chart(source).mark_point().encode(
x='Horsepower:Q',
y='Miles_per_Gallon:Q',
color=alt.condition(brush, 'Origin:N', alt.value('lightgray'))
).add_selection(
brush
)
bars = alt.Chart(source).mark_bar().encode(
y='Origin:N',
color='Origin:N',
x='count(Origin):Q'
).transform_filter(
brush
)
chart = points & bars
tracking.log_altair_chart(name='altair_chart', figure=chart, step=step)
def log_plotly(step):
df = px.data.tips()
fig = px.density_heatmap(df, x="total_bill", y="tip", facet_row="sex", facet_col="smoker")
tracking.log_plotly_chart(name="2d-hist", figure=fig, step=step)
plot_mpl_figure(100)
log_bokeh(100)
log_altair(100)
log_plotly(100)
数据帧跟踪
摘要
一个扩展 pandas 数据帧描述功能的扩展。
该模块包含了 DataFrameSummary
对象,它扩展了 describe()
函数:
- 属性
- dfs.columns_stats:每列的计数、唯一值、缺失值、缺失百分比和类型
- dsf.columns_types:列类型的计数
- dfs[column]:更深入的列摘要
- 功能
- summary():扩展了
describe()
函数,包含columns_stats
的值
- summary():扩展了
DataFrameSummary
期望获得一个 pandas DataFrame
进行总结。
from traceml.summary.df import DataFrameSummary
dfs = DataFrameSummary(df)
获取列类型
dfs.columns_types
numeric 9
bool 3
categorical 2
unique 1
date 1
constant 1
dtype: int64
获取列统计
dfs.columns_stats
A B C D E
counts 5802 5794 5781 5781 4617
uniques 5802 3 5771 128 121
missing 0 8 21 21 1185
missing_perc 0% 0.14% 0.36% 0.36% 20.42%
types unique categorical numeric numeric numeric
获取单个列摘要,例如数值列
# 我们也可以用数字 A[1] 访问列
dfs['A']
std 0.2827146
max 1.072792
min 0
variance 0.07992753
mean 0.5548516
5% 0.1603367
25% 0.3199776
50% 0.4968588
75% 0.8274732
95% 1.011255
iqr 0.5074956
kurtosis -1.208469
skewness 0.2679559
sum 3207.597
mad 0.2459508
cv 0.5095319
zeros_num 11
zeros_perc 0,1%
deviating_of_mean 21
deviating_of_mean_perc 0.36%
deviating_of_median 21
deviating_of_median_perc 0.36%
top_correlations {u'D': 0.702240243124, u'E': -0.663}
counts 5781
uniques 5771
missing 21
missing_perc 0.36%
types numeric
Name: A, dtype: object
[进行中] 摘要
- 添加列间的总结分析,例如
dfs[[1, 2]]
[进行中] 可视化
- 添加用 matplotlib 的总结可视化。
- 添加用 plotly 的总结可视化。
- 添加用 altair 的总结可视化。
- 添加预定义的分析报告。
[进行中] 目录和版本
- 添加持久化摘要和链接到特定版本的可能性。
- 集成质量库。