Feature Engineering
Script: examples/feature_engineering.py.
Inputs
A tiny in-memory
ete3.Treecreated inside the script.Feature order
["node_time", "time_bin", "branch_length", "is_tip", "is_internal"].
Run command
Run the script from the repository root:
python examples/feature_engineering.py
The script creates a TreeFeatureEngineer, writes features onto each node,
and prints a compact node-by-node listing.
Expected output
Stable stdout markers include:
Feature engineering summary
Feature order:
root: node_time=
Files written
None.
Optional dependencies
None.
Failure modes
Invalid feature names or tree inputs fail through the existing
TreeFeatureEngineer validation paths.
Source
"""Self-contained TreeFeatureEngineer example."""
from ete3 import Tree
from phylognn.data import TreeFeatureEngineer
FEATURE_NAMES = [
"node_time",
"time_bin",
"branch_length",
"is_tip",
"is_internal",
]
def build_demo_tree() -> Tree:
return Tree("((A:1.0,B:1.5)C:0.5,D:2.0)root:0.0;", format=1)
def main() -> None:
engineer = TreeFeatureEngineer(num_time_bins=6)
tree = engineer.add_features(
build_demo_tree(),
origin_time=4.0,
feature_names=FEATURE_NAMES,
rescale=False,
inplace=True,
)
print("Feature engineering summary")
print(f"Feature order: {FEATURE_NAMES}")
for node in tree.traverse(engineer.traversal_strategy):
label = node.name or "internal"
print(
f"{label}: "
f"node_time={node.node_time:.2f}, "
f"time_bin={int(node.time_bin)}, "
f"branch_length={node.branch_length:.2f}, "
f"is_tip={int(node.is_tip)}, "
f"is_internal={int(node.is_internal)}"
)
if __name__ == "__main__":
main()