Skip to content

Commit

Permalink
use regex on Bio.Nexus.Trees.Tree parser (#4624)
Browse files Browse the repository at this point in the history
  • Loading branch information
jrom99 committed May 3, 2024
1 parent 40f0a08 commit 24e0c3e
Showing 1 changed file with 36 additions and 17 deletions.
53 changes: 36 additions & 17 deletions Bio/Nexus/Trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""

import random
import re
import sys
from . import Nodes

Expand All @@ -23,6 +24,10 @@
NODECOMMENT_START = "[&"
NODECOMMENT_END = "]"

_re_block_delimiters = re.compile(
rf"({re.escape(NODECOMMENT_START)}|{re.escape(NODECOMMENT_END)}|[(),])"
)


class TreeError(Exception):
"""Provision for the management of Tree exceptions."""
Expand Down Expand Up @@ -85,48 +90,62 @@ def _parse(self, tree):
# Remove any leading/trailing white space - want any string starting
# with " (..." should be recognised as a leaf, "(..."
tree = tree.strip()

if tree.count("(") != tree.count(")"):
raise TreeError("Parentheses do not match in (sub)tree: " + tree)
if tree.count("(") == 0: # a leaf
# check if there's a colon, or a special comment, or both after the taxon name
nodecomment = tree.find(NODECOMMENT_START)
colon = tree.find(":")
if colon == -1 and nodecomment == -1: # none
return [tree, [None]]
branch, comment = tree, [None]
elif colon == -1 and nodecomment > -1: # only special comment
return [tree[:nodecomment], self._get_values(tree[nodecomment:])]
branch, comment = tree[:nodecomment], self._get_values(
tree[nodecomment:]
)
elif colon > -1 and nodecomment == -1: # only numerical values
return [tree[:colon], self._get_values(tree[colon + 1 :])]
branch, comment = tree[:colon], self._get_values(tree[colon + 1 :])
elif (
colon < nodecomment
): # taxon name ends at first colon or with special comment
return [tree[:colon], self._get_values(tree[colon + 1 :])]
branch, comment = tree[:colon], self._get_values(tree[colon + 1 :])
else:
return [tree[:nodecomment], self._get_values(tree[nodecomment:])]
branch, comment = tree[:nodecomment], self._get_values(
tree[nodecomment:]
)

return [branch, comment]
else:
closing = tree.rfind(")")
val = self._get_values(tree[closing + 1 :])
if not val:
val = [None]

subtrees = []
plevel = 0
prev = 1
incomment = False
for p in range(1, closing):
if not incomment and tree[p] == "(":
plevel += 1
elif not incomment and tree[p] == ")":
plevel -= 1
elif tree[p:].startswith(NODECOMMENT_START):
incomment = True
elif incomment and tree[p] == NODECOMMENT_END:
incomment = False
elif not incomment and tree[p] == "," and plevel == 0:
subtrees.append(tree[prev:p])
prev = p + 1

blocks = _re_block_delimiters.split(tree[1:closing])

for idx, blk in enumerate(blocks):
if not incomment:
if blk == "(":
plevel += 1
elif blk == ")":
plevel -= 1
elif blk == NODECOMMENT_START:
incomment = True
elif blk == "," and plevel == 0:
p = sum(len(blk) for blk in blocks[: idx + 1])
subtrees.append(tree[prev:p])
prev = p + 1
elif blk == NODECOMMENT_END:
incomment = False
subtrees.append(tree[prev:closing])

subclades = [self._parse(subtree) for subtree in subtrees]

return [subclades, val]

def _add_subtree(self, parent_id=None, tree=None):
Expand Down

0 comments on commit 24e0c3e

Please sign in to comment.