Source code for opentree.ot_object

"""OT object. High level wrapper for OpenTree calls"""
#!/usr/bin/env python3
import sys
import os
from .ws_wrapper import (OTWebServicesError,
                         WebServiceRunMode,
                         )
from .ot_ws_wrapper import OTWebServiceWrapper
import logging

# from .nexson_helpers import extract_tree_nexson, extract_otu_nexson, detect_nexson_version

FILES_SERVER_URL = 'files'


[docs]class FilesServerWrapper(OTWebServiceWrapper): """ This class provides a mid-level wrapper for interaction with OT web services and data. """ def __init__(self, api_endpoint=FILES_SERVER_URL, run_mode=WebServiceRunMode.RUN): super(FilesServerWrapper, self).__init__(api_endpoint=api_endpoint, run_mode=run_mode) def get_subproblem_scaffold_tree(self, synth_id): url_frag = 'synthesis/{s}/{s}/subproblems/subproblems-scaffold-only.tre'.format(s=synth_id) return self._call_api(url_frag, http_method='GET', headers='text') def get_subproblem_size_info(self, synth_id): url_frag = 'synthesis/{s}/{s}/subproblems/subproblem_size_summary.json'.format(s=synth_id) return self._call_api(url_frag, http_method='GET') def get_subproblem_solution(self, synth_id, ott_id): url_frag = 'synthesis/{s}/{s}/subproblem_solutions/ott{o}.tre'.format(s=synth_id, o=ott_id) return self._call_api(url_frag, http_method='GET', headers='text') def get_reversed_subproblem_solution(self, synth_id, ott_id): url_frag = 'synthesis/{s}/{s}/reversed_subproblem_solutions/ott{o}.tre'.format(s=synth_id, o=ott_id) return self._call_api(url_frag, http_method='GET', headers='text') def get_subproblem_trees(self, synth_id, ott_id): url_frag = 'synthesis/{s}/{s}/subproblems/ott{o}.tre'.format(s=synth_id, o=ott_id) return self._call_api(url_frag, http_method='GET', headers='text')
_default_api_endpoint = None _default_run_mode = None def default_open_tree_obj(): global _default_api_endpoint, _default_run_mode if _default_api_endpoint is None: _default_api_endpoint = os.environ.get('DEFAULT_OT_API_ENDPOINT', 'production') _default_run_mode = WebServiceRunMode.RUN return OpenTree(api_endpoint=_default_api_endpoint, run_mode=_default_run_mode)
[docs]class OpenTree(object): """ This class provides a high-level wrapper for interaction with OT web services and data. The method names are intended to be clear to a wide variety of users, rather than necessarily matching the API calls directly. """ def __init__(self, api_endpoint='production', run_mode=WebServiceRunMode.RUN): global _default_api_endpoint, _default_run_mode if _default_api_endpoint is None: _default_api_endpoint = api_endpoint _default_run_mode = run_mode self._api_endpoint = api_endpoint self._run_mode = run_mode self._ws = None self._files_server = None @property def files_server(self): if self._files_server is None: self._files_server = FilesServerWrapper(run_mode=self._run_mode) return self._files_server @property def ws(self): if self._ws is None: self._ws = OTWebServiceWrapper(api_endpoint=self._api_endpoint, run_mode=self._run_mode) return self._ws def get_subproblem_scaffold_tree(self, synth_id): return self.files_server.get_subproblem_scaffold_tree(synth_id) def get_subproblem_size_info(self, synth_id): return self.files_server.get_subproblem_size_info(synth_id) def get_subproblem_solution(self, synth_id, ott_id): return self.files_server.get_subproblem_solution(synth_id, ott_id) def get_subproblem_trees(self, synth_id, ott_id): return self.files_server.get_subproblem_trees(synth_id, ott_id) def get_reversed_subproblem_solution(self, synth_id, ott_id): return self.files_server.get_reversed_subproblem_solution(synth_id, ott_id)
[docs] def about(self): """ Get information about the Open Tree of Life taxonomy and the synthetic tree. """ tax_about = self.ws.taxonomy_about() tree_about = self.ws.tree_of_life_about() return {'taxonomy_about': tax_about.response_dict, 'synth_tree_about': tree_about.response_dict }
[docs] def get_study(self, study_id): """ Get a study and its associated metadata. Parameters ---------- study_id : single character value The study id from Open Tree of Life. """ return self.ws.study(study_id)
[docs] def get_tree(self, study_id, tree_id, tree_format="nexson", label_format="ot:originallabel", demand_success=False): """ Get a source tree from phylesystem and its associated metadata. Parameters ---------- study_id : single character value The study id from Open Tree of Life. tree_id : single character value The tree id of a tree within the study id provided. tree_format : single character value Must be one of "newick", "nexson", "nexus", or "object" If tree format is newick or nexus, returns tree as string in that format. If "nexson", returns semi-useless tree nexson w/o OTUS. label_format : single character value Must be one of "ot:originallabel", "ot:ottid", or "ot:otttaxonname". "ot:originallabel" returns the tree with tip labels as it was originally submitted to phylesystem by a curator. "ot:ottid" returns a tree with tip labels corresponding to the matching ott id. "ot:otttaxonname" returns a tree with tip labels corresponding to the matching ott taxon name. demand_success : boolean Whether to return an error or return a somewhat failed output silently. """ if tree_format not in ["newick", "nexson", "nexus", "object"]: raise ValueError('"{}" not recognized as a valid tree_format'.format(tree_format)) if tree_format == 'object': ws_rec = self.ws.study(study_id, demand_success=False) def efn(rd): nexs = rd['data'] return ws_rec._to_object_converter.tree_from_nexson(nexs, tree_id=tree_id, label_format=label_format) ws_rec._tree_from_response_extractor = efn else: ws_rec = self.ws.tree(study_id, tree_id, tree_format, label_format, demand_success) from .ws_wrapper import extract_content_from_raw_text_method_dict if tree_format in ('newick', 'nexus'): ws_rec._tree_from_response_extractor = extract_content_from_raw_text_method_dict return ws_rec
[docs] def get_otus(self, study_id): """ Get OTUs from a study in the Open Tree of Life Phylesystem. Parameters ---------- study_id : single character value The study id from Open Tree of Life. """ return self.ws.otus(study_id)
[docs] def conflict_info(self, study_id, tree_id, compare_to='synth'): """ Get node status data from any tree in the Open Tree of Life Phylesystem. Parameters ---------- study_id : single character value The study id from Open Tree of Life. tree_id : single character value The tree id of a tree within the study id provided. compare_to : a single character value Usually, you want this to be 'synth', to compare to the synthetic tree. Alternatively, you can compare your tree to any other tree in phylesystem. """ return self.ws.conflict(study_id, tree_id, compare_to, demand_success=False)
[docs] def conflict_str(self, tree_str, compare_to='synth'): """ Get node status data from a newick string tree with ott_ids as labels, following the rough format: "(('_nd1_ott770315','newick_nd2_ott417950')'_nd3_','_nd4_ott158484')'_nd5';". Parameters ---------- tree_str: a tree in 'conflict formatted' newick string compare_to : a single character value Usually, you want this to be 'synth', to compare to the synthetic tree. Alternatively, you can compare your tree to any other tree in phylesystem. """ return self.ws.conflict_from_newick(tree_str, compare_to, demand_success=False)
[docs] def studies_properties(self): """ Get properties that can be used to search across studies and trees in phylesystem. """ return self.ws.studies_properties()
[docs] def find_studies(self, value, search_property, exact=False, verbose=False): """ Get study ids that match a certain value of a given search property. Parameters ---------- value : single character value The study id from Open Tree of Life. search_property : single character value Any value from studies_properties. exact : boolean verbose : boolean """ return self.ws.studies_find_studies(value, search_property=search_property, exact=exact, verbose=verbose)
[docs] def find_trees(self, value, search_property, exact=False, verbose=False): """ Get trees that match a certain value of a given search property. Parameters ---------- value : single character value The study id from Open Tree of Life. search_property : single character value Any value from studies_properties. exact : boolean verbose : boolean Example ------- """ return self.ws.studies_find_trees(value, search_property=search_property, exact=exact, verbose=verbose)
[docs] def taxon_info(self, ott_id=None, source_id=None, include_lineage=False, include_children=False, include_terminal_descendants=False): """ Get taxonomic information for a given taxon in the Open Tree taxonomy. Parameters ---------- ott_id : single character value The OTT id of a taxon. source_id : maybe single character value include_lineage : boolean include_children : boolean include_terminal_descendant : boolean """ return self.ws.taxonomy_taxon_info(ott_id=ott_id, source_id=source_id, include_lineage=include_lineage, include_children=include_children, include_terminal_descendants=include_terminal_descendants)
[docs] def taxon_mrca(self, ott_ids=None): """ Get the node corresponding to the most recent commom ancestor (mrca) of a taxon in the synthetic Open Tree of Life tree. Notes from Luna: Does it work with just one id? Since it is not always a taxon mrca, should it be called get_mrca? Parameters ---------- ott_ids : maybe single character value """ while True: call_record = self.ws.taxonomy_mrca(ott_ids=ott_ids) if call_record: return call_record else: msgtemplate = 'Call to taxonomy/mrca failed with the message "{}"' message = call_record.response_dict['message'] raise OTWebServicesError(msgtemplate.format(message))
## cull_unknown_ids_from_args does not work on taxon_mrca call_record # self._cull_unknown_ids_from_args(call_record, [], ott_ids)
[docs] def taxon_subtree(self, ott_id=None, label_format="name_and_id"): """Get a subtree of a particular taxon """ return self.ws.taxonomy_subtree(ott_id=ott_id, label_format=label_format)
[docs] def tnrs_contexts(self): """Get a list of taxonomic contexts that can be used to constraint a TNRS match. """ return self.ws.tnrs_contexts()
[docs] def tnrs_infer_context(self, names): """Infer taxonomic context for names via a TNRS (Taxonomic Name Resolution Service) match. """ return self.ws.tnrs_infer_context(names)
[docs] def tnrs_match(self, names, context_name=None, do_approximate_matching=False, include_suppressed=False): """Match taxon names to Open Tree Taxonomy using TNRS (Taxonomic Name Resolution Service). """ return self.ws.tnrs_match_names(names, context_name=context_name, do_approximate_matching=do_approximate_matching, include_suppressed=include_suppressed)
[docs] def tnrs_autocomplete(self, name, context_name=None, include_suppressed=False): """Taxonomic name resolution service autocomplete """ return self.ws.tnrs_autocomplete_name(name, context_name=context_name, include_suppressed=include_suppressed)
[docs] def synth_node_info(self, node_ids=None, node_id=None, ott_id=None, include_lineage=False): """Get information of a node """ if isinstance(node_ids, str): node_ids = [node_ids] if ott_id: if isinstance(ott_id, str): ott_id=ott_id.strip('ott') ott_id = int(ott_id) return self.ws.tree_of_life_node_info(node_ids=node_ids, node_id=node_id, ott_id=ott_id, include_lineage=include_lineage)
[docs] def synth_subtree(self, node_id=None, ott_id=None, tree_format="newick", label_format="name_and_id", height_limit=None): """Get a subtree """ return self.ws.tree_of_life_subtree(node_id=node_id, ott_id=ott_id, tree_format=tree_format, label_format=label_format, height_limit=height_limit)
[docs] def synth_induced_tree(self, node_ids=None, ott_ids=None, label_format="name_and_id", ignore_unknown_ids=False): """Get an induced subtree """ while True: call_record = self.ws.tree_of_life_induced_subtree(node_ids=node_ids, ott_ids=ott_ids, label_format=label_format) if call_record: return call_record if not ignore_unknown_ids: msgtemplate = 'Call to tree_of_life/induced_subtree failed with the message "{}"' message = call_record.response_dict['message'] if call_record.response_dict['unknown']: unknown_ids = call_record.response_dict['unknown'] message = message + "\nFull list of unknown/unrecognized query ids:\n {} \n".format("\n".join(unknown_ids)) raise OTWebServicesError(msgtemplate.format(message)) msgtemplate = 'Call to tree_of_life/induced_subtree failed with the message "{}"' self._cull_unknown_ids_from_args(call_record, node_ids, ott_ids)
[docs] def synth_mrca(self, node_ids=None, ott_ids=None, ignore_unknown_ids=True): """Get the most recent common ancestor of a group of taxa on the synthetic Open Tree of Life """ while True: assert (ott_ids or node_ids) call_record = self.ws.tree_of_life_mrca(node_ids=node_ids, ott_ids=ott_ids) if call_record: return call_record if not ignore_unknown_ids: msgtemplate = 'Call to tree_of_life/mrca failed with the message "{}"' message = call_record.response_dict['message'] raise OTWebServicesError(msgtemplate.format(message)) self._cull_unknown_ids_from_args(call_record, node_ids, ott_ids) if not ott_ids and not node_ids: msgtemplate = 'Call to tree_of_life/mrca failed as all ids were pruned' raise OTWebServicesError(msgtemplate)
# noinspection PyMethodMayBeStatic def _cull_unknown_ids_from_args(self, call_record, node_ids, ott_ids, output='err'): """Cull unknown ids from arguments """ assert ('unknown' in call_record.response_dict), call_record.response_dict unknown_ids = call_record.response_dict['unknown'] logging.debug("\nUnknown/unrecognized query ids (skipped):\n {} \n".format("\n".join(unknown_ids))) for u in unknown_ids: if node_ids and u in node_ids: node_ids.remove(u) else: assert u.startswith('ott') ui = int(u[3:]) if ott_ids and (ui in ott_ids): ott_ids.remove(ui) if ott_ids and (str(ui) in ott_ids): ott_ids.remove(str(ui))
[docs] def get_ottid_from_gbifid(self, gbif_id): """Returns an ott id for a gbif id ott_id is set to 'None' if the gbif id is not found in the Open Tree Taxanomy """ assert int(gbif_id) gbiftax = "gbif:{}".format(int(gbif_id)) res = self.taxon_info(source_id=gbiftax) if res.status_code == 200: ott_id = int(res.response_dict['ott_id']) return ott_id if res.status_code == 400: msgtemplate = 'Call to taxon_info failed with the message "{}"' message = res.response_dict['message'] raise OTWebServicesError(msgtemplate.format(message))
[docs] def get_citations(self, studies): """Returns study citations from a list of study or tree ids """ cites = [] for study in studies: if '@' in study: studyid = study.split('@')[0] treeid = study.split('@')[1] opentree_url = "https://tree.opentreeoflife.org/curator/study/view/{}?tab=trees&tree={}" opentree_url = opentree_url.format(studyid, treeid) else: studyid = study opentree_url = "https://tree.opentreeoflife.org/curator/study/view/{}".format(studyid) studyres = self.find_studies(studyid, search_property='ot:studyId', verbose=True) new_cite = studyres.response_dict.get('matched_studies', None) if new_cite: cites.append(opentree_url + '\n' + new_cite[0].get('ot:studyPublicationReference', '') + '\n' + new_cite[0].get('ot:studyPublication', '') + '\n') return "\n".join(cites)
[docs] def get_ottid_from_name(self, spp_name): """Returns an ott id for a string - requires exact match. ott_id is set to 'None' if the name is not found in the Open Tree Txanomy """ res = self.tnrs_match([spp_name], do_approximate_matching=False) if res.status_code == 200: if len(res.response_dict['results'][0]['matches']) > 0: tax = res.response_dict['results'][0]['matches'][0].get('taxon') ott_id = int(tax.get('ott_id')) return ott_id else: # sys.stderr.write("Exact match to name {} not found in taxonomy.\n".format(spp_name)) # sys.stderr.write("""Try using `resp = OT.tnrs_match(["{}"], do_approximate_matching=True)`\n # resp.response_dict \n # to find fuzzy matches\n""".format(spp_name)) return None msgtemplate = 'Call to tnrs_match failed with the message "{}"' message = res.response_dict['message'] raise OTWebServicesError(msgtemplate.format(message))
[docs] def get_matchdict_from_taxlist(self, list_of_taxa): """ Input: a list of taxon names Returns: matches - a dictionary of name:ott_id and failed - a set of the names that were not found. """ matches = dict() failed = set() for tax in list_of_taxa: tax = tax.strip() if tax != '': ott_id = self.get_ottid_from_name(tax) if ott_id is None: failed.add(tax) else: matches[tax] = 'ott{}'.format(ott_id) return matches, failed