How do I gather a full Entity ontology in Python?


It is common to want to get all Entity records and associated concepts in Python for processing or analysis.

Here is a simple example that will perform a Data Lab action to export the results to JSON.

The export result will be a ZIP file containing JSON files (100k Entities per file) for a query and related concepts of interest. Attachments and LODs will include their download URLs and thumbnail URLs if available.


You will want to install our Python library to facilitate this process.

pip install bruce-models

Python
import bruce_models as BM
import time
import json

ENTITY_TYPE_ID = "YOUR_ENTITY_TYPE_ID"

# Setup the API instance to communicate with your account.
bruce = BM.BruceApi({
    "account_id": "YOUR_ACCOUNT_ID",
    "session_id": "your_session_id"
})

def start_job(bruce, entity_type_id):
    """
    Starts a Data Lab job to export Entities and their related concepts.
    """

    body = {
        "PrimarySelection": {
            "Items": [
                {
                    "EntityType": {
                        "EntityType.ID": entity_type_id
                    },
                    "IncludeChildren": False,
                    "LogicOperator": "AND",
                    "key": "entityType"
                }
            ]
        },
        "Action": {
            "ActionType": "export-json",
            "Expand": "EntityType,LOD,Source,Relation,Attachment",
            "Scenario": None
        }
    }
    res = bruce.POST("v3/datalab/runAction", data=body)
    return res.get("ID")

def await_job(bruce, action_id):
    """
    Awaits for the Data Lab job to complete.
    """

    # Poll every few seconds until the job is done.
    # Can take about a minute to start, so even small exports need some waiting.
    while True:
        res = bruce.GET(f"pendingAction/{action_id}")
        status = res.get("Status")
        if status == "IN_PROGRESS":
            print(f"Job status: {status}. Waiting for completion...")
            time.sleep(5)
        else:
            print(f"Job status: {status}.")
            if status == "COMPLETE":
                return res
            else:
                raise Exception(f"Job failed with status: {status}")

def parse_results(bruce, action):
    """
    Parses the results of the Data Lab job.
    In this example we'll just log the ZIP download URL.
    """

    # Parse to JSON if available.
    # Working on moving to a v3 request that is always a JSON response.
    result = action.get("Result")
    if result:
        jResult = json.loads(result)
        print("Download URL:", jResult.get("URL"))
    else:
        print("No result found.")

job_id = start_job(bruce, ENTITY_TYPE_ID)
action = await_job(bruce, job_id)
parse_results(bruce, action)