Source code for ayx_python_sdk.core.utils

# Copyright (C) 2022 Alteryx, Inc. All rights reserved.
#
# Licensed under the ALTERYX SDK AND API LICENSE AGREEMENT;
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    https://www.alteryx.com/alteryx-sdk-and-api-license-agreement
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions definitions for plugin SDK."""

import datetime
from typing import Dict

from ayx_python_sdk.core.field import Field, FieldType

import pyarrow as pa


[docs]def to_date(stdate: str) -> "datetime.date": """Turn a date represented by a string to a Python date. (only the date part and not the time of the day) """ return datetime.datetime.strptime(stdate, "%m/%d/%Y").date()
[docs]def to_time(sttime: str) -> "datetime.time": """Turn a date represented by a string to a Python time.""" return datetime.datetime.strptime(sttime, "%H:%M:%S").time()
[docs]def to_datetime(stdatetime: str) -> "datetime.datetime": """Turn a date represented by a string to a Python datetime.""" return datetime.datetime.strptime(stdatetime, "%m/%d/%Y %H:%M:%S")
[docs]def is_spatial(metadata: "pa.Metadata") -> bool: """Return true if the given arrow type is a spatial object (string + 'ayx' meta info).""" return metadata.get(b"ayx.source", None) == b"WKT"
[docs]def create_schema(col_meta: Dict = {}) -> "pa.Schema": """Create a Python Arrow Schema given a Dict of Metadata.""" fields = [] for name, data in col_meta.items(): if isinstance(data, dict): fd = Field( name, field_type=data.get("type", FieldType.string), size=data.get("size", 0), scale=data.get("scale", 0), source=data.get("source", ""), description=data.get("description", ""), ) else: fd = Field(name, data) fields.append(fd.to_arrow()) return pa.schema(fields)
[docs]def get_ayx_meta(metaname: str) -> str: """Add ayx. prefix to metadata name.""" if metaname in ["type", "size", "scale", "source", "description"]: return "ayx." + metaname raise ValueError("bad metadata name: " + metaname)
[docs]def set_metadata( tbl: "pa.Table", col_meta: Dict = {}, schema: "pa.Schema" = None ) -> "pa.Table": """Store column-level metadata as byte strings. Column-level metadata is stored in the table columns schema fields. To update the metadata, first new fields are created for all columns. Next a schema is created using the new fields and updated table metadata. Finally a new table is created by replacing the old one's schema, but without copying any data. Args: ---- tbl (pyarrow.Table): The table to store metadata in col_meta: A dictionary with column metadata in the form { 'column_1': {'type': FieldType.int64, 'size': 8}, 'column_2': {'size': 64, 'source': 'something'} } """ # Create updated column fields with new metadata if schema: return pa.Table.from_arrays(list(tbl.itercolumns()), schema=schema) if col_meta: fields = [] for col in tbl.schema: if col.name in col_meta: # Get updated column metadata metadata = col.metadata.copy() or {} for k, v in col_meta[col.name].items(): metadata[get_ayx_meta(k).encode("utf-8")] = str(v).encode("utf-8") # Update field with updated metadata col = pa.field( col.name, col.type, nullable=col.nullable, metadata=metadata ) fields.append(col) # Create new schema with updated field metadata schema = pa.schema(fields) # With updated schema build new table (shouldn't copy data) # tbl = pa.Table.from_batches(tbl.to_batches(), schema) tbl = pa.Table.from_arrays(list(tbl.itercolumns()), schema=schema) return tbl
[docs]def decode_metadata(metadata: Dict) -> Dict: """Arrow stores metadata keys and values as bytes.""" if not metadata: # None or {} are not decoded return metadata decoded = {} for k, v in metadata.items(): key = k.decode("utf-8") if key[0:4] == "ayx.": key = key[4:] val = v.decode("utf-8") decoded[key] = val return decoded
[docs]def get_metadata(tbl: "pa.Table", col_name: str = "") -> Dict: """Get all column metadata as dicts or just one column, given col_name.""" if col_name: for col in tbl.schema: if col.name == col_name: return decode_metadata(col.metadata) return {} else: return {col.name: decode_metadata(col.metadata) for col in tbl.schema}