Skip to content

Timefence

explain()

gauthierpiarrette/timefence

explain()¶

Preview the join logic that build() will use, without executing any queries.

`explain(labels, features, *, max_lookback=DEFAULT_MAX_LOOKBACK, max_staleness=None, join='strict')` ¶

Preview join logic without executing.

Source code in src/timefence/engine.py

def explain(
    labels: Labels,
    features: Sequence[Feature | FeatureSet],
    *,
    max_lookback: str | timedelta = DEFAULT_MAX_LOOKBACK,
    max_staleness: str | timedelta | None = None,
    join: str = "strict",
) -> ExplainResult:
    """Preview join logic without executing."""
    max_lookback_td = parse_duration(max_lookback) or timedelta(
        days=DEFAULT_MAX_LOOKBACK_DAYS
    )
    flat_features = flatten_features(features)

    conn = duckdb.connect()
    try:
        if labels.path is not None:
            label_count = conn.execute(
                f"SELECT COUNT(*) FROM read_parquet({_ql(labels.path)})"
            ).fetchone()[0]
        elif labels.df is not None:
            conn.register("__lbl", labels.df)
            label_count = conn.execute("SELECT COUNT(*) FROM __lbl").fetchone()[0]
        else:
            label_count = 0
    finally:
        conn.close()

    result = ExplainResult(label_count=label_count)

    for feat in flat_features:
        embargo_str = format_duration(feat.embargo) or "none"
        lookback_str = format_duration(max_lookback_td)
        strategy = "asof" if _use_asof_strategy(feat) else "row_number"

        op = "<" if join == "strict" else "<="

        if feat.embargo.total_seconds() > 0:
            join_cond = f"feature_time {op} label_time - INTERVAL '{embargo_str}'"
            window = f"[label_time - {lookback_str}, label_time - {embargo_str})"
        else:
            join_cond = f"feature_time {op} label_time"
            window = f"[label_time - {lookback_str}, label_time)"

        source_ref = str(feat.source.path) if feat.source.path else feat.source.name
        key_placeholder = "{K}"
        time_placeholder = "{T}"

        if feat.mode == "columns":
            cols = ", ".join(feat._columns.values())
            ts = feat.source.timestamp
            key_col = feat.source_keys[0]
            embargo_clause = (
                f" - INTERVAL '{embargo_str}'"
                if feat.embargo.total_seconds() > 0
                else ""
            )
            example_sql = (
                f"SELECT {key_col}, {ts} AS feature_time, {cols}\n"
                f"FROM '{source_ref}'\n"
                f"WHERE {key_col} = {key_placeholder}\n"
                f"  AND {ts} {op} {time_placeholder}{embargo_clause}\n"
                f"  AND {ts} >= {time_placeholder} - INTERVAL '{lookback_str}'\n"
                f"ORDER BY {ts} DESC\nLIMIT 1"
            )
        elif feat.mode == "sql":
            example_sql = f"WITH feature AS (\n  {feat._sql_text.strip()}\n)\nSELECT * FROM feature\n..."
        else:
            example_sql = f"-- Python transform: {feat._transform.__name__}"

        result.plan.append(
            {
                "name": feat.name,
                "source": source_ref,
                "join_condition": join_cond,
                "window": window,
                "embargo_str": (
                    embargo_str if feat.embargo.total_seconds() > 0 else "none"
                ),
                "strategy": strategy,
                "sql": example_sql,
            }
        )

    return result

Example¶

import timefence

result = timefence.explain(
    labels=labels,
    features=[rolling_spend, user_country],
    max_lookback="365d",
    join="strict",
)

print(result)

Sample output¶

JOIN PLAN for 5000 label rows

For each label row (keys, label_time):

  1. rolling_spend_30d
     Source:  data/transactions.parquet
     Join:    feature_time < label_time - INTERVAL '1d'
     Window:  [label_time - 365d, label_time - 1d)
     Embargo: 1d
     Strategy: row_number
     SQL:
       SELECT user_id, created_at AS feature_time, amount
       FROM 'data/transactions.parquet'
       WHERE user_id = {K}
         AND created_at < {T} - INTERVAL '1d'
         AND created_at >= {T} - INTERVAL '365d'
       ORDER BY created_at DESC
       LIMIT 1

  2. country
     Source:  data/users.parquet
     Join:    feature_time < label_time
     Window:  [label_time - 365d, label_time)
     Embargo: none
     Strategy: asof
     SQL:
       SELECT user_id, updated_at AS feature_time, country
       FROM 'data/users.parquet'
       WHERE user_id = {K}
         AND updated_at < {T}
         AND updated_at >= {T} - INTERVAL '365d'
       ORDER BY updated_at DESC
       LIMIT 1

{K} and {T} are placeholders for the entity key and label time of each row.

Parameters¶

Parameter	Type	Default	Description
`labels`	`Labels`	required	Label definition.
`features`	`Sequence[Feature \\| FeatureSet]`	required	Features to explain.
`max_lookback`	`str \\| timedelta`	`"365d"`	Maximum feature age.
`max_staleness`	`str \\| timedelta \\| None`	`None`	Max staleness threshold.
`join`	`str`	`"strict"`	`"strict"` (`<`) or `"inclusive"` (`<=`).

Returns: ExplainResult¶

Attribute	Type	Description
`.label_count`	`int`	Number of label rows.
`.plan`	`list[dict]`	Per-feature join plan (see below).

Each item in .plan contains:

Key	Type	Description
`name`	`str`	Feature name.
`source`	`str`	Source file path or name.
`join_condition`	`str`	The temporal join condition (e.g., `feature_time < label_time`).
`window`	`str`	The valid feature window (e.g., `[label_time - 365d, label_time)`).
`embargo_str`	`str`	Embargo duration or `"none"`.
`strategy`	`str`	`"asof"` or `"row_number"`.
`sql`	`str`	Example SQL for this feature.