Skip to content

Store

Local directory for build tracking and feature-level caching.

Store(path=DEFAULT_STORE_PATH)

Local directory that tracks builds and manifests.

Parameters:

Name Type Description Default
path str | Path

Directory path for the store (default: ".timefence").

DEFAULT_STORE_PATH
Source code in src/timefence/store.py
def __init__(self, path: str | Path = DEFAULT_STORE_PATH):
    self.path = Path(path)
    self._ensure_dirs()

save_build(manifest)

Save a build manifest and create a symlink to the output.

Source code in src/timefence/store.py
def save_build(self, manifest: dict[str, Any]) -> Path:
    """Save a build manifest and create a symlink to the output."""
    build_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
    build_dir = self.path / "builds" / build_id
    build_dir.mkdir(parents=True, exist_ok=True)

    manifest["build_id"] = build_id
    manifest_path = build_dir / "build.json"
    manifest_path.write_text(json.dumps(manifest, indent=2, default=str))

    # Create symlink to output file
    output_path = manifest.get("output", {}).get("path")
    if output_path:
        output_abs = Path(output_path).resolve()
        if output_abs.exists():
            import contextlib

            link_path = build_dir / output_abs.name
            with contextlib.suppress(OSError):
                link_path.symlink_to(output_abs)

    return manifest_path

list_builds()

List all builds in the store, newest first.

Source code in src/timefence/store.py
def list_builds(self) -> list[dict[str, Any]]:
    """List all builds in the store, newest first."""
    builds_dir = self.path / "builds"
    if not builds_dir.exists():
        return []

    builds = []
    for build_dir in sorted(builds_dir.iterdir(), reverse=True):
        manifest_path = build_dir / "build.json"
        if manifest_path.exists():
            builds.append(json.loads(manifest_path.read_text()))
    return builds

get_build(build_id)

Get a specific build manifest by ID.

Source code in src/timefence/store.py
def get_build(self, build_id: str) -> dict[str, Any] | None:
    """Get a specific build manifest by ID."""
    manifest_path = self.path / "builds" / build_id / "build.json"
    if manifest_path.exists():
        return json.loads(manifest_path.read_text())
    return None

content_hash(path) staticmethod

Compute full SHA-256 content hash of a file.

Source code in src/timefence/store.py
@staticmethod
def content_hash(path: str | Path) -> str:
    """Compute full SHA-256 content hash of a file."""
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            h.update(chunk)
    return f"sha256:{h.hexdigest()}"

cached_content_hash(path)

Content hash with (path, size, mtime_ns) caching for speed.

Source code in src/timefence/store.py
def cached_content_hash(self, path: str | Path) -> str:
    """Content hash with (path, size, mtime_ns) caching for speed."""
    path = Path(path).resolve()
    cache_file = self.path / "cache" / "hashes.json"

    cache: dict[str, str] = {}
    if cache_file.exists():
        cache = json.loads(cache_file.read_text())

    stat = path.stat()
    cache_key = f"{path}:{stat.st_size}:{stat.st_mtime_ns}"

    if cache_key in cache:
        return cache[cache_key]

    content_hash = self.content_hash(path)
    cache[cache_key] = content_hash
    cache_file.write_text(json.dumps(cache, indent=2))
    return content_hash

feature_cache_key(definition_hash, source_content_hash, embargo)

Compute a cache key for a single feature computation.

Source code in src/timefence/store.py
def feature_cache_key(
    self,
    definition_hash: str,
    source_content_hash: str | None,
    embargo: str | None,
) -> str:
    """Compute a cache key for a single feature computation."""
    key_input = (
        f"{definition_hash}:{source_content_hash or ''}:"
        f"{embargo or '0d'}:{__version__}"
    )
    return hashlib.sha256(key_input.encode()).hexdigest()[:CACHE_KEY_LENGTH]

feature_cache_path(feature_name, cache_key)

Path where a cached feature table would be stored.

Source code in src/timefence/store.py
def feature_cache_path(self, feature_name: str, cache_key: str) -> Path:
    """Path where a cached feature table would be stored."""
    return self.path / "cache" / "features" / f"{feature_name}__{cache_key}.parquet"

build_cache_key(label_content_hash, feature_cache_keys, max_lookback, max_staleness, join_mode, on_missing)

Compute a cache key for an entire build.

Source code in src/timefence/store.py
def build_cache_key(
    self,
    label_content_hash: str | None,
    feature_cache_keys: list[str],
    max_lookback: str | None,
    max_staleness: str | None,
    join_mode: str,
    on_missing: str,
) -> str:
    """Compute a cache key for an entire build."""
    key_input = (
        f"{label_content_hash or ''}:"
        f"{sorted(feature_cache_keys)}:"
        f"{max_lookback}:{max_staleness}:{join_mode}:{on_missing}"
    )
    return hashlib.sha256(key_input.encode()).hexdigest()[:CACHE_KEY_LENGTH]

find_cached_build(build_cache_key)

Find a previous build matching this cache key.

Source code in src/timefence/store.py
def find_cached_build(self, build_cache_key: str) -> dict[str, Any] | None:
    """Find a previous build matching this cache key."""
    for build in self.list_builds():
        if build.get("build_cache_key") == build_cache_key:
            output_path = build.get("output", {}).get("path")
            if output_path and Path(output_path).exists():
                return build
    return None

How it works

When you pass a Store to build(), Timefence:

  1. Hashes inputs — content hash (SHA-256) of source files, feature definitions, embargo values, and build parameters.
  2. Checks feature cache — if a feature's inputs haven't changed, the cached intermediate table is loaded instead of recomputed.
  3. Checks build cache — if all features + labels + parameters match a previous build, the entire result is returned immediately.
  4. Saves the manifest — every build writes a JSON manifest with full provenance to .timefence/builds/<build_id>/build.json.

Directory structure

.timefence/
├── builds/
│   ├── 20240315T120000Z/
│   │   ├── build.json        # Full build manifest
│   │   └── train.parquet     # Symlink to output file
│   └── 20240316T090000Z/
│       └── build.json
└── cache/
    ├── hashes.json            # Content hash cache (path:size:mtime → hash)
    └── features/
        ├── rolling_spend__a1b2c3d4.parquet
        └── user_country__e5f6g7h8.parquet

Example

import timefence

store = timefence.Store(".timefence")

# First build: computes everything, caches results
result = timefence.build(
    labels=labels,
    features=[rolling_spend, user_country],
    output="train.parquet",
    store=store,
)

# Second build (same inputs): returns cached result in milliseconds
result = timefence.build(
    labels=labels,
    features=[rolling_spend, user_country],
    output="train.parquet",
    store=store,
)

# Add a new feature: only the new one is computed, others loaded from cache
result = timefence.build(
    labels=labels,
    features=[rolling_spend, user_country, login_count],
    output="train.parquet",
    store=store,
)

Build history

store = timefence.Store(".timefence")

# List all past builds (newest first)
builds = store.list_builds()
for b in builds:
    print(f"{b['build_id']}  {b['output']['row_count']} rows  {b['duration_seconds']:.1f}s")

# Get a specific build by ID
manifest = store.get_build("20240315T120000Z")
if manifest:
    print(manifest["features"])    # Feature-level stats
    print(manifest["parameters"])  # max_lookback, join, etc.
    print(manifest["audit"])       # Post-build audit result

Cache invalidation

Cache keys are recomputed from content hashes on every build. The cache is automatically invalidated when:

Change What happens
Source data changes (any byte) Feature recomputed from scratch
Feature definition changes (SQL, columns, transform) Feature recomputed
Embargo value changes Feature recomputed
Timefence version changes Feature recomputed
Labels change Build recomputed (features may still be cached)
max_lookback / max_staleness / join / on_missing change Build recomputed

To manually clear the cache, delete the .timefence/cache/ directory:

rm -rf .timefence/cache/

Methods

Method Returns Description
.save_build(manifest) Path Save build manifest, return manifest path.
.list_builds() list[dict] List all builds (newest first).
.get_build(build_id) dict \| None Get a specific build manifest by ID.
.content_hash(path) str Compute SHA-256 hash of a file (e.g., "sha256:abc123...").

Parameters

Parameter Type Default Description
path str \| Path ".timefence" Directory path for the store.

Tip

Add .timefence/ to your .gitignore. The store is local-only and should not be committed.