GitHub AI-Agent Star Velocity

Executed Notebook

This notebook asks how much developer attention is visible in a sampled set of GitHub stargazer timestamps. Star velocity is useful for launch and attention analysis, but it is sensitive to repository choice, token limits, pagination depth, bots, and the covered window.

The main output is a repo basket, a source card, a coverage table, and a residual-event view over the fetched stargazer window.

In [1]
from pathlib import Path
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from examples.hot_trends.data import (
    HotTrendDataError,
    append_real_snapshot,
    build_arxiv_monthly_counts,
    fetch_coingecko_market_chart,
    fetch_defillama_stablecoin_chains,
    fetch_github_repo_metadata,
    fetch_github_stargazers,
    fetch_huggingface_models,
    fetch_wikipedia_pageviews,
    source_audit_table,
)
from examples.hot_trends.decomposition import (
    component_summary,
    decompose_table,
    editorial_priority,
    residual_event_table,
)
from examples.hot_trends.scoring import article_publication_phrasing

pd.set_option("display.max_columns", 80)
pd.set_option("display.max_rows", 80)
plt.rcParams.update({"axes.grid": True})

CACHE_DIR = Path("examples/hot_trends/cache")
OUTPUT_DIR = Path("examples/hot_trends/outputs")
CACHE_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

def save_table(df, name):
    path = OUTPUT_DIR / f"{name}.csv"
    df.to_csv(path, index=False)
    print(f"saved: {path.as_posix()}")

1. Select repositories

Use a small basket for unauthenticated runs. Set GITHUB_TOKEN for higher rate limits and record the repository list because basket selection changes the result.

In [2]
repos = [
    "langchain-ai/langchain",
    "microsoft/autogen",
    "crewAIInc/crewAI",
    "browser-use/browser-use",
    "modelcontextprotocol/servers",
]
token = os.environ.get("GITHUB_TOKEN")
pd.DataFrame({"repo": repos})

2. Fetch repository metadata

In [3]
metadata_rows = []
for repo in repos:
    meta = fetch_github_repo_metadata(repo, token=token)
    metadata_rows.append({
        "repo": repo,
        "stars": meta.get("stargazers_count"),
        "forks": meta.get("forks_count"),
        "open_issues": meta.get("open_issues_count"),
        "pushed_at": meta.get("pushed_at"),
        "source": "GitHub REST API",
        "endpoint": f"https://api.github.com/repos/{repo}",
    })
metadata = pd.DataFrame(metadata_rows).sort_values("stars", ascending=False)
metadata

3. Fetch sampled stargazer timestamps

This notebook fetches a bounded number of stargazer pages for each repository. The result is a sampled history window, not complete production adoption. Increase pages or run a scheduled collection for deeper coverage.

In [4]
STARGAZER_PAGES = 3
PER_PAGE = 100
source_card = pd.DataFrame([{
    "source": "GitHub REST API",
    "endpoint": "https://api.github.com/repos/{repo}/stargazers",
    "access_date": pd.Timestamp.today().date().isoformat(),
    "query_params": f"repos={len(repos)}; pages={STARGAZER_PAGES}; per_page={PER_PAGE}; token_used={bool(token)}",
    "time_range": "derived from fetched starred_at timestamps below",
    "cache_path": "not cached; outputs saved to examples/hot_trends/outputs",
    "interpretation_scope": "star velocity measures developer attention in the fetched window; stars are not production usage",
}])
star_rows = []
for repo in repos:
    sg = fetch_github_stargazers(repo, pages=STARGAZER_PAGES, per_page=PER_PAGE, token=token)
    star_rows.append(sg)
stars = pd.concat(star_rows, ignore_index=True)
display(source_card)
stars.head(20)

4. Convert stargazer timestamps to daily velocity

In [5]
stars["date"] = pd.to_datetime(stars["starred_at"]).dt.normalize()
raw_daily = stars.groupby(["repo", "date"]).size().reset_index(name="count")
calendar_rows = []
for repo, sub in raw_daily.groupby("repo"):
    calendar = pd.date_range(sub["date"].min(), sub["date"].max(), freq="D")
    filled = sub.set_index("date").reindex(calendar, fill_value=0).rename_axis("date").reset_index()
    filled["repo"] = repo
    calendar_rows.append(filled[["repo", "date", "count"]])
daily = pd.concat(calendar_rows, ignore_index=True)
daily["date"] = daily["date"].dt.date.astype(str)
raw_daily["date"] = raw_daily["date"].dt.date.astype(str)
daily.head(20)

Visualization: GitHub star velocity

The x-axis is the fetched stargazer window and the y-axis is stars per day after missing calendar days are filled with zero. Treat the line as attention in the sampled window; do not compare repositories without checking coverage.

In [6]
star_velocity = daily.copy()
star_velocity["date"] = pd.to_datetime(star_velocity["date"])
velocity_pivot = star_velocity.pivot_table(index="date", columns="repo", values="count", aggfunc="sum").fillna(0).sort_index()
ax = velocity_pivot.rolling(7, min_periods=1).mean().plot(figsize=(11, 4), title="7-day average GitHub star velocity")
ax.set_ylabel("stars/day")
ax.set_xlabel("date")
plt.tight_layout()
plt.show()

5. Decompose star velocity if enough days exist

In [7]
coverage = source_audit_table(raw_daily, value_col="count", entity_col="repo", time_col="date")
calendar_coverage = daily.groupby("repo").agg(calendar_days=("date", "nunique"), total_fetched_stars=("count", "sum")).reset_index().rename(columns={"repo": "series"})
coverage = coverage.merge(calendar_coverage, on="series", how="left")
coverage["pages_per_repo"] = STARGAZER_PAGES
coverage["per_page"] = PER_PAGE
ready = coverage.loc[coverage["calendar_days"] >= 14, "series"].tolist()
if ready:
    components = decompose_table(daily[daily["repo"].isin(ready)], entity_col="repo", time_col="date", value_col="count", method="MA_BASELINE", period=7, trend_window=5, transform="log1p")
    summary = editorial_priority(component_summary(components, entity_col="repo", time_col="date"), entity_col="repo")
    events = residual_event_table(components, entity_col="repo", time_col="date", top_n=20, trim_edges=7)
else:
    components = pd.DataFrame()
    summary = pd.DataFrame([{"status": "not_enough_stargazer_calendar_days", "required": "increase pages or run repeated snapshots"}])
    events = pd.DataFrame()
coverage

Visualization: GitHub coverage and residual events

Coverage bars show fetched stargazer days by repository. The residual panel highlights unusually large deviations after calendar zero-fill and edge trimming; inspect the event date before treating it as launch evidence.

In [8]
coverage_plot = coverage.sort_values("observations")
fig, axes = plt.subplots(1, 2, figsize=(12, 4.5))
coverage_plot.plot(kind="barh", x="series", y="observations", ax=axes[0], color="tab:blue", legend=False, title="Observed stargazer days")
axes[0].axvline(14, color="tab:red", linestyle="--", linewidth=1.0)
axes[0].set_ylabel("")
if events.empty:
    axes[1].axis("off")
    axes[1].set_title("No residual events until coverage threshold is met")
else:
    event_plot = events.copy()
    event_plot["date"] = pd.to_datetime(event_plot["date"])
    repo_order = coverage_plot["series"].tolist()
    event_plot["repo_code"] = pd.Categorical(event_plot["repo"], categories=repo_order, ordered=True).codes
    sc = axes[1].scatter(event_plot["date"], event_plot["repo_code"], s=50 + event_plot["abs_residual_z"] * 35, c=event_plot["residual_z"], cmap="RdBu_r")
    axes[1].set_yticks(range(len(repo_order)))
    axes[1].set_yticklabels(repo_order)
    axes[1].set_title("Residual star-velocity events")
    fig.colorbar(sc, ax=axes[1], label="residual z")
plt.tight_layout()
plt.show()
In [9]
summary

6. Publication language

In [10]
phrasing = article_publication_phrasing()
phrasing
In [11]
save_table(source_card, "04_github_source_card")
save_table(metadata, "04_github_repo_metadata")
save_table(coverage, "04_github_stargazer_coverage")
save_table(daily, "04_github_star_velocity_daily")
save_table(summary, "04_github_decomposition_or_collection_status")
if not events.empty:
    save_table(events, "04_github_residual_events")
save_table(phrasing, "04_github_publication_phrasing")