Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,86 @@ jobs:
run: |
composer self-update && composer --version
composer install --prefer-dist
# TEMPORARY: diagnose intermittent `cURL error 28` timeouts to ps.pndsn.com
# from the self-hosted runner. Remove once the runner egress issue is resolved.
- name: Diagnose ps.pndsn.com connectivity
continue-on-error: true
run: |
echo "=== Runner public egress IP ==="
curl -s --max-time 15 https://api.ipify.org || echo "(could not determine egress IP)"
echo
echo "=== DNS resolution for ps.pndsn.com ==="
getent hosts ps.pndsn.com || nslookup ps.pndsn.com || true
echo
echo "=== Proxy-related environment ==="
env | grep -iE 'proxy' || echo "(no proxy env vars set)"
echo
echo "=== 10x probe of https://ps.pndsn.com/time/0 (catch intermittent hang) ==="
for i in $(seq 1 10); do
echo "--- attempt $i ---"
curl -sS -o /dev/null --max-time 15 \
-w 'http_code=%{http_code} dns=%{time_namelookup}s connect=%{time_connect}s tls=%{time_appconnect}s total=%{time_total}s\n' \
"https://ps.pndsn.com/time/0" \
|| echo "attempt $i FAILED (curl exit $?)"
done
echo
echo "=== Burst probe A: 200x time/0, 20 concurrent (keyless baseline) ==="
# Cheap, unauthenticated endpoint. Establishes that the network path is fine
# under concurrent load. time/0 is NOT subject to per-key publish throttling.
burst_log="$(mktemp)"
seq 1 200 | xargs -P 20 -I {} \
curl -sS -o /dev/null --max-time 15 \
-w 'http_code=%{http_code} total=%{time_total}s\n' \
"https://ps.pndsn.com/time/0" \
>> "$burst_log" 2>&1 || true
echo "--- HTTP status code distribution ---"
grep -oE 'http_code=[0-9]+' "$burst_log" | sort | uniq -c || true
echo "--- slowest 5 ---"
sort -t= -k3 -rn "$burst_log" | head -5 || true
echo
echo "=== Burst probe B: 200x PUBLISH, 20 concurrent (the actual failing op) ==="
# Publishes to the real keyset — the same operation that times out in CI.
# If THIS bursts shows 429 / 000 / ~10s stalls while probe A stayed fast,
# the cause is per-key publish throttling, not the network or the runner.
pub_log="$(mktemp)"
seq 1 200 | xargs -P 20 -I {} \
curl -sS -o /dev/null --max-time 15 \
-w 'http_code=%{http_code} total=%{time_total}s\n' \
"https://ps.pndsn.com/publish/$PUBLISH_KEY/$SUBSCRIBE_KEY/0/diag-burst/0/%22x%22?uuid=diag-burst" \
>> "$pub_log" 2>&1 || true
echo "--- HTTP status code distribution ---"
grep -oE 'http_code=[0-9]+' "$pub_log" | sort | uniq -c || true
echo "--- non-200 / slow (>1s) responses ---"
grep -E 'http_code=(000|4|5)|total=[1-9]' "$pub_log" || echo "(none — all fast 200s)"
echo "--- slowest 5 ---"
sort -t= -k3 -rn "$pub_log" | head -5 || true
# TEMPORARY: raw `curl` CLI control that reuses ONE keep-alive connection
# across many publishes (one curl process, N urls via -K). This is the
# PHP-free reproducer for the server team: if it hangs every ~10th here on
# the dense CI runner, the every-10th black-hole is reproducible with no
# PHP/Guzzle/SDK involvement at all. Locally (high RTT) it ran 200/200
# clean — this step tests it on the same runner/pool that fails for Guzzle.
# Remove once the timeout cause is confirmed/fixed.
- name: Diagnose connection reuse (raw curl CLI)
continue-on-error: true
run: |
curl --version | head -1
DIAG_ITERATIONS=200 ./scripts/diag-curl.sh
# TEMPORARY: PHP-level probe that REUSES one Guzzle client across many
# requests, exactly like the SDK does for a whole PHPUnit run. The curl
# probes above open a fresh connection each time and so cannot reproduce a
# stale/half-closed keep-alive socket — the prime suspect for
# "connected fine, then 0 bytes received after 10s" (cURL error 28).
# DIAG_DELAY_MS inserts an idle gap to provoke LB idle-timeout of pooled
# sockets. Remove once the timeout cause is confirmed/fixed.
- name: Diagnose Guzzle connection reuse (PHP)
continue-on-error: true
run: |
# Reuses one Guzzle client like the SDK. The keyless time/0 baseline
# should be 0 failures; the publish run reproduces the cURL-28 timeout.
# The built-in spacing sweep then tells us whether spacing publishes
# further apart makes the failures disappear (== server rate limit).
DIAG_ITERATIONS=200 DIAG_DELAY_MS=0 php scripts/diag-guzzle.php
- name: Run unit tests
run: composer test
- name: Cancel workflow runs for commit on error
Expand Down
2 changes: 1 addition & 1 deletion examples/Snippets.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
$pubnub->history()
->channel("my_channel")
->count(100)
->start(-1)
->start(1)
->end(13847168819178600)
->reverse(true)
->sync();
Expand Down
128 changes: 128 additions & 0 deletions scripts/diag-curl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env bash
#
# TEMPORARY diagnostic: reproduce the "every ~10th publish hangs 10s / 0 bytes"
# issue using the raw `curl` CLI instead of PHP/Guzzle.
#
# This is a CONTROL for scripts/diag-guzzle.php. The only way to reproduce the
# hang is to REUSE ONE keep-alive connection across many requests. A fresh curl
# per request opens a new socket every time and never reproduces it -- that is
# why the earlier per-request curl probes looked healthy.
#
# curl reuses the connection when you give it MANY URLs in ONE invocation, so
# this fires all N publishes down a single connection (like the SDK does).
#
# Usage (from project root):
# set -a; source .env.dev; set +a
# ./scripts/diag-curl.sh
#
# Env:
# PUBLISH_KEY, SUBSCRIBE_KEY (required)
# DIAG_ITERATIONS (optional, default 200)
#
# Output per request line: <index> <http_code> <time_total>s <local_port> <remote_ip>
# - http_code 000 = no response (the hang / cURL error 28)
# - http_code 429 = Balancer rate limit (fast) — curl reports these every
# ~10th publish; the PHP/Guzzle path black-holes (000/10s)
# at the same cadence instead. See the investigation doc.
# - repeated local_port = same reused socket (keep-alive working)
# - a new local_port = curl had to reopen after a failure

set -u

PUBLISH_KEY="${PUBLISH_KEY:-}"
SUBSCRIBE_KEY="${SUBSCRIBE_KEY:-}"
ITER="${DIAG_ITERATIONS:-200}"

if [[ -z "$PUBLISH_KEY" || -z "$SUBSCRIBE_KEY" ]]; then
echo "ERROR: set PUBLISH_KEY and SUBSCRIBE_KEY (e.g. 'set -a; source .env.dev; set +a')" >&2
exit 1
fi

URL="https://ps.pndsn.com/publish/${PUBLISH_KEY}/${SUBSCRIBE_KEY}/0/diag-curl/0/%22x%22?uuid=diag-curl"

# Build a curl-config file with N url entries -> ONE curl process, ONE connection.
CFG="$(mktemp)"
trap 'rm -f "$CFG"' EXIT
for ((i = 0; i < ITER; i++)); do
printf 'url = "%s"\n' "$URL" >> "$CFG"
done

run_probe() {
local label="$1"; shift
echo ""
echo "=== ${label} (${ITER} iterations) ==="

# -K $CFG : all N URLs, reusing the connection
# --max-time 12 : per-request ceiling (>10s server timeout so the hang shows as 000)
# -s -o /dev/null : discard bodies
# -w ...\n : one machine-readable line per request
#
# CRITICAL: prefix the -w line with a unique sentinel ("STAT::") and parse ONLY
# sentinel lines. With -K and many URLs, response bodies can still reach stdout and
# merge with the metadata line (an earlier version miscounted every-10th HTTP 429s
# as ok=200, e.g. `code={"status":429...`). Parsing on the sentinel makes the field
# positions reliable regardless of body content.
# NOTE: the sentinel must NOT start with '@' — curl reads a -w value beginning with
# '@' as a FILENAME (`-w @file`), which fails with "error encountered when reading a
# file". So use "STAT::", not "@@STAT@@".
#
# ROBUST PARSING: -o /dev/null does NOT reliably suppress bodies on this -K
# multi-URL path (the response body still reaches stdout), AND the body often has
# no trailing newline — so the NEXT request's "STAT::" line gets glued onto the end
# of the body line. A naive `$1=="STAT::"` then matches only the very first line
# (total=1 bug). Fix: lead the format with \n (so each STAT starts fresh where
# possible) and use awk match() to find "STAT::" ANYWHERE on the line, parsing the
# whitespace-separated fields that follow it. Body text before the sentinel is
# discarded.
# Fields after STAT:: -> http_code | time_total | local_port | remote_ip | num_connects
curl "$@" -K "$CFG" \
--max-time 12 \
-s -o /dev/null \
-w '\nSTAT:: %{http_code} %{time_total} %{local_port} %{remote_ip} %{num_connects}\n' \
| awk '
{
p = index($0, "STAT:: ")
if (p == 0) next # body text with no sentinel — ignore
n = split(substr($0, p + 7), f, /[ \t]+/)
if (n < 5) next # malformed / truncated sentinel — skip
code = f[1]; t = f[2]; port = f[3]; ip = f[4]; nconn = f[5]
idx = total # 0-based request index among sentinel lines
printf " #%-3d code=%s time=%ss port=%s ip=%s conns=%s\n", idx, code, t, port, ip, nconn
total++
last_nconn = nconn + 0 # cumulative; final value = total TCP connections opened
ipseen[ip]++
if (code == "000" || code == "") { # hang / no response (cURL 28)
hang++; hangidx = hangidx (hangidx=="" ? "" : ",") idx
ipfail[ip]++
} else if (code == "429") { # Balancer rate limit (fast)
rl++; rlidx = rlidx (rlidx=="" ? "" : ",") idx
ipok[ip]++ # server answered fast; not a hang
} else {
ok++; ipok[ip]++
}
if (port != lastport && NR > 1) sockets++
portseen[port]=1
lastport = port
}
END {
ndist = 0; for (p in portseen) ndist++
printf "\n ok=%d rate-limited(429)=%d hung(000)=%d distinct-sockets=%d tcp-connects-opened=%d (total=%d)\n", \
ok+0, rl+0, hang+0, ndist, last_nconn, total
printf " (reuse proof: tcp-connects-opened << total => keep-alive working; ideally 1 + one reconnect per hang)\n"
printf " per-backend (non-hang/hung):"
for (ip in ipseen) printf " %s=%d/%d", ip, ipok[ip]+0, ipfail[ip]+0
printf "\n"
if (rl+0 > 0) printf " rate-limited(429) indices: %s\n", rlidx
if (hang+0 > 0) printf " hung(000) indices: %s\n", hangidx
if (rl+0 == 0 && hang+0 == 0) printf " (no 429s, no hangs)\n"
}'
}

echo "######## raw curl reuse diagnostic ########"
echo "One curl process reuses one keep-alive connection across ${ITER} publishes."
echo "Compare against scripts/diag-guzzle.php (the PHP/Guzzle path)."

run_probe "publish (HTTP/1.1)" --http1.1

echo ""
echo "######## done ########"
Loading
Loading