package build_event_handler

import (
	"context"
	"crypto/md5"
	"encoding/binary"
	"flag"
	"fmt"
	"io"
	"net/url"
	"path"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/Masterminds/semver/v3"
	"github.com/buildbuddy-io/buildbuddy/proto/build_event_stream"
	"github.com/buildbuddy-io/buildbuddy/proto/command_line"
	"github.com/buildbuddy-io/buildbuddy/server/build_event_protocol/accumulator"
	"github.com/buildbuddy-io/buildbuddy/server/build_event_protocol/build_status_reporter"
	"github.com/buildbuddy-io/buildbuddy/server/build_event_protocol/invocation_format"
	"github.com/buildbuddy-io/buildbuddy/server/build_event_protocol/target_tracker"
	"github.com/buildbuddy-io/buildbuddy/server/endpoint_urls/build_buddy_url"
	"github.com/buildbuddy-io/buildbuddy/server/endpoint_urls/cache_api_url"
	"github.com/buildbuddy-io/buildbuddy/server/environment"
	"github.com/buildbuddy-io/buildbuddy/server/eventlog"
	"github.com/buildbuddy-io/buildbuddy/server/interfaces"
	"github.com/buildbuddy-io/buildbuddy/server/metrics"
	"github.com/buildbuddy-io/buildbuddy/server/olapdbconfig"
	"github.com/buildbuddy-io/buildbuddy/server/remote_cache/digest"
	"github.com/buildbuddy-io/buildbuddy/server/remote_cache/hit_tracker"
	"github.com/buildbuddy-io/buildbuddy/server/remote_cache/scorecard"
	"github.com/buildbuddy-io/buildbuddy/server/tables"
	"github.com/buildbuddy-io/buildbuddy/server/util/alert"
	"github.com/buildbuddy-io/buildbuddy/server/util/authutil"
	"github.com/buildbuddy-io/buildbuddy/server/util/background"
	"github.com/buildbuddy-io/buildbuddy/server/util/capabilities"
	"github.com/buildbuddy-io/buildbuddy/server/util/log"
	"github.com/buildbuddy-io/buildbuddy/server/util/paging"
	"github.com/buildbuddy-io/buildbuddy/server/util/perms"
	"github.com/buildbuddy-io/buildbuddy/server/util/proto"
	"github.com/buildbuddy-io/buildbuddy/server/util/protofile"
	"github.com/buildbuddy-io/buildbuddy/server/util/redact"
	"github.com/buildbuddy-io/buildbuddy/server/util/status"
	"github.com/buildbuddy-io/buildbuddy/server/util/subdomain"
	"github.com/buildbuddy-io/buildbuddy/server/util/terminal"
	"github.com/buildbuddy-io/buildbuddy/server/util/urlutil"
	"github.com/buildbuddy-io/buildbuddy/server/util/usageutil"
	"github.com/buildbuddy-io/buildbuddy/server/util/uuid"
	"github.com/google/shlex"
	"github.com/prometheus/client_golang/prometheus"
	"golang.org/x/sync/errgroup"
	"google.golang.org/protobuf/types/known/fieldmaskpb"

	apipb "github.com/buildbuddy-io/buildbuddy/proto/api/v1"
	bepb "github.com/buildbuddy-io/buildbuddy/proto/build_events"
	capb "github.com/buildbuddy-io/buildbuddy/proto/cache"
	inpb "github.com/buildbuddy-io/buildbuddy/proto/invocation"
	inspb "github.com/buildbuddy-io/buildbuddy/proto/invocation_status"
	pgpb "github.com/buildbuddy-io/buildbuddy/proto/pagination"
	pepb "github.com/buildbuddy-io/buildbuddy/proto/publish_build_event"
	rspb "github.com/buildbuddy-io/buildbuddy/proto/resource"
	sipb "github.com/buildbuddy-io/buildbuddy/proto/stored_invocation"
	uidpb "github.com/buildbuddy-io/buildbuddy/proto/user_id"
	api_common "github.com/buildbuddy-io/buildbuddy/server/api/common"
	gitutil "github.com/buildbuddy-io/buildbuddy/server/util/git"
	gstatus "google.golang.org/grpc/status"
)

const (
	defaultChunkFileSizeBytes = 1000 * 100 // 100KB

	// How many workers to spin up for writing cache stats to the DB.
	numStatsRecorderWorkers = 8

	// How many workers to spin up for looking up invocations before
	// webhooks are notified.
	numWebhookInvocationLookupWorkers = 8
	// How many workers to spin up for notifying webhooks.
	numWebhookNotifyWorkers = 16

	// How long to wait before giving up on webhook requests.
	webhookNotifyTimeout = 1 * time.Minute

	// Default number of actions shown by bazel
	defaultActionsShown = 8

	// Exit code in Finished event indicating that the build was interrupted
	// (i.e. killed by user).
	InterruptedExitCode = 8

	// First sequence number that we expect to see in the ordered build
	// event stream.
	firstExpectedSequenceNumber = 1

	// Skip unimportant events if more than this many are received in a
	// single build event stream.
	maxEventCount = 100_000

	// Max total pattern length to include in the Expanded event returned to the
	// UI.
	maxPatternLengthBytes = 10_000
)

var (
	chunkFileSizeBytes      = flag.Int("storage.chunk_file_size_bytes", 3_000_000 /* 3 MB */, "How many bytes to buffer in memory before flushing a chunk of build protocol data to disk.")
	enableChunkedEventLogs  = flag.Bool("storage.enable_chunked_event_logs", true, "If true, Event logs will be stored separately from the invocation proto in chunks.")
	disablePersistArtifacts = flag.Bool("storage.disable_persist_cache_artifacts", false, "If disabled, buildbuddy will not persist cache artifacts in the blobstore. This may make older invocations not display properly.")
	writeToOLAPDBEnabled    = flag.Bool("app.enable_write_to_olap_db", true, "If enabled, complete invocations will be flushed to OLAP DB")

	cacheStatsFinalizationDelay = flag.Duration("cache_stats_finalization_delay", 500*time.Millisecond, "The time allowed for all metrics collectors across all apps to flush their local cache stats to the backing storage, before finalizing stats in the DB.")
)

var cacheArtifactsBlobstorePath = path.Join("artifacts", "cache")

type PersistArtifacts struct {
	URIs              []*url.URL
	TestActionOutputs bool
}

type BuildEventHandler struct {
	env              environment.Env
	statsRecorder    *statsRecorder
	openChannels     *sync.WaitGroup
	cancelFnsByInvID sync.Map // map of string invocationID => context.CancelFunc
}

func NewBuildEventHandler(env environment.Env) *BuildEventHandler {
	openChannels := &sync.WaitGroup{}
	onStatsRecorded := make(chan *invocationJWT, 4096)
	statsRecorder := newStatsRecorder(env, openChannels, onStatsRecorded)
	webhookNotifier := newWebhookNotifier(env, onStatsRecorded)

	statsRecorder.Start()
	webhookNotifier.Start()

	h := &BuildEventHandler{
		env:              env,
		statsRecorder:    statsRecorder,
		openChannels:     openChannels,
		cancelFnsByInvID: sync.Map{},
	}
	env.GetHealthChecker().RegisterShutdownFunction(func(ctx context.Context) error {
		h.Stop()
		statsRecorder.Stop()
		webhookNotifier.Stop()
		return nil
	})
	return h
}

func (b *BuildEventHandler) OpenChannel(ctx context.Context, iid string) interfaces.BuildEventChannel {
	invocation := &inpb.Invocation{InvocationId: iid}
	buildEventAccumulator := accumulator.NewBEValues(invocation)
	val, ok := b.cancelFnsByInvID.Load(iid)
	if ok {
		cancelFn := val.(context.CancelFunc)
		cancelFn()
	}

	ctx, cancel := context.WithCancel(ctx)
	b.cancelFnsByInvID.Store(iid, cancel)

	b.openChannels.Add(1)
	onClose := func() {
		b.openChannels.Done()
		b.cancelFnsByInvID.Delete(iid)
	}

	return &EventChannel{
		env:            b.env,
		statsRecorder:  b.statsRecorder,
		ctx:            ctx,
		pw:             nil,
		beValues:       buildEventAccumulator,
		redactor:       redact.NewStreamingRedactor(b.env),
		statusReporter: build_status_reporter.NewBuildStatusReporter(b.env, buildEventAccumulator),
		targetTracker:  target_tracker.NewTargetTracker(b.env, buildEventAccumulator),
		collector:      b.env.GetMetricsCollector(),
		apiTargetMap:   make(api_common.TargetMap),

		hasReceivedEventWithOptions: false,
		hasReceivedStartedEvent:     false,
		unprocessedStartingEvents:   make(map[string]struct{}),
		bufferedEvents:              make([]*inpb.InvocationEvent, 0),
		logWriter:                   nil,
		onClose:                     onClose,
		attempt:                     1,
	}
}

func (b *BuildEventHandler) Stop() {
	b.cancelFnsByInvID.Range(func(key, val interface{}) bool {
		iid := key.(string)
		cancelFn := val.(context.CancelFunc)
		log.Infof("Cancelling invocation %q because server received shutdown signal", iid)
		cancelFn()
		return true
	})
}

// invocationJWT represents an invocation ID as well as the JWT granting access
// to it. It should only be used for background tasks that need access to the
// JWT after the build event stream is already closed.
type invocationJWT struct {
	id      string
	jwt     string
	attempt uint64
}

// recordStatsTask contains the info needed to record the stats for an
// invocation. These tasks are enqueued to statsRecorder and executed in the
// background.
type recordStatsTask struct {
	*invocationJWT
	// createdAt is the time at which this task was created.
	createdAt time.Time
	// files contains a mapping of file digests to file name metadata for files
	// referenced in the BEP.
	files            map[string]*build_event_stream.File
	persist          *PersistArtifacts
	invocationStatus inspb.InvocationStatus
}

// statsRecorder listens for finalized invocations and copies cache stats from
// the metrics collector to the DB.
type statsRecorder struct {
	env          environment.Env
	openChannels *sync.WaitGroup
	// onStatsRecorded is a channel for this statsRecorder to notify after
	// recording stats for each invocation. Invocations sent on this channel are
	// considered "finalized".
	onStatsRecorded chan<- *invocationJWT
	eg              errgroup.Group

	mu      sync.Mutex // protects(tasks, stopped)
	tasks   chan *recordStatsTask
	stopped bool
}

func newStatsRecorder(env environment.Env, openChannels *sync.WaitGroup, onStatsRecorded chan<- *invocationJWT) *statsRecorder {
	return &statsRecorder{
		env:             env,
		openChannels:    openChannels,
		onStatsRecorded: onStatsRecorded,
		tasks:           make(chan *recordStatsTask, 4096),
	}
}

// Enqueue enqueues a task for the given invocation's stats to be recorded
// once they are available.
func (r *statsRecorder) Enqueue(ctx context.Context, invocation *inpb.Invocation, persist *PersistArtifacts) {
	r.mu.Lock()
	defer r.mu.Unlock()

	if r.stopped {
		alert.UnexpectedEvent(
			"stats_recorder_finalize_after_shutdown",
			"Invocation %q was marked finalized after the stats recorder was shut down.",
			invocation.GetInvocationId())
		return
	}
	jwt := r.env.GetAuthenticator().TrustedJWTFromAuthContext(ctx)
	req := &recordStatsTask{
		invocationJWT: &invocationJWT{
			id:      invocation.GetInvocationId(),
			attempt: invocation.Attempt,
			jwt:     jwt,
		},
		createdAt:        time.Now(),
		files:            scorecard.ExtractFiles(invocation),
		invocationStatus: invocation.GetInvocationStatus(),
		persist:          persist,
	}
	select {
	case r.tasks <- req:
		break
	default:
		alert.UnexpectedEvent(
			"stats_recorder_channel_buffer_full",
			"Failed to write cache stats: stats recorder task buffer is full")
	}
}

func (r *statsRecorder) Start() {
	ctx := r.env.GetServerContext()
	for i := 0; i < numStatsRecorderWorkers; i++ {
		metrics.StatsRecorderWorkers.Inc()
		r.eg.Go(func() error {
			defer metrics.StatsRecorderWorkers.Dec()
			for task := range r.tasks {
				r.handleTask(ctx, task)
			}
			return nil
		})
	}
}

func (r *statsRecorder) lookupInvocation(ctx context.Context, ij *invocationJWT) (*tables.Invocation, error) {
	ctx = r.env.GetAuthenticator().AuthContextFromTrustedJWT(ctx, ij.jwt)
	return r.env.GetInvocationDB().LookupInvocation(ctx, ij.id)
}

func (r *statsRecorder) flushInvocationStatsToOLAPDB(ctx context.Context, ij *invocationJWT) error {
	if r.env.GetOLAPDBHandle() == nil || !*writeToOLAPDBEnabled {
		return nil
	}
	inv, err := r.lookupInvocation(ctx, ij)
	if err != nil {
		return status.InternalErrorf("failed to look up invocation for invocation id %q: %s", ij.id, err)
	}

	err = r.env.GetOLAPDBHandle().FlushInvocationStats(ctx, inv)
	if err != nil {
		return err
	}
	// Temporary logging for debugging clickhouse missing data.
	log.CtxInfo(ctx, "Successfully wrote invocation to clickhouse")

	if r.env.GetExecutionCollector() == nil {
		return nil
	}
	const batchSize = 50_000
	var startIndex int64 = 0
	var endIndex int64 = batchSize - 1

	// Always clean up executions in Collector because we are not retrying
	defer func() {
		err := r.env.GetExecutionCollector().DeleteExecutions(ctx, inv.InvocationID)
		if err != nil {
			log.CtxErrorf(ctx, "failed to clean up executions in collector: %s", err)
		}
	}()

	if !olapdbconfig.WriteExecutionsToOLAPDBEnabled() {
		return nil
	}

	// Add the invocation to redis to signal to the executors that they can flush
	// complete Executions into clickhouse directly, in case the PublishOperation
	// is received after the Invocation is complete.
	storedInv := toStoredInvocation(inv)
	if err = r.env.GetExecutionCollector().AddInvocation(ctx, storedInv); err != nil {
		log.CtxErrorf(ctx, "failed to write the complete Invocation to redis: %s", err)
	} else {
		log.CtxInfo(ctx, "Successfully wrote invocation to redis")
	}

	for {
		endIndex = startIndex + batchSize - 1
		executions, err := r.env.GetExecutionCollector().GetExecutions(ctx, inv.InvocationID, int64(startIndex), int64(endIndex))
		if err != nil {
			return status.InternalErrorf("failed to read executions for invocation_id = %q, startIndex = %d, endIndex = %d from Redis: %s", inv.InvocationID, startIndex, endIndex, err)
		}
		if len(executions) == 0 {
			break
		}
		err = r.env.GetOLAPDBHandle().FlushExecutionStats(ctx, storedInv, executions)
		if err != nil {
			break
		}
		log.CtxInfof(ctx, "successfully wrote %d executions", len(executions))
		// Flush executions to OLAP
		size := len(executions)
		if size < batchSize {
			break
		}
		startIndex += batchSize
	}

	return nil
}

func (r *statsRecorder) handleTask(ctx context.Context, task *recordStatsTask) {
	start := time.Now()
	defer func() {
		metrics.StatsRecorderDuration.Observe(float64(time.Since(start).Microseconds()))
	}()

	// Apply the finalization delay relative to when the invocation was marked
	// finalized, rather than relative to now. Otherwise each worker would be
	// unnecessarily throttled.
	time.Sleep(time.Until(task.createdAt.Add(*cacheStatsFinalizationDelay)))
	ti := &tables.Invocation{InvocationID: task.invocationJWT.id, Attempt: task.invocationJWT.attempt}
	ctx = log.EnrichContext(ctx, log.InvocationIDKey, task.invocationJWT.id)
	if stats := hit_tracker.CollectCacheStats(ctx, r.env, task.invocationJWT.id); stats != nil {
		fillInvocationFromCacheStats(stats, ti)
	} else {
		log.CtxInfo(ctx, "cache stats is not available.")
	}
	if sc := hit_tracker.ScoreCard(ctx, r.env, task.invocationJWT.id); sc != nil {
		scorecard.FillBESMetadata(sc, task.files)
		if err := scorecard.Write(ctx, r.env, task.invocationJWT.id, task.invocationJWT.attempt, sc); err != nil {
			log.CtxErrorf(ctx, "Error writing scorecard blob: %s", err)
		}
	}

	updated, err := r.env.GetInvocationDB().UpdateInvocation(ctx, ti)
	if err != nil {
		log.CtxErrorf(ctx, "Failed to write cache stats to primaryDB: %s", err)
	}

	if task.invocationStatus == inspb.InvocationStatus_COMPLETE_INVOCATION_STATUS {
		// only flush complete invocation to clickhouse.
		err = r.flushInvocationStatsToOLAPDB(ctx, task.invocationJWT)
		if err != nil {
			log.CtxErrorf(ctx, "Failed to flush stats to clickhouse: %s", err)
		}
	} else {
		log.CtxInfof(ctx, "skipped writing stats to clickhouse, invocationStatus = %s", task.invocationStatus)
	}
	// Cleanup regardless of whether the stats are flushed successfully to
	// the DB (since we won't retry the flush and we don't need these stats
	// for any other purpose).
	hit_tracker.CleanupCacheStats(ctx, r.env, task.invocationJWT.id)
	if !updated {
		log.CtxWarningf(ctx, "Attempt %d of invocation pre-empted by more recent attempt, no cache stats flushed.", task.invocationJWT.attempt)
		// Don't notify the webhook; the more recent attempt should trigger
		// the notification when it is finalized.
		return
	}

	// Once cache stats are populated, notify the onStatsRecorded channel in
	// a non-blocking fashion.
	select {
	case r.onStatsRecorded <- task.invocationJWT:
		break
	default:
		alert.UnexpectedEvent(
			"webhook_channel_buffer_full",
			"Failed to notify webhook: channel buffer is full",
		)
	}

	ctx = r.env.GetAuthenticator().AuthContextFromTrustedJWT(ctx, task.invocationJWT.jwt)
	eg, ctx := errgroup.WithContext(ctx)
	eg.SetLimit(50) // Max concurrency when copying files from cache->blobstore.
	for _, uri := range task.persist.URIs {
		uri := uri
		rn, err := digest.ParseDownloadResourceName(uri.Path)
		if err != nil {
			log.CtxErrorf(ctx, "Unparseable artifact URI: %s", err)
			continue
		}
		if rn.IsEmpty() {
			continue
		}
		eg.Go(func() error {
			// When persisting artifacts, make sure we associate the cache
			// requests with the app, not bazel.
			ctx := usageutil.WithLocalServerLabels(ctx)

			fullPath := path.Join(task.invocationJWT.id, cacheArtifactsBlobstorePath, uri.Path)
			// Only persist artifacts from caches that are hosted on the BuildBuddy
			// domain (but only if we know it).
			if cache_api_url.String() == "" || urlutil.GetDomain(uri.Hostname()) == urlutil.GetDomain(cache_api_url.WithPath("").Hostname()) {
				if err := persistArtifact(ctx, r.env, uri, fullPath); err != nil {
					log.CtxError(ctx, err.Error())
				}
			}
			return nil
		})
	}
	if err := eg.Wait(); err != nil {
		log.CtxErrorf(ctx, "Failed to persist cache artifacts to blobstore: %s", err)
	}
}

func (r *statsRecorder) Stop() {
	// Wait for all EventHandler channels to be closed to ensure there will be no
	// more calls to Enqueue.
	// TODO(bduffany): This has a race condition where the server can be shutdown
	// just after the stream request is accepted by the server but before calling
	// openChannels.Add(1). Can fix this by explicitly waiting for the gRPC server
	// shutdown to finish, which ensures all streaming requests have terminated.
	log.Info("StatsRecorder: waiting for EventChannels to be closed before shutting down")
	r.openChannels.Wait()

	log.Info("StatsRecorder: shutting down")
	r.mu.Lock()
	r.stopped = true
	close(r.tasks)
	r.mu.Unlock()

	if err := r.eg.Wait(); err != nil {
		log.Error(err.Error())
	}

	close(r.onStatsRecorded)
}

func persistArtifact(ctx context.Context, env environment.Env, uri *url.URL, path string) error {
	w, err := env.GetBlobstore().Writer(ctx, path)
	if err != nil {
		return status.WrapErrorf(
			err,
			"Failed to open writer to blobstore for path %s to persist cache artifact at %s",
			path,
			uri.String(),
		)
	}
	if err := env.GetPooledByteStreamClient().StreamBytestreamFile(ctx, uri, w); err != nil {
		w.Close()
		return status.WrapErrorf(
			err,
			"Failed to stream to blobstore for path %s to persist cache artifact at %s",
			path,
			uri.String(),
		)
	}
	if err := w.Commit(); err != nil {
		w.Close()
		return status.WrapErrorf(
			err,
			"Failed to commit to blobstore for path %s to persist cache artifact at %s",
			path,
			uri.String(),
		)
	}
	if err := w.Close(); err != nil {
		return status.WrapErrorf(
			err,
			"Failed to close blobstore writer for path %s to persist cache artifact at %s",
			path,
			uri.String(),
		)
	}
	return nil
}

type notifyWebhookTask struct {
	// hook is the webhook to notify of a completed invocation.
	hook interfaces.Webhook
	// invocationJWT contains the invocation ID and JWT for the invocation.
	*invocationJWT
	// invocation is the complete invocation looked up from the invocationJWT.
	invocation *inpb.Invocation
}

func notifyWithTimeout(ctx context.Context, env environment.Env, t *notifyWebhookTask) error {
	start := time.Now()
	defer func() {
		metrics.WebhookNotifyDuration.Observe(float64(time.Since(start).Microseconds()))
	}()

	ctx, cancel := context.WithTimeout(ctx, webhookNotifyTimeout)
	defer cancel()
	// Run the webhook using the authenticated user from the build event stream.
	ij := t.invocationJWT
	ctx = env.GetAuthenticator().AuthContextFromTrustedJWT(ctx, ij.jwt)
	return t.hook.NotifyComplete(ctx, t.invocation)
}

// webhookNotifier listens for invocations to be finalized (including stats)
// and notifies webhooks.
type webhookNotifier struct {
	env environment.Env
	// invocations is a channel of finalized invocations. On each invocation
	// sent to this channel, we notify all configured webhooks.
	invocations <-chan *invocationJWT

	tasks       chan *notifyWebhookTask
	lookupGroup errgroup.Group
	notifyGroup errgroup.Group
}

func newWebhookNotifier(env environment.Env, invocations <-chan *invocationJWT) *webhookNotifier {
	return &webhookNotifier{
		env:         env,
		invocations: invocations,
		tasks:       make(chan *notifyWebhookTask, 4096),
	}
}

func (w *webhookNotifier) Start() {
	ctx := w.env.GetServerContext()

	w.lookupGroup = errgroup.Group{}
	for i := 0; i < numWebhookInvocationLookupWorkers; i++ {
		metrics.WebhookInvocationLookupWorkers.Inc()
		w.lookupGroup.Go(func() error {
			defer metrics.WebhookInvocationLookupWorkers.Dec()
			// Listen for invocations that have been finalized and start a notify
			// webhook task for each webhook.
			for ij := range w.invocations {
				if err := w.lookupAndCreateTask(ctx, ij); err != nil {
					log.Warningf("Failed to lookup invocation before notifying webhook: %s", err)
				}
			}
			return nil
		})
	}

	w.notifyGroup = errgroup.Group{}
	for i := 0; i < numWebhookNotifyWorkers; i++ {
		metrics.WebhookNotifyWorkers.Inc()
		w.notifyGroup.Go(func() error {
			defer metrics.WebhookNotifyWorkers.Dec()
			for task := range w.tasks {
				ctx := log.EnrichContext(ctx, log.InvocationIDKey, task.invocation.GetInvocationId())
				if err := notifyWithTimeout(ctx, w.env, task); err != nil {
					log.CtxWarningf(ctx, "Failed to notify invocation webhook: %s", err)
				}
			}
			return nil
		})
	}
}

func (w *webhookNotifier) lookupAndCreateTask(ctx context.Context, ij *invocationJWT) error {
	start := time.Now()
	defer func() {
		metrics.WebhookInvocationLookupDuration.Observe(float64(time.Since(start).Microseconds()))
	}()

	invocation, err := w.lookupInvocation(ctx, ij)
	if err != nil {
		return err
	}

	// Don't call webhooks for disconnected invocations.
	if invocation.GetInvocationStatus() == inspb.InvocationStatus_DISCONNECTED_INVOCATION_STATUS {
		return nil
	}

	for _, hook := range w.env.GetWebhooks() {
		w.tasks <- &notifyWebhookTask{
			hook:          hook,
			invocationJWT: ij,
			invocation:    invocation,
		}
	}

	return nil
}

func (w *webhookNotifier) Stop() {
	// Make sure we are done sending tasks on the task channel before we close it.
	if err := w.lookupGroup.Wait(); err != nil {
		log.Error(err.Error())
	}
	close(w.tasks)

	if err := w.notifyGroup.Wait(); err != nil {
		log.Error(err.Error())
	}
}

func (w *webhookNotifier) lookupInvocation(ctx context.Context, ij *invocationJWT) (*inpb.Invocation, error) {
	ctx = w.env.GetAuthenticator().AuthContextFromTrustedJWT(ctx, ij.jwt)
	inv, err := LookupInvocation(w.env, ctx, ij.id)
	if err != nil {
		return nil, err
	}
	// If detailed cache stats are enabled, the invocation will be missing the
	// scorecard misses field (with only AC misses) that we used to populate.
	// Populate these here for backwards compatibility.
	if hit_tracker.DetailedStatsEnabled() {
		tok, err := paging.EncodeOffsetLimit(&pgpb.OffsetLimit{Limit: hit_tracker.CacheMissScoreCardLimit})
		if err != nil {
			return nil, status.InternalErrorf("failed to encode page token: %s", err)
		}
		req := &capb.GetCacheScoreCardRequest{
			InvocationId: ij.id,
			PageToken:    tok,
			Filter: &capb.GetCacheScoreCardRequest_Filter{
				Mask: &fieldmaskpb.FieldMask{
					Paths: []string{
						"cache_type",
						"request_type",
						"response_type",
					},
				},
				CacheType:    rspb.CacheType_AC,
				RequestType:  capb.RequestType_READ,
				ResponseType: capb.ResponseType_NOT_FOUND,
			},
		}
		sc, err := scorecard.GetCacheScoreCard(ctx, w.env, req)
		if err != nil {
			log.Warningf("Failed to read cache scorecard for invocation %q: %s", req.InvocationId, err)
		} else {
			inv.ScoreCard = &capb.ScoreCard{Misses: sc.GetResults()}
		}
	}
	return inv, nil
}

func isFinalEvent(obe *pepb.OrderedBuildEvent) bool {
	switch obe.Event.Event.(type) {
	case *bepb.BuildEvent_ComponentStreamFinished:
		return true
	}
	return false
}

func (e *EventChannel) isFirstStartedEvent(bazelBuildEvent *build_event_stream.BuildEvent) bool {
	if e.hasReceivedStartedEvent {
		return false
	}
	_, ok := bazelBuildEvent.Payload.(*build_event_stream.BuildEvent_Started)
	return ok
}

func (e *EventChannel) isFirstEventWithOptions(bazelBuildEvent *build_event_stream.BuildEvent) bool {
	switch p := bazelBuildEvent.Payload.(type) {
	case *build_event_stream.BuildEvent_Started:
		return p.Started.OptionsDescription != "" && !e.hasReceivedEventWithOptions
	case *build_event_stream.BuildEvent_OptionsParsed:
		return !e.hasReceivedEventWithOptions
	}
	return false
}

func isWorkspaceStatusEvent(bazelBuildEvent *build_event_stream.BuildEvent) bool {
	switch bazelBuildEvent.Payload.(type) {
	case *build_event_stream.BuildEvent_WorkspaceStatus:
		return true
	}
	return false
}

func readBazelEvent(obe *pepb.OrderedBuildEvent, out *build_event_stream.BuildEvent) error {
	switch buildEvent := obe.Event.Event.(type) {
	case *bepb.BuildEvent_BazelEvent:
		return buildEvent.BazelEvent.UnmarshalTo(out)
	}
	return fmt.Errorf("Not a bazel event %s", obe)
}

type EventChannel struct {
	ctx            context.Context
	env            environment.Env
	pw             *protofile.BufferedProtoWriter
	beValues       *accumulator.BEValues
	redactor       *redact.StreamingRedactor
	statusReporter *build_status_reporter.BuildStatusReporter
	targetTracker  *target_tracker.TargetTracker
	statsRecorder  *statsRecorder
	collector      interfaces.MetricsCollector
	apiTargetMap   api_common.TargetMap

	startedEvent                     *build_event_stream.BuildEvent_Started
	bufferedEvents                   []*inpb.InvocationEvent
	unprocessedStartingEvents        map[string]struct{}
	numDroppedEventsBeforeProcessing uint64
	initialSequenceNumber            int64
	hasReceivedEventWithOptions      bool
	hasReceivedStartedEvent          bool
	logWriter                        *eventlog.EventLogWriter
	onClose                          func()
	attempt                          uint64

	// isVoid determines whether all EventChannel operations are NOPs. This is set
	// when we're retrying an invocation that is already complete, or is
	// incomplete but was created too far in the past.
	isVoid bool
}

func (e *EventChannel) Context() context.Context {
	return e.ctx
}

func (e *EventChannel) Close() {
	e.onClose()
}

func (e *EventChannel) FinalizeInvocation(iid string) error {
	if e.isVoid {
		return nil
	}

	ctx, cancel := background.ExtendContextForFinalization(e.ctx, 10*time.Second)
	defer cancel()

	e.beValues.Finalize(ctx)

	invocation := e.beValues.Invocation()
	invocation.Attempt = e.attempt
	invocation.HasChunkedEventLogs = e.logWriter != nil

	if e.pw != nil {
		if err := e.pw.Flush(ctx); err != nil {
			return err
		}
	}

	if e.logWriter != nil {
		if err := e.logWriter.Close(ctx); err != nil {
			return err
		}
		invocation.LastChunkId = e.logWriter.GetLastChunkId(ctx)
	}

	ti, err := e.tableInvocationFromProto(invocation, iid)
	if err != nil {
		return err
	}
	e.recordInvocationMetrics(ti)
	updated, err := e.env.GetInvocationDB().UpdateInvocation(ctx, ti)
	if err != nil {
		return err
	}
	if !updated {
		e.isVoid = true
		return status.CanceledErrorf("Attempt %d of invocation %s pre-empted by more recent attempt, invocation not finalized.", e.attempt, iid)
	}

	e.flushAPIFacets(iid)

	// Report a disconnect only if we successfully updated the invocation.
	// This reduces the likelihood that the disconnected invocation's status
	// will overwrite any statuses written by a more recent attempt.
	if invocation.GetInvocationStatus() == inspb.InvocationStatus_DISCONNECTED_INVOCATION_STATUS {
		log.CtxWarning(ctx, "Reporting disconnected status for invocation")
		e.statusReporter.ReportDisconnect(ctx)
	}

	persist := &PersistArtifacts{}
	if !*disablePersistArtifacts {
		testOutputURIs := e.beValues.TestOutputURIs()
		persist.URIs = make([]*url.URL, 0, len(testOutputURIs))
		persist.URIs = append(persist.URIs, e.beValues.BuildToolLogURIs()...)
		persist.URIs = append(persist.URIs, testOutputURIs...)
	}

	e.statsRecorder.Enqueue(ctx, invocation, persist)
	log.CtxInfof(ctx, "Finalized invocation in primary DB and enqueued for stats recording (status: %s)", invocation.GetInvocationStatus())
	return nil
}

func fillInvocationFromCacheStats(cacheStats *capb.CacheStats, ti *tables.Invocation) {
	ti.ActionCacheHits = cacheStats.GetActionCacheHits()
	ti.ActionCacheMisses = cacheStats.GetActionCacheMisses()
	ti.ActionCacheUploads = cacheStats.GetActionCacheUploads()
	ti.CasCacheHits = cacheStats.GetCasCacheHits()
	ti.CasCacheMisses = cacheStats.GetCasCacheMisses()
	ti.CasCacheUploads = cacheStats.GetCasCacheUploads()
	ti.TotalDownloadSizeBytes = cacheStats.GetTotalDownloadSizeBytes()
	ti.TotalUploadSizeBytes = cacheStats.GetTotalUploadSizeBytes()
	ti.TotalDownloadTransferredSizeBytes = cacheStats.GetTotalDownloadTransferredSizeBytes()
	ti.TotalUploadTransferredSizeBytes = cacheStats.GetTotalUploadTransferredSizeBytes()
	ti.TotalDownloadUsec = cacheStats.GetTotalDownloadUsec()
	ti.TotalUploadUsec = cacheStats.GetTotalUploadUsec()
	ti.DownloadThroughputBytesPerSecond = cacheStats.GetDownloadThroughputBytesPerSecond()
	ti.UploadThroughputBytesPerSecond = cacheStats.GetUploadThroughputBytesPerSecond()
	ti.TotalCachedActionExecUsec = cacheStats.GetTotalCachedActionExecUsec()
	ti.TotalUncachedActionExecUsec = cacheStats.GetTotalUncachedActionExecUsec()
}

func invocationStatusLabel(ti *tables.Invocation) string {
	if ti.InvocationStatus == int64(inspb.InvocationStatus_COMPLETE_INVOCATION_STATUS) {
		if ti.Success {
			return "success"
		}
		return "failure"
	}
	if ti.InvocationStatus == int64(inspb.InvocationStatus_DISCONNECTED_INVOCATION_STATUS) {
		return "disconnected"
	}
	return "unknown"
}

func (e *EventChannel) getGroupIDForMetrics() string {
	userInfo, err := e.env.GetAuthenticator().AuthenticatedUser(e.ctx)
	if err != nil {
		return interfaces.AuthAnonymousUser
	}
	return userInfo.GetGroupID()
}

func (e *EventChannel) recordInvocationMetrics(ti *tables.Invocation) {
	statusLabel := invocationStatusLabel(ti)
	metrics.InvocationCount.With(prometheus.Labels{
		metrics.InvocationStatusLabel: statusLabel,
		metrics.BazelExitCode:         ti.BazelExitCode,
		metrics.BazelCommand:          ti.Command,
	}).Inc()
	metrics.InvocationDurationUs.With(prometheus.Labels{
		metrics.InvocationStatusLabel: statusLabel,
		metrics.BazelCommand:          ti.Command,
	}).Observe(float64(ti.DurationUsec))
	metrics.InvocationDurationUsExported.With(prometheus.Labels{
		metrics.InvocationStatusLabel: statusLabel,
		metrics.GroupID:               e.getGroupIDForMetrics(),
	}).Observe(float64(ti.DurationUsec))
}

func md5Int64(text string) int64 {
	hash := md5.Sum([]byte(text))
	return int64(binary.BigEndian.Uint64(hash[:8]))
}

func (e *EventChannel) HandleEvent(event *pepb.PublishBuildToolEventStreamRequest) error {
	tStart := time.Now()
	err := e.handleEvent(event)
	duration := time.Since(tStart)
	labels := prometheus.Labels{
		metrics.StatusLabel: fmt.Sprintf("%d", gstatus.Code(err)),
	}
	metrics.BuildEventCount.With(labels).Inc()
	metrics.BuildEventHandlerDurationUs.With(labels).Observe(float64(duration.Microseconds()))
	return err
}

func (e *EventChannel) handleEvent(event *pepb.PublishBuildToolEventStreamRequest) error {
	if e.isVoid {
		return nil
	}

	seqNo := event.OrderedBuildEvent.SequenceNumber
	streamID := event.OrderedBuildEvent.StreamId
	iid := streamID.InvocationId

	if e.initialSequenceNumber == 0 {
		e.initialSequenceNumber = seqNo
	}
	// We only allow initial sequence numbers greater than one in the case where
	// Bazel failed to receive all of our ACKs after we finalized an invocation
	// (marking it complete). In that case we just void the channel and ACK all
	// events without doing any work.
	if e.initialSequenceNumber > firstExpectedSequenceNumber {
		// TODO: once https://github.com/bazelbuild/bazel/pull/18437 lands in
		// Bazel, log an error if the client attempt number is 1 in this case,
		// since today we're relying on Bazel to always start sending events
		// starting from sequence number 1 in the first attempt.
		log.Infof("Voiding EventChannel for invocation %s: build event stream starts with sequence number > %d (%d), which likely means Bazel is retrying an invocation that we already finalized.", iid, firstExpectedSequenceNumber, e.initialSequenceNumber)
		e.isVoid = true
		return nil
	}

	if isFinalEvent(event.OrderedBuildEvent) {
		return nil
	}

	var bazelBuildEvent build_event_stream.BuildEvent
	if err := readBazelEvent(event.OrderedBuildEvent, &bazelBuildEvent); err != nil {
		log.CtxWarningf(e.ctx, "error reading bazel event: %s", err)
		return err
	}

	invocationEvent := &inpb.InvocationEvent{
		EventTime:      event.OrderedBuildEvent.Event.EventTime,
		BuildEvent:     &bazelBuildEvent,
		SequenceNumber: event.OrderedBuildEvent.SequenceNumber,
	}

	// Bazel sends an Interrupted exit code in the finished event if the user cancelled the build.
	// Use that signal to cancel any actions that are currently in the remote execution system.
	if f, ok := bazelBuildEvent.Payload.(*build_event_stream.BuildEvent_Finished); ok {
		if f.Finished.GetExitCode().GetCode() == InterruptedExitCode && e.env.GetRemoteExecutionService() != nil {
			if err := e.env.GetRemoteExecutionService().Cancel(e.ctx, iid); err != nil {
				log.CtxWarningf(e.ctx, "Could not cancel executions for invocation %q: %s", iid, err)
			}
		}
	}
	if seqNo == 1 {
		log.CtxDebugf(e.ctx, "First event! sequence: %d invocation_id: %s, project_id: %s, notification_keywords: %s", seqNo, iid, event.ProjectId, event.NotificationKeywords)
	}

	if e.isFirstStartedEvent(&bazelBuildEvent) {
		started, _ := bazelBuildEvent.Payload.(*build_event_stream.BuildEvent_Started)

		parsedVersion, err := semver.NewVersion(started.Started.GetBuildToolVersion())
		version := "unknown"
		if err == nil {
			version = fmt.Sprintf("%d.%d", parsedVersion.Major(), parsedVersion.Minor())
		}
		metrics.InvocationsByBazelVersionCount.With(
			prometheus.Labels{metrics.BazelVersion: version}).Inc()

		e.hasReceivedStartedEvent = true
		e.unprocessedStartingEvents[bazelBuildEvent.Id.String()] = struct{}{}
		for _, child := range bazelBuildEvent.Children {
			switch child.Id.(type) {
			case *build_event_stream.BuildEventId_OptionsParsed:
				e.unprocessedStartingEvents[child.String()] = struct{}{}
			case *build_event_stream.BuildEventId_WorkspaceStatus:
				e.unprocessedStartingEvents[child.String()] = struct{}{}
			case *build_event_stream.BuildEventId_BuildMetadata:
				e.unprocessedStartingEvents[child.String()] = struct{}{}
			case *build_event_stream.BuildEventId_StructuredCommandLine:
				e.unprocessedStartingEvents[child.String()] = struct{}{}
			case *build_event_stream.BuildEventId_UnstructuredCommandLine:
				e.unprocessedStartingEvents[child.String()] = struct{}{}
			case *build_event_stream.BuildEventId_WorkflowConfigured:
				e.unprocessedStartingEvents[child.String()] = struct{}{}
			}
		}
	}
	// If this is the first event with options, keep track of the project ID and save any notification keywords.
	if e.isFirstEventWithOptions(&bazelBuildEvent) {
		e.hasReceivedEventWithOptions = true
		log.CtxDebugf(e.ctx, "Received options! sequence: %d invocation_id: %s", seqNo, iid)

		authenticated, err := e.authenticateEvent(&bazelBuildEvent)
		if err != nil {
			return err
		}

		if authenticated {
			if irs := e.env.GetIPRulesService(); irs != nil {
				if err := irs.Authorize(e.ctx); err != nil {
					return err
				}
			}
			baseBBURL, err := subdomain.ReplaceURLSubdomain(e.ctx, e.env, build_buddy_url.String())
			if err != nil {
				return err
			}
			e.statusReporter.SetBaseBuildBuddyURL(baseBBURL)
		}

		invocationUUID, err := uuid.StringToBytes(iid)
		if err != nil {
			return err
		}
		ti := &tables.Invocation{
			InvocationID:     iid,
			InvocationUUID:   invocationUUID,
			InvocationStatus: int64(inspb.InvocationStatus_PARTIAL_INVOCATION_STATUS),
			RedactionFlags:   redact.RedactionFlagStandardRedactions,
			Attempt:          e.attempt,
		}
		if *enableChunkedEventLogs {
			ti.LastChunkId = eventlog.EmptyId
		}

		created, err := e.env.GetInvocationDB().CreateInvocation(e.ctx, ti)
		if err != nil {
			return err
		}
		if !created {
			// We failed to retry an existing invocation
			log.CtxWarningf(e.ctx, "Voiding EventChannel for invocation %s: invocation already exists and is either completed or was last updated over 4 hours ago, so may not be retried.", iid)
			e.isVoid = true
			return nil
		}
		e.attempt = ti.Attempt
		e.ctx = log.EnrichContext(e.ctx, "invocation_attempt", fmt.Sprintf("%d", e.attempt))
		log.CtxInfof(e.ctx, "Created invocation %q, attempt %d", ti.InvocationID, ti.Attempt)
		chunkFileSizeBytes := *chunkFileSizeBytes
		if chunkFileSizeBytes == 0 {
			chunkFileSizeBytes = defaultChunkFileSizeBytes
		}
		e.pw = protofile.NewBufferedProtoWriter(
			e.env.GetBlobstore(),
			GetStreamIdFromInvocationIdAndAttempt(iid, e.attempt),
			chunkFileSizeBytes,
		)
		if *enableChunkedEventLogs {
			numLinesToRetain := getNumActionsFromOptions(&bazelBuildEvent)
			if numLinesToRetain != 0 {
				// the number of lines curses can overwrite is 3 + the ui_actions shown:
				// 1 for the progress tracker, 1 for each action, and 2 blank lines.
				// 0 indicates that curses is not being used.
				numLinesToRetain += 3
			}
			e.logWriter = eventlog.NewEventLogWriter(
				e.ctx,
				e.env.GetBlobstore(),
				e.env.GetKeyValStore(),
				e.env.GetPubSub(),
				eventlog.GetEventLogPubSubChannel(iid),
				eventlog.GetEventLogPathFromInvocationIdAndAttempt(iid, e.attempt),
				numLinesToRetain,
			)
		}
		// Since this is the first event with options and we just parsed the API key,
		// now is a good time to record invocation usage for the group. Check that
		// this is the first attempt of this invocation, to guarantee that we
		// don't increment the usage on invocation retries.
		if ut := e.env.GetUsageTracker(); ut != nil && ti.Attempt == 1 {
			incrementInvocationUsage(e.ctx, ut)
		}
	} else if !e.hasReceivedEventWithOptions || !e.hasReceivedStartedEvent {
		e.bufferedEvents = append(e.bufferedEvents, invocationEvent)
		if len(e.bufferedEvents) > 100 {
			e.numDroppedEventsBeforeProcessing++
			e.bufferedEvents = e.bufferedEvents[1:]
		}
		return nil
	}

	// Process buffered events.
	for _, event := range e.bufferedEvents {
		if err := e.processSingleEvent(event, iid); err != nil {
			return err
		}
	}
	e.bufferedEvents = nil

	// Process regular events.
	return e.processSingleEvent(invocationEvent, iid)
}

func (e *EventChannel) authenticateEvent(bazelBuildEvent *build_event_stream.BuildEvent) (bool, error) {
	auth := e.env.GetAuthenticator()
	if user, err := auth.AuthenticatedUser(e.ctx); err == nil && user != nil {
		return true, nil
	}
	options, err := extractOptions(bazelBuildEvent)
	if err != nil {
		return false, err
	}
	apiKey, err := authutil.ParseAPIKeyFromString(options)
	if err != nil {
		return false, err
	}
	if apiKey == "" {
		return false, nil
	}
	e.ctx = auth.AuthContextFromAPIKey(e.ctx, apiKey)
	authError := e.ctx.Value(interfaces.AuthContextUserErrorKey)
	if authError != nil {
		if err, ok := authError.(error); ok {
			return false, err
		}
		return false, status.UnknownError(fmt.Sprintf("%v", authError))
	}
	return true, nil
}

func (e *EventChannel) processSingleEvent(event *inpb.InvocationEvent, iid string) error {
	if err := e.redactor.RedactAPIKey(e.ctx, event.BuildEvent); err != nil {
		return err
	}
	if err := e.redactor.RedactMetadata(event.BuildEvent); err != nil {
		return err
	}
	// Accumulate a subset of invocation fields in memory.
	if err := e.beValues.AddEvent(event.BuildEvent); err != nil {
		return err
	}

	switch p := event.BuildEvent.Payload.(type) {
	case *build_event_stream.BuildEvent_Progress:
		if e.logWriter != nil {
			if _, err := e.logWriter.Write(e.ctx, append([]byte(p.Progress.Stderr), []byte(p.Progress.Stdout)...)); err != nil {
				log.Errorf("Error writing build logs for event: %s\nEvent: %s", err, event)
			}
			// Don't store the log in the protostream if we're
			// writing it separately to blobstore
			p.Progress.Stderr = ""
			p.Progress.Stdout = ""
		}
	}

	e.targetTracker.TrackTargetsForEvent(e.ctx, event.BuildEvent)
	e.statusReporter.ReportStatusForEvent(e.ctx, event.BuildEvent)

	if err := e.collectAPIFacets(iid, event.BuildEvent); err != nil {
		log.CtxWarningf(e.ctx, "Error collecting API facets: %s", err)
	}

	// For everything else, just save the event to our buffer and keep on chugging.
	if e.pw != nil {
		if err := e.pw.WriteProtoToStream(e.ctx, event); err != nil {
			return err
		}

		// Small optimization: Flush the event stream after the workspace status event. Most of the
		// command line options and workspace info has come through by then, so we have
		// something to show the user. Flushing the proto file here allows that when the
		// client fetches status for the incomplete build. Also flush if we haven't in over a minute.
		if isWorkspaceStatusEvent(event.BuildEvent) || e.pw.TimeSinceLastWrite().Minutes() > 1 {
			if err := e.pw.Flush(e.ctx); err != nil {
				return err
			}
		}
	}
	if len(e.unprocessedStartingEvents) > 0 {
		if _, ok := e.unprocessedStartingEvents[event.BuildEvent.Id.String()]; ok {
			delete(e.unprocessedStartingEvents, event.BuildEvent.Id.String())
			if len(e.unprocessedStartingEvents) == 0 {
				// When we have processed all starting events, update the invocation in
				// the DB so that it can be searched by its commit SHA, user name, etc.
				// even while the invocation is still in progress.
				if err := e.writeBuildMetadata(e.ctx, iid); err != nil {
					return err
				}
			}
		}
	}

	return nil
}

const apiFacetsExpiration = 1 * time.Hour

func (e *EventChannel) flushAPIFacets(iid string) error {
	if e.collector == nil || e.env.GetAPIService() == nil || !e.env.GetAPIService().CacheEnabled() {
		return nil
	}

	userInfo, err := e.env.GetAuthenticator().AuthenticatedUser(e.ctx)
	if userInfo == nil || err != nil {
		return nil
	}

	for label, target := range e.apiTargetMap {
		b, err := proto.Marshal(target)
		if err != nil {
			return err
		}
		key := api_common.TargetLabelKey(userInfo.GetGroupID(), iid, label)
		if err := e.collector.Set(e.ctx, key, string(b), apiFacetsExpiration); err != nil {
			return err
		}
	}
	return nil
}

func (e *EventChannel) collectAPIFacets(iid string, event *build_event_stream.BuildEvent) error {
	if e.collector == nil || e.env.GetAPIService() == nil || !e.env.GetAPIService().CacheEnabled() {
		return nil
	}

	userInfo, err := e.env.GetAuthenticator().AuthenticatedUser(e.ctx)
	if userInfo == nil || err != nil {
		return nil
	}

	e.apiTargetMap.ProcessEvent(iid, event)

	action := &apipb.Action{
		Id: &apipb.Action_Id{
			InvocationId: iid,
		},
	}
	action = api_common.FillActionFromBuildEvent(event, action)
	if action != nil {
		action = api_common.FillActionOutputFilesFromBuildEvent(event, action)
	} else {
		// early exit if this isn't an action event.
		return nil
	}
	b, err := proto.Marshal(action)
	if err != nil {
		return err
	}
	key := api_common.ActionLabelKey(userInfo.GetGroupID(), iid, action.GetTargetLabel())
	if err := e.collector.ListAppend(e.ctx, key, string(b)); err != nil {
		return err
	}
	if err := e.collector.Expire(e.ctx, key, apiFacetsExpiration); err != nil {
		return err
	}
	return nil
}

func (e *EventChannel) writeBuildMetadata(ctx context.Context, invocationID string) error {
	db := e.env.GetInvocationDB()
	invocationProto := e.beValues.Invocation()
	if e.logWriter != nil {
		invocationProto.LastChunkId = e.logWriter.GetLastChunkId(ctx)
	}
	ti, err := e.tableInvocationFromProto(invocationProto, "" /*=blobID*/)
	if err != nil {
		return err
	}
	ti.Attempt = e.attempt
	updated, err := db.UpdateInvocation(ctx, ti)
	if err != nil {
		return err
	}
	if !updated {
		e.isVoid = true
		return status.CanceledErrorf("Attempt %d of invocation %s pre-empted by more recent attempt, no build metadata written.", e.attempt, invocationID)
	}
	return nil
}

func (e *EventChannel) GetNumDroppedEvents() uint64 {
	return e.numDroppedEventsBeforeProcessing
}

func (e *EventChannel) GetInitialSequenceNumber() int64 {
	return e.initialSequenceNumber
}

func extractOptions(event *build_event_stream.BuildEvent) (string, error) {
	switch p := event.Payload.(type) {
	case *build_event_stream.BuildEvent_Started:
		return p.Started.OptionsDescription, nil
	case *build_event_stream.BuildEvent_OptionsParsed:
		return strings.Join(p.OptionsParsed.CmdLine, " "), nil
	}
	return "", nil
}

func getNumActionsFromOptions(event *build_event_stream.BuildEvent) int {
	options, err := extractOptions(event)
	if err != nil {
		log.Warningf("Could not extract options for ui_actions_shown, defaulting to %d: %d", defaultActionsShown, err)
		return defaultActionsShown
	}
	optionsList, err := shlex.Split(options)
	if err != nil {
		log.Warningf("Could not shlex split options '%s' for ui_actions_shown, defaulting to %d: %v", options, defaultActionsShown, err)
		return defaultActionsShown
	}
	actionsShownValues := getOptionValues(optionsList, "ui_actions_shown")
	cursesValues := getOptionValues(optionsList, "curses")
	if len(cursesValues) > 0 {
		curses := cursesValues[len(cursesValues)-1]
		if curses == "no" {
			return 0
		} else if curses != "yes" && curses != "auto" {
			log.Warningf("Unrecognized argument to curses, assuming auto: %v", curses)
		}
	}
	if len(actionsShownValues) > 0 {
		n, err := strconv.Atoi(actionsShownValues[len(actionsShownValues)-1])
		if err != nil {
			log.Warningf("Invalid argument to ui_actions_shown, defaulting to %d: %v", defaultActionsShown, err)
		} else if n < 1 {
			return 1
		} else {
			return n
		}
	}
	return defaultActionsShown
}

func getOptionValues(options []string, optionName string) []string {
	values := []string{}
	flag := "--" + optionName
	for _, option := range options {
		if option == "--" {
			break
		}
		if strings.HasPrefix(option, flag+"=") {
			values = append(values, strings.TrimPrefix(option, flag+"="))
		}
	}
	return values
}

type invocationEventCB func(*inpb.InvocationEvent) error

func streamRawInvocationEvents(env environment.Env, ctx context.Context, streamID string, callback invocationEventCB) error {
	eventAllocator := func() proto.Message { return &inpb.InvocationEvent{} }
	pr := protofile.NewBufferedProtoReader(env.GetBlobstore(), streamID, eventAllocator)
	for {
		event, err := pr.ReadProto(ctx)
		if err == io.EOF {
			break
		}
		if err != nil {
			return err
		}
		if err := callback(event.(*inpb.InvocationEvent)); err != nil {
			return err
		}
	}
	return nil
}

// LookupInvocation looks up the invocation, including all events. Prefer to use
// LookupInvocationWithCallback whenever possible, which avoids buffering events
// in memory.
func LookupInvocation(env environment.Env, ctx context.Context, iid string) (*inpb.Invocation, error) {
	var events []*inpb.InvocationEvent
	inv, err := LookupInvocationWithCallback(ctx, env, iid, func(event *inpb.InvocationEvent) error {
		// Certain buggy rulesets will mark intermediate output files as
		// important-outputs. This can result in very large BES streams which
		// use a ton of memory and are not displayable by the browser. If we
		// detect a large number of events coming through, begin dropping non-
		// important events so that this invocation can be displayed.
		if len(events) >= maxEventCount && !accumulator.IsImportantEvent(event.BuildEvent) {
			return nil
		}
		events = append(events, event)
		return nil
	})
	if err != nil {
		return nil, err
	}
	inv.Event = events
	return inv, nil
}

// LookupInvocationWithCallback looks up an invocation but uses a callback for
// events instead of buffering events into the events list.
//
// TODO: switch to using this API wherever possible.
func LookupInvocationWithCallback(ctx context.Context, env environment.Env, iid string, cb invocationEventCB) (*inpb.Invocation, error) {
	ti, err := env.GetInvocationDB().LookupInvocation(ctx, iid)
	if err != nil {
		return nil, err
	}

	// If this is an incomplete invocation, attempt to fill cache stats
	// from counters rather than trying to read them from invocation b/c
	// they won't be set yet.
	if ti.InvocationStatus == int64(inspb.InvocationStatus_PARTIAL_INVOCATION_STATUS) {
		if cacheStats := hit_tracker.CollectCacheStats(ctx, env, iid); cacheStats != nil {
			fillInvocationFromCacheStats(cacheStats, ti)
		}
	}

	invocation := TableInvocationToProto(ti)
	streamID := GetStreamIdFromInvocationIdAndAttempt(iid, ti.Attempt)

	var scoreCard *capb.ScoreCard
	eg, ctx := errgroup.WithContext(ctx)
	eg.Go(func() error {
		// When detailed stats are enabled, the scorecard is not inlined in the
		// invocation.
		if !hit_tracker.DetailedStatsEnabled() {
			// The cache ScoreCard is not stored in the table invocation, so we do this lookup
			// after converting the table invocation to a proto invocation.
			if ti.InvocationStatus == int64(inspb.InvocationStatus_PARTIAL_INVOCATION_STATUS) {
				scoreCard = hit_tracker.ScoreCard(ctx, env, iid)
			} else {
				sc, err := scorecard.Read(ctx, env, iid, ti.Attempt)
				if err != nil {
					log.Warningf("Failed to read scorecard for invocation %s: %s", iid, err)
				} else {
					scoreCard = sc
				}
			}
		}
		return nil
	})

	eg.Go(func() error {
		var screenWriter *terminal.ScreenWriter
		if !invocation.HasChunkedEventLogs {
			screenWriter = terminal.NewScreenWriter()
		}
		var redactor *redact.StreamingRedactor
		if ti.RedactionFlags&redact.RedactionFlagStandardRedactions != redact.RedactionFlagStandardRedactions {
			// only redact if we hadn't redacted enough, only parse again if we redact
			redactor = redact.NewStreamingRedactor(env)
		}
		beValues := accumulator.NewBEValues(invocation)
		events := []*inpb.InvocationEvent{}
		structuredCommandLines := []*command_line.CommandLine{}
		err := streamRawInvocationEvents(env, ctx, streamID, func(event *inpb.InvocationEvent) error {
			if redactor != nil {
				if err := redactor.RedactAPIKeysWithSlowRegexp(ctx, event.BuildEvent); err != nil {
					return err
				}
				if err := redactor.RedactMetadata(event.BuildEvent); err != nil {
					return err
				}
				if err := beValues.AddEvent(event.BuildEvent); err != nil {
					return err
				}
			}

			switch p := event.BuildEvent.Payload.(type) {
			case *build_event_stream.BuildEvent_Started:
				// Drop child pattern expanded events since this list can be
				// very long and we don't render these currently.
				event.BuildEvent.Children = nil
			case *build_event_stream.BuildEvent_Expanded:
				if len(event.BuildEvent.GetId().GetPattern().GetPattern()) > 0 {
					pattern, truncated := TruncateStringSlice(event.BuildEvent.GetId().GetPattern().GetPattern(), maxPatternLengthBytes)
					invocation.PatternsTruncated = truncated
					event.BuildEvent.GetId().GetPattern().Pattern = pattern
				}
				// Don't return child TargetConfigured events to the UI; the UI
				// only cares about the actual TargetConfigured event payloads.
				event.BuildEvent.Children = nil
				// UI doesn't render TestSuiteExpansions yet (though we probably
				// should at some point?) So don't return these either.
				p.Expanded.TestSuiteExpansions = nil
			case *build_event_stream.BuildEvent_Progress:
				if screenWriter != nil {
					screenWriter.Write([]byte(p.Progress.Stderr))
					screenWriter.Write([]byte(p.Progress.Stdout))
				}
				// Don't serve progress event contents to the UI since they are too
				// large. Instead, logs are available either via the
				// console_buffer field or the separate logs RPC.
				p.Progress.Stderr = ""
				p.Progress.Stdout = ""
			case *build_event_stream.BuildEvent_StructuredCommandLine:
				structuredCommandLines = append(structuredCommandLines, p.StructuredCommandLine)
			}

			if err := cb(event); err != nil {
				return err
			}
			return nil
		})
		if err != nil {
			return err
		}

		invocation.Event = events
		// TODO: Can we remove this StructuredCommandLine field? These are
		// already available in the events list.
		invocation.StructuredCommandLine = structuredCommandLines
		if screenWriter != nil {
			invocation.ConsoleBuffer = string(screenWriter.Render())
		}
		return nil
	})

	if err := eg.Wait(); err != nil {
		return nil, err
	}

	invocation.ScoreCard = scoreCard
	return invocation, nil
}

func (e *EventChannel) tableInvocationFromProto(p *inpb.Invocation, blobID string) (*tables.Invocation, error) {
	uuid, err := uuid.StringToBytes(p.InvocationId)
	if err != nil {
		return nil, err
	}

	i := &tables.Invocation{}
	i.InvocationID = p.InvocationId // Required.
	i.InvocationUUID = uuid
	i.Success = p.Success
	i.User = p.User
	i.DurationUsec = p.DurationUsec
	i.Host = p.Host
	i.RepoURL = p.RepoUrl
	if norm, err := gitutil.NormalizeRepoURL(p.RepoUrl); err == nil {
		i.RepoURL = norm.String()
	}
	i.BranchName = p.BranchName
	i.CommitSHA = p.CommitSha
	i.Role = p.Role
	i.Command = p.Command
	if p.Pattern != nil {
		i.Pattern = invocation_format.ShortFormatPatterns(p.Pattern)
	}
	i.ActionCount = p.ActionCount
	i.BlobID = blobID
	i.InvocationStatus = int64(p.InvocationStatus)
	i.LastChunkId = p.LastChunkId
	i.RedactionFlags = redact.RedactionFlagStandardRedactions
	i.Attempt = p.Attempt
	i.BazelExitCode = p.BazelExitCode
	tags, err := invocation_format.JoinTags(p.Tags)
	if err != nil {
		return nil, err
	}
	i.Tags = tags

	userGroupPerms, err := perms.ForAuthenticatedGroup(e.ctx, e.env)
	if err != nil {
		return nil, err
	} else {
		i.Perms = userGroupPerms.Perms
	}
	if p.ReadPermission == inpb.InvocationPermission_PUBLIC {
		i.Perms |= perms.OTHERS_READ
	}
	i.DownloadOutputsOption = int64(p.DownloadOutputsOption)
	i.RemoteExecutionEnabled = p.RemoteExecutionEnabled
	i.UploadLocalResultsEnabled = p.UploadLocalResultsEnabled
	return i, nil
}

func TableInvocationToProto(i *tables.Invocation) *inpb.Invocation {
	out := &inpb.Invocation{}
	out.InvocationId = i.InvocationID // Required.
	out.Success = i.Success
	out.User = i.User
	out.DurationUsec = i.DurationUsec
	out.Host = i.Host
	out.RepoUrl = i.RepoURL
	out.BranchName = i.BranchName
	out.CommitSha = i.CommitSHA
	out.Role = i.Role
	out.Command = i.Command
	if i.Pattern != "" {
		out.Pattern = strings.Split(i.Pattern, ", ")
	}
	out.ActionCount = i.ActionCount
	// BlobID is not present in output client proto.
	out.InvocationStatus = inspb.InvocationStatus(i.InvocationStatus)
	out.CreatedAtUsec = i.Model.CreatedAtUsec
	out.UpdatedAtUsec = i.Model.UpdatedAtUsec
	if i.Perms&perms.OTHERS_READ > 0 {
		out.ReadPermission = inpb.InvocationPermission_PUBLIC
	} else {
		out.ReadPermission = inpb.InvocationPermission_GROUP
	}
	out.CreatedWithCapabilities = capabilities.FromInt(i.CreatedWithCapabilities)
	out.Acl = perms.ToACLProto(&uidpb.UserId{Id: i.UserID}, i.GroupID, i.Perms)
	out.CacheStats = &capb.CacheStats{
		ActionCacheHits:                   i.ActionCacheHits,
		ActionCacheMisses:                 i.ActionCacheMisses,
		ActionCacheUploads:                i.ActionCacheUploads,
		CasCacheHits:                      i.CasCacheHits,
		CasCacheMisses:                    i.CasCacheMisses,
		CasCacheUploads:                   i.CasCacheUploads,
		TotalDownloadSizeBytes:            i.TotalDownloadSizeBytes,
		TotalDownloadTransferredSizeBytes: i.TotalDownloadTransferredSizeBytes,
		TotalUploadSizeBytes:              i.TotalUploadSizeBytes,
		TotalUploadTransferredSizeBytes:   i.TotalUploadTransferredSizeBytes,
		TotalDownloadUsec:                 i.TotalDownloadUsec,
		TotalUploadUsec:                   i.TotalUploadUsec,
		TotalCachedActionExecUsec:         i.TotalCachedActionExecUsec,
		TotalUncachedActionExecUsec:       i.TotalUncachedActionExecUsec,
		DownloadThroughputBytesPerSecond:  i.DownloadThroughputBytesPerSecond,
		UploadThroughputBytesPerSecond:    i.UploadThroughputBytesPerSecond,
	}
	out.LastChunkId = i.LastChunkId
	if i.LastChunkId != "" {
		out.HasChunkedEventLogs = true
	}
	out.Attempt = i.Attempt
	out.BazelExitCode = i.BazelExitCode
	out.DownloadOutputsOption = inpb.DownloadOutputsOption(i.DownloadOutputsOption)
	out.RemoteExecutionEnabled = i.RemoteExecutionEnabled
	out.UploadLocalResultsEnabled = i.UploadLocalResultsEnabled
	// Don't bother with validation here; just give the user whatever the DB
	// claims the tags are.
	out.Tags, _ = invocation_format.SplitAndTrimAndDedupeTags(i.Tags, false)
	return out
}

func GetStreamIdFromInvocationIdAndAttempt(iid string, attempt uint64) string {
	if attempt == 0 {
		// This invocation predates the attempt-tracking functionality, so its
		// streamId does not contain the attempt number.
		return iid
	}
	return iid + "/" + strconv.FormatUint(attempt, 10)
}

func toStoredInvocation(inv *tables.Invocation) *sipb.StoredInvocation {
	return &sipb.StoredInvocation{
		InvocationId:     inv.InvocationID,
		User:             inv.User,
		Pattern:          inv.Pattern,
		Role:             inv.Role,
		BranchName:       inv.BranchName,
		CommitSha:        inv.CommitSHA,
		RepoUrl:          inv.RepoURL,
		Command:          inv.Command,
		InvocationStatus: inv.InvocationStatus,
		Success:          inv.Success,
		Tags:             inv.Tags,
	}
}

func incrementInvocationUsage(ctx context.Context, ut interfaces.UsageTracker) {
	labels, err := usageutil.Labels(ctx)
	if err != nil {
		log.CtxWarningf(ctx, "Failed to compute invocation usage labels: %s", err)
		return
	}
	if err := ut.Increment(ctx, labels, &tables.UsageCounts{Invocations: 1}); err != nil {
		log.CtxWarningf(ctx, "Failed to increment invocation usage: %s", err)
		return
	}
}

// TruncateStringSlice truncates the given string slice so that when the strings
// are joined with a space (" "), the total byte length of the resulting string
// does not exceed the given character limit.
func TruncateStringSlice(strs []string, charLimit int) (truncatedList []string, truncated bool) {
	length := 0
	for i, s := range strs {
		if i > 0 {
			// When rendered in the UI, each arg except the first will be
			// preceded by a space. Count this towards the char limit.
			length += 1
		}
		if length+len(s) > charLimit {
			return strs[:i], true
		}
		length += len(s)
	}
	return strs, false
}
