diff --git a/workhorse/.golangci.yml b/workhorse/.golangci.yml index 3a5c55db164c634e503b751edf62c12e297456cd..0d1aaa9068a697c6520c414588264e313a4844d9 100644 --- a/workhorse/.golangci.yml +++ b/workhorse/.golangci.yml @@ -134,6 +134,7 @@ linters: - github.com/modelcontextprotocol/go-sdk - github.com/go-redsync/redsync - github.com/go-redsync/redsync/v4/redis/goredis/v9 + - golang.org/x/text/transform # gRPC and Protocol Buffers - google.golang.org/grpc - google.golang.org/grpc/codes diff --git a/workhorse/go.mod b/workhorse/go.mod index c48c369fee7e601767c8c1dccdcdd5e443d5d4b3..420c9aa7d95edcc5d36f562e54a5e429022a66b5 100644 --- a/workhorse/go.mod +++ b/workhorse/go.mod @@ -36,6 +36,7 @@ require ( gocloud.dev v0.43.0 golang.org/x/image v0.28.0 golang.org/x/oauth2 v0.30.0 + golang.org/x/text v0.28.0 google.golang.org/grpc v1.73.0 google.golang.org/protobuf v1.36.6 ) @@ -161,7 +162,6 @@ require ( golang.org/x/net v0.42.0 // indirect golang.org/x/sync v0.16.0 // indirect golang.org/x/sys v0.35.0 // indirect - golang.org/x/text v0.28.0 // indirect golang.org/x/time v0.12.0 // indirect golang.org/x/tools v0.35.0 // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect diff --git a/workhorse/internal/jsonstream/transform.go b/workhorse/internal/jsonstream/transform.go new file mode 100644 index 0000000000000000000000000000000000000000..c2c5b15bbcffa2dc5d85884233d7e91fde709123 --- /dev/null +++ b/workhorse/internal/jsonstream/transform.go @@ -0,0 +1,170 @@ +// Package jsonstream provides utilities for streaming JSON transformations. +// It enables context-aware modifications of JSON data while maintaining +// streaming performance, without loading entire documents into memory. +package jsonstream + +import ( + "io" + "regexp" + + "golang.org/x/text/transform" +) + +// JSONTransformWriter is an io.Writer that performs context-aware replacements +// in streaming JSON data. +// +// Example: +// +// tw := jsonstream.NewJSONTransformWriter(w, "old.com", "new.com", `"tarball"`) +// defer tw.Close() +// io.Copy(tw, reader) +// +// This will transform: +// +// {"tarball": "https://old.com/package.tgz"} +// +// Into: +// +// {"tarball":"https://new.com/package.tgz"} +type JSONTransformWriter struct { + writer *transform.Writer +} + +type jsonTransformer struct { + pattern *regexp.Regexp + key []byte + to []byte + inString bool + escape bool +} + +// NewJSONTransformWriter creates a new JSONTransformWriter that replaces +// `from` with `to` in JSON string values associated with the specified key. +// +// Parameters: +// - w: The underlying writer where transformed JSON will be written +// - from: The string to replace +// - to: The replacement string +// - key: The JSON key to target (e.g., `"tarball"` including quotes) +func NewJSONTransformWriter(w io.Writer, from, to, key string) *JSONTransformWriter { + // Build regex pattern that handles whitespace + keyPattern := regexp.QuoteMeta(key) + fromPattern := regexp.QuoteMeta(from) + + // Pattern: "key" \s* : \s* " (.*?) from (.*?) " + // Explanation: + // "key" - literal key with quotes + // \s* - zero or more whitespace characters + // : - literal colon + // \s* - zero or more whitespace characters + // " - opening quote of value + // (.*?) - capture group 1: any chars before 'from' (non-greedy) + // from - the pattern to replace + // (.*?) - capture group 2: any chars after 'from' (non-greedy) + // " - closing quote of value + patternStr := keyPattern + `\s*:\s*"(.*?)` + fromPattern + `(.*?)"` + + pattern := regexp.MustCompile(patternStr) + + transformer := &jsonTransformer{ + pattern: pattern, + key: []byte(key), + to: []byte(to), + } + + return &JSONTransformWriter{ + writer: transform.NewWriter(w, transformer), + } +} + +func (jt *jsonTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + // Check if we end inside an incomplete string + if !atEOF && jt.updateAndCheckStringState(src) { + // We're ending inside a string - need more data + return 0, 0, transform.ErrShortSrc + } + + // Apply regex transformation + transformed := jt.applyTransformation(src) + + // Check if destination buffer has enough space + if len(transformed) > len(dst) { + return 0, 0, transform.ErrShortDst + } + + // Copy transformed data to destination + n := copy(dst, transformed) + + // Report success + return n, len(src), nil +} + +func (jt *jsonTransformer) updateAndCheckStringState(data []byte) bool { + // Start with previous state from last Transform() call + inString := jt.inString + escape := jt.escape + + // Process all bytes in current chunk + for _, b := range data { + if escape { + escape = false // This char is escaped, skip it + continue + } + + if b == '\\' { + escape = true // Next char will be escaped + continue + } + + if b == '"' { + inString = !inString // Toggle: entering or exiting string + } + } + + // Save state for next Transform() call + jt.inString = inString + jt.escape = escape + + return inString +} + +func (jt *jsonTransformer) applyTransformation(data []byte) []byte { + return jt.pattern.ReplaceAllFunc(data, func(match []byte) []byte { + // Extract captured groups from regex + submatches := jt.pattern.FindSubmatch(match) + if len(submatches) != 3 { + return match // Something wrong, keep unchanged + } + + // submatches[0] = full match: "key"\s*:\s*"prefix from suffix" + // submatches[1] = (.*?) before 'from' - content before pattern + // submatches[2] = (.*?) after 'from' - content after pattern + + // Rebuild: "key" + ":" + " + prefix + to + suffix + " + result := make([]byte, 0, len(jt.key)+len(jt.to)+ + len(submatches[1])+len(submatches[2])+3) + result = append(result, jt.key...) + result = append(result, ':', '"') + result = append(result, submatches[1]...) + result = append(result, jt.to...) + result = append(result, submatches[2]...) + result = append(result, '"') + + return result + }) +} + +// Reset clears the transformer state +func (jt *jsonTransformer) Reset() { + jt.inString = false + jt.escape = false +} + +func (tw *JSONTransformWriter) Write(p []byte) (n int, err error) { + return tw.writer.Write(p) +} + +// Close flushes any buffered data and completes the transformation +func (tw *JSONTransformWriter) Close() error { + return tw.writer.Close() +} diff --git a/workhorse/internal/jsonstream/transform_test.go b/workhorse/internal/jsonstream/transform_test.go new file mode 100644 index 0000000000000000000000000000000000000000..2db6f6893ababd4233890c22ca8f10aa92a3d300 --- /dev/null +++ b/workhorse/internal/jsonstream/transform_test.go @@ -0,0 +1,41 @@ +package jsonstream + +import ( + "bytes" + "encoding/json" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestJSONTransformWriter(t *testing.T) { + var output bytes.Buffer + tw := NewJSONTransformWriter(&output, "old.com", "new.com", `"tarball"`) + defer tw.Close() + + input := []byte(`{"name": "foo","versions": {"1.0.0": {"dist": {"shasum": "b7fe281f2dfc22a653b4cd34dd9c54906642fb26","tarball": "https://old.com/foo-1.0.0.tgz"},"name": "foo","version": "1.0.0"}}}`) + + // Write to transformer + _, err := tw.Write(input) + require.NoError(t, err) + + // Close to flush any remaining data + err = tw.Close() + require.NoError(t, err) + + // Parse the output JSON + var result map[string]interface{} + err = json.Unmarshal(output.Bytes(), &result) + require.NoError(t, err) + + // Navigate to the nested tarball field + versions := result["versions"].(map[string]interface{}) + version := versions["1.0.0"].(map[string]interface{}) + dist := version["dist"].(map[string]interface{}) + tarball := dist["tarball"].(string) + require.Equal(t, "https://new.com/foo-1.0.0.tgz", tarball) + + // Verify other fields unchanged + name := result["name"].(string) + require.Equal(t, "foo", name) +}