Fix URL parsing error when a non-http(s) URL contains a % symbol outside of the percent-encoded sequence

This commit is contained in:
DarthSim
2024-11-28 17:45:24 +03:00
parent bbcef29e11
commit 505faf5452
3 changed files with 27 additions and 12 deletions

View File

@@ -8,6 +8,7 @@
### Fixed
- Fix detecting of width and height of HEIF images that include `irot` boxes.
- Set `Error` status for errorred traces in OpenTelemetry.
- Fix URL parsing error when a non-http(s) URL contains a `%` symbol outside of the percent-encoded sequence.
- (pro) Fix opject detection accuracy when using YOLOv8 or YOLOv10 models.
- (pro) Fix usage of the `obj` and `objw` gravity types inside the `crop` processing option.
- (pro) Fix detecting of width and height when orientation is specified in EXIF but EXIF info is not requested.

View File

@@ -7,7 +7,6 @@ import (
"io"
"net/http"
"net/http/cookiejar"
"net/url"
"strings"
"time"
@@ -134,23 +133,27 @@ func headersToStore(res *http.Response) map[string]string {
func BuildImageRequest(ctx context.Context, imageURL string, header http.Header, jar *cookiejar.Jar) (*http.Request, context.CancelFunc, error) {
reqCtx, reqCancel := context.WithTimeout(ctx, time.Duration(config.DownloadTimeout)*time.Second)
// Non-http(s) URLs may contain percent symbol outside of the percent-encoded sequences.
// Parsing such URLs will fail with an error.
// To prevent this, we replace all percent symbols with %25.
//
// Also, such URLs may contain a hash symbol, which is a fragment identifier.
// We replace them with %23 to prevent cutting off the fragment part.
// Since we already replaced all percent symbols, we won't mix up %23 that were in the original URL
// and %23 that appeared after the replacement.
//
// We will revert these replacements in `transport/common.GetBucketAndKey`.
if !strings.HasPrefix(imageURL, "http://") && !strings.HasPrefix(imageURL, "https://") {
imageURL = strings.ReplaceAll(imageURL, "%", "%25")
imageURL = strings.ReplaceAll(imageURL, "#", "%23")
}
req, err := http.NewRequestWithContext(reqCtx, "GET", imageURL, nil)
if err != nil {
reqCancel()
return nil, func() {}, ierrors.New(404, err.Error(), msgSourceImageIsUnreachable)
}
// S3, GCS, etc object keys may contain `#` symbol.
// `url.ParseRequestURI` unlike `url.Parse` does not cut-off the fragment part from the URL path.
if req.URL.Scheme != "http" && req.URL.Scheme != "https" {
u, err := url.ParseRequestURI(imageURL)
if err != nil {
reqCancel()
return nil, func() {}, ierrors.New(404, err.Error(), msgSourceImageIsUnreachable)
}
req.URL = u
}
if _, ok := enabledSchemes[req.URL.Scheme]; !ok {
reqCancel()
return nil, func() {}, ierrors.New(

View File

@@ -21,5 +21,16 @@ func GetBucketAndKey(u *url.URL) (bucket, key string) {
key = strings.TrimLeft(key, "/")
// We replaced all `%` with `%25` in `imagedata.BuildImageRequest` to prevent parsing errors.
// Also, we replaced all `#` with `%23` to prevent cutting off the fragment part.
// We need to revert these replacements.
//
// It's important to revert %23 first because the original URL may also contain %23,
// and we don't want to mix them up.
bucket = strings.ReplaceAll(bucket, "%23", "#")
bucket = strings.ReplaceAll(bucket, "%25", "%")
key = strings.ReplaceAll(key, "%23", "#")
key = strings.ReplaceAll(key, "%25", "%")
return
}