# ------------------------------------------------------------ # Copyright (c) 2013-2019 # Q.W.Page Associates Inc. # www.qwpage.com # All rights reserved. # ------------------------------------------------------------ /* { */ } ::namespace eval ::qw::http {}; ::proc ::qw::http::formatQuery {args} { /* { 2.33.3 We are overriding the method taken from the http package version 2.4.5. http::formatQuery uses a number of regsub commands and it was way too slow when parsing nv2_open_message.log. It took 5.5 minutes to parse. When using cpp_format_query it took about 100 milliseconds. Utimately cpp_format_query replaces the call to mapReply. */ } set result "" set sep "" foreach i $args { append result $sep [[::qw::system] cpp_http_mapReply .query $i]; #rwb_change append result $sep [mapReply $i] if {[string equal $sep "="]} { set sep & } else { set sep = } } return $result } ::if {0} { ::if {$::tcl_version==8.6} { /* { We have to require http first so that we can override it. Otherwise, when loading on demand, we get an "unknown namespace" error. */ } ::package require http; ::proc ::http::geturl {url args} { ::set rwb1_debug 0; ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.00";} variable http variable urlTypes variable defaultCharset variable defaultKeepalive variable strict # Initialize the state variable, an array. We'll return the name of this # array as the token for the transaction. if {![info exists http(uid)]} { set http(uid) 0 } set token [namespace current]::[incr http(uid)] ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.00.0,token==$token";} variable $token upvar 0 $token state reset $token # Process command options. array set state { -binary false -blocksize 8192 -queryblocksize 8192 -validate 0 -headers {} -timeout 0 -type application/x-www-form-urlencoded -queryprogress {} -protocol 1.1 binary 0 state connecting meta {} coding {} currentsize 0 totalsize 0 querylength 0 queryoffset 0 type text/html body {} status "" http "" connection close accept-types {} } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.01";} set state(-keepalive) $defaultKeepalive set state(-strict) $strict # These flags have their types verified [Bug 811170] array set type { -binary boolean -blocksize integer -queryblocksize integer -strict boolean -timeout integer -validate boolean } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.02";} set state(charset) $defaultCharset set options { -binary -blocksize -channel -command -handler -headers -keepalive -method -myaddr -progress -protocol -query -queryblocksize -querychannel -queryprogress -strict -timeout -type -validate } set usage [join [lsort $options] ", "] set options [string map {- ""} $options] set pat ^-(?:[join $options |])$ foreach {flag value} $args { ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.03,flag==$flag,value==$value";} if {[regexp -- $pat $flag]} { # Validate numbers ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.04";} if { [info exists type($flag)] && ![string is $type($flag) -strict $value] } { unset $token return -code error \ "Bad value for $flag ($value), must be $type($flag)" } set state($flag) $value } else { unset $token return -code error "Unknown option $flag, can be: $usage" } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.05";} } # Make sure -query and -querychannel aren't both specified set isQueryChannel [info exists state(-querychannel)] set isQuery [info exists state(-query)] if {$isQuery && $isQueryChannel} { unset $token ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.06";} return -code error "Can't combine -query and -querychannel options!" } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.07";} # Validate URL, determine the server host and port, and check proxy case # Recognize user:pass@host URLs also, although we do not do anything with # that info yet. # URLs have basically four parts. # First, before the colon, is the protocol scheme (e.g. http) # Second, for HTTP-like protocols, is the authority # The authority is preceded by // and lasts up to (but not including) # the following / or ? and it identifies up to four parts, of which # only one, the host, is required (if an authority is present at all). # All other parts of the authority (user name, password, port number) # are optional. # Third is the resource name, which is split into two parts at a ? # The first part (from the single "/" up to "?") is the path, and the # second part (from that "?" up to "#") is the query. *HOWEVER*, we do # not need to separate them; we send the whole lot to the server. # Both, path and query are allowed to be missing, including their # delimiting character. # Fourth is the fragment identifier, which is everything after the first # "#" in the URL. The fragment identifier MUST NOT be sent to the server # and indeed, we don't bother to validate it (it could be an error to # pass it in here, but it's cheap to strip). # # An example of a URL that has all the parts: # # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes # # The "http" is the protocol, the user is "jschmoe", the password is # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes". # # Note that the RE actually combines the user and password parts, as # recommended in RFC 3986. Indeed, that RFC states that putting passwords # in URLs is a Really Bad Idea, something with which I would agree utterly. # # From a validation perspective, we need to ensure that the parts of the # URL that are going to the server are correctly encoded. This is only # done if $state(-strict) is true (inherited from $::http::strict). ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.08";} set URLmatcher {(?x) # this is _expanded_ syntax ^ (?: (\w+) : ) ? # (?: // (?: ( [^@/\#?]+ # ) @ )? ( # [^/:\#?]+ | # host name or IPv4 address \[ [^/\#?]+ \] # IPv6 address in square brackets ) (?: : (\d+) )? # )? ( [/\?] [^\#]*)? # (including query) (?: \# (.*) )? # $ } # Phase one: parse if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} { unset $token ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.09";} return -code error "Unsupported URL: $url" } # Phase two: validate ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.10";} set host [string trim $host {[]}]; # strip square brackets from IPv6 address ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.10.0,proto==$proto";} ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.10.1,user==$user";} ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.10.2,host==$host";} ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.10.3,port==$port";} ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.10.4,srvurl==$srvurl";} if {$host eq ""} { # Caller has to provide a host name; we do not have a "default host" # that would enable us to handle relative URLs. unset $token ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.11";} return -code error "Missing host part: $url" # Note that we don't check the hostname for validity here; if it's # invalid, we'll simply fail to resolve it later on. } if {$port ne "" && $port > 65535} { unset $token ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.12";} return -code error "Invalid port number: $port" } # The user identification and resource identification parts of the URL can # have encoded characters in them; take care! if {$user ne ""} { # Check for validity according to RFC 3986, Appendix A set validityRE {(?xi) ^ (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+ $ } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.13";} if {$state(-strict) && ![regexp -- $validityRE $user]} { unset $token # Provide a better error message in this error case if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} { ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.14";} return -code error \ "Illegal encoding character usage \"$bad\" in URL user" } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.15";} { return -code error "Illegal characters in URL user" } } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.16";} } if {$srvurl ne ""} { # RFC 3986 allows empty paths (not even a /), but servers # return 400 if the path in the HTTP request doesn't start # with / , so add it here if needed. if {[string index $srvurl 0] ne "/"} { set srvurl /$srvurl } # Check for validity according to RFC 3986, Appendix A set validityRE {(?xi) ^ # Path part (already must start with / character) (?: [-\w.~!$&'()*+,;=:@/] | %[0-9a-f][0-9a-f] )* # Query part (optional, permits ? characters) (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )? $ } if {$state(-strict) && ![regexp -- $validityRE $srvurl]} { unset $token # Provide a better error message in this error case if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} { return -code error \ "Illegal encoding character usage \"$bad\" in URL path" } return -code error "Illegal characters in URL path" } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.17";} } else { set srvurl / } if {$proto eq ""} { set proto http } if {![info exists urlTypes($proto)]} { unset $token return -code error "Unsupported URL type \"$proto\"" } set defport [lindex $urlTypes($proto) 0] set defcmd [lindex $urlTypes($proto) 1] if {$port eq ""} { set port $defport } if {![catch {$http(-proxyfilter) $host} proxy]} { set phost [lindex $proxy 0] set pport [lindex $proxy 1] } # OK, now reassemble into a full URL set url ${proto}:// if {$user ne ""} { append url $user append url @ } append url $host if {$port != $defport} { append url : $port } append url $srvurl # Don't append the fragment! set state(url) $url # If a timeout is specified we set up the after event and arrange for an # asynchronous socket connection. set sockopts [list -async] if {$state(-timeout) > 0} { set state(after) [after $state(-timeout) \ [list http::reset $token timeout]] } # If we are using the proxy, we must pass in the full URL that includes # the server name. if {[info exists phost] && ($phost ne "")} { set srvurl $url set targetAddr [list $phost $pport] } else { set targetAddr [list $host $port] } # Proxy connections aren't shared among different hosts. ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.18,host==$host,port==$port";} set state(socketinfo) $host:$port # See if we are supposed to use a previously opened channel. if {$state(-keepalive)} { variable socketmap ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.19";} if {[info exists socketmap($state(socketinfo))]} { if {[catch {fconfigure $socketmap($state(socketinfo))}]} { Log "WARNING: socket for $state(socketinfo) was closed" unset socketmap($state(socketinfo)) } else { set sock $socketmap($state(socketinfo)) Log "reusing socket $sock for $state(socketinfo)" catch {fileevent $sock writable {}} catch {fileevent $sock readable {}} } } # don't automatically close this connection socket set state(connection) {} } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.20";} if {![info exists sock]} { # Pass -myaddr directly to the socket command if {[info exists state(-myaddr)]} { lappend sockopts -myaddr $state(-myaddr) } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.20.0,defcmd==$defcmd";} ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.20.1,sockopts==$sockopts";} ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.20.2,targetAddr==$targetAddr";} if {[catch {eval $defcmd $sockopts $targetAddr} sock]} { # something went wrong while trying to establish the connection. # Clean up after events and such, but DON'T call the command # callback (if available) because we're going to throw an # exception from here instead. ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.20.3,sock==$sock";} set state(sock) $sock ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.20.4";} Finish $token "" 1 ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.20.5";} cleanup $token ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.20.6";} return -code error $sock } } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.21,\$sock==$sock";} set state(sock) $sock Log "Using $sock for $state(socketinfo)" \ [expr {$state(-keepalive)?"keepalive":""}] if {$state(-keepalive)} { set socketmap($state(socketinfo)) $sock } if {![info exists phost]} { set phost "" } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.21.0,srvurl==$srvurl";} fileevent $sock writable [list http::Connect $token $proto $phost $srvurl] ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.21.1";} # Wait for the connection to complete. ::if {$rwb1_debug} { ::puts "state-----------------------------------------------------"; ::foreach Name [::array names state] { ::puts "rwb1_debug,state($Name)==$state($Name)"; } ::puts "state-----------------------------------------------------"; } if {![info exists state(-command)]} { # geturl does EVERYTHING asynchronously, so if the user # calls it synchronously, we just do a wait here. ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.22";} http::wait $token ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.23";} if {![info exists state]} { # If we timed out then Finish has been called and the users # command callback may have cleaned up the token. If so we end up # here with nothing left to do. ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.24";} return $token } elseif {$state(status) eq "error"} { # Something went wrong while trying to establish the connection. # Clean up after events and such, but DON'T call the command # callback (if available) because we're going to throw an # exception from here instead. set err [lindex $state(error) 0] cleanup $token ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.25";} return -code error $err } } ::if {$rwb1_debug} {::puts "rwb1_debug,http::geturl,1000.26";} return $token } } }