Changeset 3d18e9d
- Timestamp:
- 13/04/2012 13:05:25 (10 years ago)
- Branches:
- master
- Children:
- 0e7bdd1
- Parents:
- cf97a98
- git-author:
- Eric van der Vlist <vdv@dyomedea.com> (13/04/2012 13:05:25)
- git-committer:
- Eric van der Vlist <vdv@dyomedea.com> (13/04/2012 13:05:25)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
archiver/pipelines/actions/archive-resource.xpl
r750ccaa r3d18e9d 3 3 <p:param name="data" type="input"/> 4 4 5 <!-- Fetch the resource --> 6 <p:processor name="oxf:url-generator"> 7 <p:input name="config" transform="oxf:xslt" href="#data"> 8 <config xsl:version="2.0"> 9 <url> 10 <xsl:value-of select="/action/@url"/> 11 </url> 12 <header> 13 <name>User-Agent</name> 14 <value> 15 <xsl:value-of select="doc('oxf:/config.xml')/config/user-agent"/> 16 </value> 17 </header> 18 <mode>archive</mode> 19 </config> 20 </p:input> 21 <p:output name="data" id="archive" debug="archive"/> 22 </p:processor> 23 24 25 <!-- Store the archive in the database --> 5 <!-- Look if the resource has already been archived for that set --> 26 6 <p:processor name="oxf:pipeline"> 27 7 <p:input name="config" href="/data-access.xpl"/> … … 30 10 <relpath> 31 11 <xsl:value-of select="/action/@directory"/> 32 <xsl: value-of select="/action/@filename"/>12 <xsl:text>index.xml</xsl:text> 33 13 </relpath> 34 <operation>write</operation> 35 <type>document</type> 14 <operation>read</operation> 15 <type>xquery</type> 16 <parameter name="url" type="string"> 17 <xsl:value-of select="/action/@url"/> 18 </parameter> 36 19 </config> 37 20 </p:input> 38 <p:input name="param" href="#archive"/> 39 <p:output name="data" id="response2"/> 21 <p:input name="param"> 22 <xquery><![CDATA[ 23 24 boolean(//archive[@url = $(url)]) 25 26 ]]></xquery> 27 </p:input> 28 <p:output name="data" id="duplicate" debug="duplicate"/> 40 29 </p:processor> 41 30 42 <p:processor name="oxf:null-serializer"> 43 <p:input name="data" href="#response2"/> 44 </p:processor> 45 46 47 <!-- Test the type of document to see if it needs to be rewritten --> 48 <p:choose href="#archive"> 49 50 <!-- HTML document : need to update the links... --> 51 <p:when test="/archive/response/document/@content-type=('text/html', 'text/css')"> 52 53 <!-- Call the corresponding pipeline to extract the links and rewrite them --> 54 <p:processor name="oxf:url-generator"> 55 <p:input name="config" transform="oxf:xslt" href="#archive"> 56 <config xsl:version="2.0"> 57 <url> 58 <xsl:text>oxf:/actions/mediatypes/</xsl:text> 59 <xsl:value-of select="substring-after(/archive/response/document/@content-type, '/')"/> 60 <xsl:text>.xpl</xsl:text> 61 </url> 62 </config> 63 </p:input> 64 <p:output name="data" id="pipeline"/> 65 </p:processor> 66 67 <p:processor name="oxf:pipeline"> 68 <p:input name="config" href="#pipeline"/> 69 <p:input name="archive" href="#archive"/> 70 <p:output name="rewritten" id="rewritten"/> 71 <p:output name="links" id="links"/> 72 </p:processor> 73 74 75 <!-- Store the rewritten document in the database --> 76 <p:processor name="oxf:pipeline"> 77 <p:input name="config" href="/data-access.xpl"/> 78 <p:input name="data" transform="oxf:xslt" href="#data"> 79 <config xsl:version="2.0"> 80 <relpath> 81 <xsl:value-of select="/action/@directory"/> 82 <xsl:text>rewritten-</xsl:text> 83 <xsl:value-of select="/action/@filename"/> 84 </relpath> 85 <operation>write</operation> 86 <type>document</type> 87 </config> 88 </p:input> 89 <p:input name="param" href="#rewritten"/> 90 <p:output name="data" id="response3"/> 91 </p:processor> 92 <p:processor name="oxf:null-serializer"> 93 <p:input name="data" href="#response3"/> 94 </p:processor> 95 96 97 98 <!-- Update the archive index --> 99 <p:processor name="oxf:pipeline"> 100 <p:input name="config" href="/data-access.xpl"/> 101 <p:input name="data" transform="oxf:xslt" href="#data"> 102 <config xsl:version="2.0"> 103 <relpath> 104 <xsl:value-of select="/action/@directory"/> 105 <xsl:text>index.xml</xsl:text> 106 </relpath> 107 <operation>write</operation> 108 <type>xquery</type> 109 <parameter name="url" type="string"> 110 <xsl:value-of select="/action/@url"/> 111 </parameter> 112 <parameter name="filename" type="string"> 113 <xsl:value-of select="/action/@filename"/> 114 </parameter> 115 <parameter name="filename-rewritten" type="string"> 116 <xsl:text>rewritten-</xsl:text> 117 <xsl:value-of select="/action/@filename"/> 118 </parameter> 119 </config> 120 </p:input> 121 <p:input name="param"> 122 <xquery><![CDATA[ 123 for $as in /archive-set 124 return 125 update 126 insert <archive url=$(url) href=$(filename) href-rewritten=$(filename-rewritten) dateTime="{current-dateTime()}"/> 127 into $as 128 ]]></xquery> 129 </p:input> 130 <p:output name="data" id="response1"/> 131 </p:processor> 132 <p:processor name="oxf:null-serializer"> 133 <p:input name="data" href="#response1"/> 134 </p:processor> 135 136 <!-- Update the queue --> 137 <p:processor name="oxf:pipeline"> 138 <p:input name="config" href="/data-access.xpl"/> 139 <p:input name="data" transform="oxf:xslt" href="aggregate('root', #data, #links)"> 140 <config xsl:version="2.0"> 141 <relpath>queue.xml</relpath> 142 <operation>write</operation> 143 <type>xquery</type> 144 <parameter name="directory" type="string"> 145 <xsl:value-of select="/root/action/@directory"/> 146 </parameter> 147 <parameter name="uuid" type="string"> 148 <xsl:value-of select="/root/action/@uuid"/> 149 </parameter> 150 <parameter name="priority" type="string"> 151 <xsl:value-of select="/root/action/@priority"/> 152 </parameter> 153 <parameter name="links" type="node-set"> 154 <xsl:copy-of select="/root/links"/> 155 </parameter> 156 </config> 157 </p:input> 158 <p:input name="param"> 159 <xquery><![CDATA[ 160 declare namespace util = "http://exist-db.org/xquery/util"; 161 declare variable $links := $(links); 162 163 for $q in /queue[$links/link/@abs-href] 164 return 165 update 166 insert 167 for $href in distinct-values($links/link/@abs-href) 168 let $link := $links/link[@abs-href = $href][1] 169 return <action priority=$(priority) uuid="{util:uuid()}" type="archive-resource" url="{$link/@abs-href}" directory=$(directory) filename="{$link/@filename}"/> 170 into $q, 171 172 for $a in /queue/action where $a/@uuid = $(uuid) return 173 update 174 delete $a 175 176 ]]></xquery> 177 </p:input> 178 <p:output name="data" id="response4" debug="response"/> 179 </p:processor> 180 <p:processor name="oxf:null-serializer"> 181 <p:input name="data" href="#response4"/> 182 </p:processor> 183 184 185 </p:when> 186 187 <!-- Otherwise: no need to rewrite --> 188 <p:otherwise> 189 <!-- Update the archive index --> 190 <p:processor name="oxf:pipeline"> 191 <p:input name="config" href="/data-access.xpl"/> 192 <p:input name="data" transform="oxf:xslt" href="#data"> 193 <config xsl:version="2.0"> 194 <relpath> 195 <xsl:value-of select="/action/@directory"/> 196 <xsl:text>index.xml</xsl:text> 197 </relpath> 198 <operation>write</operation> 199 <type>xquery</type> 200 <parameter name="url" type="string"> 201 <xsl:value-of select="/action/@url"/> 202 </parameter> 203 <parameter name="filename" type="string"> 204 <xsl:value-of select="/action/@filename"/> 205 </parameter> 206 </config> 207 </p:input> 208 <p:input name="param"> 209 <xquery><![CDATA[ 210 for $as in /archive-set 211 return 212 update 213 insert <archive url=$(url) href=$(filename) dateTime="{current-dateTime()}"/> 214 into $as 215 ]]></xquery> 216 </p:input> 217 <p:output name="data" id="response1"/> 218 </p:processor> 219 <p:processor name="oxf:null-serializer"> 220 <p:input name="data" href="#response1"/> 221 </p:processor> 222 31 <p:choose href="#duplicate"> 32 33 <p:when test="/*/* = 'true'"> 34 <!-- Already archived, nothing to do --> 223 35 <!-- Update the queue --> 224 36 <p:processor name="oxf:pipeline"> … … 236 48 <p:input name="param"> 237 49 <xquery><![CDATA[ 238 50 239 51 for $a in /queue/action where $a/@uuid = $(uuid) return 240 52 update … … 248 60 <p:input name="data" href="#response4"/> 249 61 </p:processor> 250 62 </p:when> 63 64 <p:otherwise> 65 <!-- Otherwise, archive the resource... --> 66 <!-- Fetch the resource --> 67 <p:processor name="oxf:url-generator"> 68 <p:input name="config" transform="oxf:xslt" href="#data"> 69 <config xsl:version="2.0"> 70 <url> 71 <xsl:value-of select="/action/@url"/> 72 </url> 73 <header> 74 <name>User-Agent</name> 75 <value> 76 <xsl:value-of select="doc('oxf:/config.xml')/config/user-agent"/> 77 </value> 78 </header> 79 <mode>archive</mode> 80 </config> 81 </p:input> 82 <p:output name="data" id="archive" debug="archive"/> 83 </p:processor> 84 85 86 <!-- Store the archive in the database --> 87 <p:processor name="oxf:pipeline"> 88 <p:input name="config" href="/data-access.xpl"/> 89 <p:input name="data" transform="oxf:xslt" href="#data"> 90 <config xsl:version="2.0"> 91 <relpath> 92 <xsl:value-of select="/action/@directory"/> 93 <xsl:value-of select="/action/@filename"/> 94 </relpath> 95 <operation>write</operation> 96 <type>document</type> 97 </config> 98 </p:input> 99 <p:input name="param" href="#archive"/> 100 <p:output name="data" id="response2"/> 101 </p:processor> 102 103 <p:processor name="oxf:null-serializer"> 104 <p:input name="data" href="#response2"/> 105 </p:processor> 106 107 108 <!-- Test the type of document to see if it needs to be rewritten --> 109 <p:choose href="#archive"> 110 111 <!-- HTML document : need to update the links... --> 112 <p:when test="/archive/response/document/@content-type=('text/html', 'text/css')"> 113 114 <!-- Call the corresponding pipeline to extract the links and rewrite them --> 115 <p:processor name="oxf:url-generator"> 116 <p:input name="config" transform="oxf:xslt" href="#archive"> 117 <config xsl:version="2.0"> 118 <url> 119 <xsl:text>oxf:/actions/mediatypes/</xsl:text> 120 <xsl:value-of select="substring-after(/archive/response/document/@content-type, '/')"/> 121 <xsl:text>.xpl</xsl:text> 122 </url> 123 </config> 124 </p:input> 125 <p:output name="data" id="pipeline"/> 126 </p:processor> 127 128 <p:processor name="oxf:pipeline"> 129 <p:input name="config" href="#pipeline"/> 130 <p:input name="archive" href="#archive"/> 131 <p:output name="rewritten" id="rewritten"/> 132 <p:output name="links" id="links"/> 133 </p:processor> 134 135 136 <!-- Store the rewritten document in the database --> 137 <p:processor name="oxf:pipeline"> 138 <p:input name="config" href="/data-access.xpl"/> 139 <p:input name="data" transform="oxf:xslt" href="#data"> 140 <config xsl:version="2.0"> 141 <relpath> 142 <xsl:value-of select="/action/@directory"/> 143 <xsl:text>rewritten-</xsl:text> 144 <xsl:value-of select="/action/@filename"/> 145 </relpath> 146 <operation>write</operation> 147 <type>document</type> 148 </config> 149 </p:input> 150 <p:input name="param" href="#rewritten"/> 151 <p:output name="data" id="response3"/> 152 </p:processor> 153 <p:processor name="oxf:null-serializer"> 154 <p:input name="data" href="#response3"/> 155 </p:processor> 156 157 158 159 <!-- Update the archive index --> 160 <p:processor name="oxf:pipeline"> 161 <p:input name="config" href="/data-access.xpl"/> 162 <p:input name="data" transform="oxf:xslt" href="#data"> 163 <config xsl:version="2.0"> 164 <relpath> 165 <xsl:value-of select="/action/@directory"/> 166 <xsl:text>index.xml</xsl:text> 167 </relpath> 168 <operation>write</operation> 169 <type>xquery</type> 170 <parameter name="url" type="string"> 171 <xsl:value-of select="/action/@url"/> 172 </parameter> 173 <parameter name="filename" type="string"> 174 <xsl:value-of select="/action/@filename"/> 175 </parameter> 176 <parameter name="filename-rewritten" type="string"> 177 <xsl:text>rewritten-</xsl:text> 178 <xsl:value-of select="/action/@filename"/> 179 </parameter> 180 </config> 181 </p:input> 182 <p:input name="param"> 183 <xquery><![CDATA[ 184 for $as in /archive-set 185 return 186 update 187 insert <archive url=$(url) href=$(filename) href-rewritten=$(filename-rewritten) dateTime="{current-dateTime()}"/> 188 into $as 189 ]]></xquery> 190 </p:input> 191 <p:output name="data" id="response1"/> 192 </p:processor> 193 <p:processor name="oxf:null-serializer"> 194 <p:input name="data" href="#response1"/> 195 </p:processor> 196 197 <!-- Update the queue --> 198 <p:processor name="oxf:pipeline"> 199 <p:input name="config" href="/data-access.xpl"/> 200 <p:input name="data" transform="oxf:xslt" href="aggregate('root', #data, #links)"> 201 <config xsl:version="2.0"> 202 <relpath>queue.xml</relpath> 203 <operation>write</operation> 204 <type>xquery</type> 205 <parameter name="directory" type="string"> 206 <xsl:value-of select="/root/action/@directory"/> 207 </parameter> 208 <parameter name="uuid" type="string"> 209 <xsl:value-of select="/root/action/@uuid"/> 210 </parameter> 211 <parameter name="priority" type="string"> 212 <xsl:value-of select="/root/action/@priority"/> 213 </parameter> 214 <parameter name="links" type="node-set"> 215 <xsl:copy-of select="/root/links"/> 216 </parameter> 217 </config> 218 </p:input> 219 <p:input name="param"> 220 <xquery><![CDATA[ 221 declare namespace util = "http://exist-db.org/xquery/util"; 222 declare variable $links := $(links); 223 224 for $q in /queue[$links/link/@abs-href] 225 return 226 update 227 insert 228 for $href in distinct-values($links/link/@abs-href) 229 let $link := $links/link[@abs-href = $href][1] 230 return <action priority=$(priority) uuid="{util:uuid()}" type="archive-resource" url="{$link/@abs-href}" directory=$(directory) filename="{$link/@filename}"/> 231 into $q, 232 233 for $a in /queue/action where $a/@uuid = $(uuid) return 234 update 235 delete $a 236 237 ]]></xquery> 238 </p:input> 239 <p:output name="data" id="response4" debug="response"/> 240 </p:processor> 241 <p:processor name="oxf:null-serializer"> 242 <p:input name="data" href="#response4"/> 243 </p:processor> 244 245 246 </p:when> 247 248 <!-- Otherwise: no need to rewrite --> 249 <p:otherwise> 250 <!-- Update the archive index --> 251 <p:processor name="oxf:pipeline"> 252 <p:input name="config" href="/data-access.xpl"/> 253 <p:input name="data" transform="oxf:xslt" href="#data"> 254 <config xsl:version="2.0"> 255 <relpath> 256 <xsl:value-of select="/action/@directory"/> 257 <xsl:text>index.xml</xsl:text> 258 </relpath> 259 <operation>write</operation> 260 <type>xquery</type> 261 <parameter name="url" type="string"> 262 <xsl:value-of select="/action/@url"/> 263 </parameter> 264 <parameter name="filename" type="string"> 265 <xsl:value-of select="/action/@filename"/> 266 </parameter> 267 </config> 268 </p:input> 269 <p:input name="param"> 270 <xquery><![CDATA[ 271 for $as in /archive-set 272 return 273 update 274 insert <archive url=$(url) href=$(filename) dateTime="{current-dateTime()}"/> 275 into $as 276 ]]></xquery> 277 </p:input> 278 <p:output name="data" id="response1"/> 279 </p:processor> 280 <p:processor name="oxf:null-serializer"> 281 <p:input name="data" href="#response1"/> 282 </p:processor> 283 284 <!-- Update the queue --> 285 <p:processor name="oxf:pipeline"> 286 <p:input name="config" href="/data-access.xpl"/> 287 <p:input name="data" transform="oxf:xslt" href="#data"> 288 <config xsl:version="2.0"> 289 <relpath>queue.xml</relpath> 290 <operation>write</operation> 291 <type>xquery</type> 292 <parameter name="uuid" type="string"> 293 <xsl:value-of select="/action/@uuid"/> 294 </parameter> 295 </config> 296 </p:input> 297 <p:input name="param"> 298 <xquery><![CDATA[ 299 300 for $a in /queue/action where $a/@uuid = $(uuid) return 301 update 302 delete $a 303 304 ]]></xquery> 305 </p:input> 306 <p:output name="data" id="response4" debug="response"/> 307 </p:processor> 308 <p:processor name="oxf:null-serializer"> 309 <p:input name="data" href="#response4"/> 310 </p:processor> 311 312 </p:otherwise> 313 314 </p:choose> 251 315 </p:otherwise> 252 253 316 </p:choose> 254 317 318 319 255 320 </p:config>
Note: See TracChangeset
for help on using the changeset viewer.