source: archiver/pipelines/actions/archive-resource.xpl @ 11027c0

Revision 11027c0, 12.0 KB checked in by Eric van der Vlist <vdv@dyomedea.com>, 14 months ago (diff)

Moving action pipelines in their own directory

  • Property mode set to 100644
Line 
1<p:config xmlns:p="http://www.orbeon.com/oxf/pipeline" xmlns:oxf="http://www.orbeon.com/oxf/processors" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:saxon="http://saxon.sf.net/">
2
3  <p:param name="data" type="input"/>
4
5  <!-- Fetch the resource -->
6  <p:processor name="oxf:url-generator">
7    <p:input name="config" transform="oxf:xslt" href="#data">
8      <config xsl:version="2.0">
9        <url>
10          <xsl:value-of select="/action/@url"/>
11        </url>
12        <header>
13          <name>User-Agent</name>
14          <value>
15            <xsl:value-of select="doc('oxf:/config.xml')/config/user-agent"/>
16          </value>
17        </header>
18        <mode>archive</mode>
19      </config>
20    </p:input>
21    <p:output name="data" id="archive" debug="archive"/>
22  </p:processor>
23
24
25  <!-- Store the archive in the database -->
26  <p:processor name="oxf:pipeline">
27    <p:input name="config" href="/data-access.xpl
28"/>
29    <p:input name="data" transform="oxf:xslt" href="#data">
30      <config xsl:version="2.0">
31        <relpath>
32          <xsl:value-of select="/action/@directory"/>
33          <xsl:value-of select="/action/@filename"/>
34        </relpath>
35        <operation>write</operation>
36        <type>document</type>
37      </config>
38    </p:input>
39    <p:input name="param" href="#archive"/>
40    <p:output name="data" id="response2"/>
41  </p:processor>
42
43  <p:processor name="oxf:null-serializer">
44    <p:input name="data" href="#response2"/>
45  </p:processor>
46
47
48  <!-- Test the type of document to see if it needs to be rewritten -->
49  <p:choose href="#archive">
50
51    <!-- HTML document : need to update the links... -->
52    <p:when test="/archive/response/document/@content-type='text/html'">
53
54      <!-- Store the document -->
55      <p:processor name="oxf:file-serializer">
56        <p:input name="config">
57          <config>
58            <scope>session</scope>
59          </config>
60        </p:input>
61        <p:input name="data" href="#archive#xpointer(/archive/response/document)"/>
62        <p:output name="data" id="url-written"/>
63      </p:processor>
64
65      <!-- And read it as HTML -->
66      <p:processor name="oxf:url-generator">
67        <p:input name="config" transform="oxf:xslt" href="#url-written">
68          <config xsl:version="2.0">
69            <url>
70              <xsl:value-of select="/*"/>
71            </url>
72            <mode>html</mode>
73          </config>
74        </p:input>
75        <p:output name="data" id="html" debug="html"/>
76      </p:processor>
77
78      <!-- Get a list of links to update -->
79      <!-- TODO: support links in inline CSS -->
80      <p:processor name="oxf:unsafe-xslt">
81        <p:input name="data" href="#html"/>
82        <p:input name="request" href="#archive#xpointer(/archive/request)"/>
83        <p:input name="config">
84          <xsl:stylesheet version="2.0">
85            <xsl:variable name="base" select="doc('input:request')/request/location"/>
86            <xsl:template match="/">
87              <links>
88                <xsl:variable name="links" as="node()*">
89                  <xsl:apply-templates/>
90                </xsl:variable>
91                <xsl:for-each-group select="$links" group-by="@href">
92                  <xsl:variable name="abs-href" select="resolve-uri(@href, $base)"/>
93                  <xsl:variable name="tokens" select="tokenize($abs-href, '/')"/>
94                  <xsl:variable name="last-token" select="$tokens[last()]"/>
95                  <xsl:variable name="tokens2" select="tokenize($last-token, '\.')"/>
96                  <xsl:variable name="extension" select="$tokens2[last()]"/>
97                  <link abs-href="{$abs-href}" new-href="{saxon:string-to-hexBinary(substring($abs-href, 1, string-length($abs-href) - string-length($extension) - 1), 'utf-8')}.{$extension}"
98                    filename="{saxon:string-to-hexBinary($abs-href, 'utf-8')}.xml">
99                    <xsl:copy-of select="@*"/>
100                  </link>
101                </xsl:for-each-group>
102              </links>
103            </xsl:template>
104            <xsl:template match="text()"/>
105            <xsl:template match="link[@rel='stylesheet']">
106              <link>
107                <xsl:copy-of select="@*"/>
108              </link>
109            </xsl:template>
110            <xsl:template match="img">
111              <link href="{@src}" type="image/*"/>
112            </xsl:template>
113            <xsl:template match="script[@src]">
114              <link href="{@src}" type="{@type}"/>
115            </xsl:template>
116          </xsl:stylesheet>
117        </p:input>
118        <p:output name="data" id="links" debug="links"/>
119      </p:processor>
120
121      <!-- Update the links -->
122      <p:processor name="oxf:unsafe-xslt">
123        <p:input name="data" href="#html"/>
124        <p:input name="request" href="#archive#xpointer(/archive/request)"/>
125        <p:input name="links" href="#links"/>
126        <p:input name="config">
127          <xsl:stylesheet version="2.0">
128            <xsl:variable name="links" select="doc('input:links')/links"/>
129            <xsl:variable name="base" select="doc('input:request')/request/location"/>
130            <xsl:key name="link" match="link" use="@href"/>
131            <xsl:template match="@*|node()">
132              <xsl:copy>
133                <xsl:apply-templates select="@*|node()"/>
134              </xsl:copy>
135            </xsl:template>
136            <xsl:template match="link[@rel='stylesheet']/@href|img/@src|script/@src">
137              <xsl:attribute name="{name(.)}">
138                <xsl:value-of select="$links/key('link', current())/@new-href"/>
139              </xsl:attribute>
140            </xsl:template>
141            <xsl:template match="link[@rel!='stylesheet']/@href|a/@href">
142              <xsl:attribute name="{name(.)}">
143                <xsl:value-of select="resolve-uri(., $base)"/>
144              </xsl:attribute>
145            </xsl:template>
146          </xsl:stylesheet>
147        </p:input>
148        <p:output name="data" id="rewritten" debug="rewritten"/>
149      </p:processor>
150
151      <!-- Store the rewritten document in the database -->
152      <p:processor name="oxf:pipeline">
153        <p:input name="config" href="/data-access.xpl
154"/>
155        <p:input name="data" transform="oxf:xslt" href="#data">
156          <config xsl:version="2.0">
157            <relpath>
158              <xsl:value-of select="/action/@directory"/>
159              <xsl:text>rewritten-</xsl:text>
160              <xsl:value-of select="/action/@filename"/>
161            </relpath>
162            <operation>write</operation>
163            <type>document</type>
164          </config>
165        </p:input>
166        <p:input name="param" href="#rewritten"/>
167        <p:output name="data" id="response3"/>
168      </p:processor>
169      <p:processor name="oxf:null-serializer">
170        <p:input name="data" href="#response3"/>
171      </p:processor>
172
173
174
175      <!-- Update the archive index -->
176      <p:processor name="oxf:pipeline">
177        <p:input name="config" href="/data-access.xpl
178"/>
179        <p:input name="data" transform="oxf:xslt" href="#data">
180          <config xsl:version="2.0">
181            <relpath>
182              <xsl:value-of select="/action/@directory"/>
183              <xsl:text>index.xml</xsl:text>
184            </relpath>
185            <operation>write</operation>
186            <type>xquery</type>
187            <parameter name="url" type="string">
188              <xsl:value-of select="/action/@url"/>
189            </parameter>
190            <parameter name="filename" type="string">
191              <xsl:value-of select="/action/@filename"/>
192            </parameter>
193            <parameter name="filename-rewritten" type="string">
194              <xsl:text>rewritten-</xsl:text>
195              <xsl:value-of select="/action/@filename"/>
196            </parameter>
197          </config>
198        </p:input>
199        <p:input name="param">
200          <xquery><![CDATA[
201for $as in /archive-set
202    return
203      update
204        insert <archive url=$(url) href=$(filename) href-rewritten=$(filename-rewritten) dateTime="{current-dateTime()}"/>
205        into $as               
206                ]]></xquery>
207        </p:input>
208        <p:output name="data" id="response1"/>
209      </p:processor>
210      <p:processor name="oxf:null-serializer">
211        <p:input name="data" href="#response1"/>
212      </p:processor>
213
214      <!-- Update the queue -->
215      <p:processor name="oxf:pipeline">
216        <p:input name="config" href="/data-access.xpl
217"/>
218        <p:input name="data" transform="oxf:xslt" href="aggregate('root', #data, #links)">
219          <config xsl:version="2.0">
220            <relpath>queue.xml</relpath>
221            <operation>write</operation>
222            <type>xquery</type>
223            <parameter name="directory" type="string">
224              <xsl:value-of select="/root/action/@directory"/>
225            </parameter>
226            <parameter name="uuid" type="string">
227              <xsl:value-of select="/root/action/@uuid"/>
228            </parameter>
229            <parameter name="priority" type="string">
230              <xsl:value-of select="/root/action/@priority"/>
231            </parameter>
232            <parameter name="links" type="node-set">
233              <xsl:copy-of select="/root/links"/>
234            </parameter>
235          </config>
236        </p:input>
237        <p:input name="param">
238          <xquery><![CDATA[
239declare namespace util = "http://exist-db.org/xquery/util";
240declare variable $links := $(links);
241
242for $q in /queue return
243    update
244        insert
245          for $href in distinct-values($links/link/@abs-href)
246            let $link := $links/link[@abs-href = $href][1]
247            return <action priority=$(priority) uuid="{util:uuid()}" type="archive-resource" url="{$link/@abs-href}" directory=$(directory) filename="{$link/@filename}"/>
248        into $q,
249       
250for $a in /queue/action where $a/@uuid = $(uuid) return
251    update
252        delete $a
253       
254                ]]></xquery>
255        </p:input>
256        <p:output name="data" id="response4" debug="response"/>
257      </p:processor>
258      <p:processor name="oxf:null-serializer">
259        <p:input name="data" href="#response4"/>
260      </p:processor>
261
262
263    </p:when>
264
265    <!-- Otherwise: no need to rewrite -->
266    <p:otherwise>
267      <!-- Update the archive index -->
268      <p:processor name="oxf:pipeline">
269        <p:input name="config" href="/data-access.xpl
270"/>
271        <p:input name="data" transform="oxf:xslt" href="#data">
272          <config xsl:version="2.0">
273            <relpath>
274              <xsl:value-of select="/action/@directory"/>
275              <xsl:text>index.xml</xsl:text>
276            </relpath>
277            <operation>write</operation>
278            <type>xquery</type>
279            <parameter name="url" type="string">
280              <xsl:value-of select="/action/@url"/>
281            </parameter>
282            <parameter name="filename" type="string">
283              <xsl:value-of select="/action/@filename"/>
284            </parameter>
285          </config>
286        </p:input>
287        <p:input name="param">
288          <xquery><![CDATA[
289for $as in /archive-set
290    return
291      update
292        insert <archive url=$(url) href=$(filename) dateTime="{current-dateTime()}"/>
293        into $as               
294                ]]></xquery>
295        </p:input>
296        <p:output name="data" id="response1"/>
297      </p:processor>
298      <p:processor name="oxf:null-serializer">
299        <p:input name="data" href="#response1"/>
300      </p:processor>
301
302      <!-- Update the queue -->
303      <p:processor name="oxf:pipeline">
304        <p:input name="config" href="/data-access.xpl
305"/>
306        <p:input name="data" transform="oxf:xslt" href="#data">
307          <config xsl:version="2.0">
308            <relpath>queue.xml</relpath>
309            <operation>write</operation>
310            <type>xquery</type>
311            <parameter name="uuid" type="string">
312              <xsl:value-of select="/action/@uuid"/>
313            </parameter>
314          </config>
315        </p:input>
316        <p:input name="param">
317          <xquery><![CDATA[
318
319for $a in /queue/action where $a/@uuid = $(uuid) return
320    update
321        delete $a
322       
323                ]]></xquery>
324        </p:input>
325        <p:output name="data" id="response4" debug="response"/>
326      </p:processor>
327      <p:processor name="oxf:null-serializer">
328        <p:input name="data" href="#response4"/>
329      </p:processor>
330
331    </p:otherwise>
332
333  </p:choose>
334
335</p:config>
Note: See TracBrowser for help on using the repository browser.