Changeset 5b162a6
- Timestamp:
- 27/04/2012 17:34:18 (10 years ago)
- Branches:
- master
- Children:
- 9bce34f
- Parents:
- 466d447
- git-author:
- Eric van der Vlist <vdv@dyomedea.com> (27/04/2012 17:34:18)
- git-committer:
- Eric van der Vlist <vdv@dyomedea.com> (27/04/2012 17:34:18)
- Location:
- archiver/pipelines/actions
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
archiver/pipelines/actions/package-heritrix-warc.xpl
r466d447 r5b162a6 78 78 <p:output name="data" id="index" debug="index"/> 79 79 </p:processor> 80 81 80 81 82 83 84 <!-- Loop over the WARC file to store and transform documents --> 85 <p:for-each href="#warc-xml" select="/warc/record[headers/header[@name='Content-Type'] = 'application/http; msgtype=response' and content/status/status = 200]" root="root" id="loop"> 86 <p:processor name="oxf:xslt"> 87 <p:input name="data" href="aggregate('root', current(), #index)" debug="aggregate"/> 88 <p:input name="config"> 89 <resource xsl:version="2.0"> 90 <xsl:copy-of select="/root/index/resource[uri = /root/record/headers/header[@name = 'WARC-Target-URI']]/*"/> 91 </resource> 92 </p:input> 93 <p:output name="data" id="index-entry" debug="index-entry"/> 94 </p:processor> 95 <p:choose href="#index-entry"> 96 <p:when test="/entry/embeds"> 97 <!-- The resource has embedded content and must be rewritten --> 98 <p:processor name="oxf:identity"> 99 <p:input name="data" href="current()#xpointer(/record/content/document)"/> 100 <p:output name="data" id="document"/> 101 </p:processor> 102 </p:when> 103 <p:otherwise> 104 <!-- The resource can be stored --> 105 <p:processor name="oxf:identity"> 106 <p:input name="data" href="current()#xpointer(/record/content/document)"/> 107 <p:output name="data" id="document"/> 108 </p:processor> 109 </p:otherwise> 110 </p:choose> 111 <p:processor name="oxf:file-serializer"> 112 <p:input name="config"> 113 <config> 114 <scope>request</scope> 115 </config> 116 </p:input> 117 <p:input name="data" href="#document"/> 118 <p:output name="data" id="doc-location" debug="doc-location"/> 119 </p:processor> 120 <p:processor name="oxf:identity"> 121 <p:input name="data" href="aggregate('doc', #index-entry, #doc-location)"/> 122 <p:output name="data" ref="loop"/> 123 </p:processor> 124 </p:for-each> 82 125 83 126 <p:processor name="oxf:null-serializer"> 84 <p:input name="data" href="#index"/> 85 </p:processor> 86 87 88 <!-- Loop over the WARC file to store and transform documents --> 89 <!-- <p:for-each href="#warc-xml" select="/warc/record[header[name='Content-Type'] = 'application/http; msgtype=response' and content/status/status = 200]"> </p:for-each> 90 --> 127 <p:input name="data" href="#loop" debug="loop"/> 128 </p:processor> 129 130 131 91 132 <!-- Store the WARC in a temp file --> 92 133 <p:processor name="oxf:file-serializer"> -
archiver/pipelines/actions/resource-index.xslt
r466d447 r5b162a6 1 1 <?xml version="1.0" encoding="UTF-8"?> 2 2 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:owk="http://owark.org/xslt/" xmlns:xd="http://www.oxygenxml.com/ns/doc/xsl" 3 exclude-result-prefixes="xs xd " version="2.0">3 exclude-result-prefixes="xs xd owk" version="2.0"> 4 4 <xd:doc scope="stylesheet"> 5 5 <xd:desc>
Note: See TracChangeset
for help on using the changeset viewer.