Changeset 9bce34f
- Timestamp:
- 27/04/2012 18:29:15 (10 years ago)
- Branches:
- master
- Children:
- c79bd8e
- Parents:
- 5b162a6
- git-author:
- Eric van der Vlist <vdv@dyomedea.com> (27/04/2012 18:29:15)
- git-committer:
- Eric van der Vlist <vdv@dyomedea.com> (27/04/2012 18:29:15)
- Location:
- archiver/pipelines/actions
- Files:
-
- 2 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
archiver/pipelines/actions/package-heritrix-warc.xpl
r5b162a6 r9bce34f 94 94 </p:processor> 95 95 <p:choose href="#index-entry"> 96 <p:when test="/ entry/embeds">96 <p:when test="/resource/embeds"> 97 97 <!-- The resource has embedded content and must be rewritten --> 98 <p:processor name="oxf:identity"> 99 <p:input name="data" href="current()#xpointer(/record/content/document)"/> 100 <p:output name="data" id="document"/> 98 99 <!-- Call the corresponding pipeline --> 100 <p:processor name="oxf:url-generator"> 101 <p:input name="config" transform="oxf:xslt" href="#index-entry"> 102 <config xsl:version="2.0"> 103 <url> 104 <xsl:text>oxf:/actions/mediatypes/warc-</xsl:text> 105 <xsl:value-of select="/resource/type"/> 106 <xsl:text>.xpl</xsl:text> 107 </url> 108 </config> 109 </p:input> 110 <p:output name="data" id="pipeline"/> 101 111 </p:processor> 112 113 <p:processor name="oxf:pipeline"> 114 <p:input name="config" href="#pipeline"/> 115 <p:input name="record" href="current()"/> 116 <p:input name="index" href="#index"/> 117 <p:input name="index-entry" href="#index-entry"/> 118 <p:output name="rewritten" id="document" debug="rewritten"/> 119 </p:processor> 120 102 121 </p:when> 103 122 <p:otherwise> … … 124 143 </p:for-each> 125 144 126 <p:processor name="oxf:null-serializer">127 <p:input name="data" href="#loop" debug="loop"/>128 </p:processor>129 130 145 131 146 … … 142 157 143 158 <p:processor name="oxf:zip"> 144 <p:input name="data" transform="oxf:unsafe-xslt" href="aggregate('root', #warc-location )">159 <p:input name="data" transform="oxf:unsafe-xslt" href="aggregate('root', #warc-location, #loop)"> 145 160 <files xsl:version="2.0" file-name="archive.zip"> 146 161 <file name="archive.warc"> 147 162 <xsl:value-of select="/root/url"/> 148 163 </file> 149 <!--<xsl:for-each select="/root/files/file[url]"> 150 <xsl:choose> 151 <xsl:when test="position()=1"> 152 <!-\- TODO: support non HTML documents... -\-> 153 <file name="rewritten/index.html"> 154 <xsl:value-of select="url"/> 155 </file> 156 </xsl:when> 157 <xsl:otherwise> 158 <xsl:variable name="tokens" select="tokenize(archive/@url, '/')"/> 159 <xsl:variable name="last-token" select="$tokens[last()]"/> 160 <xsl:variable name="tokens2" select="tokenize($last-token, '\.')"/> 161 <xsl:variable name="extension" select="$tokens2[last()]"/> 162 <file name="rewritten/{saxon:string-to-hexBinary(substring(archive/@url, 1, string-length(archive/@url) - string-length($extension) - 1), 'utf-8')}.{$extension}"> 163 <xsl:value-of select="url"/> 164 </file> 165 </xsl:otherwise> 166 </xsl:choose> 167 </xsl:for-each>--> 164 <xsl:for-each select="/root/root/doc"> 165 <file name="rewritten/{resource/local-name}"> 166 <xsl:value-of select="url"/> 167 </file> 168 </xsl:for-each> 168 169 </files> 169 170 </p:input> -
archiver/pipelines/actions/resource-index.xslt
r5b162a6 r9bce34f 59 59 <xsl:value-of select="owk:unique-local-name(.)"/> 60 60 </local-name> 61 <type> 62 <xsl:choose> 63 <xsl:when test="content-type = 'text/html'">html</xsl:when> 64 <xsl:when test="content-type = 'application/xhtml+xml'">html</xsl:when> 65 <xsl:when test="content-type = 'text/plain'">text</xsl:when> 66 <xsl:otherwise> 67 <xsl:value-of select="substring-after(content-type, '/')"/> 68 </xsl:otherwise> 69 </xsl:choose> 70 </type> 61 71 <xsl:apply-templates select="." mode="redirect"/> 62 72 <xsl:apply-templates select="/log/entry[referer = current()/uri and ends-with(discovery-path, 'E')]" mode="embedding"/>
Note: See TracChangeset
for help on using the changeset viewer.