Changeset 5b162a6


Ignore:
Timestamp:
27/04/2012 17:34:18 (10 years ago)
Author:
Eric van der Vlist <vdv@dyomedea.com>
Branches:
master
Children:
9bce34f
Parents:
466d447
git-author:
Eric van der Vlist <vdv@dyomedea.com> (27/04/2012 17:34:18)
git-committer:
Eric van der Vlist <vdv@dyomedea.com> (27/04/2012 17:34:18)
Message:

WARC mail extract loop

Location:
archiver/pipelines/actions
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • archiver/pipelines/actions/package-heritrix-warc.xpl

    r466d447 r5b162a6  
    7878        <p:output name="data" id="index" debug="index"/> 
    7979    </p:processor> 
    80      
    81      
     80 
     81 
     82 
     83 
     84    <!-- Loop over the WARC file to store and transform documents --> 
     85    <p:for-each href="#warc-xml" select="/warc/record[headers/header[@name='Content-Type'] = 'application/http; msgtype=response' and content/status/status = 200]" root="root" id="loop"> 
     86        <p:processor name="oxf:xslt"> 
     87            <p:input name="data" href="aggregate('root', current(), #index)" debug="aggregate"/> 
     88            <p:input name="config"> 
     89                <resource xsl:version="2.0"> 
     90                    <xsl:copy-of select="/root/index/resource[uri = /root/record/headers/header[@name = 'WARC-Target-URI']]/*"/> 
     91                </resource> 
     92            </p:input> 
     93            <p:output name="data" id="index-entry" debug="index-entry"/> 
     94        </p:processor> 
     95        <p:choose href="#index-entry"> 
     96            <p:when test="/entry/embeds"> 
     97                <!-- The resource has embedded content and must be rewritten --> 
     98                <p:processor name="oxf:identity"> 
     99                    <p:input name="data" href="current()#xpointer(/record/content/document)"/> 
     100                    <p:output name="data" id="document"/> 
     101                </p:processor> 
     102            </p:when> 
     103            <p:otherwise> 
     104                <!-- The resource can be stored  --> 
     105                <p:processor name="oxf:identity"> 
     106                    <p:input name="data" href="current()#xpointer(/record/content/document)"/> 
     107                    <p:output name="data" id="document"/> 
     108                </p:processor> 
     109            </p:otherwise> 
     110        </p:choose> 
     111        <p:processor name="oxf:file-serializer"> 
     112            <p:input name="config"> 
     113                <config> 
     114                    <scope>request</scope> 
     115                </config> 
     116            </p:input> 
     117            <p:input name="data" href="#document"/> 
     118            <p:output name="data" id="doc-location" debug="doc-location"/> 
     119        </p:processor> 
     120        <p:processor name="oxf:identity"> 
     121            <p:input name="data" href="aggregate('doc', #index-entry, #doc-location)"/> 
     122            <p:output name="data" ref="loop"/> 
     123        </p:processor> 
     124    </p:for-each> 
    82125 
    83126    <p:processor name="oxf:null-serializer"> 
    84         <p:input name="data" href="#index"/> 
    85     </p:processor> 
    86  
    87  
    88     <!-- Loop over the WARC file to store and transform documents --> 
    89 <!--    <p:for-each href="#warc-xml" select="/warc/record[header[name='Content-Type'] = 'application/http; msgtype=response' and content/status/status = 200]"> </p:for-each> 
    90 --> 
     127        <p:input name="data" href="#loop" debug="loop"/> 
     128    </p:processor> 
     129 
     130 
     131 
    91132    <!-- Store the WARC in a temp file --> 
    92133    <p:processor name="oxf:file-serializer"> 
  • archiver/pipelines/actions/resource-index.xslt

    r466d447 r5b162a6  
    11<?xml version="1.0" encoding="UTF-8"?> 
    22<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:owk="http://owark.org/xslt/" xmlns:xd="http://www.oxygenxml.com/ns/doc/xsl" 
    3     exclude-result-prefixes="xs xd" version="2.0"> 
     3    exclude-result-prefixes="xs xd owk" version="2.0"> 
    44    <xd:doc scope="stylesheet"> 
    55        <xd:desc> 
Note: See TracChangeset for help on using the changeset viewer.