Question : Grouping Data in XML

Hi,

I am a SQL Server developer with no too much knowledege of XML and XSL.

I receive a bunch of XML files on a daily basis.  The data is to do with sales from an EPOS till.  So, for example, the file might contain details of bottles of Coke.  The file I receive is big because the transactions have not been consolidated.  I would like to consolidate them.  Here is a sample of incoming data:

1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
<MainHeader xmlns="">
    <OrganisationHeader>        
        <SalesHeader>
            <SalesDate>2010-07-18</SalesDate>
            <Location>Loc1</Location>
            <RevenueCentre>Rev1</RevenueCentre>
            <ActionIfDataExists>1</ActionIfDataExists>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>1</Quantity>
                <VAT>0.2</VAT>
                <TotalGrossSales>2.20</TotalGrossSales>
                <NetSalesPrice>2.00</NetSalesPrice>
                <GrossSalesPrice>2.20</GrossSalesPrice>
                <TotalNetSales>2.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>1</Quantity>
                <VAT>0.2</VAT>
                <TotalGrossSales>2.20</TotalGrossSales>
                <NetSalesPrice>2.00</NetSalesPrice>
                <GrossSalesPrice>2.20</GrossSalesPrice>
                <TotalNetSales>2.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>
            
        </SalesHeader>
        <SalesHeader>
            <SalesDate>2010-07-19</SalesDate>
            <Location>Loc1</Location>
            <RevenueCentre>Rev1</RevenueCentre>
            <ActionIfDataExists>1</ActionIfDataExists>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>1</Quantity>
                <VAT>0.2</VAT>
                <TotalGrossSales>2.20</TotalGrossSales>
                <NetSalesPrice>2.00</NetSalesPrice>
                <GrossSalesPrice>2.20</GrossSalesPrice>
                <TotalNetSales>2.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>1</Quantity>
                <VAT>0.2</VAT>
                <TotalGrossSales>2.20</TotalGrossSales>
                <NetSalesPrice>2.00</NetSalesPrice>
                <GrossSalesPrice>2.20</GrossSalesPrice>
                <TotalNetSales>2.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>
            
        </SalesHeader>
        <SalesHeader>
            <SalesDate>2010-07-19</SalesDate>
            <Location>Loc2</Location>
            <RevenueCentre>Rev1</RevenueCentre>
            <ActionIfDataExists>1</ActionIfDataExists>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>1</Quantity>
                <VAT>0.2</VAT>
                <TotalGrossSales>2.20</TotalGrossSales>
                <NetSalesPrice>2.00</NetSalesPrice>
                <GrossSalesPrice>2.20</GrossSalesPrice>
                <TotalNetSales>2.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>1</Quantity>
                <VAT>0.2</VAT>
                <TotalGrossSales>2.20</TotalGrossSales>
                <NetSalesPrice>2.00</NetSalesPrice>
                <GrossSalesPrice>2.20</GrossSalesPrice>
                <TotalNetSales>2.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>
            
        </SalesHeader>
        <SalesHeader>
            <SalesDate>2010-07-19</SalesDate>
            <Location>Loc2</Location>
            <RevenueCentre>Rev1</RevenueCentre>
            <ActionIfDataExists>1</ActionIfDataExists>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>-1</Quantity>
                <VAT>-0.2</VAT>
                <TotalGrossSales>-2.20</TotalGrossSales>
                <NetSalesPrice>-2.00</NetSalesPrice>
                <GrossSalesPrice>-2.20</GrossSalesPrice>
                <TotalNetSales>-2.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>99</SaleType>
            </SalesTransaction>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>1</Quantity>
                <VAT>0.2</VAT>
                <TotalGrossSales>2.20</TotalGrossSales>
                <NetSalesPrice>2.00</NetSalesPrice>
                <GrossSalesPrice>2.20</GrossSalesPrice>
                <TotalNetSales>2.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>
            <SalesTransaction>
                <PLU>64</PLU>
                <Description>Diet Coke</Description>
                <Quantity>1</Quantity>
                <VAT>0.3</VAT>
                <TotalGrossSales>3.30</TotalGrossSales>
                <NetSalesPrice>3.00</NetSalesPrice>
                <GrossSalesPrice>3.30</GrossSalesPrice>
                <TotalNetSales>3.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>            
        </SalesHeader>
    </OrganisationHeader>
</MainHeader>



My desired output would group certain tags together - namely,

<SalesDate>, <ActionIfDataExists>, <Location>, <RevenueCentre>, <CategoryCode>, <PLU>, <SaleType>, <NetSalesPrice>, <GrossSalesPrice>

The quantities would be summed, as would the sales figures, ..and I would end up with something looking like this:

1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
<MainHeader xmlns="">
    <OrganisationHeader>        
        <SalesHeader>
            <SalesDate>2010-07-18</SalesDate>
            <Location>Loc1</Location>
            <RevenueCentre>Rev1</RevenueCentre>
            <ActionIfDataExists>1</ActionIfDataExists>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>2</Quantity>
                <VAT>0.4</VAT>
                <TotalGrossSales>4.40</TotalGrossSales>
                <NetSalesPrice>2.00</NetSalesPrice>
                <GrossSalesPrice>2.20</GrossSalesPrice>
                <TotalNetSales>4.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>
         </SalesHeader>
        <SalesHeader>
            <SalesDate>2010-07-19</SalesDate>
            <Location>Loc1</Location>
            <RevenueCentre>Rev1</RevenueCentre>
            <ActionIfDataExists>1</ActionIfDataExists>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>2</Quantity>
                <VAT>0.4</VAT>
                <TotalGrossSales>4.40</TotalGrossSales>
                <NetSalesPrice>2.00</NetSalesPrice>
                <GrossSalesPrice>2.20</GrossSalesPrice>
                <TotalNetSales>4.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>
        </SalesHeader>
        <SalesHeader>
            <SalesDate>2010-07-19</SalesDate>
            <Location>Loc2</Location>
            <RevenueCentre>Rev1</RevenueCentre>
            <ActionIfDataExists>1</ActionIfDataExists>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>3</Quantity>
                <VAT>0.6</VAT>
                <TotalGrossSales>6.60</TotalGrossSales>
                <NetSalesPrice>2.00</NetSalesPrice>
                <GrossSalesPrice>2.20</GrossSalesPrice>
                <TotalNetSales>6.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>
            <SalesTransaction>
                <PLU>63</PLU>
                <Description>Coke</Description>
                <Quantity>-1</Quantity>
                <VAT>-0.2</VAT>
                <TotalGrossSales>-2.20</TotalGrossSales>
                <NetSalesPrice>-2.00</NetSalesPrice>
                <GrossSalesPrice>-2.20</GrossSalesPrice>
                <TotalNetSales>-2.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>99</SaleType>
            </SalesTransaction>     
            <SalesTransaction>
                <PLU>64</PLU>
                <Description>Diet Coke</Description>
                <Quantity>1</Quantity>
                <VAT>0.3</VAT>
                <TotalGrossSales>3.30</TotalGrossSales>
                <NetSalesPrice>3.00</NetSalesPrice>
                <GrossSalesPrice>3.30</GrossSalesPrice>
                <TotalNetSales>3.00</TotalNetSales>
                <CategoryCode>174</CategoryCode>
                <SaleType>0</SaleType>
            </SalesTransaction>               
         </SalesHeader>
    </OrganisationHeader>
</MainHeader>



I have examples of incoming files that, once imported into SQL Server, can be grouped into 190 records, versus 8850 transactions in the XML.  I would really like to import consolidated data to take some strain of the SQL box.

I know I need to use something like "for-each-group", but I'm not sure where to even start with this!

TIA

Answer : Grouping Data in XML

Here is an XSLT code which I believe generates the output you need:
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
<?xml version="1.0"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes"/>
	<xsl:key name="sh" match="SalesHeader" use="concat(SalesDate,Location,RevenueCentre)"/>
	<xsl:key name="st" match="SalesTransaction" use="concat(../SalesDate,../Location,../RevenueCentre,PLU,Description,NetSalesPrice,GrossSalesPrice)"/>
	
	<xsl:template match="/MainHeader/OrganisationHeader">
	<MainHeader xmlns="">
	    <OrganisationHeader>        
		<xsl:for-each select="SalesHeader[generate-id(.)=generate-id(key('sh',concat(SalesDate,Location,RevenueCentre)))]">
			<SalesHeader>
	            <xsl:copy-of select="SalesDate"/>
	            <xsl:copy-of select="Location"/>
	            <xsl:copy-of select="RevenueCentre"/>
	            <xsl:copy-of select="ActionIfDataExists"/>
	            <xsl:variable name="cur-header" select="concat(SalesDate,Location,RevenueCentre)"/>
				<xsl:for-each select="key('sh',$cur-header)/SalesTransaction[generate-id(.)=generate-id(key('st',concat(../SalesDate,../Location,../RevenueCentre,PLU,Description,NetSalesPrice,GrossSalesPrice)))]">
					<SalesTransaction>
					<xsl:variable name="cur-trans" select="concat(../SalesDate,../Location,../RevenueCentre,PLU,Description,NetSalesPrice,GrossSalesPrice)"/>
	                <xsl:copy-of select="PLU"/>
	                <xsl:copy-of select="Description"/>
	                <Quantity><xsl:value-of select="sum(key('st',$cur-trans)/Quantity)"/></Quantity>
	                <VAT><xsl:value-of select="format-number(sum(key('st',$cur-trans)/VAT),'#0.00')"/></VAT>
	                <TotalGrossSales><xsl:value-of select="format-number(sum(key('st',$cur-trans)/TotalGrossSales),'#0.00')"/></TotalGrossSales>
	                <xsl:copy-of select="NetSalesPrice"/>
	                <xsl:copy-of select="GrossSalesPrice"/>
	                <TotalNetSales><xsl:value-of select="sum(key('st',$cur-trans)/TotalNetSales)"/></TotalNetSales>
	                <xsl:copy-of select="CategoryCode"/>
	                <xsl:copy-of select="SaleType"/>
	                </SalesTransaction>
    			</xsl:for-each>		
			</SalesHeader>
		</xsl:for-each>
	    </OrganisationHeader>
	</MainHeader>
</xsl:template>
</xsl:stylesheet>
Random Solutions  
 
programming4us programming4us