#!/usr/bin/perl use warnings; use strict; our $CF = undef; our $target = '.'; our $limit = 500*1024; our $header = ''; our $curr_file = 0; sub d { #print @_, "\n"; } sub next_file { close $CF if (defined($CF)); open $CF, '>', "$target/export$curr_file.xml" or die("Couldn't open $target/export$curr_file.xml for writing!"); print $CF $header; $curr_file++; } sub part { my ($export) = @_; open EXPORT, '<', $export or die("Couldn't open $export!"); my $did_header = 0; my $curr_length = 0; my $curr_item = ''; my $curr_item_length = 0; my $footer = ""; my $footer_length = length($footer); my $header_length = 0; $limit -= $footer_length; while () { my $s = $_; chomp($s); #d("Read: $s"); if (!$did_header) { if (//) { d('item'); $did_header = 1; $curr_length = $header_length = length($header); die("Cannot fir header in limit!") if ($header_length > $limit); $limit -= $header_length; next_file; $curr_item = "\n"; } else { d('header'); $header .= $_; } } else { if (/<\/channel>/) { # end of story print $CF $footer; close $CF; last; } if (/<\/item>/) { # end of item $curr_item .= "\n"; $curr_item_length = length($curr_item); die("Cannot fit this item in limit!") if ($curr_item_length > $limit); d("$curr_length $curr_item_length $limit"); if (($curr_length + $curr_item_length) > $limit) { # cannot fit this item into this file -- finish print $CF $footer; next_file; $curr_length = $header_length; } # can fit it print $CF $curr_item; $curr_length += $curr_item_length; $curr_item = ''; } else { $curr_item .= $_; } } } close EXPORT; } if (!defined($ARGV[0])) { die("Partitions WXR files in smaller, still importable chunks. Usage:\n$0 path-to-exported-xml [limit-in-KB (default: 500KB)]\n"); } if (defined($ARGV[1])) { $limit = 1024*$ARGV[1]; } part($ARGV[0]);