# Copyright (C) 2023 Masaya YAMAGUCHI

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

use strict;
use utf8;

binmode STDIN, ':utf8';
binmode STDOUT, ':raw:utf8';
binmode STDERR, ':utf8';


my $filename = shift(@ARGV);
open(META, "<: encoding(utf16)", $filename) || die "Cannot open medata file: $filename\n";

my @attributes = (
    "id",  # id
    "sex",
    "env",
    "theme",
    "native",
    "history",
    "level",
    "chr_grade",
    "grammar_grade"
    );
my $corpus_name = "jlcc";


my %metadata = ();

while(<META>){
    s/[\r\n]+$//;
    next if(/^学習者の/);

    my @w = split("\t");
    $metadata{$w[0]} = $_;
}

$filename = shift(@ARGV);
open(XML, "<: encoding(utf8)", $filename) || die "Cannot open xml file: $filename\n";

while(<XML>){
    if(/^(<$corpus_name) id=\"(.+?)\"/){
	my $newTag = $1;
	my $id = $2;
	
	die "no meta data: $id" if(!$metadata{$id});

	my @metaRecord = split("\t", $metadata{$id});

	my $i = 0;
	foreach(@attributes){
	    $newTag .= " $_=\"$metaRecord[$i]\"" if($attributes[$i] ne "");
	    $i++;
	}
	$newTag .= ">";
	print "$newTag\n";
    } else {
	print "$_";
    }
}
