【C#】2つのDictionaryを1つにマージする

2020-09-11

2つの Dictionary を 1つの Dictionary にマージして1つにまとめる方法の紹介です。

確認環境
実装コード
Linqでマージする
実行速度の比較

確認環境

確認環境は以下の通りです（とはいってもどの環境でも動きます。

.NET Core 3.1
Windows10(Core-i7 3770K)
VisualStudio2019

実装コード

以下のコードで2つの Dictionaty をマージして新しい Dictionary を作成して取得できます。

/// <summary>
/// 2 つの Dictionary を 1 つの Dictionary に結合し新しいテーブルを取得します。
/// データが重複する場合 a の Dictionary のデータが優先されます。
/// </summary>
public static IDictionary<TKey, TValue> Marge<TKey, TValue>(IDictionary<TKey, TValue> a, 
                                                            IDictionary<TKey, TValue> b)
{
    var table = new Dictionary<TKey, TValue>();
    foreach (var item in a)
    {
        table[item.Key] = item.Value;
    }

    foreach (var item in b)
    {
        if (!table.ContainsKey(item.Key))
        {
            table[item.Key] = item.Value;
        }
    }

    return table;
}

// 使い方

// データ準備
var d1 = new Dictionary<string, int>()
d1["a"] = 1;
d1["b"] = 2;
var d2 = new Dictionary<string, int>();
d2["b"] = 3;
d2["c"] = 4;

// マージする
Dictionary<string, int> d3 = Marge(a, b);
// > a : 1
// > b : 2
// > c : 4

また、自分の Dictionary に別の Dictionary をマージするには以下のように拡張メソッドを記述します。

public static class DictionaryExtension
{
    /// <summary>
    /// 現在の <see cref="Dictionary{TKey, TValue}"/> に b をマージします。
    /// b のキーが重複する場合マージされません。
    /// </summary>
    public static void Marge<TKey, TValue>(this IDictionary<TKey, TValue> a, 
                                                IDictionary<TKey, TValue> b)
    {
        foreach (var item in b)
        {
            if (a.ContainsKey(item.Key))
            {
                continue;
            }
            a[item.Key] = item.Value;
        }
    }
}

// 使い方

// データ準備
var d1 = new Dictionary<string, int>()
d1["a"] = 1;
d1["b"] = 2;
var d2 = new Dictionary<string, int>();
d2["b"] = 3;
d2["c"] = 4;

// マージする
d1.Marge(b);
// > a : 1
// > b : 2
// > c : 4

副作用がないので最初のほうがおすすめですが、新しいテーブルを作成するときに倍メモリが必要なので、テーブルの中身が変わってもいいなら拡張メソッドの書き方のほうがおすすめです。

Linqでマージする

この記事で簡単にマージできるのですが正直あまりお勧めはしません。Linq は注意しないと処理コストがかかる場合が多いです。というかこれくらいなら普通にforeachを書いたほうが良いと思います。

C# の Dictionary 同士を簡単にマージする方法

参考までに Linq のクエリー式でマージすると以下のようになります。

// Linq のクエリー式でマージする
var marged = 
    (from p1 in d1 
        where !d2.ContainsKey(p1.Key) select p1).Concat(d2);
var map = marged.ToDictionary(p => p.Key, p => p.Value);

こんな感じで拡張メソッドがあると ToDictionary がシンプルにできるかもしれません。

// IEnumerableExtension.cs
public static class IEnumerableExtension
{
    /// <summary>
    /// <see cref="KeyValuePair{TKey, TValue}"/> を <see cref="Dictionary{TKey, TValue}"/> に変換します。
    /// </summary>
    public static Dictionary<TKey, TValue> 
        ToDictionary<TKey, TValue>(this IEnumerable<KeyValuePair<TKey, TValue>> self)
    {
        return self.ToDictionary(p => p.Key, p => p.Value);
    }

    /// <summary>
    /// 2 つの値の組み合わせの Tuple を <see cref="Dictionary{TKey, TValue}"/> に変換します。
    /// </summary>
    public static Dictionary<TKey, TValue>
        ToDictionary<TKey, TValue>(this IEnumerable<(TKey k, TValue v)> self)
    {
        return self.ToDictionary(p => p.k, p => p.v);
    }
}

実行速度の比較

余談ですが、記事中の処理の実行速度を計測した結果を乗せておきます。

測定対象
- 普通に foreach でマージする (1)
- Linq を使った方法 (2) ～ (5)
- Linq のクエリ式を使用 (6)

結論から書きますが、（この程度なら foreach で回したほうがいい節がありますが）Linq でも実行速度は（気を付ければ）だいたい同じになります。

Linq は GropuBy すると急に遅くなります(というか一般的に Linq なメソッドを何個も連結すると相応に遅くなっていきます）

// Program.cs
internal class Program
{
    public static void Main(string[] args)
    {
        // 計算前に少しCPUを回しておく
        for (int i = 0; i < 100; i++)
        {
            // 共通のテストデータ生成
            var dic1 = CreateTable(1000);
            var dic2 = CreateTable(1000);
            Marge1(dic1, dic2);
            Marge2(dic1, dic2);
            Marge3(dic1, dic2);
            Marge4(dic1, dic2);
            Marge5(dic1, dic2);
            Marge6(dic1, dic2);
        }

        var list = new List<int>();

        int count = 5000; // ループ回数
        var sw1 = new Stopwatch();
        var sw2 = new Stopwatch();
        var sw3 = new Stopwatch();
        var sw4 = new Stopwatch();
        var sw5 = new Stopwatch();
        var sw6 = new Stopwatch();
        for (int i = 0; i < count; i++)
        {
            // 共通のテストデータ生成
            var dic1 = CreateTable(1000);
            var dic2 = CreateTable(1000);

            sw1.Start();
            var item = Marge1(dic1, dic2); // (1) 普通にforeachで結合
            sw1.Stop();
            list.Add(item.Count);

            sw2.Start();
            item = Marge2(dic1, dic2); // (2) Qiita の方法で結合(1)
            sw2.Stop();
            list.Add(item.Count);

            sw3.Start();
            item = Marge3(dic1, dic2); // (3) Qiita の方法で結合(2)
            sw3.Stop();
            list.Add(item.Count);

            sw4.Start();
            item = Marge4(dic1, dic2); // (4) Qiita の方法で結合(改定版)
            sw4.Stop();
            list.Add(item.Count);

            sw5.Start();
            item = Marge5(dic1, dic2); // (5) コガネブログの方法で結合
            sw5.Stop();
            list.Add(item.Count);

            sw6.Start();
            item = Marge6(dic1, dic2); // (6) Linq のクエリー式でマージ
            sw6.Stop();
            list.Add(item.Count);
        }

        // 結果表示
        Console.WriteLine($"Count={count}, Tmp={list.Count}");
        Console.WriteLine($"(1) Total={sw1.Elapsed.TotalMilliseconds:F3}ms, 
            per={sw1.Elapsed.TotalMilliseconds / count:F3}ms");
        Console.WriteLine($"(2) Total={sw2.Elapsed.TotalMilliseconds:F3}ms, 
            per={sw2.Elapsed.TotalMilliseconds / count:F3}ms");
        Console.WriteLine($"(3) Total={sw3.Elapsed.TotalMilliseconds:F3}ms, 
            per={sw3.Elapsed.TotalMilliseconds / count:F3}ms");
        Console.WriteLine($"(4) Total={sw4.Elapsed.TotalMilliseconds:F3}ms,
            per={sw4.Elapsed.TotalMilliseconds / count:F3}ms");
        Console.WriteLine($"(5) Total={sw5.Elapsed.TotalMilliseconds:F3}ms,
            per={sw5.Elapsed.TotalMilliseconds / count:F3}ms");
        Console.WriteLine($"(6) Total={sw6.Elapsed.TotalMilliseconds:F3}ms, 
            per={sw6.Elapsed.TotalMilliseconds / count:F3}ms");
        // (1) Total=1038.604ms, per=0.208ms ◎
        // (2) Total=1905.880ms, per=0.381ms ◎
        // (3) Total=2332.546ms, per=0.467ms △
        // (4) Total=1048.628ms, per=0.210ms ◎
        // (5) Total=2224.486ms, per=0.445ms △
        // (6) Total=1030.684ms, per=0.206ms ◎

        // (1) = (6) = (4) >>>> (2) = (3) = (5)
        // → GroupBy が入ると急に遅くなる
    }

    // テストデータを作成
    private static Dictionary<string, int> CreateTable(int count)
    {
        IEnumerable<(string, int)> f()
        {
            for (int i = 0; i < count; i++)
            {
                yield return (Guid.NewGuid().ToString(), count);
            }
        }

        return f().ToDictionary();
    }

    // (1) 普通にforeachで結合
    public static IDictionary<TKey, TValue>
        Marge1<TKey, TValue>(IDictionary<TKey, TValue> a, IDictionary<TKey, TValue> b)
    {
        var table = new Dictionary<TKey, TValue>();
        foreach (var item in a) table[item.Key] = item.Value;
        foreach (var item in b) table[item.Key] = item.Value;
        return table;
    }

    // (2) Qiita の方法で結合(1)
    //  → https://qiita.com/Nossa/items/802b0e0de927c0cfec05
    public static IDictionary<TKey, TValue>
        Marge2<TKey, TValue>(IDictionary<TKey, TValue> a, IDictionary<TKey, TValue> b)
    {
        return a.Concat(b)
                .GroupBy(c => c.Key)
                .ToDictionary(c => c.Key, c => c.FirstOrDefault().Value);
    }

    // (3) Qiita の方法で結合(2)
    public static IDictionary<TKey, TValue>
        Marge3<TKey, TValue>(IDictionary<TKey, TValue> a, IDictionary<TKey, TValue> b)
    {
        return a.Concat(b)
                .GroupBy(c => c.Key)
                .ToDictionary(c => c.Key, c => c.FirstOrDefault().Value);
    }

    // (4) Qiita の方法で結合(改定版)
    public static IDictionary<TKey, TValue>
        Marge4<TKey, TValue>(IDictionary<TKey, TValue> a, IDictionary<TKey, TValue> b)
    {
        return a.Concat(b.Where(pair =>
                !a.ContainsKey(pair.Key))).ToDictionary(pair => pair.Key, pair => pair.Value);
    }

    // (5) コガネブログの方法で結合
    //  → https://baba-s.hatenablog.com/entry/2019/09/09/215700
    public static IDictionary<TKey, TValue>
        Marge5<TKey, TValue>(IDictionary<TKey, TValue> a, IDictionary<TKey, TValue> b)
    {
        return a.Concat(b)
                .GroupBy(c => c.Key)
                .ToDictionary(c => c.Key, c => c.FirstOrDefault().Value);
    }

    // (6) Linq のクエリー式でマージ
    public static IDictionary<TKey, TValue>
        Marge6<TKey, TValue>(IDictionary<TKey, TValue> a, IDictionary<TKey, TValue> b)
    {
        var marged = (from p1 in a where !b.ContainsKey(p1.Key) select p1).Concat(b);
        return marged.ToDictionary(p => p.Key, p => p.Value);
    }
}

Dictionary を短時間に数千回も結合するような処理は仕様の方を考え直した方がいいと思いますが実行結果は最終的に同じなのに実行速度が倍近く違うというのは多少気にはなります。特に、モバイル上で実行される可能性を考えるとわざわざ倍遅い方法を採用することはないと思います。