stackprobe7s_memo

何処にも披露する見込みの無いものを書き落とす場所

CsvFile*.cs

RDBを使わずにCSVでなんとかしてみた結果できた副産物。
特筆するようなところはないと思う。
概ね自分用。
 
CsvFileDBTable.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using Charlotte.Commons;
using Charlotte.Utilities;

namespace Charlotte.Tools
{
	public class CsvFileDBTable
	{
		// 制約:
		// -- 全ての行は1列以上の長さがなければならない。
		// -- 1列目は(重複ナシの)IDとする。
		// -- IDは空文字列不可

		// 注意:
		// -- 削除バッファ優先 -- 削除バッファ・追加または更新バッファ両方に同じIDがあった場合、削除として扱う。

		private static int BUFFER_SIZE_MAX = 1000;
		private static int MEMORY_LOAD_MAX = 50000000; // 50 MB

		public static void DEBUG_SetBufferSizeMax(int value)
		{
			BUFFER_SIZE_MAX = value;
		}

		public static void DEBUG_SetMemoryLoadMax(int value)
		{
			MEMORY_LOAD_MAX = value;
		}

		private static int RowsToMemoryLoad(string[][] rows)
		{
			return rows.Length * 100 + rows.Sum(row => row.Length * 100 + row.Sum(v => v.Length * 2)); // rough value
		}

		private static int RowToMemoryLoad(string[] row)
		{
			return RowsToMemoryLoad(new string[][] { row });
		}

		private string FilePath;

		/// <summary>
		/// 削除されたID
		/// 注意:バッファ内はID重複あり
		/// </summary>
		private string DeleteBufferFilePath
		{
			get
			{
				return this.FilePath + "_DeleteBuffer.csv";
			}
		}

		/// <summary>
		/// 追加または更新されたレコード
		/// 注意:バッファ内はID重複あり(後の方のレコードが有効)
		/// </summary>
		private string UpdateBufferFilePath
		{
			get
			{
				return this.FilePath + "_UpdateBuffer.csv";
			}
		}

		private int DeleteBufferSize;
		private int DeleteMemoryLoad;
		private int UpdateBufferSize;
		private int UpdateMemoryLoad;

		public CsvFileDBTable(string file)
		{
			this.FilePath = SCommon.MakeFullPath(file);

			// ----

			if (!File.Exists(this.FilePath))
				File.WriteAllBytes(this.FilePath, SCommon.EMPTY_BYTES);

			if (!File.Exists(this.DeleteBufferFilePath))
				File.WriteAllBytes(this.DeleteBufferFilePath, SCommon.EMPTY_BYTES);

			if (!File.Exists(this.UpdateBufferFilePath))
				File.WriteAllBytes(this.UpdateBufferFilePath, SCommon.EMPTY_BYTES);

			using (CsvFileReader reader = new CsvFileReader(this.DeleteBufferFilePath))
			{
				string[][] rows = reader.ReadToEnd();

				this.DeleteBufferSize = rows.Length;
				this.DeleteMemoryLoad = RowsToMemoryLoad(reader.ReadToEnd());
			}

			using (CsvFileReader reader = new CsvFileReader(this.UpdateBufferFilePath))
			{
				string[][] rows = reader.ReadToEnd();

				this.UpdateBufferSize = rows.Length;
				this.UpdateMemoryLoad = RowsToMemoryLoad(reader.ReadToEnd());
			}
		}

		/// <summary>
		/// 参照のみの全件走査
		/// 削除・更新を行う場合は FilterAll を使用すること。
		/// </summary>
		/// <param name="reaction">行リアクション</param>
		public void ForEach(Predicate<string[]> reaction)
		{
			if (reaction == null)
				throw new Exception("Bad reaction");

			HashSet<string> deletedOrKnownIDs = new HashSet<string>();
			string[][] addedOrUpdatedRows;

			using (CsvFileReader reader = new CsvFileReader(this.DeleteBufferFilePath))
			{
				foreach (string id in reader.ReadToEnd().Select(row => row[0]))
				{
					deletedOrKnownIDs.Add(id);
				}
			}

			using (CsvFileReader reader = new CsvFileReader(this.UpdateBufferFilePath))
			{
				addedOrUpdatedRows = reader.ReadToEnd();
			}

			foreach (string[] row in addedOrUpdatedRows.Reverse()) // 最後の更新を優先するため、最後から読み込む。
			{
				if (deletedOrKnownIDs.Contains(row[0]))
					continue;

				if (!reaction(row))
					return;

				deletedOrKnownIDs.Add(row[0]);
			}

			using (CsvFileReader reader = new CsvFileReader(this.FilePath))
			{
				for (; ; )
				{
					string[] row = reader.ReadRow();

					if (row == null)
						break;

					if (deletedOrKnownIDs.Contains(row[0]))
						continue;

					if (!reaction(row))
						break;
				}
			}
		}

		/// <summary>
		/// 削除・更新を伴う全件走査
		/// 行フィルタ:
		/// -- 何もしない場合 == 引数をそのまま返す。
		/// -- 更新する場合 == 新しい行を返す。-- 1列目(ID)を変更してはならない。
		/// -- 削除する場合 == nullを返す。
		/// </summary>
		/// <param name="filter">行フィルタ</param>
		public void FilterAll(Func<string[], string[]> filter)
		{
			if (filter == null)
				throw new Exception("Bad filter");

			using (WorkingDir wd = new WorkingDir())
			{
				string midFile = wd.MakePath();

				using (CsvFileWriter writer = new CsvFileWriter(midFile))
				{
					this.ForEach(row =>
					{
						string[] newRow = filter(row);

						if (newRow != null)
						{
							if (
								newRow.Length < 1 ||
								newRow.Any(v => v == null) ||
								newRow[0] != row[0] // 1列目(ID)の不一致
								)
								throw new Exception("Bad newRow");

							writer.WriteRow(newRow);
						}
						return true;
					});
				}

				SCommon.DeletePath(this.FilePath);
				File.Move(midFile, this.FilePath);
			}

			File.WriteAllBytes(this.DeleteBufferFilePath, SCommon.EMPTY_BYTES);
			File.WriteAllBytes(this.UpdateBufferFilePath, SCommon.EMPTY_BYTES);

			this.DeleteBufferSize = 0;
			this.DeleteMemoryLoad = 0;
			this.UpdateBufferSize = 0;
			this.UpdateMemoryLoad = 0;
		}

		public List<string[]> Search(Predicate<string[]> match, int limit, out bool overflow)
		{
			if (
				match == null ||
				limit < 1 || SCommon.IMAX < limit
				)
				throw new Exception("Bad params");

			List<string[]> dest = new List<string[]>();
			bool wOverflow = false;

			this.ForEach(row =>
			{
				if (match(row)) // ? 検索対象
				{
					if (limit <= dest.Count)
					{
						wOverflow = true;
						return false;
					}
					dest.Add(row);
				}
				return true;
			});

			overflow = wOverflow;
			return dest;
		}

		public List<string[]> Search(Predicate<string[]> match, Comparison<string[]> comp, int limit, out int count)
		{
			if (
				match == null ||
				comp == null ||
				limit < 1 || SCommon.IMAX < limit
				)
				throw new Exception("Bad params");

			int DEST_MAX = Math.Max(limit + limit / 2, 100); // rough limit

			List<string[]> dest = new List<string[]>();
			int wCount = 0;

			this.ForEach(row =>
			{
				if (match(row)) // ? 検索対象
				{
					if (DEST_MAX < dest.Count)
					{
						dest.Sort(comp);
						dest.RemoveRange(limit, dest.Count - limit);
					}
					dest.Add(row);
					wCount++;
				}
				return true;
			});

			dest.Sort(comp);

			if (limit < dest.Count)
				dest.RemoveRange(limit, dest.Count - limit);

			count = wCount;
			return dest;
		}

		public List<string[]> Search(Predicate<string[]> match, Comparison<string[]> comp, int offset, int limit, out int count)
		{
			if (
				match == null ||
				comp == null ||
				offset < 0 || SCommon.IMAX < offset ||
				limit < 1 || SCommon.IMAX - offset < limit
				)
				throw new Exception("Bad params");

			List<string[]> dest = new List<string[]>();
			int wCount = 0;

			using (WorkingDir wd = new WorkingDir())
			{
				string midFile = wd.MakePath();

				using (CsvFileWriter writer = new CsvFileWriter(midFile))
				{
					this.ForEach(row =>
					{
						if (match(row)) // ? 検索対象
						{
							writer.WriteRow(row);
							wCount++;
						}
						return true;
					});
				}

				CsvFileSorter.Sort(midFile, comp);

				using (CsvFileReader reader = new CsvFileReader(midFile))
				{
					for (int index = 0; index < wCount; index++)
					{
						string[] row = reader.ReadRow();

						if (row == null)
							throw null; // never

						if (index < offset) // ? 出力開始位置の前
							continue;

						dest.Add(row);

						if (limit <= dest.Count) // ? 出力件数に達した。
							break;
					}
				}
			}

			count = wCount;
			return dest;
		}

		public void Search(Predicate<string[]> match, Comparison<string[]> comp, Predicate<string[]> reaction)
		{
			if (
				match == null ||
				comp == null ||
				reaction == null
				)
				throw new Exception("Bad params");

			using (WorkingDir wd = new WorkingDir())
			{
				string midFile = wd.MakePath();

				using (CsvFileWriter writer = new CsvFileWriter(midFile))
				{
					this.ForEach(row =>
					{
						if (match(row))
							writer.WriteRow(row);

						return true;
					});
				}

				CsvFileSorter.Sort(midFile, comp);

				using (CsvFileReader reader = new CsvFileReader(midFile))
				{
					for (; ; )
					{
						string[] row = reader.ReadRow();

						if (row == null)
							break;

						if (!reaction(row))
							break;
					}
				}
			}
		}

		/// <summary>
		/// 行の削除を行う。
		/// 大量の削除には向かない。
		/// 大量削除は FilterAll を検討すること。
		/// </summary>
		/// <param name="id">削除する行のID</param>
		public void Delete(string id)
		{
			if (string.IsNullOrEmpty(id))
				throw new Exception("Bad id");

			using (CsvFileWriter writer = new CsvFileWriter(this.DeleteBufferFilePath, true))
			{
				writer.WriteCell(id);
				writer.EndRow();
			}

			this.DeleteBufferSize += 1;
			this.DeleteMemoryLoad += RowToMemoryLoad(new string[] { id });

			if (
				BUFFER_SIZE_MAX < this.DeleteBufferSize ||
				MEMORY_LOAD_MAX < this.DeleteMemoryLoad
				)
				this.Flush();
		}

		/// <summary>
		/// 行の追加または更新を行う。
		/// 追加または更新された行は1行目の前に追加(移動)される。
		/// 大量の追加・更新には向かない。
		/// 大量追加は BulkInsert 大量更新は FilterAll を検討すること。
		/// </summary>
		/// <param name="row">追加または更新する行</param>
		public void AddOrUpdate(string[] row)
		{
			if (
				row == null ||
				row.Length < 1 || // 1列以上必要(1列目は(重複ナシの)ID)
				row.Any(v => v == null) ||
				row[0] == "" // IDは空文字列不可
				)
				throw new Exception("Bad row");

			if (1 <= this.DeleteBufferSize) // 削除バッファが優先であるため!
				this.Flush();

			using (CsvFileWriter writer = new CsvFileWriter(this.UpdateBufferFilePath, true))
			{
				writer.WriteRow(row);
			}

			this.UpdateBufferSize += 1;
			this.UpdateMemoryLoad += RowToMemoryLoad(row);

			if (
				BUFFER_SIZE_MAX < this.UpdateBufferSize ||
				MEMORY_LOAD_MAX < this.UpdateMemoryLoad
				)
				this.Flush();
		}

		private void Flush()
		{
			// ? バッファ無し -> フラッシュ不要
			if (
				this.DeleteBufferSize == 0 &&
				this.UpdateBufferSize == 0
				)
				return;

			using (WorkingDir wd = new WorkingDir())
			{
				string midFile = wd.MakePath();

				using (CsvFileWriter writer = new CsvFileWriter(midFile))
				{
					this.ForEach(row =>
					{
						writer.WriteRow(row);
						return true;
					});
				}

				SCommon.DeletePath(this.FilePath);
				File.Move(midFile, this.FilePath);
			}

			File.WriteAllBytes(this.DeleteBufferFilePath, SCommon.EMPTY_BYTES);
			File.WriteAllBytes(this.UpdateBufferFilePath, SCommon.EMPTY_BYTES);

			this.DeleteBufferSize = 0;
			this.DeleteMemoryLoad = 0;
			this.UpdateBufferSize = 0;
			this.UpdateMemoryLoad = 0;
		}

		public void Sort(Comparison<string[]> comp)
		{
			if (comp == null)
				throw new Exception("Bad comp");

			this.Flush();
			CsvFileSorter.Sort(this.FilePath, comp);
		}

		public void Truncate()
		{
			File.WriteAllBytes(this.FilePath, SCommon.EMPTY_BYTES);
			File.WriteAllBytes(this.DeleteBufferFilePath, SCommon.EMPTY_BYTES);
			File.WriteAllBytes(this.UpdateBufferFilePath, SCommon.EMPTY_BYTES);

			this.DeleteBufferSize = 0;
			this.DeleteMemoryLoad = 0;
			this.UpdateBufferSize = 0;
			this.UpdateMemoryLoad = 0;
		}

		public void BulkInsert(Func<string[]> reader)
		{
			if (reader == null)
				throw new Exception("Bad reader");

			this.Flush();

			using (CsvFileWriter writer = new CsvFileWriter(this.FilePath, true))
			{
				for (; ; )
				{
					string[] row = reader();

					if (row == null) // 読み込み終了
						break;

					// IDの重複はチェックしない。

					if (
						row.Length < 1 || // 1列以上必要(1列目は(重複ナシの)ID)
						row.Any(v => v == null) ||
						row[0] == "" // IDは空文字列不可
						)
						throw new Exception("Bad row");

					writer.WriteRow(row);
				}
			}
		}
	}
}

 
CsvFileSorter.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using Charlotte.Commons;
using Charlotte.Utilities;

namespace Charlotte.Tools
{
	public static class CsvFileSorter
	{
		private static int MEMORY_LOAD_MAX = 200000000; // 200 MB

		public static void DEBUG_SetMemoryLoadMax(int value)
		{
			MEMORY_LOAD_MAX = value;
		}

		public static List<int> DEBUG_LastRowCountList = new List<int>();

		private static int RowToMemoryLoad(string[] row)
		{
			return 100 + row.Length * 100 + row.Sum(v => v.Length) * 2; // rough value
		}

		public static void Sort(string file, Comparison<string[]> comp)
		{
			Sort(file, file, comp);
		}

		public static void Sort(string rFile, string wFile, Comparison<string[]> comp)
		{
			rFile = SCommon.MakeFullPath(rFile);
			wFile = SCommon.MakeFullPath(wFile);

			if (!File.Exists(rFile))
				throw new Exception("no rFile");

			if (Directory.Exists(wFile))
				throw new Exception("Bad wFile");

			if (comp == null)
				throw new Exception("Bad comp");

			using (WorkingDir wd = new WorkingDir())
			{
				Queue<string> q = new Queue<string>();

				DEBUG_LastRowCountList.Clear();

				using (CsvFileReader reader = new CsvFileReader(rFile))
				{
					for (; ; )
					{
						List<string[]> rows = new List<string[]>();
						string[] row;
						int memoryLoad = 0;

						for (; ; )
						{
							row = reader.ReadRow();

							if (row == null)
								break;

							rows.Add(row);
							memoryLoad += RowToMemoryLoad(row);

							if (MEMORY_LOAD_MAX < memoryLoad)
								break;
						}
						if (1 <= rows.Count)
						{
							rows.Sort(comp);

							string midFile = wd.MakePath();

							using (CsvFileWriter writer = new CsvFileWriter(midFile))
							{
								writer.WriteRows(rows);
							}
							q.Enqueue(midFile);

							DEBUG_LastRowCountList.Add(rows.Count);
						}
						if (row == null)
							break;
					}
				}

				if (q.Count == 0)
				{
					File.WriteAllBytes(wFile, SCommon.EMPTY_BYTES);
				}
				else
				{
					while (2 <= q.Count)
					{
						string midFile1 = q.Dequeue();
						string midFile2 = q.Dequeue();
						string midFile3 = wd.MakePath();

						using (CsvFileReader reader1 = new CsvFileReader(midFile1))
						using (CsvFileReader reader2 = new CsvFileReader(midFile2))
						using (CsvFileWriter writer = new CsvFileWriter(midFile3))
						{
							string[] row1 = reader1.ReadRow();
							string[] row2 = reader2.ReadRow();

							while (row1 != null && row2 != null)
							{
								int ret = comp(row1, row2);

								if (ret <= 0)
								{
									writer.WriteRow(row1);
									row1 = reader1.ReadRow();
								}
								if (0 <= ret)
								{
									writer.WriteRow(row2);
									row2 = reader2.ReadRow();
								}
							}
							while (row1 != null)
							{
								writer.WriteRow(row1);
								row1 = reader1.ReadRow();
							}
							while (row2 != null)
							{
								writer.WriteRow(row2);
								row2 = reader2.ReadRow();
							}
						}
						q.Enqueue(midFile3);
					}
					SCommon.DeletePath(wFile);
					File.Move(q.Dequeue(), wFile);
				}
			}
		}
	}
}

 
CsvFileReader.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using Charlotte.Commons;

namespace Charlotte.Utilities
{
	public class CsvFileReader : IDisposable
	{
		public const char DELIMITER_COMMA = ','; // for .csv
		public const char DELIMITER_SPACE = ' '; // for .ssv
		public const char DELIMITER_TAB = '\t';  // for .tsv

		private char Delimiter;
		private StreamReader Reader;

		public CsvFileReader(string file)
			: this(file, SCommon.ENCODING_SJIS)
		{ }

		public CsvFileReader(string file, Encoding encoding)
			: this(file, encoding, DELIMITER_COMMA)
		{ }

		public CsvFileReader(string file, Encoding encoding, char delimiter)
		{
			this.Delimiter = delimiter;
			this.Reader = new StreamReader(file, encoding);
		}

		private int LastChar;

		private int ReadChar()
		{
			do
			{
				this.LastChar = this.Reader.Read();
			}
			while (this.LastChar == '\r');

			return this.LastChar;
		}

		private bool EnclosedCell;

		private string ReadCell()
		{
			StringBuilder buff = new StringBuilder();

			if (this.ReadChar() == '"')
			{
				while (this.ReadChar() != -1 && (this.LastChar != '"' || this.ReadChar() == '"'))
				{
					buff.Append((char)this.LastChar);
				}
				this.EnclosedCell = true;
			}
			else
			{
				while (this.LastChar != -1 && this.LastChar != '\n' && this.LastChar != this.Delimiter)
				{
					buff.Append((char)this.LastChar);
					this.ReadChar();
				}
				this.EnclosedCell = false;
			}
			return buff.ToString();
		}

		public string[] ReadRow()
		{
			List<string> row = new List<string>();

			do
			{
				row.Add(this.ReadCell());
			}
			while (this.LastChar != -1 && this.LastChar != '\n');

			if (this.LastChar == -1 && row.Count == 1 && row[0] == "" && !this.EnclosedCell)
				return null;

			return row.ToArray();
		}

		public string[][] ReadToEnd()
		{
			List<string[]> rows = new List<string[]>();

			for (; ; )
			{
				string[] row = this.ReadRow();

				if (row == null)
					break;

				rows.Add(row);
			}
			return rows.ToArray();
		}

		public void Dispose()
		{
			if (this.Reader != null)
			{
				this.Reader.Dispose();
				this.Reader = null;
			}
		}
	}
}

 
CsvFileWriter.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using Charlotte.Commons;

namespace Charlotte.Utilities
{
	public class CsvFileWriter : IDisposable
	{
		public const char DELIMITER_COMMA = ','; // for .csv
		public const char DELIMITER_SPACE = ' '; // for .ssv
		public const char DELIMITER_TAB = '\t';  // for .tsv

		private char Delimiter;
		private StreamWriter Writer;

		public CsvFileWriter(string file, bool append = false)
			: this(file, append, SCommon.ENCODING_SJIS)
		{ }

		public CsvFileWriter(string file, bool append, Encoding encoding)
			: this(file, append, encoding, DELIMITER_COMMA)
		{ }

		public CsvFileWriter(string file, bool append, Encoding encoding, char delimiter)
		{
			this.Delimiter = delimiter;
			this.Writer = new StreamWriter(file, append, encoding);
		}

		/// <summary>
		/// 次に書き込むセルが行の最初のセルか
		/// </summary>
		private bool FirstCell = true;

		public void WriteCell(string cell)
		{
			if (this.FirstCell)
				this.FirstCell = false;
			else
				this.Writer.Write(this.Delimiter);

			if (
				cell.Contains('"') ||
				cell.Contains('\n') ||
				cell.Contains(this.Delimiter)
				)
			{
				this.Writer.Write('"');
				this.Writer.Write(cell.Replace("\"", "\"\""));
				this.Writer.Write('"');
			}
			else
			{
				this.Writer.Write(cell);
			}
		}

		public void EndRow()
		{
			this.Writer.Write('\n');
			this.FirstCell = true;
		}

		public void WriteCells(IList<string> cells)
		{
			foreach (string cell in cells)
			{
				this.WriteCell(cell);
			}
		}

		public void WriteRow(IList<string> row)
		{
			foreach (string cell in row)
			{
				this.WriteCell(cell);
			}
			this.EndRow();
		}

		public void WriteRows(IList<string[]> rows)
		{
			foreach (string[] row in rows)
			{
				this.WriteRow(row);
			}
		}

		public void Dispose()
		{
			if (this.Writer != null)
			{
				this.Writer.Dispose();
				this.Writer = null;
			}
		}
	}
}