2017-03-03 7 views
1

のエンコーディングを検出:C#のStreamReaderは、私はこのようなデータを持っている私のXMLファイルにXMLファイル

<Data> 
    <Field> 
     <Name>BarcodeCapture_0</Name> 
     <Type>SimpleIndex</Type> 
     <DataType>DataMatrix</DataType> 
     <Value>DEA&#x2;"&#x1;¡CV°)Ñ  &#x2;võ Fƒ´  20100410050</Value> 
    </Field> 
</Data> 

イムのStreamReaderから延びるクラスを使用して、私は文字のようなinaccepted文字を防ぐためにmethodesを読んで上書きします。

これは、XMLデシリアライズ側でクラス

public class CustomStreamReader : StreamReader 
    { 
     private const int EOF = -1; 

     public CustomStreamReader(Stream stream) : base(stream) 
     { 
     } 

     public CustomStreamReader(string path) : base(path) 
     { 
     } 

     public CustomStreamReader(string path, Encoding encoding) : base(path, encoding) 
     { 
     } 


     /// <summary> 
     /// Get whether an integer represents a legal XML 1.0 or 1.1 character. See 
     /// the specification at w3.org for these characters. 
     /// </summary> 
     /// <param name="xmlVersion"> 
     /// The version number as a string. Use "1.0" for XML 1.0 character 
     /// validation, and use "1.1" for XML 1.1 character validation. 
     /// </param> 
     public static bool IsLegalXmlChar(string xmlVersion, int character) 
     { 
      switch (xmlVersion) 
      { 
       case "1.1": // http://www.w3.org/TR/xml11/#charsets 
        { 
         return 
         !(
          character <= 0x8 || 
          character == 0xB || 
          character == 0xC || 
          (character >= 0xE && character <= 0x1F) || 
          (character >= 0x7F && character <= 0x84) || 
          (character >= 0x86 && character <= 0x9F) || 
          character > 0x10FFFF 
         ); 
        } 
       case "1.0": // http://www.w3.org/TR/REC-xml/#charsets 
        { 
         return 
         (
          character == 0x9 /* == '\t' == 9 */   || 
          character == 0xA /* == '\n' == 10 */   || 
          character == 0xD /* == '\r' == 13 */   || 
          (character >= 0x20 && character <= 0xD7FF) || 
          (character >= 0xE000 && character <= 0xFFFD) || 
          (character >= 0x10000 && character <= 0x10FFFF) 
         ); 
        } 
       default: 
        { 
         throw new ArgumentOutOfRangeException 
          ("xmlVersion", string.Format("'{0}' is not a valid XML version.")); 
        } 
      } 
     } 


     /// <summary> 
     /// Get whether an integer represents a legal XML 1.0 character. See the 
     /// specification at w3.org for these characters. 
     /// </summary> 
     public static bool IsLegalXmlChar(int character) 
     { 
      return CustomStreamReader.IsLegalXmlChar("1.0", character); 
     } 

     public override int Read() 
     { 
      // Read each character, skipping over characters that XML has prohibited 

      int nextCharacter; 

      do 
      { 
       // Read a character 

       if ((nextCharacter = base.Read()) == EOF) 
       { 
        // If the character denotes the end of the file, stop reading 

        break; 
       } 
      } 

      // Skip the character if it's prohibited, and try the next 

      while (!CustomStreamReader.IsLegalXmlChar(nextCharacter)); 

      return nextCharacter; 
     } 

     public override int Peek() 
     { 
      // Return the next legl XML character without reading it 

      int nextCharacter; 

      do 
      { 
       // See what the next character is 

       nextCharacter = base.Peek(); 
      } 
      while 
      (
       // If it's prohibited XML, skip over the character in the stream 
       // and try the next. 

       !CustomStreamReader.IsLegalXmlChar(nextCharacter) && 
       (nextCharacter = base.Read()) != EOF 
      ); 

      return nextCharacter; 

     } // method 


     // The following methods are exact copies of the methods in TextReader, 
     // extracting by disassembling it in Refelctor 

     public override int Read(char[] buffer, int index, int count) 
     { 
      if (buffer == null) 
      { 
       throw new ArgumentNullException("buffer"); 
      } 
      if (index < 0) 
      { 
       throw new ArgumentOutOfRangeException("index"); 
      } 
      if (count < 0) 
      { 
       throw new ArgumentOutOfRangeException("count"); 
      } 
      if ((buffer.Length - index) < count) 
      { 
       throw new ArgumentException(); 
      } 
      int num = 0; 
      do 
      { 
       int num2 = this.Read(); 
       if (num2 == -1) 
       { 
        return num; 
       } 
       buffer[index + num++] = (char)num2; 
      } 
      while (num < count); 
      return num; 
     } 

     public override int ReadBlock(char[] buffer, int index, int count) 
     { 
      int num; 
      int num2 = 0; 
      do 
      { 
       num2 += num = this.Read(buffer, index + num2, count - num2); 
      } 
      while ((num > 0) && (num2 < count)); 
      return num2; 
     } 

     public override string ReadLine() 
     { 
      StringBuilder builder = new StringBuilder(); 
      while (true) 
      { 
       int num = this.Read(); 
       switch (num) 
       { 
        case -1: 
         if (builder.Length > 0) 
         { 
          return builder.ToString(); 
         } 
         return null; 

        case 13: 
        case 10: 
         if ((num == 13) && (this.Peek() == 10)) 
         { 
          this.Read(); 
         } 
         return builder.ToString(); 
       } 
       builder.Append((char)num); 
      } 
     } 

     public override string ReadToEnd() 
     { 
      int num; 
      char[] buffer = new char[0x1000]; 
      StringBuilder builder = new StringBuilder(0x1000); 
      while ((num = this.Read(buffer, 0, buffer.Length)) != 0) 
      { 
       builder.Append(buffer, 0, num); 
      } 
      return builder.ToString(); 
     } 

    } 

です:

CustomStreamReader fStream_scanTransaction_XML = new CustomStreamReader(scanTransactionFilePath, Encoding.UTF8); 
XmlSerializer s = new XmlSerializer(typeof(ScanTransaction)); 
ScanTransaction result = ScanTransaction)s.Deserialize(fStream_scanTransaction_XML); 

問題は、StreamReaderをはエンコーディングを検出することができないということですので、この文字とXMLシリアル化解除faildを削除することではありません。

+1

ちょっとした提案: base64文字列に変換し、xmlはbase64文字列とエンコーディングタイプのみを格納します。元に戻すことは安全です。 –

+0

元のデータと異なるデータを逆シリアル化することは本当に良い考えですか?あなたは "違法な" xml文字を取り除いていますが、元のデータには存在していました... – xanatos

+0

@ xanatosはい、デシリアライズ後に元のデータがパージされます。問題はすべてのプロセスがブロックされているような行動をしない場合 –

答えて

0

試してみてください。

using (var sr = new StreamReader("XMLFile1.xml", Encoding.UTF8)) 
using (var xtr = new XmlTextReader(sr)) 
{ 
    XmlSerializer s = new XmlSerializer(typeof(ScanTransaction)); 
    ScanTransaction result = (ScanTransaction)s.Deserialize(xtr); 
} 

あなたもStreamReader "特別" をneeedません。 XmlTextReaderは不正な文字をチェックしません(Normalizeのbooleanプロパティでこれを制御できますが、デフォルトではfalseなので、不正な文字はチェックされません)

+0

@ xanatosありがとうございます –

関連する問題